facebook · terrelln · Apr 8, 2020 · Mar 14, 2020 · davidbolvansky · Mar 17, 2020
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
@@ -114,6 +114,9 @@
 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
 #    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
 #    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+#    elif defined(__aarch64__)
+#     define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#     define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
 #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 #    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)

diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
@@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
         }   }
 
         ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+        PREFETCH_L1(ip+256);
+#endif
         continue;
 
 _search_next_long:

diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
@@ -102,6 +102,11 @@ ZSTD_compressBlock_fast_generic(
         const BYTE* match0 = base + matchIndex0;
         const BYTE* match1 = base + matchIndex1;
         U32 offcode;
+
+#if defined(__aarch64__)
+        PREFETCH_L1(ip0+256);
+#endif
+
         hashTable[h0] = current0;   /* update hash table */
         hashTable[h1] = current1;   /* update hash table */