21 #if (defined(__x86_64__) || defined(_M_AMD64))
31 #include <emmintrin.h>
34 typedef __m128i Vector32;
36 #elif defined(__aarch64__) && defined(__ARM_NEON)
48 typedef uint32x4_t Vector32;
66 static inline void vector32_load(Vector32 *v,
const uint32 *s);
72 static inline Vector32 vector32_broadcast(
const uint32 c);
81 static inline bool vector32_is_highbit_set(
const Vector32 v);
87 static inline Vector32 vector32_or(
const Vector32 v1,
const Vector32 v2);
99 static inline Vector32 vector32_eq(
const Vector32 v1,
const Vector32 v2);
108 #if defined(USE_SSE2)
109 *v = _mm_loadu_si128((
const __m128i *) s);
110 #elif defined(USE_NEON)
119 vector32_load(Vector32 *v,
const uint32 *s)
122 *v = _mm_loadu_si128((
const __m128i *) s);
123 #elif defined(USE_NEON)
135 #if defined(USE_SSE2)
136 return _mm_set1_epi8(
c);
137 #elif defined(USE_NEON)
138 return vdupq_n_u8(
c);
140 return ~UINT64CONST(0) / 0xFF *
c;
145 static inline Vector32
146 vector32_broadcast(
const uint32 c)
149 return _mm_set1_epi32(
c);
150 #elif defined(USE_NEON)
151 return vdupq_n_u32(
c);
165 #ifdef USE_ASSERT_CHECKING
166 bool assert_result =
false;
170 if (((
const uint8 *) &v)[
i] ==
c)
172 assert_result =
true;
178 #if defined(USE_NO_SIMD)
185 Assert(assert_result == result);
195 #if defined(USE_NO_SIMD)
216 #ifdef USE_ASSERT_CHECKING
217 bool assert_result =
false;
221 if (((
const uint8 *) &v)[
i] <=
c)
223 assert_result =
true;
229 #if defined(USE_NO_SIMD)
237 if ((int64) v >= 0 &&
c < 0x80)
244 if (((
const uint8 *) &v)[
i] <=
c)
261 Assert(assert_result == result);
272 return _mm_movemask_epi8(v) != 0;
273 #elif defined(USE_NEON)
274 return vmaxvq_u8(v) > 0x7F;
292 vector32_is_highbit_set(
const Vector32 v)
294 #if defined(USE_NEON)
309 return _mm_or_si128(v1, v2);
310 #elif defined(USE_NEON)
311 return vorrq_u8(v1, v2);
318 static inline Vector32
319 vector32_or(
const Vector32 v1,
const Vector32 v2)
322 return _mm_or_si128(v1, v2);
323 #elif defined(USE_NEON)
324 return vorrq_u32(v1, v2);
340 return _mm_subs_epu8(v1, v2);
341 #elif defined(USE_NEON)
342 return vqsubq_u8(v1, v2);
356 return _mm_cmpeq_epi8(v1, v2);
357 #elif defined(USE_NEON)
358 return vceqq_u8(v1, v2);
364 static inline Vector32
365 vector32_eq(
const Vector32 v1,
const Vector32 v2)
368 return _mm_cmpeq_epi32(v1, v2);
369 #elif defined(USE_NEON)
370 return vceqq_u32(v1, v2);
Assert(fmt[strlen(fmt) - 1] !='\n')
static bool vector8_has_le(const Vector8 v, const uint8 c)
static Vector8 vector8_broadcast(const uint8 c)
static void vector8_load(Vector8 *v, const uint8 *s)
static bool vector8_has_zero(const Vector8 v)
static Vector8 vector8_or(const Vector8 v1, const Vector8 v2)
static bool vector8_is_highbit_set(const Vector8 v)
static bool vector8_has(const Vector8 v, const uint8 c)