21 #if (defined(__x86_64__) || defined(_M_AMD64))
31 #include <emmintrin.h>
34 typedef __m128i Vector32;
36 #elif defined(__aarch64__) && defined(__ARM_NEON)
48 typedef uint32x4_t Vector32;
66 static inline void vector32_load(Vector32 *v,
const uint32 *s);
72 static inline Vector32 vector32_broadcast(
const uint32 c);
81 static inline bool vector32_is_highbit_set(
const Vector32 v);
88 static inline Vector32 vector32_or(
const Vector32 v1,
const Vector32 v2);
101 static inline Vector32 vector32_eq(
const Vector32 v1,
const Vector32 v2);
110 #if defined(USE_SSE2)
111 *v = _mm_loadu_si128((
const __m128i *) s);
112 #elif defined(USE_NEON)
121 vector32_load(Vector32 *v,
const uint32 *s)
124 *v = _mm_loadu_si128((
const __m128i *) s);
125 #elif defined(USE_NEON)
137 #if defined(USE_SSE2)
138 return _mm_set1_epi8(
c);
139 #elif defined(USE_NEON)
140 return vdupq_n_u8(
c);
147 static inline Vector32
148 vector32_broadcast(
const uint32 c)
151 return _mm_set1_epi32(
c);
152 #elif defined(USE_NEON)
153 return vdupq_n_u32(
c);
167 #ifdef USE_ASSERT_CHECKING
168 bool assert_result =
false;
172 if (((
const uint8 *) &v)[
i] ==
c)
174 assert_result =
true;
180 #if defined(USE_NO_SIMD)
187 Assert(assert_result == result);
197 #if defined(USE_NO_SIMD)
218 #ifdef USE_ASSERT_CHECKING
219 bool assert_result =
false;
223 if (((
const uint8 *) &v)[
i] <=
c)
225 assert_result =
true;
231 #if defined(USE_NO_SIMD)
239 if ((
int64) v >= 0 &&
c < 0x80)
246 if (((
const uint8 *) &v)[
i] <=
c)
263 Assert(assert_result == result);
274 return _mm_movemask_epi8(v) != 0;
275 #elif defined(USE_NEON)
276 return vmaxvq_u8(v) > 0x7F;
294 vector32_is_highbit_set(
const Vector32 v)
296 #if defined(USE_NEON)
309 vector8_highbit_mask(
const Vector8 v)
312 return (
uint32) _mm_movemask_epi8(v);
313 #elif defined(USE_NEON)
319 static const uint8 mask[16] = {
320 1 << 0, 1 << 1, 1 << 2, 1 << 3,
321 1 << 4, 1 << 5, 1 << 6, 1 << 7,
322 1 << 0, 1 << 1, 1 << 2, 1 << 3,
323 1 << 4, 1 << 5, 1 << 6, 1 << 7,
326 uint8x16_t masked = vandq_u8(vld1q_u8(mask), (uint8x16_t) vshrq_n_s8((int8x16_t) v, 7));
327 uint8x16_t maskedhi = vextq_u8(masked, masked, 8);
329 return (
uint32) vaddvq_u16((uint16x8_t) vzip1q_u8(masked, maskedhi));
341 return _mm_or_si128(v1, v2);
342 #elif defined(USE_NEON)
343 return vorrq_u8(v1, v2);
350 static inline Vector32
351 vector32_or(
const Vector32 v1,
const Vector32 v2)
354 return _mm_or_si128(v1, v2);
355 #elif defined(USE_NEON)
356 return vorrq_u32(v1, v2);
372 return _mm_subs_epu8(v1, v2);
373 #elif defined(USE_NEON)
374 return vqsubq_u8(v1, v2);
388 return _mm_cmpeq_epi8(v1, v2);
389 #elif defined(USE_NEON)
390 return vceqq_u8(v1, v2);
396 static inline Vector32
397 vector32_eq(
const Vector32 v1,
const Vector32 v2)
400 return _mm_cmpeq_epi32(v1, v2);
401 #elif defined(USE_NEON)
402 return vceqq_u32(v1, v2);
415 return _mm_min_epu8(v1, v2);
416 #elif defined(USE_NEON)
417 return vminq_u8(v1, v2);
#define Assert(condition)
static bool vector8_has_le(const Vector8 v, const uint8 c)
static Vector8 vector8_broadcast(const uint8 c)
static void vector8_load(Vector8 *v, const uint8 *s)
static bool vector8_has_zero(const Vector8 v)
static Vector8 vector8_or(const Vector8 v1, const Vector8 v2)
static bool vector8_is_highbit_set(const Vector8 v)
static bool vector8_has(const Vector8 v, const uint8 c)