15 #ifdef HAVE__GET_CPUID
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
111 #ifdef TRY_POPCNT_FAST
112 static bool pg_popcount_available(
void);
114 static int pg_popcount64_choose(uint64
word);
115 static uint64 pg_popcount_choose(
const char *
buf,
int bytes);
116 static uint64 pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask);
117 static inline int pg_popcount32_fast(
uint32 word);
118 static inline int pg_popcount64_fast(uint64
word);
119 static uint64 pg_popcount_fast(
const char *
buf,
int bytes);
120 static uint64 pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask);
128 #ifdef TRY_POPCNT_FAST
134 pg_popcount_available(
void)
136 unsigned int exx[4] = {0, 0, 0, 0};
138 #if defined(HAVE__GET_CPUID)
139 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
140 #elif defined(HAVE__CPUID)
143 #error cpuid instruction not available
146 return (exx[2] & (1 << 23)) != 0;
156 choose_popcount_functions(
void)
158 if (pg_popcount_available())
173 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
174 if (pg_popcount_avx512_available())
185 choose_popcount_functions();
190 pg_popcount64_choose(uint64
word)
192 choose_popcount_functions();
197 pg_popcount_choose(
const char *
buf,
int bytes)
199 choose_popcount_functions();
204 pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask)
206 choose_popcount_functions();
218 return __popcnt(
word);
222 __asm__ __volatile__(
" popcntl %1,%0\n":
"=q"(
res):
"rm"(
word):
"cc");
232 pg_popcount64_fast(uint64
word)
235 return __popcnt64(
word);
239 __asm__ __volatile__(
" popcntq %1,%0\n":
"=q"(
res):
"rm"(
word):
"cc");
249 pg_popcount_fast(
const char *
buf,
int bytes)
253 #if SIZEOF_VOID_P >= 8
257 const uint64 *words = (
const uint64 *)
buf;
261 popcnt += pg_popcount64_fast(*words++);
265 buf = (
const char *) words;
275 popcnt += pg_popcount32_fast(*words++);
279 buf = (
const char *) words;
295 pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask)
299 #if SIZEOF_VOID_P >= 8
301 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
305 const uint64 *words = (
const uint64 *)
buf;
309 popcnt += pg_popcount64_fast(*words++ & maskv);
313 buf = (
const char *) words;
325 popcnt += pg_popcount32_fast(*words++ & maskv);
329 buf = (
const char *) words;
350 #ifdef HAVE__BUILTIN_POPCOUNT
351 return __builtin_popcount(
word);
372 #ifdef HAVE__BUILTIN_POPCOUNT
373 #if defined(HAVE_LONG_INT_64)
374 return __builtin_popcountl(
word);
375 #elif defined(HAVE_LONG_LONG_INT_64)
376 return __builtin_popcountll(
word);
378 #error must have a working 64-bit integer datatype
402 #if SIZEOF_VOID_P >= 8
406 const uint64 *words = (
const uint64 *)
buf;
414 buf = (
const char *) words;
428 buf = (
const char *) words;
448 #if SIZEOF_VOID_P >= 8
450 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
454 const uint64 *words = (
const uint64 *)
buf;
462 buf = (
const char *) words;
478 buf = (
const char *) words;
489 #ifndef TRY_POPCNT_FAST
#define TYPEALIGN(ALIGNVAL, LEN)
static void PGresult * res
const uint8 pg_number_of_ones[256]
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
static int pg_popcount32_slow(uint32 word)
uint64 pg_popcount_optimized(const char *buf, int bytes)
int pg_popcount64(uint64 word)
int pg_popcount32(uint32 word)
const uint8 pg_rightmost_one_pos[256]
static int pg_popcount64_slow(uint64 word)
const uint8 pg_leftmost_one_pos[256]
static uint64 pg_popcount_slow(const char *buf, int bytes)
static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)