35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
111#ifdef TRY_POPCNT_FAST
112static bool pg_popcount_available(
void);
115static uint64 pg_popcount_choose(
const char *
buf,
int bytes);
116static uint64 pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask);
117static inline int pg_popcount32_fast(
uint32 word);
118static inline int pg_popcount64_fast(
uint64 word);
119static uint64 pg_popcount_fast(
const char *
buf,
int bytes);
120static uint64 pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask);
128#ifdef TRY_POPCNT_FAST
134pg_popcount_available(
void)
136 unsigned int exx[4] = {0, 0, 0, 0};
138#if defined(HAVE__GET_CPUID)
139 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
140#elif defined(HAVE__CPUID)
143#error cpuid instruction not available
146 return (exx[2] & (1 << 23)) != 0;
156choose_popcount_functions(
void)
158 if (pg_popcount_available())
173#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
174 if (pg_popcount_avx512_available())
185 choose_popcount_functions();
192 choose_popcount_functions();
197pg_popcount_choose(
const char *
buf,
int bytes)
199 choose_popcount_functions();
204pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask)
206 choose_popcount_functions();
218 return __popcnt(
word);
222__asm__ __volatile__(
" popcntl %1,%0\n":
"=q"(res):
"rm"(
word):
"cc");
235 return __popcnt64(
word);
239__asm__ __volatile__(
" popcntq %1,%0\n":
"=q"(res):
"rm"(
word):
"cc");
249pg_popcount_fast(
const char *
buf,
int bytes)
253#if SIZEOF_VOID_P >= 8
261 popcnt += pg_popcount64_fast(*words++);
265 buf = (
const char *) words;
275 popcnt += pg_popcount32_fast(*words++);
279 buf = (
const char *) words;
295pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask)
299#if SIZEOF_VOID_P >= 8
301 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
309 popcnt += pg_popcount64_fast(*words++ & maskv);
313 buf = (
const char *) words;
325 popcnt += pg_popcount32_fast(*words++ & maskv);
329 buf = (
const char *) words;
350#ifdef HAVE__BUILTIN_POPCOUNT
351 return __builtin_popcount(
word);
372#ifdef HAVE__BUILTIN_POPCOUNT
374 return __builtin_popcountl(
word);
375#elif SIZEOF_LONG_LONG == 8
376 return __builtin_popcountll(
word);
378#error "cannot find integer of the same size as uint64_t"
402#if SIZEOF_VOID_P >= 8
414 buf = (
const char *) words;
428 buf = (
const char *) words;
448#if SIZEOF_VOID_P >= 8
450 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
462 buf = (
const char *) words;
478 buf = (
const char *) words;
489#ifndef TRY_POPCNT_FAST
#define TYPEALIGN(ALIGNVAL, LEN)
const uint8 pg_number_of_ones[256]
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
static int pg_popcount32_slow(uint32 word)
uint64 pg_popcount_optimized(const char *buf, int bytes)
int pg_popcount64(uint64 word)
int pg_popcount32(uint32 word)
const uint8 pg_rightmost_one_pos[256]
static int pg_popcount64_slow(uint64 word)
const uint8 pg_leftmost_one_pos[256]
static uint64 pg_popcount_slow(const char *buf, int bytes)
static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)