35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
109#ifndef POPCNT_AARCH64
116#ifdef TRY_POPCNT_X86_64
117static bool pg_popcount_available(
void);
120static uint64 pg_popcount_choose(
const char *
buf,
int bytes);
121static uint64 pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask);
122static inline int pg_popcount32_fast(
uint32 word);
123static inline int pg_popcount64_fast(
uint64 word);
124static uint64 pg_popcount_fast(
const char *
buf,
int bytes);
125static uint64 pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask);
133#ifdef TRY_POPCNT_X86_64
139pg_popcount_available(
void)
141 unsigned int exx[4] = {0, 0, 0, 0};
143#if defined(HAVE__GET_CPUID)
144 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
145#elif defined(HAVE__CPUID)
148#error cpuid instruction not available
151 return (exx[2] & (1 << 23)) != 0;
161choose_popcount_functions(
void)
163 if (pg_popcount_available())
178#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
179 if (pg_popcount_avx512_available())
190 choose_popcount_functions();
197 choose_popcount_functions();
202pg_popcount_choose(
const char *
buf,
int bytes)
204 choose_popcount_functions();
209pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask)
211 choose_popcount_functions();
223 return __popcnt(
word);
227__asm__ __volatile__(
" popcntl %1,%0\n":
"=q"(res):
"rm"(
word):
"cc");
240 return __popcnt64(
word);
244__asm__ __volatile__(
" popcntq %1,%0\n":
"=q"(res):
"rm"(
word):
"cc");
254pg_popcount_fast(
const char *
buf,
int bytes)
258#if SIZEOF_VOID_P >= 8
266 popcnt += pg_popcount64_fast(*words++);
270 buf = (
const char *) words;
280 popcnt += pg_popcount32_fast(*words++);
284 buf = (
const char *) words;
300pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask)
304#if SIZEOF_VOID_P >= 8
306 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
314 popcnt += pg_popcount64_fast(*words++ & maskv);
318 buf = (
const char *) words;
330 popcnt += pg_popcount32_fast(*words++ & maskv);
334 buf = (
const char *) words;
350#ifndef POPCNT_AARCH64
359#ifdef HAVE__BUILTIN_POPCOUNT
360 return __builtin_popcount(
word);
381#ifdef HAVE__BUILTIN_POPCOUNT
383 return __builtin_popcountl(
word);
384#elif SIZEOF_LONG_LONG == 8
385 return __builtin_popcountll(
word);
387#error "cannot find integer of the same size as uint64_t"
411#if SIZEOF_VOID_P >= 8
423 buf = (
const char *) words;
437 buf = (
const char *) words;
457#if SIZEOF_VOID_P >= 8
459 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
471 buf = (
const char *) words;
487 buf = (
const char *) words;
500#if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64)
#define TYPEALIGN(ALIGNVAL, LEN)
const uint8 pg_number_of_ones[256]
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
static int pg_popcount32_slow(uint32 word)
uint64 pg_popcount_optimized(const char *buf, int bytes)
int pg_popcount64(uint64 word)
int pg_popcount32(uint32 word)
const uint8 pg_rightmost_one_pos[256]
static int pg_popcount64_slow(uint64 word)
const uint8 pg_leftmost_one_pos[256]
static uint64 pg_popcount_slow(const char *buf, int bytes)
static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)