PostgreSQL Source Code git master
pg_bitutils.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_bitutils.c
4 * Miscellaneous functions for bit-wise operations.
5 *
6 * Copyright (c) 2019-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/port/pg_bitutils.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "c.h"
14
15#ifdef HAVE__GET_CPUID
16#include <cpuid.h>
17#endif
18#ifdef HAVE__CPUID
19#include <intrin.h>
20#endif
21
22#include "port/pg_bitutils.h"
23
24
25/*
26 * Array giving the position of the left-most set bit for each possible
27 * byte value. We count the right-most position as the 0th bit, and the
28 * left-most the 7th bit. The 0th entry of the array should not be used.
29 *
30 * Note: this is not used by the functions in pg_bitutils.h when
31 * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 * extensions possibly compiled with a different compiler can use it.
33 */
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51};
52
53/*
54 * Array giving the position of the right-most set bit for each possible
55 * byte value. We count the right-most position as the 0th bit, and the
56 * left-most the 7th bit. The 0th entry of the array should not be used.
57 *
58 * Note: this is not used by the functions in pg_bitutils.h when
59 * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 * extensions possibly compiled with a different compiler can use it.
61 */
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79};
80
81/*
82 * Array giving the number of 1-bits in each possible byte value.
83 *
84 * Note: we export this for use by functions in which explicit use
85 * of the popcount functions seems unlikely to be a win.
86 */
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104};
105
106static inline int pg_popcount32_slow(uint32 word);
107static inline int pg_popcount64_slow(uint64 word);
108static uint64 pg_popcount_slow(const char *buf, int bytes);
109static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
110
111#ifdef TRY_POPCNT_FAST
112static bool pg_popcount_available(void);
113static int pg_popcount32_choose(uint32 word);
114static int pg_popcount64_choose(uint64 word);
115static uint64 pg_popcount_choose(const char *buf, int bytes);
116static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
117static inline int pg_popcount32_fast(uint32 word);
118static inline int pg_popcount64_fast(uint64 word);
119static uint64 pg_popcount_fast(const char *buf, int bytes);
120static uint64 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask);
121
122int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
123int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
124uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
125uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
126#endif /* TRY_POPCNT_FAST */
127
128#ifdef TRY_POPCNT_FAST
129
130/*
131 * Return true if CPUID indicates that the POPCNT instruction is available.
132 */
133static bool
134pg_popcount_available(void)
135{
136 unsigned int exx[4] = {0, 0, 0, 0};
137
138#if defined(HAVE__GET_CPUID)
139 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
140#elif defined(HAVE__CPUID)
141 __cpuid(exx, 1);
142#else
143#error cpuid instruction not available
144#endif
145
146 return (exx[2] & (1 << 23)) != 0; /* POPCNT */
147}
148
149/*
150 * These functions get called on the first call to pg_popcount32 etc.
151 * They detect whether we can use the asm implementations, and replace
152 * the function pointers so that subsequent calls are routed directly to
153 * the chosen implementation.
154 */
155static inline void
156choose_popcount_functions(void)
157{
158 if (pg_popcount_available())
159 {
160 pg_popcount32 = pg_popcount32_fast;
161 pg_popcount64 = pg_popcount64_fast;
162 pg_popcount_optimized = pg_popcount_fast;
163 pg_popcount_masked_optimized = pg_popcount_masked_fast;
164 }
165 else
166 {
171 }
172
173#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
174 if (pg_popcount_avx512_available())
175 {
176 pg_popcount_optimized = pg_popcount_avx512;
177 pg_popcount_masked_optimized = pg_popcount_masked_avx512;
178 }
179#endif
180}
181
182static int
183pg_popcount32_choose(uint32 word)
184{
185 choose_popcount_functions();
186 return pg_popcount32(word);
187}
188
189static int
190pg_popcount64_choose(uint64 word)
191{
192 choose_popcount_functions();
193 return pg_popcount64(word);
194}
195
196static uint64
197pg_popcount_choose(const char *buf, int bytes)
198{
199 choose_popcount_functions();
200 return pg_popcount_optimized(buf, bytes);
201}
202
203static uint64
204pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask)
205{
206 choose_popcount_functions();
207 return pg_popcount_masked(buf, bytes, mask);
208}
209
210/*
211 * pg_popcount32_fast
212 * Return the number of 1 bits set in word
213 */
214static inline int
215pg_popcount32_fast(uint32 word)
216{
217#ifdef _MSC_VER
218 return __popcnt(word);
219#else
220 uint32 res;
221
222__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
223 return (int) res;
224#endif
225}
226
227/*
228 * pg_popcount64_fast
229 * Return the number of 1 bits set in word
230 */
231static inline int
232pg_popcount64_fast(uint64 word)
233{
234#ifdef _MSC_VER
235 return __popcnt64(word);
236#else
237 uint64 res;
238
239__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
240 return (int) res;
241#endif
242}
243
244/*
245 * pg_popcount_fast
246 * Returns the number of 1-bits in buf
247 */
248static uint64
249pg_popcount_fast(const char *buf, int bytes)
250{
251 uint64 popcnt = 0;
252
253#if SIZEOF_VOID_P >= 8
254 /* Process in 64-bit chunks if the buffer is aligned. */
255 if (buf == (const char *) TYPEALIGN(8, buf))
256 {
257 const uint64 *words = (const uint64 *) buf;
258
259 while (bytes >= 8)
260 {
261 popcnt += pg_popcount64_fast(*words++);
262 bytes -= 8;
263 }
264
265 buf = (const char *) words;
266 }
267#else
268 /* Process in 32-bit chunks if the buffer is aligned. */
269 if (buf == (const char *) TYPEALIGN(4, buf))
270 {
271 const uint32 *words = (const uint32 *) buf;
272
273 while (bytes >= 4)
274 {
275 popcnt += pg_popcount32_fast(*words++);
276 bytes -= 4;
277 }
278
279 buf = (const char *) words;
280 }
281#endif
282
283 /* Process any remaining bytes */
284 while (bytes--)
285 popcnt += pg_number_of_ones[(unsigned char) *buf++];
286
287 return popcnt;
288}
289
290/*
291 * pg_popcount_masked_fast
292 * Returns the number of 1-bits in buf after applying the mask to each byte
293 */
294static uint64
295pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask)
296{
297 uint64 popcnt = 0;
298
299#if SIZEOF_VOID_P >= 8
300 /* Process in 64-bit chunks if the buffer is aligned */
301 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
302
303 if (buf == (const char *) TYPEALIGN(8, buf))
304 {
305 const uint64 *words = (const uint64 *) buf;
306
307 while (bytes >= 8)
308 {
309 popcnt += pg_popcount64_fast(*words++ & maskv);
310 bytes -= 8;
311 }
312
313 buf = (const char *) words;
314 }
315#else
316 /* Process in 32-bit chunks if the buffer is aligned. */
317 uint32 maskv = ~((uint32) 0) / 0xFF * mask;
318
319 if (buf == (const char *) TYPEALIGN(4, buf))
320 {
321 const uint32 *words = (const uint32 *) buf;
322
323 while (bytes >= 4)
324 {
325 popcnt += pg_popcount32_fast(*words++ & maskv);
326 bytes -= 4;
327 }
328
329 buf = (const char *) words;
330 }
331#endif
332
333 /* Process any remaining bytes */
334 while (bytes--)
335 popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
336
337 return popcnt;
338}
339
340#endif /* TRY_POPCNT_FAST */
341
342
343/*
344 * pg_popcount32_slow
345 * Return the number of 1 bits set in word
346 */
347static inline int
349{
350#ifdef HAVE__BUILTIN_POPCOUNT
351 return __builtin_popcount(word);
352#else /* !HAVE__BUILTIN_POPCOUNT */
353 int result = 0;
354
355 while (word != 0)
356 {
357 result += pg_number_of_ones[word & 255];
358 word >>= 8;
359 }
360
361 return result;
362#endif /* HAVE__BUILTIN_POPCOUNT */
363}
364
365/*
366 * pg_popcount64_slow
367 * Return the number of 1 bits set in word
368 */
369static inline int
371{
372#ifdef HAVE__BUILTIN_POPCOUNT
373#if SIZEOF_LONG == 8
374 return __builtin_popcountl(word);
375#elif SIZEOF_LONG_LONG == 8
376 return __builtin_popcountll(word);
377#else
378#error "cannot find integer of the same size as uint64_t"
379#endif
380#else /* !HAVE__BUILTIN_POPCOUNT */
381 int result = 0;
382
383 while (word != 0)
384 {
385 result += pg_number_of_ones[word & 255];
386 word >>= 8;
387 }
388
389 return result;
390#endif /* HAVE__BUILTIN_POPCOUNT */
391}
392
393/*
394 * pg_popcount_slow
395 * Returns the number of 1-bits in buf
396 */
397static uint64
398pg_popcount_slow(const char *buf, int bytes)
399{
400 uint64 popcnt = 0;
401
402#if SIZEOF_VOID_P >= 8
403 /* Process in 64-bit chunks if the buffer is aligned. */
404 if (buf == (const char *) TYPEALIGN(8, buf))
405 {
406 const uint64 *words = (const uint64 *) buf;
407
408 while (bytes >= 8)
409 {
410 popcnt += pg_popcount64_slow(*words++);
411 bytes -= 8;
412 }
413
414 buf = (const char *) words;
415 }
416#else
417 /* Process in 32-bit chunks if the buffer is aligned. */
418 if (buf == (const char *) TYPEALIGN(4, buf))
419 {
420 const uint32 *words = (const uint32 *) buf;
421
422 while (bytes >= 4)
423 {
424 popcnt += pg_popcount32_slow(*words++);
425 bytes -= 4;
426 }
427
428 buf = (const char *) words;
429 }
430#endif
431
432 /* Process any remaining bytes */
433 while (bytes--)
434 popcnt += pg_number_of_ones[(unsigned char) *buf++];
435
436 return popcnt;
437}
438
439/*
440 * pg_popcount_masked_slow
441 * Returns the number of 1-bits in buf after applying the mask to each byte
442 */
443static uint64
444pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
445{
446 uint64 popcnt = 0;
447
448#if SIZEOF_VOID_P >= 8
449 /* Process in 64-bit chunks if the buffer is aligned */
450 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
451
452 if (buf == (const char *) TYPEALIGN(8, buf))
453 {
454 const uint64 *words = (const uint64 *) buf;
455
456 while (bytes >= 8)
457 {
458 popcnt += pg_popcount64_slow(*words++ & maskv);
459 bytes -= 8;
460 }
461
462 buf = (const char *) words;
463 }
464#else
465 /* Process in 32-bit chunks if the buffer is aligned. */
466 uint32 maskv = ~((uint32) 0) / 0xFF * mask;
467
468 if (buf == (const char *) TYPEALIGN(4, buf))
469 {
470 const uint32 *words = (const uint32 *) buf;
471
472 while (bytes >= 4)
473 {
474 popcnt += pg_popcount32_slow(*words++ & maskv);
475 bytes -= 4;
476 }
477
478 buf = (const char *) words;
479 }
480#endif
481
482 /* Process any remaining bytes */
483 while (bytes--)
484 popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
485
486 return popcnt;
487}
488
489#ifndef TRY_POPCNT_FAST
490
491/*
492 * When the POPCNT instruction is not available, there's no point in using
493 * function pointers to vary the implementation between the fast and slow
494 * method. We instead just make these actual external functions when
495 * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
496 * the slow versions here.
497 */
498int
500{
501 return pg_popcount32_slow(word);
502}
503
504int
506{
507 return pg_popcount64_slow(word);
508}
509
510/*
511 * pg_popcount_optimized
512 * Returns the number of 1-bits in buf
513 */
514uint64
515pg_popcount_optimized(const char *buf, int bytes)
516{
517 return pg_popcount_slow(buf, bytes);
518}
519
520/*
521 * pg_popcount_masked_optimized
522 * Returns the number of 1-bits in buf after applying the mask to each byte
523 */
524uint64
525pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
526{
527 return pg_popcount_masked_slow(buf, bytes, mask);
528}
529
530#endif /* !TRY_POPCNT_FAST */
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:775
uint8_t uint8
Definition: c.h:500
uint8 bits8
Definition: c.h:509
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.c:525
static int pg_popcount32_slow(uint32 word)
Definition: pg_bitutils.c:348
uint64 pg_popcount_optimized(const char *buf, int bytes)
Definition: pg_bitutils.c:515
int pg_popcount64(uint64 word)
Definition: pg_bitutils.c:505
int pg_popcount32(uint32 word)
Definition: pg_bitutils.c:499
const uint8 pg_rightmost_one_pos[256]
Definition: pg_bitutils.c:62
static int pg_popcount64_slow(uint64 word)
Definition: pg_bitutils.c:370
const uint8 pg_leftmost_one_pos[256]
Definition: pg_bitutils.c:34
static uint64 pg_popcount_slow(const char *buf, int bytes)
Definition: pg_bitutils.c:398
static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.c:444
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.h:368
static char * buf
Definition: pg_test_fsync.c:72
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1476