PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_bitutils.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_bitutils.c
4 * Miscellaneous functions for bit-wise operations.
5 *
6 * Copyright (c) 2019-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/port/pg_bitutils.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "c.h"
14
15#ifdef HAVE__GET_CPUID
16#include <cpuid.h>
17#endif
18#ifdef HAVE__CPUID
19#include <intrin.h>
20#endif
21
22#include "port/pg_bitutils.h"
23
24
25/*
26 * Array giving the position of the left-most set bit for each possible
27 * byte value. We count the right-most position as the 0th bit, and the
28 * left-most the 7th bit. The 0th entry of the array should not be used.
29 *
30 * Note: this is not used by the functions in pg_bitutils.h when
31 * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 * extensions possibly compiled with a different compiler can use it.
33 */
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51};
52
53/*
54 * Array giving the position of the right-most set bit for each possible
55 * byte value. We count the right-most position as the 0th bit, and the
56 * left-most the 7th bit. The 0th entry of the array should not be used.
57 *
58 * Note: this is not used by the functions in pg_bitutils.h when
59 * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 * extensions possibly compiled with a different compiler can use it.
61 */
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79};
80
81/*
82 * Array giving the number of 1-bits in each possible byte value.
83 *
84 * Note: we export this for use by functions in which explicit use
85 * of the popcount functions seems unlikely to be a win.
86 */
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104};
105
106/*
107 * If we are building the Neon versions, we don't need the "slow" fallbacks.
108 */
109#ifndef POPCNT_AARCH64
110static inline int pg_popcount32_slow(uint32 word);
111static inline int pg_popcount64_slow(uint64 word);
112static uint64 pg_popcount_slow(const char *buf, int bytes);
113static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
114#endif
115
116#ifdef TRY_POPCNT_X86_64
117static bool pg_popcount_available(void);
118static int pg_popcount32_choose(uint32 word);
119static int pg_popcount64_choose(uint64 word);
120static uint64 pg_popcount_choose(const char *buf, int bytes);
121static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
122static inline int pg_popcount32_fast(uint32 word);
123static inline int pg_popcount64_fast(uint64 word);
124static uint64 pg_popcount_fast(const char *buf, int bytes);
125static uint64 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask);
126
127int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
128int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
129uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
130uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
131#endif /* TRY_POPCNT_X86_64 */
132
133#ifdef TRY_POPCNT_X86_64
134
135/*
136 * Return true if CPUID indicates that the POPCNT instruction is available.
137 */
138static bool
139pg_popcount_available(void)
140{
141 unsigned int exx[4] = {0, 0, 0, 0};
142
143#if defined(HAVE__GET_CPUID)
144 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
145#elif defined(HAVE__CPUID)
146 __cpuid(exx, 1);
147#else
148#error cpuid instruction not available
149#endif
150
151 return (exx[2] & (1 << 23)) != 0; /* POPCNT */
152}
153
154/*
155 * These functions get called on the first call to pg_popcount32 etc.
156 * They detect whether we can use the asm implementations, and replace
157 * the function pointers so that subsequent calls are routed directly to
158 * the chosen implementation.
159 */
160static inline void
161choose_popcount_functions(void)
162{
163 if (pg_popcount_available())
164 {
165 pg_popcount32 = pg_popcount32_fast;
166 pg_popcount64 = pg_popcount64_fast;
167 pg_popcount_optimized = pg_popcount_fast;
168 pg_popcount_masked_optimized = pg_popcount_masked_fast;
169 }
170 else
171 {
176 }
177
178#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
179 if (pg_popcount_avx512_available())
180 {
181 pg_popcount_optimized = pg_popcount_avx512;
182 pg_popcount_masked_optimized = pg_popcount_masked_avx512;
183 }
184#endif
185}
186
187static int
188pg_popcount32_choose(uint32 word)
189{
190 choose_popcount_functions();
191 return pg_popcount32(word);
192}
193
194static int
195pg_popcount64_choose(uint64 word)
196{
197 choose_popcount_functions();
198 return pg_popcount64(word);
199}
200
201static uint64
202pg_popcount_choose(const char *buf, int bytes)
203{
204 choose_popcount_functions();
205 return pg_popcount_optimized(buf, bytes);
206}
207
208static uint64
209pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask)
210{
211 choose_popcount_functions();
212 return pg_popcount_masked(buf, bytes, mask);
213}
214
215/*
216 * pg_popcount32_fast
217 * Return the number of 1 bits set in word
218 */
219static inline int
220pg_popcount32_fast(uint32 word)
221{
222#ifdef _MSC_VER
223 return __popcnt(word);
224#else
225 uint32 res;
226
227__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
228 return (int) res;
229#endif
230}
231
232/*
233 * pg_popcount64_fast
234 * Return the number of 1 bits set in word
235 */
236static inline int
237pg_popcount64_fast(uint64 word)
238{
239#ifdef _MSC_VER
240 return __popcnt64(word);
241#else
242 uint64 res;
243
244__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
245 return (int) res;
246#endif
247}
248
249/*
250 * pg_popcount_fast
251 * Returns the number of 1-bits in buf
252 */
253static uint64
254pg_popcount_fast(const char *buf, int bytes)
255{
256 uint64 popcnt = 0;
257
258#if SIZEOF_VOID_P >= 8
259 /* Process in 64-bit chunks if the buffer is aligned. */
260 if (buf == (const char *) TYPEALIGN(8, buf))
261 {
262 const uint64 *words = (const uint64 *) buf;
263
264 while (bytes >= 8)
265 {
266 popcnt += pg_popcount64_fast(*words++);
267 bytes -= 8;
268 }
269
270 buf = (const char *) words;
271 }
272#else
273 /* Process in 32-bit chunks if the buffer is aligned. */
274 if (buf == (const char *) TYPEALIGN(4, buf))
275 {
276 const uint32 *words = (const uint32 *) buf;
277
278 while (bytes >= 4)
279 {
280 popcnt += pg_popcount32_fast(*words++);
281 bytes -= 4;
282 }
283
284 buf = (const char *) words;
285 }
286#endif
287
288 /* Process any remaining bytes */
289 while (bytes--)
290 popcnt += pg_number_of_ones[(unsigned char) *buf++];
291
292 return popcnt;
293}
294
295/*
296 * pg_popcount_masked_fast
297 * Returns the number of 1-bits in buf after applying the mask to each byte
298 */
299static uint64
300pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask)
301{
302 uint64 popcnt = 0;
303
304#if SIZEOF_VOID_P >= 8
305 /* Process in 64-bit chunks if the buffer is aligned */
306 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
307
308 if (buf == (const char *) TYPEALIGN(8, buf))
309 {
310 const uint64 *words = (const uint64 *) buf;
311
312 while (bytes >= 8)
313 {
314 popcnt += pg_popcount64_fast(*words++ & maskv);
315 bytes -= 8;
316 }
317
318 buf = (const char *) words;
319 }
320#else
321 /* Process in 32-bit chunks if the buffer is aligned. */
322 uint32 maskv = ~((uint32) 0) / 0xFF * mask;
323
324 if (buf == (const char *) TYPEALIGN(4, buf))
325 {
326 const uint32 *words = (const uint32 *) buf;
327
328 while (bytes >= 4)
329 {
330 popcnt += pg_popcount32_fast(*words++ & maskv);
331 bytes -= 4;
332 }
333
334 buf = (const char *) words;
335 }
336#endif
337
338 /* Process any remaining bytes */
339 while (bytes--)
340 popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
341
342 return popcnt;
343}
344
345#endif /* TRY_POPCNT_X86_64 */
346
347/*
348 * If we are building the Neon versions, we don't need the "slow" fallbacks.
349 */
350#ifndef POPCNT_AARCH64
351
352/*
353 * pg_popcount32_slow
354 * Return the number of 1 bits set in word
355 */
356static inline int
358{
359#ifdef HAVE__BUILTIN_POPCOUNT
360 return __builtin_popcount(word);
361#else /* !HAVE__BUILTIN_POPCOUNT */
362 int result = 0;
363
364 while (word != 0)
365 {
366 result += pg_number_of_ones[word & 255];
367 word >>= 8;
368 }
369
370 return result;
371#endif /* HAVE__BUILTIN_POPCOUNT */
372}
373
374/*
375 * pg_popcount64_slow
376 * Return the number of 1 bits set in word
377 */
378static inline int
380{
381#ifdef HAVE__BUILTIN_POPCOUNT
382#if SIZEOF_LONG == 8
383 return __builtin_popcountl(word);
384#elif SIZEOF_LONG_LONG == 8
385 return __builtin_popcountll(word);
386#else
387#error "cannot find integer of the same size as uint64_t"
388#endif
389#else /* !HAVE__BUILTIN_POPCOUNT */
390 int result = 0;
391
392 while (word != 0)
393 {
394 result += pg_number_of_ones[word & 255];
395 word >>= 8;
396 }
397
398 return result;
399#endif /* HAVE__BUILTIN_POPCOUNT */
400}
401
402/*
403 * pg_popcount_slow
404 * Returns the number of 1-bits in buf
405 */
406static uint64
407pg_popcount_slow(const char *buf, int bytes)
408{
409 uint64 popcnt = 0;
410
411#if SIZEOF_VOID_P >= 8
412 /* Process in 64-bit chunks if the buffer is aligned. */
413 if (buf == (const char *) TYPEALIGN(8, buf))
414 {
415 const uint64 *words = (const uint64 *) buf;
416
417 while (bytes >= 8)
418 {
419 popcnt += pg_popcount64_slow(*words++);
420 bytes -= 8;
421 }
422
423 buf = (const char *) words;
424 }
425#else
426 /* Process in 32-bit chunks if the buffer is aligned. */
427 if (buf == (const char *) TYPEALIGN(4, buf))
428 {
429 const uint32 *words = (const uint32 *) buf;
430
431 while (bytes >= 4)
432 {
433 popcnt += pg_popcount32_slow(*words++);
434 bytes -= 4;
435 }
436
437 buf = (const char *) words;
438 }
439#endif
440
441 /* Process any remaining bytes */
442 while (bytes--)
443 popcnt += pg_number_of_ones[(unsigned char) *buf++];
444
445 return popcnt;
446}
447
448/*
449 * pg_popcount_masked_slow
450 * Returns the number of 1-bits in buf after applying the mask to each byte
451 */
452static uint64
453pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
454{
455 uint64 popcnt = 0;
456
457#if SIZEOF_VOID_P >= 8
458 /* Process in 64-bit chunks if the buffer is aligned */
459 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
460
461 if (buf == (const char *) TYPEALIGN(8, buf))
462 {
463 const uint64 *words = (const uint64 *) buf;
464
465 while (bytes >= 8)
466 {
467 popcnt += pg_popcount64_slow(*words++ & maskv);
468 bytes -= 8;
469 }
470
471 buf = (const char *) words;
472 }
473#else
474 /* Process in 32-bit chunks if the buffer is aligned. */
475 uint32 maskv = ~((uint32) 0) / 0xFF * mask;
476
477 if (buf == (const char *) TYPEALIGN(4, buf))
478 {
479 const uint32 *words = (const uint32 *) buf;
480
481 while (bytes >= 4)
482 {
483 popcnt += pg_popcount32_slow(*words++ & maskv);
484 bytes -= 4;
485 }
486
487 buf = (const char *) words;
488 }
489#endif
490
491 /* Process any remaining bytes */
492 while (bytes--)
493 popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
494
495 return popcnt;
496}
497
498#endif /* ! POPCNT_AARCH64 */
499
500#if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64)
501
502/*
503 * When special CPU instructions are not available, there's no point in using
504 * function pointers to vary the implementation between the fast and slow
505 * method. We instead just make these actual external functions. The compiler
506 * should be able to inline the slow versions here.
507 */
508int
510{
511 return pg_popcount32_slow(word);
512}
513
514int
516{
517 return pg_popcount64_slow(word);
518}
519
520/*
521 * pg_popcount_optimized
522 * Returns the number of 1-bits in buf
523 */
524uint64
525pg_popcount_optimized(const char *buf, int bytes)
526{
527 return pg_popcount_slow(buf, bytes);
528}
529
530/*
531 * pg_popcount_masked_optimized
532 * Returns the number of 1-bits in buf after applying the mask to each byte
533 */
534uint64
535pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
536{
537 return pg_popcount_masked_slow(buf, bytes, mask);
538}
539
540#endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:775
uint8_t uint8
Definition: c.h:500
uint8 bits8
Definition: c.h:509
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.c:535
static int pg_popcount32_slow(uint32 word)
Definition: pg_bitutils.c:357
uint64 pg_popcount_optimized(const char *buf, int bytes)
Definition: pg_bitutils.c:525
int pg_popcount64(uint64 word)
Definition: pg_bitutils.c:515
int pg_popcount32(uint32 word)
Definition: pg_bitutils.c:509
const uint8 pg_rightmost_one_pos[256]
Definition: pg_bitutils.c:62
static int pg_popcount64_slow(uint64 word)
Definition: pg_bitutils.c:379
const uint8 pg_leftmost_one_pos[256]
Definition: pg_bitutils.c:34
static uint64 pg_popcount_slow(const char *buf, int bytes)
Definition: pg_bitutils.c:407
static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.c:453
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.h:394
static char * buf
Definition: pg_test_fsync.c:72
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1476