PostgreSQL Source Code  git master
pg_bitutils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_bitutils.c
4  * Miscellaneous functions for bit-wise operations.
5  *
6  * Copyright (c) 2019-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/port/pg_bitutils.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "c.h"
14 
15 #ifdef HAVE__GET_CPUID
16 #include <cpuid.h>
17 #endif
18 #ifdef HAVE__CPUID
19 #include <intrin.h>
20 #endif
21 
22 #include "port/pg_bitutils.h"
23 
24 
25 /*
26  * Array giving the position of the left-most set bit for each possible
27  * byte value. We count the right-most position as the 0th bit, and the
28  * left-most the 7th bit. The 0th entry of the array should not be used.
29  *
30  * Note: this is not used by the functions in pg_bitutils.h when
31  * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32  * extensions possibly compiled with a different compiler can use it.
33  */
34 const uint8 pg_leftmost_one_pos[256] = {
35  0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51 };
52 
53 /*
54  * Array giving the position of the right-most set bit for each possible
55  * byte value. We count the right-most position as the 0th bit, and the
56  * left-most the 7th bit. The 0th entry of the array should not be used.
57  *
58  * Note: this is not used by the functions in pg_bitutils.h when
59  * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60  * extensions possibly compiled with a different compiler can use it.
61  */
63  0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71  7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79 };
80 
81 /*
82  * Array giving the number of 1-bits in each possible byte value.
83  *
84  * Note: we export this for use by functions in which explicit use
85  * of the popcount functions seems unlikely to be a win.
86  */
87 const uint8 pg_number_of_ones[256] = {
88  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104 };
105 
106 static inline int pg_popcount32_slow(uint32 word);
107 static inline int pg_popcount64_slow(uint64 word);
108 static uint64 pg_popcount_slow(const char *buf, int bytes);
109 static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
110 
111 #ifdef TRY_POPCNT_FAST
112 static bool pg_popcount_available(void);
113 static int pg_popcount32_choose(uint32 word);
114 static int pg_popcount64_choose(uint64 word);
115 static uint64 pg_popcount_choose(const char *buf, int bytes);
116 static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
117 static inline int pg_popcount32_fast(uint32 word);
118 static inline int pg_popcount64_fast(uint64 word);
119 static uint64 pg_popcount_fast(const char *buf, int bytes);
120 static uint64 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask);
121 
122 int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
123 int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
124 uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
125 uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
126 #endif /* TRY_POPCNT_FAST */
127 
128 #ifdef TRY_POPCNT_FAST
129 
130 /*
131  * Return true if CPUID indicates that the POPCNT instruction is available.
132  */
133 static bool
134 pg_popcount_available(void)
135 {
136  unsigned int exx[4] = {0, 0, 0, 0};
137 
138 #if defined(HAVE__GET_CPUID)
139  __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
140 #elif defined(HAVE__CPUID)
141  __cpuid(exx, 1);
142 #else
143 #error cpuid instruction not available
144 #endif
145 
146  return (exx[2] & (1 << 23)) != 0; /* POPCNT */
147 }
148 
149 /*
150  * These functions get called on the first call to pg_popcount32 etc.
151  * They detect whether we can use the asm implementations, and replace
152  * the function pointers so that subsequent calls are routed directly to
153  * the chosen implementation.
154  */
155 static inline void
156 choose_popcount_functions(void)
157 {
158  if (pg_popcount_available())
159  {
160  pg_popcount32 = pg_popcount32_fast;
161  pg_popcount64 = pg_popcount64_fast;
162  pg_popcount_optimized = pg_popcount_fast;
163  pg_popcount_masked_optimized = pg_popcount_masked_fast;
164  }
165  else
166  {
171  }
172 
173 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
174  if (pg_popcount_avx512_available())
175  {
176  pg_popcount_optimized = pg_popcount_avx512;
177  pg_popcount_masked_optimized = pg_popcount_masked_avx512;
178  }
179 #endif
180 }
181 
182 static int
183 pg_popcount32_choose(uint32 word)
184 {
185  choose_popcount_functions();
186  return pg_popcount32(word);
187 }
188 
189 static int
190 pg_popcount64_choose(uint64 word)
191 {
192  choose_popcount_functions();
193  return pg_popcount64(word);
194 }
195 
196 static uint64
197 pg_popcount_choose(const char *buf, int bytes)
198 {
199  choose_popcount_functions();
200  return pg_popcount_optimized(buf, bytes);
201 }
202 
203 static uint64
204 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask)
205 {
206  choose_popcount_functions();
207  return pg_popcount_masked(buf, bytes, mask);
208 }
209 
210 /*
211  * pg_popcount32_fast
212  * Return the number of 1 bits set in word
213  */
214 static inline int
215 pg_popcount32_fast(uint32 word)
216 {
217 #ifdef _MSC_VER
218  return __popcnt(word);
219 #else
220  uint32 res;
221 
222 __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
223  return (int) res;
224 #endif
225 }
226 
227 /*
228  * pg_popcount64_fast
229  * Return the number of 1 bits set in word
230  */
231 static inline int
232 pg_popcount64_fast(uint64 word)
233 {
234 #ifdef _MSC_VER
235  return __popcnt64(word);
236 #else
237  uint64 res;
238 
239 __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
240  return (int) res;
241 #endif
242 }
243 
244 /*
245  * pg_popcount_fast
246  * Returns the number of 1-bits in buf
247  */
248 static uint64
249 pg_popcount_fast(const char *buf, int bytes)
250 {
251  uint64 popcnt = 0;
252 
253 #if SIZEOF_VOID_P >= 8
254  /* Process in 64-bit chunks if the buffer is aligned. */
255  if (buf == (const char *) TYPEALIGN(8, buf))
256  {
257  const uint64 *words = (const uint64 *) buf;
258 
259  while (bytes >= 8)
260  {
261  popcnt += pg_popcount64_fast(*words++);
262  bytes -= 8;
263  }
264 
265  buf = (const char *) words;
266  }
267 #else
268  /* Process in 32-bit chunks if the buffer is aligned. */
269  if (buf == (const char *) TYPEALIGN(4, buf))
270  {
271  const uint32 *words = (const uint32 *) buf;
272 
273  while (bytes >= 4)
274  {
275  popcnt += pg_popcount32_fast(*words++);
276  bytes -= 4;
277  }
278 
279  buf = (const char *) words;
280  }
281 #endif
282 
283  /* Process any remaining bytes */
284  while (bytes--)
285  popcnt += pg_number_of_ones[(unsigned char) *buf++];
286 
287  return popcnt;
288 }
289 
290 /*
291  * pg_popcount_masked_fast
292  * Returns the number of 1-bits in buf after applying the mask to each byte
293  */
294 static uint64
295 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask)
296 {
297  uint64 popcnt = 0;
298 
299 #if SIZEOF_VOID_P >= 8
300  /* Process in 64-bit chunks if the buffer is aligned */
301  uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
302 
303  if (buf == (const char *) TYPEALIGN(8, buf))
304  {
305  const uint64 *words = (const uint64 *) buf;
306 
307  while (bytes >= 8)
308  {
309  popcnt += pg_popcount64_fast(*words++ & maskv);
310  bytes -= 8;
311  }
312 
313  buf = (const char *) words;
314  }
315 #else
316  /* Process in 32-bit chunks if the buffer is aligned. */
317  uint32 maskv = ~((uint32) 0) / 0xFF * mask;
318 
319  if (buf == (const char *) TYPEALIGN(4, buf))
320  {
321  const uint32 *words = (const uint32 *) buf;
322 
323  while (bytes >= 4)
324  {
325  popcnt += pg_popcount32_fast(*words++ & maskv);
326  bytes -= 4;
327  }
328 
329  buf = (const char *) words;
330  }
331 #endif
332 
333  /* Process any remaining bytes */
334  while (bytes--)
335  popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
336 
337  return popcnt;
338 }
339 
340 #endif /* TRY_POPCNT_FAST */
341 
342 
343 /*
344  * pg_popcount32_slow
345  * Return the number of 1 bits set in word
346  */
347 static inline int
349 {
350 #ifdef HAVE__BUILTIN_POPCOUNT
351  return __builtin_popcount(word);
352 #else /* !HAVE__BUILTIN_POPCOUNT */
353  int result = 0;
354 
355  while (word != 0)
356  {
357  result += pg_number_of_ones[word & 255];
358  word >>= 8;
359  }
360 
361  return result;
362 #endif /* HAVE__BUILTIN_POPCOUNT */
363 }
364 
365 /*
366  * pg_popcount64_slow
367  * Return the number of 1 bits set in word
368  */
369 static inline int
371 {
372 #ifdef HAVE__BUILTIN_POPCOUNT
373 #if defined(HAVE_LONG_INT_64)
374  return __builtin_popcountl(word);
375 #elif defined(HAVE_LONG_LONG_INT_64)
376  return __builtin_popcountll(word);
377 #else
378 #error must have a working 64-bit integer datatype
379 #endif
380 #else /* !HAVE__BUILTIN_POPCOUNT */
381  int result = 0;
382 
383  while (word != 0)
384  {
385  result += pg_number_of_ones[word & 255];
386  word >>= 8;
387  }
388 
389  return result;
390 #endif /* HAVE__BUILTIN_POPCOUNT */
391 }
392 
393 /*
394  * pg_popcount_slow
395  * Returns the number of 1-bits in buf
396  */
397 static uint64
398 pg_popcount_slow(const char *buf, int bytes)
399 {
400  uint64 popcnt = 0;
401 
402 #if SIZEOF_VOID_P >= 8
403  /* Process in 64-bit chunks if the buffer is aligned. */
404  if (buf == (const char *) TYPEALIGN(8, buf))
405  {
406  const uint64 *words = (const uint64 *) buf;
407 
408  while (bytes >= 8)
409  {
410  popcnt += pg_popcount64_slow(*words++);
411  bytes -= 8;
412  }
413 
414  buf = (const char *) words;
415  }
416 #else
417  /* Process in 32-bit chunks if the buffer is aligned. */
418  if (buf == (const char *) TYPEALIGN(4, buf))
419  {
420  const uint32 *words = (const uint32 *) buf;
421 
422  while (bytes >= 4)
423  {
424  popcnt += pg_popcount32_slow(*words++);
425  bytes -= 4;
426  }
427 
428  buf = (const char *) words;
429  }
430 #endif
431 
432  /* Process any remaining bytes */
433  while (bytes--)
434  popcnt += pg_number_of_ones[(unsigned char) *buf++];
435 
436  return popcnt;
437 }
438 
439 /*
440  * pg_popcount_masked_slow
441  * Returns the number of 1-bits in buf after applying the mask to each byte
442  */
443 static uint64
444 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
445 {
446  uint64 popcnt = 0;
447 
448 #if SIZEOF_VOID_P >= 8
449  /* Process in 64-bit chunks if the buffer is aligned */
450  uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
451 
452  if (buf == (const char *) TYPEALIGN(8, buf))
453  {
454  const uint64 *words = (const uint64 *) buf;
455 
456  while (bytes >= 8)
457  {
458  popcnt += pg_popcount64_slow(*words++ & maskv);
459  bytes -= 8;
460  }
461 
462  buf = (const char *) words;
463  }
464 #else
465  /* Process in 32-bit chunks if the buffer is aligned. */
466  uint32 maskv = ~((uint32) 0) / 0xFF * mask;
467 
468  if (buf == (const char *) TYPEALIGN(4, buf))
469  {
470  const uint32 *words = (const uint32 *) buf;
471 
472  while (bytes >= 4)
473  {
474  popcnt += pg_popcount32_slow(*words++ & maskv);
475  bytes -= 4;
476  }
477 
478  buf = (const char *) words;
479  }
480 #endif
481 
482  /* Process any remaining bytes */
483  while (bytes--)
484  popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
485 
486  return popcnt;
487 }
488 
489 #ifndef TRY_POPCNT_FAST
490 
491 /*
492  * When the POPCNT instruction is not available, there's no point in using
493  * function pointers to vary the implementation between the fast and slow
494  * method. We instead just make these actual external functions when
495  * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
496  * the slow versions here.
497  */
498 int
500 {
501  return pg_popcount32_slow(word);
502 }
503 
504 int
506 {
507  return pg_popcount64_slow(word);
508 }
509 
510 /*
511  * pg_popcount_optimized
512  * Returns the number of 1-bits in buf
513  */
514 uint64
515 pg_popcount_optimized(const char *buf, int bytes)
516 {
517  return pg_popcount_slow(buf, bytes);
518 }
519 
520 /*
521  * pg_popcount_masked_optimized
522  * Returns the number of 1-bits in buf after applying the mask to each byte
523  */
524 uint64
525 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
526 {
527  return pg_popcount_masked_slow(buf, bytes, mask);
528 }
529 
530 #endif /* !TRY_POPCNT_FAST */
unsigned int uint32
Definition: c.h:518
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:807
uint8 bits8
Definition: c.h:524
unsigned char uint8
Definition: c.h:516
const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.c:525
static int pg_popcount32_slow(uint32 word)
Definition: pg_bitutils.c:348
uint64 pg_popcount_optimized(const char *buf, int bytes)
Definition: pg_bitutils.c:515
int pg_popcount64(uint64 word)
Definition: pg_bitutils.c:505
int pg_popcount32(uint32 word)
Definition: pg_bitutils.c:499
const uint8 pg_rightmost_one_pos[256]
Definition: pg_bitutils.c:62
static int pg_popcount64_slow(uint64 word)
Definition: pg_bitutils.c:370
const uint8 pg_leftmost_one_pos[256]
Definition: pg_bitutils.c:34
static uint64 pg_popcount_slow(const char *buf, int bytes)
Definition: pg_bitutils.c:398
static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.c:444
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
Definition: pg_bitutils.h:370
static char * buf
Definition: pg_test_fsync.c:72
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1474