PostgreSQL Source Code  git master
pg_lfind.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_lfind.h
4  * Optimized linear search routines using SIMD intrinsics where
5  * available.
6  *
7  * Copyright (c) 2022-2024, PostgreSQL Global Development Group
8  *
9  * IDENTIFICATION
10  * src/include/port/pg_lfind.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef PG_LFIND_H
15 #define PG_LFIND_H
16 
17 #include "port/simd.h"
18 
19 /*
20  * pg_lfind8
21  *
22  * Return true if there is an element in 'base' that equals 'key', otherwise
23  * return false.
24  */
25 static inline bool
26 pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
27 {
28  uint32 i;
29 
30  /* round down to multiple of vector length */
31  uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1);
32  Vector8 chunk;
33 
34  for (i = 0; i < tail_idx; i += sizeof(Vector8))
35  {
36  vector8_load(&chunk, &base[i]);
37  if (vector8_has(chunk, key))
38  return true;
39  }
40 
41  /* Process the remaining elements one at a time. */
42  for (; i < nelem; i++)
43  {
44  if (key == base[i])
45  return true;
46  }
47 
48  return false;
49 }
50 
51 /*
52  * pg_lfind8_le
53  *
54  * Return true if there is an element in 'base' that is less than or equal to
55  * 'key', otherwise return false.
56  */
57 static inline bool
59 {
60  uint32 i;
61 
62  /* round down to multiple of vector length */
63  uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1);
64  Vector8 chunk;
65 
66  for (i = 0; i < tail_idx; i += sizeof(Vector8))
67  {
68  vector8_load(&chunk, &base[i]);
69  if (vector8_has_le(chunk, key))
70  return true;
71  }
72 
73  /* Process the remaining elements one at a time. */
74  for (; i < nelem; i++)
75  {
76  if (base[i] <= key)
77  return true;
78  }
79 
80  return false;
81 }
82 
83 /*
84  * pg_lfind32_one_by_one_helper
85  *
86  * Searches the array of integers one-by-one. The caller is responsible for
87  * ensuring that there are at least "nelem" integers in the array.
88  */
89 static inline bool
91 {
92  for (uint32 i = 0; i < nelem; i++)
93  {
94  if (key == base[i])
95  return true;
96  }
97 
98  return false;
99 }
100 
101 #ifndef USE_NO_SIMD
102 /*
103  * pg_lfind32_simd_helper
104  *
105  * Searches one 4-register-block of integers. The caller is responsible for
106  * ensuring that there are at least 4-registers-worth of integers remaining.
107  */
108 static inline bool
109 pg_lfind32_simd_helper(const Vector32 keys, const uint32 *base)
110 {
111  const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
112  Vector32 vals1,
113  vals2,
114  vals3,
115  vals4,
116  result1,
117  result2,
118  result3,
119  result4,
120  tmp1,
121  tmp2,
122  result;
123 
124  /* load the next block into 4 registers */
125  vector32_load(&vals1, base);
126  vector32_load(&vals2, &base[nelem_per_vector]);
127  vector32_load(&vals3, &base[nelem_per_vector * 2]);
128  vector32_load(&vals4, &base[nelem_per_vector * 3]);
129 
130  /* compare each value to the key */
131  result1 = vector32_eq(keys, vals1);
132  result2 = vector32_eq(keys, vals2);
133  result3 = vector32_eq(keys, vals3);
134  result4 = vector32_eq(keys, vals4);
135 
136  /* combine the results into a single variable */
137  tmp1 = vector32_or(result1, result2);
138  tmp2 = vector32_or(result3, result4);
139  result = vector32_or(tmp1, tmp2);
140 
141  /* return whether there was a match */
142  return vector32_is_highbit_set(result);
143 }
144 #endif /* ! USE_NO_SIMD */
145 
146 /*
147  * pg_lfind32
148  *
149  * Return true if there is an element in 'base' that equals 'key', otherwise
150  * return false.
151  */
152 static inline bool
153 pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
154 {
155 #ifndef USE_NO_SIMD
156  uint32 i = 0;
157 
158  /*
159  * For better instruction-level parallelism, each loop iteration operates
160  * on a block of four registers.
161  */
162  const Vector32 keys = vector32_broadcast(key); /* load copies of key */
163  const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
164  const uint32 nelem_per_iteration = 4 * nelem_per_vector;
165 
166  /* round down to multiple of elements per iteration */
167  const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
168 
169 #if defined(USE_ASSERT_CHECKING)
170  bool assert_result = pg_lfind32_one_by_one_helper(key, base, nelem);
171 #endif
172 
173  /*
174  * If there aren't enough elements for the SIMD code, use the standard
175  * one-by-one linear search code.
176  */
177  if (nelem < nelem_per_iteration)
178  return pg_lfind32_one_by_one_helper(key, base, nelem);
179 
180  /*
181  * Process as many elements as possible with a block of 4 registers.
182  */
183  do
184  {
185  if (pg_lfind32_simd_helper(keys, &base[i]))
186  {
187  Assert(assert_result == true);
188  return true;
189  }
190 
191  i += nelem_per_iteration;
192 
193  } while (i < tail_idx);
194 
195  /*
196  * Process the last 'nelem_per_iteration' elements in the array with a
197  * 4-register block. This will cause us to check a subset of the elements
198  * more than once, but that won't affect correctness, and testing has
199  * demonstrated that this helps more cases than it harms.
200  */
201  Assert(assert_result == pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration]));
202  return pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration]);
203 #else
204  /* Process the elements one at a time. */
205  return pg_lfind32_one_by_one_helper(key, base, nelem);
206 #endif
207 }
208 
209 #endif /* PG_LFIND_H */
unsigned int uint32
Definition: c.h:493
unsigned char uint8
Definition: c.h:491
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
static bool pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:58
static bool pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
Definition: pg_lfind.h:153
static bool pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:26
static bool pg_lfind32_one_by_one_helper(uint32 key, const uint32 *base, uint32 nelem)
Definition: pg_lfind.h:90
static bool vector8_has_le(const Vector8 v, const uint8 c)
Definition: simd.h:213
static void vector8_load(Vector8 *v, const uint8 *s)
Definition: simd.h:108
uint64 Vector8
Definition: simd.h:60
static bool vector8_has(const Vector8 v, const uint8 c)
Definition: simd.h:162