PostgreSQL Source Code  git master
ginbulk.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ginbulk.c
4  * routines for fast build of inverted index
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gin/ginbulk.c
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include <limits.h>
18 
19 #include "access/gin_private.h"
20 #include "utils/datum.h"
21 #include "utils/memutils.h"
22 
23 
24 #define DEF_NENTRY 2048 /* GinEntryAccumulator allocation quantum */
25 #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */
26 
27 
28 /* Combiner function for rbtree.c */
29 static void
30 ginCombineData(RBTNode *existing, const RBTNode *newdata, void *arg)
31 {
32  GinEntryAccumulator *eo = (GinEntryAccumulator *) existing;
33  const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata;
35 
36  /*
37  * Note this code assumes that newdata contains only one itempointer.
38  */
39  if (eo->count >= eo->maxcount)
40  {
41  if (eo->maxcount > INT_MAX)
42  ereport(ERROR,
43  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
44  errmsg("posting list is too long"),
45  errhint("Reduce maintenance_work_mem.")));
46 
48  eo->maxcount *= 2;
49  eo->list = (ItemPointerData *)
50  repalloc_huge(eo->list, sizeof(ItemPointerData) * eo->maxcount);
52  }
53 
54  /* If item pointers are not ordered, they will need to be sorted later */
55  if (eo->shouldSort == false)
56  {
57  int res;
58 
59  res = ginCompareItemPointers(eo->list + eo->count - 1, en->list);
60  Assert(res != 0);
61 
62  if (res > 0)
63  eo->shouldSort = true;
64  }
65 
66  eo->list[eo->count] = en->list[0];
67  eo->count++;
68 }
69 
70 /* Comparator function for rbtree.c */
71 static int
72 cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg)
73 {
74  const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a;
75  const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b;
77 
78  return ginCompareAttEntries(accum->ginstate,
79  ea->attnum, ea->key, ea->category,
80  eb->attnum, eb->key, eb->category);
81 }
82 
83 /* Allocator function for rbtree.c */
84 static RBTNode *
86 {
89 
90  /*
91  * Allocate memory by rather big chunks to decrease overhead. We have no
92  * need to reclaim RBTNodes individually, so this costs nothing.
93  */
94  if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY)
95  {
98  accum->eas_used = 0;
99  }
100 
101  /* Allocate new RBTNode from current chunk */
102  ea = accum->entryallocator + accum->eas_used;
103  accum->eas_used++;
104 
105  return (RBTNode *) ea;
106 }
107 
108 void
110 {
111  /* accum->ginstate is intentionally not set here */
112  accum->allocatedMemory = 0;
113  accum->entryallocator = NULL;
114  accum->eas_used = 0;
115  accum->tree = rbt_create(sizeof(GinEntryAccumulator),
119  NULL, /* no freefunc needed */
120  (void *) accum);
121 }
122 
123 /*
124  * This is basically the same as datumCopy(), but extended to count
125  * palloc'd space in accum->allocatedMemory.
126  */
127 static Datum
129 {
130  Form_pg_attribute att;
131  Datum res;
132 
133  att = TupleDescAttr(accum->ginstate->origTupdesc, attnum - 1);
134  if (att->attbyval)
135  res = value;
136  else
137  {
138  res = datumCopy(value, false, att->attlen);
140  }
141  return res;
142 }
143 
144 /*
145  * Find/store one entry from indexed value.
146  */
147 static void
150  Datum key, GinNullCategory category)
151 {
152  GinEntryAccumulator eatmp;
154  bool isNew;
155 
156  /*
157  * For the moment, fill only the fields of eatmp that will be looked at by
158  * cmpEntryAccumulator or ginCombineData.
159  */
160  eatmp.attnum = attnum;
161  eatmp.key = key;
162  eatmp.category = category;
163  /* temporarily set up single-entry itempointer list */
164  eatmp.list = heapptr;
165 
166  ea = (GinEntryAccumulator *) rbt_insert(accum->tree, (RBTNode *) &eatmp,
167  &isNew);
168 
169  if (isNew)
170  {
171  /*
172  * Finish initializing new tree entry, including making permanent
173  * copies of the datum (if it's not null) and itempointer.
174  */
175  if (category == GIN_CAT_NORM_KEY)
176  ea->key = getDatumCopy(accum, attnum, key);
177  ea->maxcount = DEF_NPTR;
178  ea->count = 1;
179  ea->shouldSort = false;
180  ea->list =
182  ea->list[0] = *heapptr;
184  }
185  else
186  {
187  /*
188  * ginCombineData did everything needed.
189  */
190  }
191 }
192 
193 /*
194  * Insert the entries for one heap pointer.
195  *
196  * Since the entries are being inserted into a balanced binary tree, you
197  * might think that the order of insertion wouldn't be critical, but it turns
198  * out that inserting the entries in sorted order results in a lot of
199  * rebalancing operations and is slow. To prevent this, we attempt to insert
200  * the nodes in an order that will produce a nearly-balanced tree if the input
201  * is in fact sorted.
202  *
203  * We do this as follows. First, we imagine that we have an array whose size
204  * is the smallest power of two greater than or equal to the actual array
205  * size. Second, we insert the middle entry of our virtual array into the
206  * tree; then, we insert the middles of each half of our virtual array, then
207  * middles of quarters, etc.
208  */
209 void
212  Datum *entries, GinNullCategory *categories,
213  int32 nentries)
214 {
215  uint32 step = nentries;
216 
217  if (nentries <= 0)
218  return;
219 
221 
222  /*
223  * step will contain largest power of 2 and <= nentries
224  */
225  step |= (step >> 1);
226  step |= (step >> 2);
227  step |= (step >> 4);
228  step |= (step >> 8);
229  step |= (step >> 16);
230  step >>= 1;
231  step++;
232 
233  while (step > 0)
234  {
235  int i;
236 
237  for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ )
238  ginInsertBAEntry(accum, heapptr, attnum,
239  entries[i], categories[i]);
240 
241  step >>= 1; /* /2 */
242  }
243 }
244 
245 static int
246 qsortCompareItemPointers(const void *a, const void *b)
247 {
249 
250  /* Assert that there are no equal item pointers being sorted */
251  Assert(res != 0);
252  return res;
253 }
254 
255 /* Prepare to read out the rbtree contents using ginGetBAEntry */
256 void
258 {
259  rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk);
260 }
261 
262 /*
263  * Get the next entry in sequence from the BuildAccumulator's rbtree.
264  * This consists of a single key datum and a list (array) of one or more
265  * heap TIDs in which that key is found. The list is guaranteed sorted.
266  */
270  uint32 *n)
271 {
272  GinEntryAccumulator *entry;
274 
275  entry = (GinEntryAccumulator *) rbt_iterate(&accum->tree_walk);
276 
277  if (entry == NULL)
278  return NULL; /* no more entries */
279 
280  *attnum = entry->attnum;
281  *key = entry->key;
282  *category = entry->category;
283  list = entry->list;
284  *n = entry->count;
285 
286  Assert(list != NULL && entry->count > 0);
287 
288  if (entry->shouldSort && entry->count > 1)
289  qsort(list, entry->count, sizeof(ItemPointerData),
291 
292  return list;
293 }
unsigned int uint32
Definition: c.h:506
signed int int32
Definition: c.h:494
#define Assert(condition)
Definition: c.h:858
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
static int ginCompareItemPointers(ItemPointer a, ItemPointer b)
Definition: gin_private.h:488
#define GIN_CAT_NORM_KEY
Definition: ginblock.h:208
signed char GinNullCategory
Definition: ginblock.h:206
static int qsortCompareItemPointers(const void *a, const void *b)
Definition: ginbulk.c:246
void ginBeginBAScan(BuildAccumulator *accum)
Definition: ginbulk.c:257
static void ginCombineData(RBTNode *existing, const RBTNode *newdata, void *arg)
Definition: ginbulk.c:30
#define DEF_NPTR
Definition: ginbulk.c:25
void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum *entries, GinNullCategory *categories, int32 nentries)
Definition: ginbulk.c:210
static int cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg)
Definition: ginbulk.c:72
#define DEF_NENTRY
Definition: ginbulk.c:24
void ginInitBA(BuildAccumulator *accum)
Definition: ginbulk.c:109
static Datum getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
Definition: ginbulk.c:128
static RBTNode * ginAllocEntryAccumulator(void *arg)
Definition: ginbulk.c:85
ItemPointerData * ginGetBAEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *key, GinNullCategory *category, uint32 *n)
Definition: ginbulk.c:268
static void ginInsertBAEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum key, GinNullCategory category)
Definition: ginbulk.c:148
int ginCompareAttEntries(GinState *ginstate, OffsetNumber attnuma, Datum a, GinNullCategory categorya, OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
Definition: ginutil.c:410
static struct @155 value
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:721
void * palloc(Size size)
Definition: mcxt.c:1316
void * repalloc_huge(void *pointer, Size size)
Definition: mcxt.c:1671
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
int16 attnum
Definition: pg_attribute.h:74
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
void * arg
#define qsort(a, b, c, d)
Definition: port.h:449
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
RBTNode * rbt_iterate(RBTreeIterator *iter)
Definition: rbtree.c:826
RBTNode * rbt_insert(RBTree *rbt, const RBTNode *data, bool *isNew)
Definition: rbtree.c:453
void rbt_begin_iterate(RBTree *rbt, RBTOrderControl ctrl, RBTreeIterator *iter)
Definition: rbtree.c:802
RBTree * rbt_create(Size node_size, rbt_comparator comparator, rbt_combiner combiner, rbt_allocfunc allocfunc, rbt_freefunc freefunc, void *arg)
Definition: rbtree.c:102
@ LeftRightWalk
Definition: rbtree.h:37
GinState * ginstate
Definition: gin_private.h:433
RBTreeIterator tree_walk
Definition: gin_private.h:438
GinEntryAccumulator * entryallocator
Definition: gin_private.h:435
OffsetNumber attnum
Definition: gin_private.h:424
ItemPointerData * list
Definition: gin_private.h:426
GinNullCategory category
Definition: gin_private.h:423
TupleDesc origTupdesc
Definition: gin_private.h:73
Definition: rbtree.h:24
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92