PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
gininsert.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * gininsert.c
4 * insert routines for the postgres inverted index access method.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/gin/gininsert.c
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres.h"
16
17#include "access/gin_private.h"
18#include "access/gin_tuple.h"
19#include "access/parallel.h"
20#include "access/table.h"
21#include "access/tableam.h"
22#include "access/xloginsert.h"
23#include "catalog/index.h"
25#include "commands/progress.h"
26#include "miscadmin.h"
27#include "nodes/execnodes.h"
28#include "pgstat.h"
29#include "storage/bufmgr.h"
30#include "storage/predicate.h"
31#include "tcop/tcopprot.h"
32#include "utils/datum.h"
33#include "utils/memutils.h"
34#include "utils/rel.h"
35#include "utils/builtins.h"
36
37
38/* Magic numbers for parallel state sharing */
39#define PARALLEL_KEY_GIN_SHARED UINT64CONST(0xB000000000000001)
40#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xB000000000000002)
41#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xB000000000000003)
42#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004)
43#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005)
44
45/*
46 * Status for index builds performed in parallel. This is allocated in a
47 * dynamic shared memory segment.
48 */
49typedef struct GinBuildShared
50{
51 /*
52 * These fields are not modified during the build. They primarily exist
53 * for the benefit of worker processes that need to create state
54 * corresponding to that used by the leader.
55 */
60
61 /*
62 * workersdonecv is used to monitor the progress of workers. All parallel
63 * participants must indicate that they are done before leader can use
64 * results built by the workers (and before leader can write the data into
65 * the index).
66 */
68
69 /*
70 * mutex protects all following fields
71 *
72 * These fields contain status information of interest to GIN index builds
73 * that must work just the same when an index is built in parallel.
74 */
75 slock_t mutex;
76
77 /*
78 * Mutable state that is maintained by workers, and reported back to
79 * leader at end of the scans.
80 *
81 * nparticipantsdone is number of worker processes finished.
82 *
83 * reltuples is the total number of input heap tuples.
84 *
85 * indtuples is the total number of tuples that made it into the index.
86 */
88 double reltuples;
89 double indtuples;
90
91 /*
92 * ParallelTableScanDescData data follows. Can't directly embed here, as
93 * implementations of the parallel table scan desc interface might need
94 * stronger alignment.
95 */
97
98/*
99 * Return pointer to a GinBuildShared's parallel table scan.
100 *
101 * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
102 * MAXALIGN.
103 */
104#define ParallelTableScanFromGinBuildShared(shared) \
105 (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
106
107/*
108 * Status for leader in parallel index build.
109 */
110typedef struct GinLeader
111{
112 /* parallel context itself */
114
115 /*
116 * nparticipanttuplesorts is the exact number of worker processes
117 * successfully launched, plus one leader process if it participates as a
118 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
119 * participating as a worker).
120 */
122
123 /*
124 * Leader process convenience pointers to shared state (leader avoids TOC
125 * lookups).
126 *
127 * GinBuildShared is the shared state for entire build. sharedsort is the
128 * shared, tuplesort-managed state passed to each process tuplesort.
129 * snapshot is the snapshot used by the scan iff an MVCC snapshot is
130 * required.
131 */
138
139typedef struct
140{
142 double indtuples;
149
150 /*
151 * bs_leader is only present when a parallel index build is performed, and
152 * only in the leader process.
153 */
155
156 /* number of participating workers (including leader) */
158
159 /* used to pass information from workers to leader */
162
163 /*
164 * The sortstate is used by workers (including the leader). It has to be
165 * part of the build state, because that's the only thing passed to the
166 * build callback etc.
167 */
169
170 /*
171 * The sortstate used only within a single worker for the first merge pass
172 * happening there. In principle it doesn't need to be part of the build
173 * state and we could pass it around directly, but it's more convenient
174 * this way. And it's part of the build state, after all.
175 */
178
179
180/* parallel index builds */
181static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
182 bool isconcurrent, int request);
183static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
188 Relation heap, Relation index);
190 GinBuildShared *ginshared,
191 Sharedsort *sharedsort,
192 Relation heap, Relation index,
193 int sortmem, bool progress);
194
197
198static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
199 Datum key, int16 typlen, bool typbyval,
201 Size *len);
202
203/*
204 * Adds array of item pointers to tuple's posting list, or
205 * creates posting tree and tuple pointing to tree in case
206 * of not enough space. Max size of tuple is defined in
207 * GinFormTuple(). Returns a new, modified index tuple.
208 * items[] must be in sorted order with no duplicates.
209 */
210static IndexTuple
212 IndexTuple old,
214 GinStatsData *buildStats, Buffer buffer)
215{
217 Datum key;
218 GinNullCategory category;
219 IndexTuple res;
220 ItemPointerData *newItems,
221 *oldItems;
222 int oldNPosting,
223 newNPosting,
224 nwritten;
225 GinPostingList *compressedList;
226
228
229 attnum = gintuple_get_attrnum(ginstate, old);
230 key = gintuple_get_key(ginstate, old, &category);
231
232 /* merge the old and new posting lists */
233 oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
234
235 newItems = ginMergeItemPointers(items, nitem,
236 oldItems, oldNPosting,
237 &newNPosting);
238
239 /* Compress the posting list, and try to a build tuple with room for it */
240 res = NULL;
241 compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, &nwritten);
242 if (nwritten == newNPosting)
243 {
244 res = GinFormTuple(ginstate, attnum, key, category,
245 (char *) compressedList,
246 SizeOfGinPostingList(compressedList),
247 newNPosting,
248 false);
249 }
250
251 pfree(newItems);
252 pfree(compressedList);
253
254 if (!res)
255 {
256 /* posting list would be too big, convert to posting tree */
257 BlockNumber postingRoot;
258
259 /*
260 * Initialize posting tree with the old tuple's posting list. It's
261 * surely small enough to fit on one posting-tree page, and should
262 * already be in order with no duplicates.
263 */
264 postingRoot = createPostingTree(ginstate->index,
265 oldItems,
266 oldNPosting,
267 buildStats,
268 buffer);
269
270 /* Now insert the TIDs-to-be-added into the posting tree */
271 ginInsertItemPointers(ginstate->index, postingRoot,
272 items, nitem,
273 buildStats);
274
275 /* And build a new posting-tree-only result tuple */
276 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
277 GinSetPostingTree(res, postingRoot);
278 }
279 pfree(oldItems);
280
281 return res;
282}
283
284/*
285 * Build a fresh leaf tuple, either posting-list or posting-tree format
286 * depending on whether the given items list will fit.
287 * items[] must be in sorted order with no duplicates.
288 *
289 * This is basically the same logic as in addItemPointersToLeafTuple,
290 * but working from slightly different input.
291 */
292static IndexTuple
296 GinStatsData *buildStats, Buffer buffer)
297{
298 IndexTuple res = NULL;
299 GinPostingList *compressedList;
300 int nwritten;
301
302 /* try to build a posting list tuple with all the items */
303 compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, &nwritten);
304 if (nwritten == nitem)
305 {
306 res = GinFormTuple(ginstate, attnum, key, category,
307 (char *) compressedList,
308 SizeOfGinPostingList(compressedList),
309 nitem, false);
310 }
311 pfree(compressedList);
312
313 if (!res)
314 {
315 /* posting list would be too big, build posting tree */
316 BlockNumber postingRoot;
317
318 /*
319 * Build posting-tree-only result tuple. We do this first so as to
320 * fail quickly if the key is too big.
321 */
322 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
323
324 /*
325 * Initialize a new posting tree with the TIDs.
326 */
327 postingRoot = createPostingTree(ginstate->index, items, nitem,
328 buildStats, buffer);
329
330 /* And save the root link in the result tuple */
331 GinSetPostingTree(res, postingRoot);
332 }
333
334 return res;
335}
336
337/*
338 * Insert one or more heap TIDs associated with the given key value.
339 * This will either add a single key entry, or enlarge a pre-existing entry.
340 *
341 * During an index build, buildStats is non-null and the counters
342 * it contains should be incremented as needed.
343 */
344void
348 GinStatsData *buildStats)
349{
350 GinBtreeData btree;
351 GinBtreeEntryInsertData insertdata;
352 GinBtreeStack *stack;
353 IndexTuple itup;
354 Page page;
355
356 insertdata.isDelete = false;
357
358 ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
359 btree.isBuild = (buildStats != NULL);
360
361 stack = ginFindLeafPage(&btree, false, false);
362 page = BufferGetPage(stack->buffer);
363
364 if (btree.findItem(&btree, stack))
365 {
366 /* found pre-existing entry */
367 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
368
369 if (GinIsPostingTree(itup))
370 {
371 /* add entries to existing posting tree */
372 BlockNumber rootPostingTree = GinGetPostingTree(itup);
373
374 /* release all stack */
376 freeGinBtreeStack(stack);
377
378 /* insert into posting tree */
379 ginInsertItemPointers(ginstate->index, rootPostingTree,
380 items, nitem,
381 buildStats);
382 return;
383 }
384
387 /* modify an existing leaf entry */
388 itup = addItemPointersToLeafTuple(ginstate, itup,
389 items, nitem, buildStats, stack->buffer);
390
391 insertdata.isDelete = true;
392 }
393 else
394 {
397 /* no match, so construct a new leaf entry */
398 itup = buildFreshLeafTuple(ginstate, attnum, key, category,
399 items, nitem, buildStats, stack->buffer);
400
401 /*
402 * nEntries counts leaf tuples, so increment it only when we make a
403 * new one.
404 */
405 if (buildStats)
406 buildStats->nEntries++;
407 }
408
409 /* Insert the new or modified leaf tuple */
410 insertdata.entry = itup;
411 ginInsertValue(&btree, stack, &insertdata, buildStats);
412 pfree(itup);
413}
414
415/*
416 * Extract index entries for a single indexable item, and add them to the
417 * BuildAccumulator's state.
418 *
419 * This function is used only during initial index creation.
420 */
421static void
423 Datum value, bool isNull,
424 ItemPointer heapptr)
425{
426 Datum *entries;
427 GinNullCategory *categories;
428 int32 nentries;
429 MemoryContext oldCtx;
430
431 oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
432 entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
433 value, isNull,
434 &nentries, &categories);
435 MemoryContextSwitchTo(oldCtx);
436
437 ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
438 entries, categories, nentries);
439
440 buildstate->indtuples += nentries;
441
442 MemoryContextReset(buildstate->funcCtx);
443}
444
445static void
447 bool *isnull, bool tupleIsAlive, void *state)
448{
449 GinBuildState *buildstate = (GinBuildState *) state;
450 MemoryContext oldCtx;
451 int i;
452
453 oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
454
455 for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
456 ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
457 values[i], isnull[i], tid);
458
459 /* If we've maxed out our available memory, dump everything to the index */
460 if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
461 {
463 Datum key;
464 GinNullCategory category;
465 uint32 nlist;
467
468 ginBeginBAScan(&buildstate->accum);
469 while ((list = ginGetBAEntry(&buildstate->accum,
470 &attnum, &key, &category, &nlist)) != NULL)
471 {
472 /* there could be many entries, so be willing to abort here */
474 ginEntryInsert(&buildstate->ginstate, attnum, key, category,
475 list, nlist, &buildstate->buildStats);
476 }
477
478 MemoryContextReset(buildstate->tmpCtx);
479 ginInitBA(&buildstate->accum);
480 }
481
482 MemoryContextSwitchTo(oldCtx);
483}
484
485/*
486 * ginFlushBuildState
487 * Write all data from BuildAccumulator into the tuplesort.
488 *
489 * The number of TIDs written to the tuplesort at once is limited, to reduce
490 * the amount of memory needed when merging the intermediate results later.
491 * The leader will see up to two chunks per worker, so calculate the limit to
492 * not need more than MaxAllocSize overall.
493 *
494 * We don't need to worry about overflowing maintenance_work_mem. We can't
495 * build chunks larger than work_mem, and that limit was set so that workers
496 * produce sufficiently small chunks.
497 */
498static void
500{
502 Datum key;
503 GinNullCategory category;
504 uint32 nlist;
507 uint32 maxlen;
508
509 /* maximum number of TIDs per chunk (two chunks per worker) */
510 maxlen = MaxAllocSize / sizeof(ItemPointerData);
511 maxlen /= (2 * buildstate->bs_num_workers);
512
513 ginBeginBAScan(&buildstate->accum);
514 while ((list = ginGetBAEntry(&buildstate->accum,
515 &attnum, &key, &category, &nlist)) != NULL)
516 {
517 /* information about the key */
518 CompactAttribute *attr = TupleDescCompactAttr(tdesc, (attnum - 1));
519
520 /* start of the chunk */
521 uint32 offset = 0;
522
523 /* split the entry into smaller chunk with up to maxlen items */
524 while (offset < nlist)
525 {
526 /* GIN tuple and tuple length */
527 GinTuple *tup;
528 Size tuplen;
529 uint32 len = Min(maxlen, nlist - offset);
530
531 /* there could be many entries, so be willing to abort here */
533
534 tup = _gin_build_tuple(attnum, category,
535 key, attr->attlen, attr->attbyval,
536 &list[offset], len,
537 &tuplen);
538
539 offset += len;
540
541 tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
542
543 pfree(tup);
544 }
545 }
546
547 MemoryContextReset(buildstate->tmpCtx);
548 ginInitBA(&buildstate->accum);
549}
550
551/*
552 * ginBuildCallbackParallel
553 * Callback for the parallel index build.
554 *
555 * This is similar to the serial build callback ginBuildCallback, but
556 * instead of writing the accumulated entries into the index, each worker
557 * writes them into a (local) tuplesort.
558 *
559 * The worker then sorts and combines these entries, before writing them
560 * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
561 * for the whole process).
562 */
563static void
565 bool *isnull, bool tupleIsAlive, void *state)
566{
567 GinBuildState *buildstate = (GinBuildState *) state;
568 MemoryContext oldCtx;
569 int i;
570
571 oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
572
573 /*
574 * if scan wrapped around - flush accumulated entries and start anew
575 *
576 * With parallel scans, we don't have a guarantee the scan does not start
577 * half-way through the relation (serial builds disable sync scans and
578 * always start from block 0, parallel scans require allow_sync=true).
579 *
580 * Building the posting lists assumes the TIDs are monotonic and never go
581 * back, and the wrap around would break that. We handle that by detecting
582 * the wraparound, and flushing all entries. This means we'll later see
583 * two separate entries with non-overlapping TID lists (which can be
584 * combined by merge sort).
585 *
586 * To detect a wraparound, we remember the last TID seen by each worker
587 * (for any key). If the next TID seen by the worker is lower, the scan
588 * must have wrapped around.
589 */
590 if (ItemPointerCompare(tid, &buildstate->tid) < 0)
591 ginFlushBuildState(buildstate, index);
592
593 /* remember the TID we're about to process */
594 buildstate->tid = *tid;
595
596 for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
597 ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
598 values[i], isnull[i], tid);
599
600 /*
601 * If we've maxed out our available memory, dump everything to the
602 * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
603 * the other half is used for the tuplesort.
604 */
605 if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
606 ginFlushBuildState(buildstate, index);
607
608 MemoryContextSwitchTo(oldCtx);
609}
610
613{
614 IndexBuildResult *result;
615 double reltuples;
616 GinBuildState buildstate;
617 GinBuildState *state = &buildstate;
618 Buffer RootBuffer,
619 MetaBuffer;
621 Datum key;
622 GinNullCategory category;
623 uint32 nlist;
624 MemoryContext oldCtx;
626
628 elog(ERROR, "index \"%s\" already contains data",
630
631 initGinState(&buildstate.ginstate, index);
632 buildstate.indtuples = 0;
633 memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
634
635 /* Initialize fields for parallel build too. */
636 buildstate.bs_numtuples = 0;
637 buildstate.bs_reltuples = 0;
638 buildstate.bs_leader = NULL;
639 memset(&buildstate.tid, 0, sizeof(ItemPointerData));
640
641 /* initialize the meta page */
642 MetaBuffer = GinNewBuffer(index);
643
644 /* initialize the root page */
645 RootBuffer = GinNewBuffer(index);
646
648 GinInitMetabuffer(MetaBuffer);
649 MarkBufferDirty(MetaBuffer);
650 GinInitBuffer(RootBuffer, GIN_LEAF);
651 MarkBufferDirty(RootBuffer);
652
653
654 UnlockReleaseBuffer(MetaBuffer);
655 UnlockReleaseBuffer(RootBuffer);
657
658 /* count the root as first entry page */
659 buildstate.buildStats.nEntryPages++;
660
661 /*
662 * create a temporary memory context that is used to hold data not yet
663 * dumped out to the index
664 */
666 "Gin build temporary context",
668
669 /*
670 * create a temporary memory context that is used for calling
671 * ginExtractEntries(), and can be reset after each tuple
672 */
674 "Gin build temporary context for user-defined function",
676
677 buildstate.accum.ginstate = &buildstate.ginstate;
678 ginInitBA(&buildstate.accum);
679
680 /* Report table scan phase started */
683
684 /*
685 * Attempt to launch parallel worker scan when required
686 *
687 * XXX plan_create_index_workers makes the number of workers dependent on
688 * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
689 * reasonable too, because we sort the data just like btree. It does
690 * ignore the memory used to accumulate data in memory (set by work_mem),
691 * but there is no way to communicate that to plan_create_index_workers.
692 */
693 if (indexInfo->ii_ParallelWorkers > 0)
694 _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
695 indexInfo->ii_ParallelWorkers);
696
697 /*
698 * If parallel build requested and at least one worker process was
699 * successfully launched, set up coordination state, wait for workers to
700 * complete. Then read all tuples from the shared tuplesort and insert
701 * them into the index.
702 *
703 * In serial mode, simply scan the table and build the index one index
704 * tuple at a time.
705 */
706 if (state->bs_leader)
707 {
708 SortCoordinate coordinate;
709
710 coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
711 coordinate->isWorker = false;
712 coordinate->nParticipants =
713 state->bs_leader->nparticipanttuplesorts;
714 coordinate->sharedsort = state->bs_leader->sharedsort;
715
716 /*
717 * Begin leader tuplesort.
718 *
719 * In cases where parallelism is involved, the leader receives the
720 * same share of maintenance_work_mem as a serial sort (it is
721 * generally treated in the same way as a serial sort once we return).
722 * Parallel worker Tuplesortstates will have received only a fraction
723 * of maintenance_work_mem, though.
724 *
725 * We rely on the lifetime of the Leader Tuplesortstate almost not
726 * overlapping with any worker Tuplesortstate's lifetime. There may
727 * be some small overlap, but that's okay because we rely on leader
728 * Tuplesortstate only allocating a small, fixed amount of memory
729 * here. When its tuplesort_performsort() is called (by our caller),
730 * and significant amounts of memory are likely to be used, all
731 * workers must have already freed almost all memory held by their
732 * Tuplesortstates (they are about to go away completely, too). The
733 * overall effect is that maintenance_work_mem always represents an
734 * absolute high watermark on the amount of memory used by a CREATE
735 * INDEX operation, regardless of the use of parallelism or any other
736 * factor.
737 */
738 state->bs_sortstate =
740 maintenance_work_mem, coordinate,
742
743 /* scan the relation in parallel and merge per-worker results */
744 reltuples = _gin_parallel_merge(state);
745
746 _gin_end_parallel(state->bs_leader, state);
747 }
748 else /* no parallel index build */
749 {
750 /*
751 * Do the heap scan. We disallow sync scan here because
752 * dataPlaceToPage prefers to receive tuples in TID order.
753 */
754 reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
755 ginBuildCallback, &buildstate, NULL);
756
757 /* dump remaining entries to the index */
758 oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
759 ginBeginBAScan(&buildstate.accum);
760 while ((list = ginGetBAEntry(&buildstate.accum,
761 &attnum, &key, &category, &nlist)) != NULL)
762 {
763 /* there could be many entries, so be willing to abort here */
765 ginEntryInsert(&buildstate.ginstate, attnum, key, category,
766 list, nlist, &buildstate.buildStats);
767 }
768 MemoryContextSwitchTo(oldCtx);
769 }
770
771 MemoryContextDelete(buildstate.funcCtx);
772 MemoryContextDelete(buildstate.tmpCtx);
773
774 /*
775 * Update metapage stats
776 */
778 ginUpdateStats(index, &buildstate.buildStats, true);
779
780 /*
781 * We didn't write WAL records as we built the index, so if WAL-logging is
782 * required, write all pages to the WAL now.
783 */
785 {
788 true);
789 }
790
791 /*
792 * Return statistics
793 */
794 result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
795
796 result->heap_tuples = reltuples;
797 result->index_tuples = buildstate.indtuples;
798
799 return result;
800}
801
802/*
803 * ginbuildempty() -- build an empty gin index in the initialization fork
804 */
805void
807{
808 Buffer RootBuffer,
809 MetaBuffer;
810
811 /* An empty GIN index has two pages. */
812 MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
814 RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
816
817 /* Initialize and xlog metabuffer and root buffer. */
819 GinInitMetabuffer(MetaBuffer);
820 MarkBufferDirty(MetaBuffer);
821 log_newpage_buffer(MetaBuffer, true);
822 GinInitBuffer(RootBuffer, GIN_LEAF);
823 MarkBufferDirty(RootBuffer);
824 log_newpage_buffer(RootBuffer, false);
826
827 /* Unlock and release the buffers. */
828 UnlockReleaseBuffer(MetaBuffer);
829 UnlockReleaseBuffer(RootBuffer);
830}
831
832/*
833 * Insert index entries for a single indexable item during "normal"
834 * (non-fast-update) insertion
835 */
836static void
838 Datum value, bool isNull,
839 ItemPointer item)
840{
841 Datum *entries;
842 GinNullCategory *categories;
843 int32 i,
844 nentries;
845
846 entries = ginExtractEntries(ginstate, attnum, value, isNull,
847 &nentries, &categories);
848
849 for (i = 0; i < nentries; i++)
850 ginEntryInsert(ginstate, attnum, entries[i], categories[i],
851 item, 1, NULL);
852}
853
854bool
856 ItemPointer ht_ctid, Relation heapRel,
857 IndexUniqueCheck checkUnique,
858 bool indexUnchanged,
859 IndexInfo *indexInfo)
860{
861 GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
862 MemoryContext oldCtx;
863 MemoryContext insertCtx;
864 int i;
865
866 /* Initialize GinState cache if first call in this statement */
867 if (ginstate == NULL)
868 {
869 oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
870 ginstate = (GinState *) palloc(sizeof(GinState));
871 initGinState(ginstate, index);
872 indexInfo->ii_AmCache = ginstate;
873 MemoryContextSwitchTo(oldCtx);
874 }
875
877 "Gin insert temporary context",
879
880 oldCtx = MemoryContextSwitchTo(insertCtx);
881
883 {
884 GinTupleCollector collector;
885
886 memset(&collector, 0, sizeof(GinTupleCollector));
887
888 for (i = 0; i < ginstate->origTupdesc->natts; i++)
889 ginHeapTupleFastCollect(ginstate, &collector,
890 (OffsetNumber) (i + 1),
891 values[i], isnull[i],
892 ht_ctid);
893
894 ginHeapTupleFastInsert(ginstate, &collector);
895 }
896 else
897 {
898 for (i = 0; i < ginstate->origTupdesc->natts; i++)
899 ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
900 values[i], isnull[i],
901 ht_ctid);
902 }
903
904 MemoryContextSwitchTo(oldCtx);
905 MemoryContextDelete(insertCtx);
906
907 return false;
908}
909
910/*
911 * Create parallel context, and launch workers for leader.
912 *
913 * buildstate argument should be initialized (with the exception of the
914 * tuplesort states, which may later be created based on shared
915 * state initially set up here).
916 *
917 * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
918 *
919 * request is the target number of parallel worker processes to launch.
920 *
921 * Sets buildstate's GinLeader, which caller must use to shut down parallel
922 * mode by passing it to _gin_end_parallel() at the very end of its index
923 * build. If not even a single worker process can be launched, this is
924 * never set, and caller should proceed with a serial index build.
925 */
926static void
928 bool isconcurrent, int request)
929{
930 ParallelContext *pcxt;
931 int scantuplesortstates;
932 Snapshot snapshot;
933 Size estginshared;
934 Size estsort;
935 GinBuildShared *ginshared;
936 Sharedsort *sharedsort;
937 GinLeader *ginleader = (GinLeader *) palloc0(sizeof(GinLeader));
938 WalUsage *walusage;
939 BufferUsage *bufferusage;
940 bool leaderparticipates = true;
941 int querylen;
942
943#ifdef DISABLE_LEADER_PARTICIPATION
944 leaderparticipates = false;
945#endif
946
947 /*
948 * Enter parallel mode, and create context for parallel build of gin index
949 */
951 Assert(request > 0);
952 pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
953 request);
954
955 scantuplesortstates = leaderparticipates ? request + 1 : request;
956
957 /*
958 * Prepare for scan of the base relation. In a normal index build, we use
959 * SnapshotAny because we must retrieve all tuples and do our own time
960 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
961 * concurrent build, we take a regular MVCC snapshot and index whatever's
962 * live according to that.
963 */
964 if (!isconcurrent)
965 snapshot = SnapshotAny;
966 else
968
969 /*
970 * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
971 */
972 estginshared = _gin_parallel_estimate_shared(heap, snapshot);
973 shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
974 estsort = tuplesort_estimate_shared(scantuplesortstates);
975 shm_toc_estimate_chunk(&pcxt->estimator, estsort);
976
978
979 /*
980 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
981 * and PARALLEL_KEY_BUFFER_USAGE.
982 *
983 * If there are no extensions loaded that care, we could skip this. We
984 * have no way of knowing whether anyone's looking at pgWalUsage or
985 * pgBufferUsage, so do it unconditionally.
986 */
988 mul_size(sizeof(WalUsage), pcxt->nworkers));
991 mul_size(sizeof(BufferUsage), pcxt->nworkers));
993
994 /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
996 {
997 querylen = strlen(debug_query_string);
998 shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
1000 }
1001 else
1002 querylen = 0; /* keep compiler quiet */
1003
1004 /* Everyone's had a chance to ask for space, so now create the DSM */
1006
1007 /* If no DSM segment was available, back out (do serial build) */
1008 if (pcxt->seg == NULL)
1009 {
1010 if (IsMVCCSnapshot(snapshot))
1011 UnregisterSnapshot(snapshot);
1014 return;
1015 }
1016
1017 /* Store shared build state, for which we reserved space */
1018 ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
1019 /* Initialize immutable state */
1020 ginshared->heaprelid = RelationGetRelid(heap);
1021 ginshared->indexrelid = RelationGetRelid(index);
1022 ginshared->isconcurrent = isconcurrent;
1023 ginshared->scantuplesortstates = scantuplesortstates;
1024
1026 SpinLockInit(&ginshared->mutex);
1027
1028 /* Initialize mutable state */
1029 ginshared->nparticipantsdone = 0;
1030 ginshared->reltuples = 0.0;
1031 ginshared->indtuples = 0.0;
1032
1035 snapshot);
1036
1037 /*
1038 * Store shared tuplesort-private state, for which we reserved space.
1039 * Then, initialize opaque state using tuplesort routine.
1040 */
1041 sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1042 tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1043 pcxt->seg);
1044
1045 shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
1046 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1047
1048 /* Store query string for workers */
1050 {
1051 char *sharedquery;
1052
1053 sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1054 memcpy(sharedquery, debug_query_string, querylen + 1);
1055 shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
1056 }
1057
1058 /*
1059 * Allocate space for each worker's WalUsage and BufferUsage; no need to
1060 * initialize.
1061 */
1062 walusage = shm_toc_allocate(pcxt->toc,
1063 mul_size(sizeof(WalUsage), pcxt->nworkers));
1064 shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
1065 bufferusage = shm_toc_allocate(pcxt->toc,
1066 mul_size(sizeof(BufferUsage), pcxt->nworkers));
1067 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
1068
1069 /* Launch workers, saving status for leader/caller */
1071 ginleader->pcxt = pcxt;
1072 ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1073 if (leaderparticipates)
1074 ginleader->nparticipanttuplesorts++;
1075 ginleader->ginshared = ginshared;
1076 ginleader->sharedsort = sharedsort;
1077 ginleader->snapshot = snapshot;
1078 ginleader->walusage = walusage;
1079 ginleader->bufferusage = bufferusage;
1080
1081 /* If no workers were successfully launched, back out (do serial build) */
1082 if (pcxt->nworkers_launched == 0)
1083 {
1084 _gin_end_parallel(ginleader, NULL);
1085 return;
1086 }
1087
1088 /* Save leader state now that it's clear build will be parallel */
1089 buildstate->bs_leader = ginleader;
1090
1091 /* Join heap scan ourselves */
1092 if (leaderparticipates)
1093 _gin_leader_participate_as_worker(buildstate, heap, index);
1094
1095 /*
1096 * Caller needs to wait for all launched workers when we return. Make
1097 * sure that the failure-to-start case will not hang forever.
1098 */
1100}
1101
1102/*
1103 * Shut down workers, destroy parallel context, and end parallel mode.
1104 */
1105static void
1107{
1108 int i;
1109
1110 /* Shutdown worker processes */
1112
1113 /*
1114 * Next, accumulate WAL usage. (This must wait for the workers to finish,
1115 * or we might get incomplete data.)
1116 */
1117 for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
1118 InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
1119
1120 /* Free last reference to MVCC snapshot, if one was used */
1121 if (IsMVCCSnapshot(ginleader->snapshot))
1122 UnregisterSnapshot(ginleader->snapshot);
1123 DestroyParallelContext(ginleader->pcxt);
1125}
1126
1127/*
1128 * Within leader, wait for end of heap scan.
1129 *
1130 * When called, parallel heap scan started by _gin_begin_parallel() will
1131 * already be underway within worker processes (when leader participates
1132 * as a worker, we should end up here just as workers are finishing).
1133 *
1134 * Returns the total number of heap tuples scanned.
1135 */
1136static double
1138{
1139 GinBuildShared *ginshared = state->bs_leader->ginshared;
1140 int nparticipanttuplesorts;
1141
1142 nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
1143 for (;;)
1144 {
1145 SpinLockAcquire(&ginshared->mutex);
1146 if (ginshared->nparticipantsdone == nparticipanttuplesorts)
1147 {
1148 /* copy the data into leader state */
1149 state->bs_reltuples = ginshared->reltuples;
1150 state->bs_numtuples = ginshared->indtuples;
1151
1152 SpinLockRelease(&ginshared->mutex);
1153 break;
1154 }
1155 SpinLockRelease(&ginshared->mutex);
1156
1158 WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
1159 }
1160
1162
1163 return state->bs_reltuples;
1164}
1165
1166/*
1167 * Buffer used to accumulate TIDs from multiple GinTuples for the same key
1168 * (we read these from the tuplesort, sorted by the key).
1169 *
1170 * This is similar to BuildAccumulator in that it's used to collect TIDs
1171 * in memory before inserting them into the index, but it's much simpler
1172 * as it only deals with a single index key at a time.
1173 *
1174 * When adding TIDs to the buffer, we make sure to keep them sorted, both
1175 * during the initial table scan (and detecting when the scan wraps around),
1176 * and during merging (where we do mergesort).
1177 */
1178typedef struct GinBuffer
1179{
1182 Datum key; /* 0 if no key (and keylen == 0) */
1183 Size keylen; /* number of bytes (not typlen) */
1184
1185 /* type info */
1188
1189 /* Number of TIDs to collect before attempt to write some out. */
1191
1192 /* array of TID values */
1195 SortSupport ssup; /* for sorting/comparing keys */
1198
1199/*
1200 * Check that TID array contains valid values, and that it's sorted (if we
1201 * expect it to be).
1202 */
1203static void
1205{
1206#ifdef USE_ASSERT_CHECKING
1207 /* we should not have a buffer with no TIDs to sort */
1208 Assert(buffer->items != NULL);
1209 Assert(buffer->nitems > 0);
1210
1211 for (int i = 0; i < buffer->nitems; i++)
1212 {
1213 Assert(ItemPointerIsValid(&buffer->items[i]));
1214
1215 /* don't check ordering for the first TID item */
1216 if (i == 0)
1217 continue;
1218
1219 Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
1220 }
1221#endif
1222}
1223
1224/*
1225 * GinBuffer checks
1226 *
1227 * Make sure the nitems/items fields are consistent (either the array is empty
1228 * or not empty, the fields need to agree). If there are items, check ordering.
1229 */
1230static void
1232{
1233#ifdef USE_ASSERT_CHECKING
1234 /* if we have any items, the array must exist */
1235 Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
1236
1237 /*
1238 * The buffer may be empty, in which case we must not call the check of
1239 * item pointers, because that assumes non-emptiness.
1240 */
1241 if (buffer->nitems == 0)
1242 return;
1243
1244 /* Make sure the item pointers are valid and sorted. */
1246#endif
1247}
1248
1249/*
1250 * GinBufferInit
1251 * Initialize buffer to store tuples for a GIN index.
1252 *
1253 * Initialize the buffer used to accumulate TID for a single key at a time
1254 * (we process the data sorted), so we know when we received all data for
1255 * a given key.
1256 *
1257 * Initializes sort support procedures for all index attributes.
1258 */
1259static GinBuffer *
1261{
1262 GinBuffer *buffer = palloc0(sizeof(GinBuffer));
1263 int i,
1264 nKeys;
1266
1267 /*
1268 * How many items can we fit into the memory limit? We don't want to end
1269 * with too many TIDs. and 64kB seems more than enough. But maybe this
1270 * should be tied to maintenance_work_mem or something like that?
1271 */
1272 buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
1273
1275
1276 buffer->ssup = palloc0(sizeof(SortSupportData) * nKeys);
1277
1278 /*
1279 * Lookup ordering operator for the index key data type, and initialize
1280 * the sort support function.
1281 */
1282 for (i = 0; i < nKeys; i++)
1283 {
1284 Oid cmpFunc;
1285 SortSupport sortKey = &buffer->ssup[i];
1286 Form_pg_attribute att = TupleDescAttr(desc, i);
1287
1288 sortKey->ssup_cxt = CurrentMemoryContext;
1289 sortKey->ssup_collation = index->rd_indcollation[i];
1290
1291 if (!OidIsValid(sortKey->ssup_collation))
1292 sortKey->ssup_collation = DEFAULT_COLLATION_OID;
1293
1294 sortKey->ssup_nulls_first = false;
1295 sortKey->ssup_attno = i + 1;
1296 sortKey->abbreviate = false;
1297
1298 Assert(sortKey->ssup_attno != 0);
1299
1300 /*
1301 * If the compare proc isn't specified in the opclass definition, look
1302 * up the index key type's default btree comparator.
1303 */
1304 cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
1305 if (cmpFunc == InvalidOid)
1306 {
1307 TypeCacheEntry *typentry;
1308
1309 typentry = lookup_type_cache(att->atttypid,
1311 if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
1312 ereport(ERROR,
1313 (errcode(ERRCODE_UNDEFINED_FUNCTION),
1314 errmsg("could not identify a comparison function for type %s",
1315 format_type_be(att->atttypid))));
1316
1317 cmpFunc = typentry->cmp_proc_finfo.fn_oid;
1318 }
1319
1320 PrepareSortSupportComparisonShim(cmpFunc, sortKey);
1321 }
1322
1323 return buffer;
1324}
1325
1326/* Is the buffer empty, i.e. has no TID values in the array? */
1327static bool
1329{
1330 return (buffer->nitems == 0);
1331}
1332
1333/*
1334 * GinBufferKeyEquals
1335 * Can the buffer store TIDs for the provided GIN tuple (same key)?
1336 *
1337 * Compare if the tuple matches the already accumulated data in the GIN
1338 * buffer. Compare scalar fields first, before the actual key.
1339 *
1340 * Returns true if the key matches, and the TID belongs to the buffer, or
1341 * false if the key does not match.
1342 */
1343static bool
1345{
1346 int r;
1347 Datum tupkey;
1348
1349 AssertCheckGinBuffer(buffer);
1350
1351 if (tup->attrnum != buffer->attnum)
1352 return false;
1353
1354 /* same attribute should have the same type info */
1355 Assert(tup->typbyval == buffer->typbyval);
1356 Assert(tup->typlen == buffer->typlen);
1357
1358 if (tup->category != buffer->category)
1359 return false;
1360
1361 /*
1362 * For NULL/empty keys, this means equality, for normal keys we need to
1363 * compare the actual key value.
1364 */
1365 if (buffer->category != GIN_CAT_NORM_KEY)
1366 return true;
1367
1368 /*
1369 * For the tuple, get either the first sizeof(Datum) bytes for byval
1370 * types, or a pointer to the beginning of the data array.
1371 */
1372 tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
1373
1374 r = ApplySortComparator(buffer->key, false,
1375 tupkey, false,
1376 &buffer->ssup[buffer->attnum - 1]);
1377
1378 return (r == 0);
1379}
1380
1381/*
1382 * GinBufferShouldTrim
1383 * Should we trim the list of item pointers?
1384 *
1385 * By trimming we understand writing out and removing the tuple IDs that
1386 * we know can't change by future merges. We can deduce the TID up to which
1387 * this is guaranteed from the "first" TID in each GIN tuple, which provides
1388 * a "horizon" (for a given key) thanks to the sort.
1389 *
1390 * We don't want to do this too often - compressing longer TID lists is more
1391 * efficient. But we also don't want to accumulate too many TIDs, for two
1392 * reasons. First, it consumes memory and we might exceed maintenance_work_mem
1393 * (or whatever limit applies), even if that's unlikely because TIDs are very
1394 * small so we can fit a lot of them. Second, and more importantly, long TID
1395 * lists are an issue if the scan wraps around, because a key may get a very
1396 * wide list (with min/max TID for that key), forcing "full" mergesorts for
1397 * every list merged into it (instead of the efficient append).
1398 *
1399 * So we look at two things when deciding if to trim - if the resulting list
1400 * (after adding TIDs from the new tuple) would be too long, and if there is
1401 * enough TIDs to trim (with values less than "first" TID from the new tuple),
1402 * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
1403 * number).
1404 *
1405 * We try freezing TIDs at the beginning of the list first, before attempting
1406 * to trim the buffer. This may allow trimming the data earlier, reducing the
1407 * memory usage and excluding it from the mergesort.
1408 */
1409static bool
1411{
1412 /*
1413 * Check if the last TID in the current list is frozen. This is the case
1414 * when merging non-overlapping lists, e.g. in each parallel worker.
1415 */
1416 if ((buffer->nitems > 0) &&
1417 (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
1418 GinTupleGetFirst(tup)) == 0))
1419 buffer->nfrozen = buffer->nitems;
1420
1421 /*
1422 * Now find the last TID we know to be frozen, i.e. the last TID right
1423 * before the new GIN tuple.
1424 *
1425 * Start with the first not-yet-frozen tuple, and walk until we find the
1426 * first TID that's higher. If we already know the whole list is frozen
1427 * (i.e. nfrozen == nitems), this does nothing.
1428 *
1429 * XXX This might do a binary search for sufficiently long lists, but it
1430 * does not seem worth the complexity. Overlapping lists should be rare
1431 * common, TID comparisons are cheap, and we should quickly freeze most of
1432 * the list.
1433 */
1434 for (int i = buffer->nfrozen; i < buffer->nitems; i++)
1435 {
1436 /* Is the TID after the first TID of the new tuple? Can't freeze. */
1437 if (ItemPointerCompare(&buffer->items[i],
1438 GinTupleGetFirst(tup)) > 0)
1439 break;
1440
1441 buffer->nfrozen++;
1442 }
1443
1444 /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
1445 if (buffer->nfrozen < 1024)
1446 return false;
1447
1448 /* no need to trim if we have not hit the memory limit yet */
1449 if ((buffer->nitems + tup->nitems) < buffer->maxitems)
1450 return false;
1451
1452 /*
1453 * OK, we have enough frozen TIDs to flush, and we have hit the memory
1454 * limit, so it's time to write it out.
1455 */
1456 return true;
1457}
1458
1459/*
1460 * GinBufferStoreTuple
1461 * Add data (especially TID list) from a GIN tuple to the buffer.
1462 *
1463 * The buffer is expected to be empty (in which case it's initialized), or
1464 * having the same key. The TID values from the tuple are combined with the
1465 * stored values using a merge sort.
1466 *
1467 * The tuples (for the same key) are expected to be sorted by first TID. But
1468 * this does not guarantee the lists do not overlap, especially in the leader,
1469 * because the workers process interleaving data. There should be no overlaps
1470 * in a single worker - it could happen when the parallel scan wraps around,
1471 * but we detect that and flush the data (see ginBuildCallbackParallel).
1472 *
1473 * By sorting the GinTuple not only by key, but also by the first TID, we make
1474 * it more less likely the lists will overlap during merge. We merge them using
1475 * mergesort, but it's cheaper to just append one list to the other.
1476 *
1477 * How often can the lists overlap? There should be no overlaps in workers,
1478 * and in the leader we can see overlaps between lists built by different
1479 * workers. But the workers merge the items as much as possible, so there
1480 * should not be too many.
1481 */
1482static void
1484{
1486 Datum key;
1487
1488 AssertCheckGinBuffer(buffer);
1489
1492
1493 /* if the buffer is empty, set the fields (and copy the key) */
1494 if (GinBufferIsEmpty(buffer))
1495 {
1496 buffer->category = tup->category;
1497 buffer->keylen = tup->keylen;
1498 buffer->attnum = tup->attrnum;
1499
1500 buffer->typlen = tup->typlen;
1501 buffer->typbyval = tup->typbyval;
1502
1503 if (tup->category == GIN_CAT_NORM_KEY)
1504 buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
1505 else
1506 buffer->key = (Datum) 0;
1507 }
1508
1509 /* add the new TIDs into the buffer, combine using merge-sort */
1510 {
1511 int nnew;
1512 ItemPointer new;
1513
1514 /*
1515 * Resize the array - we do this first, because we'll dereference the
1516 * first unfrozen TID, which would fail if the array is NULL. We'll
1517 * still pass 0 as number of elements in that array though.
1518 */
1519 if (buffer->items == NULL)
1520 buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1521 else
1522 buffer->items = repalloc(buffer->items,
1523 (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1524
1525 new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
1526 (buffer->nitems - buffer->nfrozen), /* num of unfrozen */
1527 items, tup->nitems, &nnew);
1528
1529 Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
1530
1531 memcpy(&buffer->items[buffer->nfrozen], new,
1532 nnew * sizeof(ItemPointerData));
1533
1534 pfree(new);
1535
1536 buffer->nitems += tup->nitems;
1537
1539 }
1540
1541 /* free the decompressed TID list */
1542 pfree(items);
1543}
1544
1545/*
1546 * GinBufferReset
1547 * Reset the buffer into a state as if it contains no data.
1548 */
1549static void
1551{
1552 Assert(!GinBufferIsEmpty(buffer));
1553
1554 /* release byref values, do nothing for by-val ones */
1555 if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1556 pfree(DatumGetPointer(buffer->key));
1557
1558 /*
1559 * Not required, but makes it more likely to trigger NULL dereference if
1560 * using the value incorrectly, etc.
1561 */
1562 buffer->key = (Datum) 0;
1563
1564 buffer->attnum = 0;
1565 buffer->category = 0;
1566 buffer->keylen = 0;
1567 buffer->nitems = 0;
1568 buffer->nfrozen = 0;
1569
1570 buffer->typlen = 0;
1571 buffer->typbyval = 0;
1572}
1573
1574/*
1575 * GinBufferTrim
1576 * Discard the "frozen" part of the TID list (which should have been
1577 * written to disk/index before this call).
1578 */
1579static void
1581{
1582 Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
1583
1584 memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
1585 sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
1586
1587 buffer->nitems -= buffer->nfrozen;
1588 buffer->nfrozen = 0;
1589}
1590
1591/*
1592 * GinBufferFree
1593 * Release memory associated with the GinBuffer (including TID array).
1594 */
1595static void
1597{
1598 if (buffer->items)
1599 pfree(buffer->items);
1600
1601 /* release byref values, do nothing for by-val ones */
1602 if (!GinBufferIsEmpty(buffer) &&
1603 (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1604 pfree(DatumGetPointer(buffer->key));
1605
1606 pfree(buffer);
1607}
1608
1609/*
1610 * GinBufferCanAddKey
1611 * Check if a given GIN tuple can be added to the current buffer.
1612 *
1613 * Returns true if the buffer is either empty or for the same index key.
1614 */
1615static bool
1617{
1618 /* empty buffer can accept data for any key */
1619 if (GinBufferIsEmpty(buffer))
1620 return true;
1621
1622 /* otherwise just data for the same key */
1623 return GinBufferKeyEquals(buffer, tup);
1624}
1625
1626/*
1627 * Within leader, wait for end of heap scan and merge per-worker results.
1628 *
1629 * After waiting for all workers to finish, merge the per-worker results into
1630 * the complete index. The results from each worker are sorted by block number
1631 * (start of the page range). While combining the per-worker results we merge
1632 * summaries for the same page range, and also fill-in empty summaries for
1633 * ranges without any tuples.
1634 *
1635 * Returns the total number of heap tuples scanned.
1636 */
1637static double
1639{
1640 GinTuple *tup;
1641 Size tuplen;
1642 double reltuples = 0;
1643 GinBuffer *buffer;
1644
1645 /* GIN tuples from workers, merged by leader */
1646 double numtuples = 0;
1647
1648 /* wait for workers to scan table and produce partial results */
1649 reltuples = _gin_parallel_heapscan(state);
1650
1651 /* Execute the sort */
1654
1655 /* do the actual sort in the leader */
1656 tuplesort_performsort(state->bs_sortstate);
1657
1658 /*
1659 * Initialize buffer to combine entries for the same key.
1660 *
1661 * The leader is allowed to use the whole maintenance_work_mem buffer to
1662 * combine data. The parallel workers already completed.
1663 */
1664 buffer = GinBufferInit(state->ginstate.index);
1665
1666 /*
1667 * Set the progress target for the next phase. Reset the block number
1668 * values set by table_index_build_scan
1669 */
1670 {
1671 const int progress_index[] = {
1676 };
1677 const int64 progress_vals[] = {
1679 state->bs_numtuples,
1680 0, 0
1681 };
1682
1683 pgstat_progress_update_multi_param(4, progress_index, progress_vals);
1684 }
1685
1686 /*
1687 * Read the GIN tuples from the shared tuplesort, sorted by category and
1688 * key. That probably gives us order matching how data is organized in the
1689 * index.
1690 *
1691 * We don't insert the GIN tuples right away, but instead accumulate as
1692 * many TIDs for the same key as possible, and then insert that at once.
1693 * This way we don't need to decompress/recompress the posting lists, etc.
1694 */
1695 while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
1696 {
1697 MemoryContext oldCtx;
1698
1700
1701 /*
1702 * If the buffer can accept the new GIN tuple, just store it there and
1703 * we're done. If it's a different key (or maybe too much data) flush
1704 * the current contents into the index first.
1705 */
1706 if (!GinBufferCanAddKey(buffer, tup))
1707 {
1708 /*
1709 * Buffer is not empty and it's storing a different key - flush
1710 * the data into the insert, and start a new entry for current
1711 * GinTuple.
1712 */
1714
1715 oldCtx = MemoryContextSwitchTo(state->tmpCtx);
1716
1717 ginEntryInsert(&state->ginstate,
1718 buffer->attnum, buffer->key, buffer->category,
1719 buffer->items, buffer->nitems, &state->buildStats);
1720
1721 MemoryContextSwitchTo(oldCtx);
1722 MemoryContextReset(state->tmpCtx);
1723
1724 /* discard the existing data */
1725 GinBufferReset(buffer);
1726 }
1727
1728 /*
1729 * We're about to add a GIN tuple to the buffer - check the memory
1730 * limit first, and maybe write out some of the data into the index
1731 * first, if needed (and possible). We only flush the part of the TID
1732 * list that we know won't change, and only if there's enough data for
1733 * compression to work well.
1734 */
1735 if (GinBufferShouldTrim(buffer, tup))
1736 {
1737 Assert(buffer->nfrozen > 0);
1738
1739 /*
1740 * Buffer is not empty and it's storing a different key - flush
1741 * the data into the insert, and start a new entry for current
1742 * GinTuple.
1743 */
1745
1746 oldCtx = MemoryContextSwitchTo(state->tmpCtx);
1747
1748 ginEntryInsert(&state->ginstate,
1749 buffer->attnum, buffer->key, buffer->category,
1750 buffer->items, buffer->nfrozen, &state->buildStats);
1751
1752 MemoryContextSwitchTo(oldCtx);
1753 MemoryContextReset(state->tmpCtx);
1754
1755 /* truncate the data we've just discarded */
1756 GinBufferTrim(buffer);
1757 }
1758
1759 /*
1760 * Remember data for the current tuple (either remember the new key,
1761 * or append if to the existing data).
1762 */
1763 GinBufferStoreTuple(buffer, tup);
1764
1765 /* Report progress */
1767 ++numtuples);
1768 }
1769
1770 /* flush data remaining in the buffer (for the last key) */
1771 if (!GinBufferIsEmpty(buffer))
1772 {
1774
1775 ginEntryInsert(&state->ginstate,
1776 buffer->attnum, buffer->key, buffer->category,
1777 buffer->items, buffer->nitems, &state->buildStats);
1778
1779 /* discard the existing data */
1780 GinBufferReset(buffer);
1781
1782 /* Report progress */
1784 ++numtuples);
1785 }
1786
1787 /* relase all the memory */
1788 GinBufferFree(buffer);
1789
1790 tuplesort_end(state->bs_sortstate);
1791
1792 return reltuples;
1793}
1794
1795/*
1796 * Returns size of shared memory required to store state for a parallel
1797 * gin index build based on the snapshot its parallel scan will use.
1798 */
1799static Size
1801{
1802 /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1803 return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
1804 table_parallelscan_estimate(heap, snapshot));
1805}
1806
1807/*
1808 * Within leader, participate as a parallel worker.
1809 */
1810static void
1812{
1813 GinLeader *ginleader = buildstate->bs_leader;
1814 int sortmem;
1815
1816 /*
1817 * Might as well use reliable figure when doling out maintenance_work_mem
1818 * (when requested number of workers were not launched, this will be
1819 * somewhat higher than it is for other workers).
1820 */
1821 sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
1822
1823 /* Perform work common to all participants */
1824 _gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
1825 ginleader->sharedsort, heap, index,
1826 sortmem, true);
1827}
1828
1829/*
1830 * _gin_process_worker_data
1831 * First phase of the key merging, happening in the worker.
1832 *
1833 * Depending on the number of distinct keys, the TID lists produced by the
1834 * callback may be very short (due to frequent evictions in the callback).
1835 * But combining many tiny lists is expensive, so we try to do as much as
1836 * possible in the workers and only then pass the results to the leader.
1837 *
1838 * We read the tuples sorted by the key, and merge them into larger lists.
1839 * At the moment there's no memory limit, so this will just produce one
1840 * huge (sorted) list per key in each worker. Which means the leader will
1841 * do a very limited number of mergesorts, which is good.
1842 */
1843static void
1845 bool progress)
1846{
1847 GinTuple *tup;
1848 Size tuplen;
1849
1850 GinBuffer *buffer;
1851
1852 /*
1853 * Initialize buffer to combine entries for the same key.
1854 *
1855 * The workers are limited to the same amount of memory as during the sort
1856 * in ginBuildCallbackParallel. But this probably should be the 32MB used
1857 * during planning, just like there.
1858 */
1859 buffer = GinBufferInit(state->ginstate.index);
1860
1861 /* sort the raw per-worker data */
1862 if (progress)
1865
1866 tuplesort_performsort(state->bs_worker_sort);
1867
1868 /* reset the number of GIN tuples produced by this worker */
1869 state->bs_numtuples = 0;
1870
1871 if (progress)
1874
1875 /*
1876 * Read the GIN tuples from the shared tuplesort, sorted by the key, and
1877 * merge them into larger chunks for the leader to combine.
1878 */
1879 while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
1880 {
1881
1883
1884 /*
1885 * If the buffer can accept the new GIN tuple, just store it there and
1886 * we're done. If it's a different key (or maybe too much data) flush
1887 * the current contents into the index first.
1888 */
1889 if (!GinBufferCanAddKey(buffer, tup))
1890 {
1891 GinTuple *ntup;
1892 Size ntuplen;
1893
1894 /*
1895 * Buffer is not empty and it's storing a different key - flush
1896 * the data into the insert, and start a new entry for current
1897 * GinTuple.
1898 */
1900
1901 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1902 buffer->key, buffer->typlen, buffer->typbyval,
1903 buffer->items, buffer->nitems, &ntuplen);
1904
1905 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1906 state->bs_numtuples++;
1907
1908 pfree(ntup);
1909
1910 /* discard the existing data */
1911 GinBufferReset(buffer);
1912 }
1913
1914 /*
1915 * We're about to add a GIN tuple to the buffer - check the memory
1916 * limit first, and maybe write out some of the data into the index
1917 * first, if needed (and possible). We only flush the part of the TID
1918 * list that we know won't change, and only if there's enough data for
1919 * compression to work well.
1920 */
1921 if (GinBufferShouldTrim(buffer, tup))
1922 {
1923 GinTuple *ntup;
1924 Size ntuplen;
1925
1926 Assert(buffer->nfrozen > 0);
1927
1928 /*
1929 * Buffer is not empty and it's storing a different key - flush
1930 * the data into the insert, and start a new entry for current
1931 * GinTuple.
1932 */
1934
1935 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1936 buffer->key, buffer->typlen, buffer->typbyval,
1937 buffer->items, buffer->nfrozen, &ntuplen);
1938
1939 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1940
1941 pfree(ntup);
1942
1943 /* truncate the data we've just discarded */
1944 GinBufferTrim(buffer);
1945 }
1946
1947 /*
1948 * Remember data for the current tuple (either remember the new key,
1949 * or append if to the existing data).
1950 */
1951 GinBufferStoreTuple(buffer, tup);
1952 }
1953
1954 /* flush data remaining in the buffer (for the last key) */
1955 if (!GinBufferIsEmpty(buffer))
1956 {
1957 GinTuple *ntup;
1958 Size ntuplen;
1959
1961
1962 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1963 buffer->key, buffer->typlen, buffer->typbyval,
1964 buffer->items, buffer->nitems, &ntuplen);
1965
1966 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1967 state->bs_numtuples++;
1968
1969 pfree(ntup);
1970
1971 /* discard the existing data */
1972 GinBufferReset(buffer);
1973 }
1974
1975 /* relase all the memory */
1976 GinBufferFree(buffer);
1977
1978 tuplesort_end(worker_sort);
1979}
1980
1981/*
1982 * Perform a worker's portion of a parallel GIN index build sort.
1983 *
1984 * This generates a tuplesort for the worker portion of the table.
1985 *
1986 * sortmem is the amount of working memory to use within each worker,
1987 * expressed in KBs.
1988 *
1989 * When this returns, workers are done, and need only release resources.
1990 *
1991 * Before feeding data into a shared tuplesort (for the leader process),
1992 * the workers process data in two phases.
1993 *
1994 * 1) A worker reads a portion of rows from the table, accumulates entries
1995 * in memory, and flushes them into a private tuplesort (e.g. because of
1996 * using too much memory).
1997 *
1998 * 2) The private tuplesort gets sorted (by key and TID), the worker reads
1999 * the data again, and combines the entries as much as possible. This has
2000 * to happen eventually, and this way it's done in workers in parallel.
2001 *
2002 * Finally, the combined entries are written into the shared tuplesort, so
2003 * that the leader can process them.
2004 *
2005 * How well this works (compared to just writing entries into the shared
2006 * tuplesort) depends on the data set. For large tables with many distinct
2007 * keys this helps a lot. With many distinct keys it's likely the buffers has
2008 * to be flushed often, generating many entries with the same key and short
2009 * TID lists. These entries need to be sorted and merged at some point,
2010 * before writing them to the index. The merging is quite expensive, it can
2011 * easily be ~50% of a serial build, and doing as much of it in the workers
2012 * means it's parallelized. The leader still has to merge results from the
2013 * workers, but it's much more efficient to merge few large entries than
2014 * many tiny ones.
2015 *
2016 * This also reduces the amount of data the workers pass to the leader through
2017 * the shared tuplesort. OTOH the workers need more space for the private sort,
2018 * possibly up to 2x of the data, if no entries be merged in a worker. But this
2019 * is very unlikely, and the only consequence is inefficiency, so we ignore it.
2020 */
2021static void
2023 GinBuildShared *ginshared, Sharedsort *sharedsort,
2024 Relation heap, Relation index,
2025 int sortmem, bool progress)
2026{
2027 SortCoordinate coordinate;
2028 TableScanDesc scan;
2029 double reltuples;
2030 IndexInfo *indexInfo;
2031
2032 /* Initialize local tuplesort coordination state */
2033 coordinate = palloc0(sizeof(SortCoordinateData));
2034 coordinate->isWorker = true;
2035 coordinate->nParticipants = -1;
2036 coordinate->sharedsort = sharedsort;
2037
2038 /* remember how much space is allowed for the accumulated entries */
2039 state->work_mem = (sortmem / 2);
2040
2041 /* remember how many workers participate in the build */
2042 state->bs_num_workers = ginshared->scantuplesortstates;
2043
2044 /* Begin "partial" tuplesort */
2045 state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
2046 state->work_mem,
2047 coordinate,
2049
2050 /* Local per-worker sort of raw-data */
2051 state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
2052 state->work_mem,
2053 NULL,
2055
2056 /* Join parallel scan */
2057 indexInfo = BuildIndexInfo(index);
2058 indexInfo->ii_Concurrent = ginshared->isconcurrent;
2059
2060 scan = table_beginscan_parallel(heap,
2062
2063 reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
2065
2066 /* write remaining accumulated entries */
2068
2069 /*
2070 * Do the first phase of in-worker processing - sort the data produced by
2071 * the callback, and combine them into much larger chunks and place that
2072 * into the shared tuplestore for leader to process.
2073 */
2074 _gin_process_worker_data(state, state->bs_worker_sort, progress);
2075
2076 /* sort the GIN tuples built by this worker */
2077 tuplesort_performsort(state->bs_sortstate);
2078
2079 state->bs_reltuples += reltuples;
2080
2081 /*
2082 * Done. Record ambuild statistics.
2083 */
2084 SpinLockAcquire(&ginshared->mutex);
2085 ginshared->nparticipantsdone++;
2086 ginshared->reltuples += state->bs_reltuples;
2087 ginshared->indtuples += state->bs_numtuples;
2088 SpinLockRelease(&ginshared->mutex);
2089
2090 /* Notify leader */
2092
2093 tuplesort_end(state->bs_sortstate);
2094}
2095
2096/*
2097 * Perform work within a launched parallel process.
2098 */
2099void
2101{
2102 char *sharedquery;
2103 GinBuildShared *ginshared;
2104 Sharedsort *sharedsort;
2105 GinBuildState buildstate;
2106 Relation heapRel;
2107 Relation indexRel;
2108 LOCKMODE heapLockmode;
2109 LOCKMODE indexLockmode;
2110 WalUsage *walusage;
2111 BufferUsage *bufferusage;
2112 int sortmem;
2113
2114 /*
2115 * The only possible status flag that can be set to the parallel worker is
2116 * PROC_IN_SAFE_IC.
2117 */
2118 Assert((MyProc->statusFlags == 0) ||
2120
2121 /* Set debug_query_string for individual workers first */
2122 sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
2123 debug_query_string = sharedquery;
2124
2125 /* Report the query string from leader */
2127
2128 /* Look up gin shared state */
2129 ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
2130
2131 /* Open relations using lock modes known to be obtained by index.c */
2132 if (!ginshared->isconcurrent)
2133 {
2134 heapLockmode = ShareLock;
2135 indexLockmode = AccessExclusiveLock;
2136 }
2137 else
2138 {
2139 heapLockmode = ShareUpdateExclusiveLock;
2140 indexLockmode = RowExclusiveLock;
2141 }
2142
2143 /* Open relations within worker */
2144 heapRel = table_open(ginshared->heaprelid, heapLockmode);
2145 indexRel = index_open(ginshared->indexrelid, indexLockmode);
2146
2147 /* initialize the GIN build state */
2148 initGinState(&buildstate.ginstate, indexRel);
2149 buildstate.indtuples = 0;
2150 memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
2151 memset(&buildstate.tid, 0, sizeof(ItemPointerData));
2152
2153 /*
2154 * create a temporary memory context that is used to hold data not yet
2155 * dumped out to the index
2156 */
2158 "Gin build temporary context",
2160
2161 /*
2162 * create a temporary memory context that is used for calling
2163 * ginExtractEntries(), and can be reset after each tuple
2164 */
2166 "Gin build temporary context for user-defined function",
2168
2169 buildstate.accum.ginstate = &buildstate.ginstate;
2170 ginInitBA(&buildstate.accum);
2171
2172
2173 /* Look up shared state private to tuplesort.c */
2174 sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
2175 tuplesort_attach_shared(sharedsort, seg);
2176
2177 /* Prepare to track buffer usage during parallel execution */
2179
2180 /*
2181 * Might as well use reliable figure when doling out maintenance_work_mem
2182 * (when requested number of workers were not launched, this will be
2183 * somewhat higher than it is for other workers).
2184 */
2185 sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
2186
2187 _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
2188 heapRel, indexRel, sortmem, false);
2189
2190 /* Report WAL/buffer usage during parallel execution */
2191 bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
2192 walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
2194 &walusage[ParallelWorkerNumber]);
2195
2196 index_close(indexRel, indexLockmode);
2197 table_close(heapRel, heapLockmode);
2198}
2199
2200/*
2201 * Used to keep track of compressed TID lists when building a GIN tuple.
2202 */
2203typedef struct
2204{
2205 dlist_node node; /* linked list pointers */
2208
2209/*
2210 * _gin_build_tuple
2211 * Serialize the state for an index key into a tuple for tuplesort.
2212 *
2213 * The tuple has a number of scalar fields (mostly matching the build state),
2214 * and then a data array that stores the key first, and then the TID list.
2215 *
2216 * For by-reference data types, we store the actual data. For by-val types
2217 * we simply copy the whole Datum, so that we don't have to care about stuff
2218 * like endianess etc. We could make it a little bit smaller, but it's not
2219 * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
2220 * start of the TID list anyway. So we wouldn't save anything. (This would
2221 * not be a good idea for the permanent in-index data, since we'd prefer
2222 * that that not depend on sizeof(Datum). But this is just a transient
2223 * representation to use while sorting the data.)
2224 *
2225 * The TID list is serialized as compressed - it's highly compressible, and
2226 * we already have ginCompressPostingList for this purpose. The list may be
2227 * pretty long, so we compress it into multiple segments and then copy all
2228 * of that into the GIN tuple.
2229 */
2230static GinTuple *
2231_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
2232 Datum key, int16 typlen, bool typbyval,
2234 Size *len)
2235{
2236 GinTuple *tuple;
2237 char *ptr;
2238
2239 Size tuplen;
2240 int keylen;
2241
2242 dlist_mutable_iter iter;
2243 dlist_head segments;
2244 int ncompressed;
2245 Size compresslen;
2246
2247 /*
2248 * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
2249 * have actual non-empty key. We include varlena headers and \0 bytes for
2250 * strings, to make it easier to access the data in-line.
2251 *
2252 * For byval types we simply copy the whole Datum. We could store just the
2253 * necessary bytes, but this is simpler to work with and not worth the
2254 * extra complexity. Moreover we still need to do the MAXALIGN to allow
2255 * direct access to items pointers.
2256 *
2257 * XXX Note that for byval types we store the whole datum, no matter what
2258 * the typlen value is.
2259 */
2260 if (category != GIN_CAT_NORM_KEY)
2261 keylen = 0;
2262 else if (typbyval)
2263 keylen = sizeof(Datum);
2264 else if (typlen > 0)
2265 keylen = typlen;
2266 else if (typlen == -1)
2267 keylen = VARSIZE_ANY(DatumGetPointer(key));
2268 else if (typlen == -2)
2269 keylen = strlen(DatumGetPointer(key)) + 1;
2270 else
2271 elog(ERROR, "unexpected typlen value (%d)", typlen);
2272
2273 /* compress the item pointers */
2274 ncompressed = 0;
2275 compresslen = 0;
2276 dlist_init(&segments);
2277
2278 /* generate compressed segments of TID list chunks */
2279 while (ncompressed < nitems)
2280 {
2281 int cnt;
2282 GinSegmentInfo *seginfo = palloc(sizeof(GinSegmentInfo));
2283
2284 seginfo->seg = ginCompressPostingList(&items[ncompressed],
2285 (nitems - ncompressed),
2286 UINT16_MAX,
2287 &cnt);
2288
2289 ncompressed += cnt;
2290 compresslen += SizeOfGinPostingList(seginfo->seg);
2291
2292 dlist_push_tail(&segments, &seginfo->node);
2293 }
2294
2295 /*
2296 * Determine GIN tuple length with all the data included. Be careful about
2297 * alignment, to allow direct access to compressed segments (those require
2298 * only SHORTALIGN).
2299 */
2300 tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
2301
2302 *len = tuplen;
2303
2304 /*
2305 * Allocate space for the whole GIN tuple.
2306 *
2307 * The palloc0 is needed - writetup_index_gin will write the whole tuple
2308 * to disk, so we need to make sure the padding bytes are defined
2309 * (otherwise valgrind would report this).
2310 */
2311 tuple = palloc0(tuplen);
2312
2313 tuple->tuplen = tuplen;
2314 tuple->attrnum = attrnum;
2315 tuple->category = category;
2316 tuple->keylen = keylen;
2317 tuple->nitems = nitems;
2318
2319 /* key type info */
2320 tuple->typlen = typlen;
2321 tuple->typbyval = typbyval;
2322
2323 /*
2324 * Copy the key and items into the tuple. First the key value, which we
2325 * can simply copy right at the beginning of the data array.
2326 */
2327 if (category == GIN_CAT_NORM_KEY)
2328 {
2329 if (typbyval)
2330 {
2331 memcpy(tuple->data, &key, sizeof(Datum));
2332 }
2333 else if (typlen > 0) /* byref, fixed length */
2334 {
2335 memcpy(tuple->data, DatumGetPointer(key), typlen);
2336 }
2337 else if (typlen == -1)
2338 {
2339 memcpy(tuple->data, DatumGetPointer(key), keylen);
2340 }
2341 else if (typlen == -2)
2342 {
2343 memcpy(tuple->data, DatumGetPointer(key), keylen);
2344 }
2345 }
2346
2347 /* finally, copy the TIDs into the array */
2348 ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
2349
2350 /* copy in the compressed data, and free the segments */
2351 dlist_foreach_modify(iter, &segments)
2352 {
2353 GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);
2354
2355 memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
2356
2357 ptr += SizeOfGinPostingList(seginfo->seg);
2358
2359 dlist_delete(&seginfo->node);
2360
2361 pfree(seginfo->seg);
2362 pfree(seginfo);
2363 }
2364
2365 return tuple;
2366}
2367
2368/*
2369 * _gin_parse_tuple_key
2370 * Return a Datum representing the key stored in the tuple.
2371 *
2372 * Most of the tuple fields are directly accessible, the only thing that
2373 * needs more care is the key and the TID list.
2374 *
2375 * For the key, this returns a regular Datum representing it. It's either the
2376 * actual key value, or a pointer to the beginning of the data array (which is
2377 * where the data was copied by _gin_build_tuple).
2378 */
2379static Datum
2381{
2382 Datum key;
2383
2384 if (a->category != GIN_CAT_NORM_KEY)
2385 return (Datum) 0;
2386
2387 if (a->typbyval)
2388 {
2389 memcpy(&key, a->data, a->keylen);
2390 return key;
2391 }
2392
2393 return PointerGetDatum(a->data);
2394}
2395
2396/*
2397* _gin_parse_tuple_items
2398 * Return a pointer to a palloc'd array of decompressed TID array.
2399 */
2400static ItemPointer
2402{
2403 int len;
2404 char *ptr;
2405 int ndecoded;
2407
2408 len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2409 ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2410
2412
2413 Assert(ndecoded == a->nitems);
2414
2415 return (ItemPointer) items;
2416}
2417
2418/*
2419 * _gin_compare_tuples
2420 * Compare GIN tuples, used by tuplesort during parallel index build.
2421 *
2422 * The scalar fields (attrnum, category) are compared first, the key value is
2423 * compared last. The comparisons are done using type-specific sort support
2424 * functions.
2425 *
2426 * If the key value matches, we compare the first TID value in the TID list,
2427 * which means the tuples are merged in an order in which they are most
2428 * likely to be simply concatenated. (This "first" TID will also allow us
2429 * to determine a point up to which the list is fully determined and can be
2430 * written into the index to enforce a memory limit etc.)
2431 */
2432int
2434{
2435 int r;
2436 Datum keya,
2437 keyb;
2438
2439 if (a->attrnum < b->attrnum)
2440 return -1;
2441
2442 if (a->attrnum > b->attrnum)
2443 return 1;
2444
2445 if (a->category < b->category)
2446 return -1;
2447
2448 if (a->category > b->category)
2449 return 1;
2450
2451 if (a->category == GIN_CAT_NORM_KEY)
2452 {
2453 keya = _gin_parse_tuple_key(a);
2454 keyb = _gin_parse_tuple_key(b);
2455
2456 r = ApplySortComparator(keya, false,
2457 keyb, false,
2458 &ssup[a->attrnum - 1]);
2459
2460 /* if the key is the same, consider the first TID in the array */
2461 return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
2463 }
2464
2467}
int ParallelWorkerNumber
Definition: parallel.c:115
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:211
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:796
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:573
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:950
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:173
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:693
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_report_activity(BackendState state, const char *cmd_str)
@ STATE_RUNNING
uint32 BlockNumber
Definition: block.h:31
static Datum values[MAXATTR]
Definition: bootstrap.c:153
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4223
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:845
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5383
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2943
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5604
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:291
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:425
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:75
@ EB_LOCK_FIRST
Definition: bufmgr.h:87
#define BMR_REL(p_rel)
Definition: bufmgr.h:114
static void * PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
PageData * Page
Definition: bufpage.h:81
#define Min(x, y)
Definition: c.h:1007
#define BUFFERALIGN(LEN)
Definition: c.h:816
int64_t int64
Definition: c.h:539
int16_t int16
Definition: c.h:537
#define SHORTALIGN(LEN)
Definition: c.h:810
int32_t int32
Definition: c.h:538
uint32_t uint32
Definition: c.h:542
#define OidIsValid(objectId)
Definition: c.h:778
size_t Size
Definition: c.h:614
bool ConditionVariableCancelSleep(void)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void ConditionVariableSignal(ConditionVariable *cv)
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
#define MaxAllocSize
Definition: fe_memutils.h:22
char * format_type_be(Oid type_oid)
Definition: format_type.c:343
IndexUniqueCheck
Definition: genam.h:143
#define GIN_COMPARE_PROC
Definition: gin.h:24
#define PROGRESS_GIN_PHASE_PERFORMSORT_2
Definition: gin.h:49
#define PROGRESS_GIN_PHASE_MERGE_1
Definition: gin.h:48
#define PROGRESS_GIN_PHASE_PERFORMSORT_1
Definition: gin.h:47
#define PROGRESS_GIN_PHASE_MERGE_2
Definition: gin.h:50
#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN
Definition: gin.h:46
#define GIN_UNLOCK
Definition: gin_private.h:50
#define GinGetUseFastUpdate(relation)
Definition: gin_private.h:35
static ItemPointer GinTupleGetFirst(GinTuple *tup)
Definition: gin_tuple.h:35
#define GinIsPostingTree(itup)
Definition: ginblock.h:231
#define GIN_CAT_NORM_KEY
Definition: ginblock.h:208
#define SizeOfGinPostingList(plist)
Definition: ginblock.h:342
#define GIN_LEAF
Definition: ginblock.h:42
#define GinGetPostingTree(itup)
Definition: ginblock.h:233
signed char GinNullCategory
Definition: ginblock.h:206
#define GinSetPostingTree(itup, blkno)
Definition: ginblock.h:232
#define GinMaxItemSize
Definition: ginblock.h:248
void freeGinBtreeStack(GinBtreeStack *stack)
Definition: ginbtree.c:198
void ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, GinStatsData *buildStats)
Definition: ginbtree.c:816
GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, bool rootConflictCheck)
Definition: ginbtree.c:83
void ginBeginBAScan(BuildAccumulator *accum)
Definition: ginbulk.c:257
ItemPointerData * ginGetBAEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *key, GinNullCategory *category, uint32 *n)
Definition: ginbulk.c:268
void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum *entries, GinNullCategory *categories, int32 nentries)
Definition: ginbulk.c:210
void ginInitBA(BuildAccumulator *accum)
Definition: ginbulk.c:109
BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, GinStatsData *buildStats, Buffer entrybuffer)
Definition: gindatapage.c:1775
void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats)
Definition: gindatapage.c:1908
ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, int *nitems)
Definition: ginentrypage.c:162
IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, Pointer data, Size dataSize, int nipd, bool errorTooBig)
Definition: ginentrypage.c:44
void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, Datum key, GinNullCategory category, GinState *ginstate)
Definition: ginentrypage.c:747
void ginHeapTupleFastCollect(GinState *ginstate, GinTupleCollector *collector, OffsetNumber attnum, Datum value, bool isNull, ItemPointer ht_ctid)
Definition: ginfast.c:483
void ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
Definition: ginfast.c:219
static void ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: gininsert.c:564
#define PARALLEL_KEY_BUFFER_USAGE
Definition: gininsert.c:43
static void AssertCheckItemPointers(GinBuffer *buffer)
Definition: gininsert.c:1204
int _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
Definition: gininsert.c:2433
struct GinBuffer GinBuffer
struct GinLeader GinLeader
static IndexTuple addItemPointersToLeafTuple(GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
Definition: gininsert.c:211
static bool GinBufferIsEmpty(GinBuffer *buffer)
Definition: gininsert.c:1328
#define PARALLEL_KEY_GIN_SHARED
Definition: gininsert.c:39
IndexBuildResult * ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: gininsert.c:612
static GinBuffer * GinBufferInit(Relation index)
Definition: gininsert.c:1260
static IndexTuple buildFreshLeafTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
Definition: gininsert.c:293
static void GinBufferReset(GinBuffer *buffer)
Definition: gininsert.c:1550
static void ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer heapptr)
Definition: gininsert.c:422
static void AssertCheckGinBuffer(GinBuffer *buffer)
Definition: gininsert.c:1231
static void _gin_parallel_scan_and_build(GinBuildState *state, GinBuildShared *ginshared, Sharedsort *sharedsort, Relation heap, Relation index, int sortmem, bool progress)
Definition: gininsert.c:2022
void ginEntryInsert(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats)
Definition: gininsert.c:345
static void GinBufferTrim(GinBuffer *buffer)
Definition: gininsert.c:1580
static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition: gininsert.c:1800
static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
Definition: gininsert.c:1106
static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, bool isconcurrent, int request)
Definition: gininsert.c:927
static void ginFlushBuildState(GinBuildState *buildstate, Relation index)
Definition: gininsert.c:499
struct GinBuildShared GinBuildShared
static void _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, bool progress)
Definition: gininsert.c:1844
static bool GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1344
static double _gin_parallel_merge(GinBuildState *state)
Definition: gininsert.c:1638
static void ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer item)
Definition: gininsert.c:837
static bool GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1616
void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition: gininsert.c:2100
void ginbuildempty(Relation index)
Definition: gininsert.c:806
bool gininsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: gininsert.c:855
static void _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
Definition: gininsert.c:1811
static GinTuple * _gin_build_tuple(OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len)
Definition: gininsert.c:2231
static Datum _gin_parse_tuple_key(GinTuple *a)
Definition: gininsert.c:2380
#define PARALLEL_KEY_TUPLESORT
Definition: gininsert.c:40
static bool GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1410
#define PARALLEL_KEY_QUERY_TEXT
Definition: gininsert.c:41
static void GinBufferFree(GinBuffer *buffer)
Definition: gininsert.c:1596
static ItemPointer _gin_parse_tuple_items(GinTuple *a)
Definition: gininsert.c:2401
#define ParallelTableScanFromGinBuildShared(shared)
Definition: gininsert.c:104
#define PARALLEL_KEY_WAL_USAGE
Definition: gininsert.c:42
static void GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1483
static void ginBuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: gininsert.c:446
static double _gin_parallel_heapscan(GinBuildState *state)
Definition: gininsert.c:1137
ItemPointer ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
GinPostingList * ginCompressPostingList(const ItemPointerData *ipd, int nipd, int maxsize, int *nwritten)
ItemPointer ginMergeItemPointers(ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb, int *nmerged)
OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
Definition: ginutil.c:231
Buffer GinNewBuffer(Relation index)
Definition: ginutil.c:305
void GinInitBuffer(Buffer b, uint32 f)
Definition: ginutil.c:355
Datum * ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, int32 *nentries, GinNullCategory **categories)
Definition: ginutil.c:488
Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category)
Definition: ginutil.c:264
void GinInitMetabuffer(Buffer b)
Definition: ginutil.c:361
void initGinState(GinState *state, Relation index)
Definition: ginutil.c:102
void ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
Definition: ginutil.c:655
int maintenance_work_mem
Definition: globals.c:133
Assert(PointerIsAligned(start, uint64))
static void dlist_init(dlist_head *head)
Definition: ilist.h:314
static void dlist_delete(dlist_node *node)
Definition: ilist.h:405
#define dlist_foreach_modify(iter, lhead)
Definition: ilist.h:640
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition: ilist.h:364
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
#define nitems(x)
Definition: indent.h:31
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2428
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:883
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133
static struct @171 value
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:218
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:208
void InstrStartParallelQuery(void)
Definition: instrument.c:200
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition: itemptr.c:51
struct ItemPointerData ItemPointerData
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
IndexTupleData * IndexTuple
Definition: itup.h:53
int LOCKMODE
Definition: lockdefs.h:26
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define ShareLock
Definition: lockdefs.h:40
#define RowExclusiveLock
Definition: lockdefs.h:38
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:400
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:469
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
uint16 OffsetNumber
Definition: off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
int16 attnum
Definition: pg_attribute.h:74
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:202
const void size_t len
const void * data
static int progress
Definition: pgbench.c:262
const char * debug_query_string
Definition: postgres.c:89
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
uint64_t Datum
Definition: postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition: predicate.c:4336
#define PROC_IN_SAFE_IC
Definition: proc.h:59
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:89
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:125
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition: progress.h:90
#define PROGRESS_CREATEIDX_SUBPHASE
Definition: progress.h:88
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:124
#define RelationGetRelid(relation)
Definition: rel.h:515
#define RelationGetDescr(relation)
Definition: rel.h:541
#define RelationGetRelationName(relation)
Definition: rel.h:549
#define RelationNeedsWAL(relation)
Definition: rel.h:638
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:534
@ MAIN_FORKNUM
Definition: relpath.h:58
@ INIT_FORKNUM
Definition: relpath.h:61
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:494
Size mul_size(Size s1, Size s2)
Definition: shmem.c:511
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:271
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:864
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:822
#define SnapshotAny
Definition: snapmgr.h:33
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55
void PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup)
Definition: sortsupport.c:68
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
PGPROC * MyProc
Definition: proc.c:67
GinState * ginstate
Definition: gin_private.h:442
int16 attlen
Definition: tupdesc.h:71
Oid fn_oid
Definition: fmgr.h:59
bool(* findItem)(GinBtree, GinBtreeStack *)
Definition: gin_private.h:158
OffsetNumber off
Definition: gin_private.h:135
int nfrozen
Definition: gininsert.c:1194
Size keylen
Definition: gininsert.c:1183
int maxitems
Definition: gininsert.c:1190
int nitems
Definition: gininsert.c:1193
GinNullCategory category
Definition: gininsert.c:1181
OffsetNumber attnum
Definition: gininsert.c:1180
Datum key
Definition: gininsert.c:1182
SortSupport ssup
Definition: gininsert.c:1195
ItemPointerData * items
Definition: gininsert.c:1196
int16 typlen
Definition: gininsert.c:1186
bool typbyval
Definition: gininsert.c:1187
double reltuples
Definition: gininsert.c:88
int scantuplesortstates
Definition: gininsert.c:59
double indtuples
Definition: gininsert.c:89
bool isconcurrent
Definition: gininsert.c:58
slock_t mutex
Definition: gininsert.c:75
ConditionVariable workersdonecv
Definition: gininsert.c:67
int nparticipantsdone
Definition: gininsert.c:87
double indtuples
Definition: gininsert.c:142
GinState ginstate
Definition: gininsert.c:141
double bs_reltuples
Definition: gininsert.c:161
MemoryContext tmpCtx
Definition: gininsert.c:144
GinLeader * bs_leader
Definition: gininsert.c:154
GinStatsData buildStats
Definition: gininsert.c:143
Tuplesortstate * bs_worker_sort
Definition: gininsert.c:176
ItemPointerData tid
Definition: gininsert.c:147
double bs_numtuples
Definition: gininsert.c:160
int bs_num_workers
Definition: gininsert.c:157
Tuplesortstate * bs_sortstate
Definition: gininsert.c:168
MemoryContext funcCtx
Definition: gininsert.c:145
BuildAccumulator accum
Definition: gininsert.c:146
Sharedsort * sharedsort
Definition: gininsert.c:133
int nparticipanttuplesorts
Definition: gininsert.c:121
ParallelContext * pcxt
Definition: gininsert.c:113
BufferUsage * bufferusage
Definition: gininsert.c:136
Snapshot snapshot
Definition: gininsert.c:134
GinBuildShared * ginshared
Definition: gininsert.c:132
WalUsage * walusage
Definition: gininsert.c:135
dlist_node node
Definition: gininsert.c:2205
GinPostingList * seg
Definition: gininsert.c:2206
TupleDesc origTupdesc
Definition: gin_private.h:74
Relation index
Definition: gin_private.h:60
BlockNumber nEntryPages
Definition: gin.h:59
int64 nEntries
Definition: gin.h:61
BlockNumber nTotalPages
Definition: gin.h:58
char data[FLEXIBLE_ARRAY_MEMBER]
Definition: gin_tuple.h:31
int nitems
Definition: gin_tuple.h:30
int16 typlen
Definition: gin_tuple.h:27
bool typbyval
Definition: gin_tuple.h:28
signed char category
Definition: gin_tuple.h:29
int tuplen
Definition: gin_tuple.h:24
uint16 keylen
Definition: gin_tuple.h:26
OffsetNumber attrnum
Definition: gin_tuple.h:25
double heap_tuples
Definition: genam.h:59
double index_tuples
Definition: genam.h:60
void * ii_AmCache
Definition: execnodes.h:223
int ii_ParallelWorkers
Definition: execnodes.h:218
bool ii_Concurrent
Definition: execnodes.h:210
MemoryContext ii_Context
Definition: execnodes.h:226
uint8 statusFlags
Definition: proc.h:259
dsm_segment * seg
Definition: parallel.h:42
shm_toc_estimator estimator
Definition: parallel.h:41
shm_toc * toc
Definition: parallel.h:44
int nworkers_launched
Definition: parallel.h:37
Sharedsort * sharedsort
Definition: tuplesort.h:59
AttrNumber ssup_attno
Definition: sortsupport.h:81
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66
FmgrInfo cmp_proc_finfo
Definition: typcache.h:77
dlist_node * cur
Definition: ilist.h:200
Definition: type.h:96
Definition: regguts.h:323
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:166
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:131
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:146
static double table_index_build_scan(Relation table_rel, Relation index_rel, IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1744
static ItemArray items
Definition: test_tidstore.c:48
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:160
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:175
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1359
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:2932
Size tuplesort_estimate_shared(int nWorkers)
Definition: tuplesort.c:2911
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:947
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:2955
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:62
#define TUPLESORT_NONE
Definition: tuplesort.h:94
Tuplesortstate * tuplesort_begin_index_gin(Relation heapRel, Relation indexRel, int workMem, SortCoordinate coordinate, int sortopt)
GinTuple * tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
void tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:144
static Size VARSIZE_ANY(const void *PTR)
Definition: varatt.h:460
void ExitParallelMode(void)
Definition: xact.c:1065
void EnterParallelMode(void)
Definition: xact.c:1052
void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std)
Definition: xloginsert.c:1292
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1259