PostgreSQL Source Code git master
Loading...
Searching...
No Matches
gininsert.c File Reference
#include "postgres.h"
#include "access/gin_private.h"
#include "access/gin_tuple.h"
#include "access/parallel.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
#include "commands/progress.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/condition_variable.h"
#include "storage/proc.h"
#include "storage/predicate.h"
#include "tcop/tcopprot.h"
#include "utils/datum.h"
#include "utils/memutils.h"
#include "utils/builtins.h"
#include "utils/rel.h"
#include "utils/tuplesort.h"
#include "utils/typcache.h"
#include "utils/wait_event.h"
Include dependency graph for gininsert.c:

Go to the source code of this file.

Data Structures

struct  GinBuildShared
 
struct  GinLeader
 
struct  GinBuildState
 
struct  GinBuffer
 
struct  GinSegmentInfo
 

Macros

#define PARALLEL_KEY_GIN_SHARED   UINT64CONST(0xB000000000000001)
 
#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xB000000000000002)
 
#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xB000000000000003)
 
#define PARALLEL_KEY_WAL_USAGE   UINT64CONST(0xB000000000000004)
 
#define PARALLEL_KEY_BUFFER_USAGE   UINT64CONST(0xB000000000000005)
 
#define ParallelTableScanFromGinBuildShared(shared)    (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
 

Typedefs

typedef struct GinBuildShared GinBuildShared
 
typedef struct GinLeader GinLeader
 
typedef struct GinBuffer GinBuffer
 

Functions

static void _gin_begin_parallel (GinBuildState *buildstate, Relation heap, Relation index, bool isconcurrent, int request)
 
static void _gin_end_parallel (GinLeader *ginleader, GinBuildState *state)
 
static Size _gin_parallel_estimate_shared (Relation heap, Snapshot snapshot)
 
static double _gin_parallel_heapscan (GinBuildState *state)
 
static double _gin_parallel_merge (GinBuildState *state)
 
static void _gin_leader_participate_as_worker (GinBuildState *buildstate, Relation heap, Relation index)
 
static void _gin_parallel_scan_and_build (GinBuildState *state, GinBuildShared *ginshared, Sharedsort *sharedsort, Relation heap, Relation index, int sortmem, bool progress)
 
static ItemPointer _gin_parse_tuple_items (GinTuple *a)
 
static Datum _gin_parse_tuple_key (GinTuple *a)
 
static GinTuple_gin_build_tuple (OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len)
 
static IndexTuple addItemPointersToLeafTuple (GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
 
static IndexTuple buildFreshLeafTuple (GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
 
void ginEntryInsert (GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats)
 
static void ginHeapTupleBulkInsert (GinBuildState *buildstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer heapptr)
 
static void ginBuildCallback (Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static void ginFlushBuildState (GinBuildState *buildstate, Relation index)
 
static void ginBuildCallbackParallel (Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
IndexBuildResultginbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
void ginbuildempty (Relation index)
 
static void ginHeapTupleInsert (GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer item)
 
bool gininsert (Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
static void AssertCheckItemPointers (GinBuffer *buffer)
 
static void AssertCheckGinBuffer (GinBuffer *buffer)
 
static GinBufferGinBufferInit (Relation index)
 
static bool GinBufferIsEmpty (GinBuffer *buffer)
 
static bool GinBufferKeyEquals (GinBuffer *buffer, GinTuple *tup)
 
static bool GinBufferShouldTrim (GinBuffer *buffer, GinTuple *tup)
 
static void GinBufferStoreTuple (GinBuffer *buffer, GinTuple *tup)
 
static void GinBufferReset (GinBuffer *buffer)
 
static void GinBufferTrim (GinBuffer *buffer)
 
static void GinBufferFree (GinBuffer *buffer)
 
static bool GinBufferCanAddKey (GinBuffer *buffer, GinTuple *tup)
 
static void _gin_process_worker_data (GinBuildState *state, Tuplesortstate *worker_sort, bool progress)
 
void _gin_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 
int _gin_compare_tuples (GinTuple *a, GinTuple *b, SortSupport ssup)
 

Macro Definition Documentation

◆ PARALLEL_KEY_BUFFER_USAGE

#define PARALLEL_KEY_BUFFER_USAGE   UINT64CONST(0xB000000000000005)

Definition at line 49 of file gininsert.c.

◆ PARALLEL_KEY_GIN_SHARED

#define PARALLEL_KEY_GIN_SHARED   UINT64CONST(0xB000000000000001)

Definition at line 45 of file gininsert.c.

◆ PARALLEL_KEY_QUERY_TEXT

#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xB000000000000003)

Definition at line 47 of file gininsert.c.

◆ PARALLEL_KEY_TUPLESORT

#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xB000000000000002)

Definition at line 46 of file gininsert.c.

◆ PARALLEL_KEY_WAL_USAGE

#define PARALLEL_KEY_WAL_USAGE   UINT64CONST(0xB000000000000004)

Definition at line 48 of file gininsert.c.

◆ ParallelTableScanFromGinBuildShared

#define ParallelTableScanFromGinBuildShared (   shared)     (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))

Definition at line 110 of file gininsert.c.

116{
117 /* parallel context itself */
118 ParallelContext *pcxt;
119
120 /*
121 * nparticipanttuplesorts is the exact number of worker processes
122 * successfully launched, plus one leader process if it participates as a
123 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
124 * participating as a worker).
125 */
126 int nparticipanttuplesorts;
127
128 /*
129 * Leader process convenience pointers to shared state (leader avoids TOC
130 * lookups).
131 *
132 * GinBuildShared is the shared state for entire build. sharedsort is the
133 * shared, tuplesort-managed state passed to each process tuplesort.
134 * snapshot is the snapshot used by the scan iff an MVCC snapshot is
135 * required.
136 */
137 GinBuildShared *ginshared;
138 Sharedsort *sharedsort;
139 Snapshot snapshot;
140 WalUsage *walusage;
141 BufferUsage *bufferusage;
142} GinLeader;
143
144typedef struct
145{
146 GinState ginstate;
147 double indtuples;
148 GinStatsData buildStats;
149 MemoryContext tmpCtx;
150 MemoryContext funcCtx;
151 BuildAccumulator accum;
152 ItemPointerData tid;
153 int work_mem;
154
155 /*
156 * bs_leader is only present when a parallel index build is performed, and
157 * only in the leader process.
158 */
159 GinLeader *bs_leader;
160
161 /* number of participating workers (including leader) */
162 int bs_num_workers;
163
164 /* used to pass information from workers to leader */
165 double bs_numtuples;
166 double bs_reltuples;
167
168 /*
169 * The sortstate is used by workers (including the leader). It has to be
170 * part of the build state, because that's the only thing passed to the
171 * build callback etc.
172 */
173 Tuplesortstate *bs_sortstate;
174
175 /*
176 * The sortstate used only within a single worker for the first merge pass
177 * happening there. In principle it doesn't need to be part of the build
178 * state and we could pass it around directly, but it's more convenient
179 * this way. And it's part of the build state, after all.
180 */
181 Tuplesortstate *bs_worker_sort;
183
184
185/* parallel index builds */
187 bool isconcurrent, int request);
193 Relation heap, Relation index);
195 GinBuildShared *ginshared,
196 Sharedsort *sharedsort,
197 Relation heap, Relation index,
198 int sortmem, bool progress);
199
202
203static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
204 Datum key, int16 typlen, bool typbyval,
206 Size *len);
207
208/*
209 * Adds array of item pointers to tuple's posting list, or
210 * creates posting tree and tuple pointing to tree in case
211 * of not enough space. Max size of tuple is defined in
212 * GinFormTuple(). Returns a new, modified index tuple.
213 * items[] must be in sorted order with no duplicates.
214 */
215static IndexTuple
219 GinStatsData *buildStats, Buffer buffer)
220{
222 Datum key;
223 GinNullCategory category;
224 IndexTuple res;
226 *oldItems;
227 int oldNPosting,
229 nwritten;
231
233
234 attnum = gintuple_get_attrnum(ginstate, old);
235 key = gintuple_get_key(ginstate, old, &category);
236
237 /* merge the old and new posting lists */
239
242 &newNPosting);
243
244 /* Compress the posting list, and try to a build tuple with room for it */
245 res = NULL;
247 if (nwritten == newNPosting)
248 {
249 res = GinFormTuple(ginstate, attnum, key, category,
250 (char *) compressedList,
253 false);
254 }
255
258
259 if (!res)
260 {
261 /* posting list would be too big, convert to posting tree */
263
264 /*
265 * Initialize posting tree with the old tuple's posting list. It's
266 * surely small enough to fit on one posting-tree page, and should
267 * already be in order with no duplicates.
268 */
270 oldItems,
272 buildStats,
273 buffer);
274
275 /* Now insert the TIDs-to-be-added into the posting tree */
277 items, nitem,
278 buildStats);
279
280 /* And build a new posting-tree-only result tuple */
281 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
283 }
285
286 return res;
287}
288
289/*
290 * Build a fresh leaf tuple, either posting-list or posting-tree format
291 * depending on whether the given items list will fit.
292 * items[] must be in sorted order with no duplicates.
293 *
294 * This is basically the same logic as in addItemPointersToLeafTuple,
295 * but working from slightly different input.
296 */
297static IndexTuple
301 GinStatsData *buildStats, Buffer buffer)
302{
303 IndexTuple res = NULL;
305 int nwritten;
306
307 /* try to build a posting list tuple with all the items */
309 if (nwritten == nitem)
310 {
311 res = GinFormTuple(ginstate, attnum, key, category,
312 (char *) compressedList,
314 nitem, false);
315 }
317
318 if (!res)
319 {
320 /* posting list would be too big, build posting tree */
322
323 /*
324 * Build posting-tree-only result tuple. We do this first so as to
325 * fail quickly if the key is too big.
326 */
327 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
328
329 /*
330 * Initialize a new posting tree with the TIDs.
331 */
332 postingRoot = createPostingTree(ginstate->index, items, nitem,
333 buildStats, buffer);
334
335 /* And save the root link in the result tuple */
337 }
338
339 return res;
340}
341
342/*
343 * Insert one or more heap TIDs associated with the given key value.
344 * This will either add a single key entry, or enlarge a pre-existing entry.
345 *
346 * During an index build, buildStats is non-null and the counters
347 * it contains should be incremented as needed.
348 */
349void
350ginEntryInsert(GinState *ginstate,
353 GinStatsData *buildStats)
354{
355 GinBtreeData btree;
357 GinBtreeStack *stack;
358 IndexTuple itup;
359 Page page;
360
361 insertdata.isDelete = false;
362
363 ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
364 btree.isBuild = (buildStats != NULL);
365
366 stack = ginFindLeafPage(&btree, false, false);
367 page = BufferGetPage(stack->buffer);
368
369 if (btree.findItem(&btree, stack))
370 {
371 /* found pre-existing entry */
372 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
373
374 if (GinIsPostingTree(itup))
375 {
376 /* add entries to existing posting tree */
378
379 /* release all stack */
381 freeGinBtreeStack(stack);
382
383 /* insert into posting tree */
385 items, nitem,
386 buildStats);
387 return;
388 }
389
392 /* modify an existing leaf entry */
393 itup = addItemPointersToLeafTuple(ginstate, itup,
394 items, nitem, buildStats, stack->buffer);
395
396 insertdata.isDelete = true;
397 }
398 else
399 {
402 /* no match, so construct a new leaf entry */
403 itup = buildFreshLeafTuple(ginstate, attnum, key, category,
404 items, nitem, buildStats, stack->buffer);
405
406 /*
407 * nEntries counts leaf tuples, so increment it only when we make a
408 * new one.
409 */
410 if (buildStats)
411 buildStats->nEntries++;
412 }
413
414 /* Insert the new or modified leaf tuple */
415 insertdata.entry = itup;
416 ginInsertValue(&btree, stack, &insertdata, buildStats);
417 pfree(itup);
418}
419
420/*
421 * Extract index entries for a single indexable item, and add them to the
422 * BuildAccumulator's state.
423 *
424 * This function is used only during initial index creation.
425 */
426static void
428 Datum value, bool isNull,
430{
431 Datum *entries;
432 GinNullCategory *categories;
433 int32 nentries;
435
437 entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
438 value, isNull,
439 &nentries, &categories);
441
443 entries, categories, nentries);
444
445 buildstate->indtuples += nentries;
446
448}
449
450static void
452 bool *isnull, bool tupleIsAlive, void *state)
453{
456 int i;
457
459
460 for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
462 values[i], isnull[i], tid);
463
464 /* If we've maxed out our available memory, dump everything to the index */
465 if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
466 {
468 Datum key;
469 GinNullCategory category;
470 uint32 nlist;
472
473 ginBeginBAScan(&buildstate->accum);
474 while ((list = ginGetBAEntry(&buildstate->accum,
475 &attnum, &key, &category, &nlist)) != NULL)
476 {
477 /* there could be many entries, so be willing to abort here */
479 ginEntryInsert(&buildstate->ginstate, attnum, key, category,
480 list, nlist, &buildstate->buildStats);
481 }
482
484 ginInitBA(&buildstate->accum);
485 }
486
488}
489
490/*
491 * ginFlushBuildState
492 * Write all data from BuildAccumulator into the tuplesort.
493 *
494 * The number of TIDs written to the tuplesort at once is limited, to reduce
495 * the amount of memory needed when merging the intermediate results later.
496 * The leader will see up to two chunks per worker, so calculate the limit to
497 * not need more than MaxAllocSize overall.
498 *
499 * We don't need to worry about overflowing maintenance_work_mem. We can't
500 * build chunks larger than work_mem, and that limit was set so that workers
501 * produce sufficiently small chunks.
502 */
503static void
505{
507 Datum key;
508 GinNullCategory category;
509 uint32 nlist;
512 uint32 maxlen;
513
514 /* maximum number of TIDs per chunk (two chunks per worker) */
515 maxlen = MaxAllocSize / sizeof(ItemPointerData);
516 maxlen /= (2 * buildstate->bs_num_workers);
517
518 ginBeginBAScan(&buildstate->accum);
519 while ((list = ginGetBAEntry(&buildstate->accum,
520 &attnum, &key, &category, &nlist)) != NULL)
521 {
522 /* information about the key */
524
525 /* start of the chunk */
526 uint32 offset = 0;
527
528 /* split the entry into smaller chunk with up to maxlen items */
529 while (offset < nlist)
530 {
531 /* GIN tuple and tuple length */
532 GinTuple *tup;
533 Size tuplen;
534 uint32 len = Min(maxlen, nlist - offset);
535
536 /* there could be many entries, so be willing to abort here */
538
539 tup = _gin_build_tuple(attnum, category,
540 key, attr->attlen, attr->attbyval,
541 &list[offset], len,
542 &tuplen);
543
544 offset += len;
545
546 tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
547
548 pfree(tup);
549 }
550 }
551
553 ginInitBA(&buildstate->accum);
554}
555
556/*
557 * ginBuildCallbackParallel
558 * Callback for the parallel index build.
559 *
560 * This is similar to the serial build callback ginBuildCallback, but
561 * instead of writing the accumulated entries into the index, each worker
562 * writes them into a (local) tuplesort.
563 *
564 * The worker then sorts and combines these entries, before writing them
565 * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
566 * for the whole process).
567 */
568static void
570 bool *isnull, bool tupleIsAlive, void *state)
571{
574 int i;
575
577
578 /*
579 * if scan wrapped around - flush accumulated entries and start anew
580 *
581 * With parallel scans, we don't have a guarantee the scan does not start
582 * half-way through the relation (serial builds disable sync scans and
583 * always start from block 0, parallel scans require allow_sync=true).
584 *
585 * Building the posting lists assumes the TIDs are monotonic and never go
586 * back, and the wrap around would break that. We handle that by detecting
587 * the wraparound, and flushing all entries. This means we'll later see
588 * two separate entries with non-overlapping TID lists (which can be
589 * combined by merge sort).
590 *
591 * To detect a wraparound, we remember the last TID seen by each worker
592 * (for any key). If the next TID seen by the worker is lower, the scan
593 * must have wrapped around.
594 */
595 if (ItemPointerCompare(tid, &buildstate->tid) < 0)
597
598 /* remember the TID we're about to process */
599 buildstate->tid = *tid;
600
601 for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
603 values[i], isnull[i], tid);
604
605 /*
606 * If we've maxed out our available memory, dump everything to the
607 * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
608 * the other half is used for the tuplesort.
609 */
610 if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
612
614}
615
617ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
618{
619 IndexBuildResult *result;
620 double reltuples;
626 Datum key;
627 GinNullCategory category;
628 uint32 nlist;
631
633 elog(ERROR, "index \"%s\" already contains data",
635
636 initGinState(&buildstate.ginstate, index);
637 buildstate.indtuples = 0;
638 memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
639
640 /* Initialize fields for parallel build too. */
641 buildstate.bs_numtuples = 0;
642 buildstate.bs_reltuples = 0;
643 buildstate.bs_leader = NULL;
644 memset(&buildstate.tid, 0, sizeof(ItemPointerData));
645
646 /* initialize the meta page */
648
649 /* initialize the root page */
651
657
658
662
663 /* count the root as first entry page */
664 buildstate.buildStats.nEntryPages++;
665
666 /*
667 * create a temporary memory context that is used to hold data not yet
668 * dumped out to the index
669 */
671 "Gin build temporary context",
673
674 /*
675 * create a temporary memory context that is used for calling
676 * ginExtractEntries(), and can be reset after each tuple
677 */
679 "Gin build temporary context for user-defined function",
681
682 buildstate.accum.ginstate = &buildstate.ginstate;
683 ginInitBA(&buildstate.accum);
684
685 /* Report table scan phase started */
688
689 /*
690 * Attempt to launch parallel worker scan when required
691 *
692 * XXX plan_create_index_workers makes the number of workers dependent on
693 * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
694 * reasonable too, because we sort the data just like btree. It does
695 * ignore the memory used to accumulate data in memory (set by work_mem),
696 * but there is no way to communicate that to plan_create_index_workers.
697 */
698 if (indexInfo->ii_ParallelWorkers > 0)
699 _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
700 indexInfo->ii_ParallelWorkers);
701
702 /*
703 * If parallel build requested and at least one worker process was
704 * successfully launched, set up coordination state, wait for workers to
705 * complete. Then read all tuples from the shared tuplesort and insert
706 * them into the index.
707 *
708 * In serial mode, simply scan the table and build the index one index
709 * tuple at a time.
710 */
711 if (state->bs_leader)
712 {
714
716 coordinate->isWorker = false;
717 coordinate->nParticipants =
718 state->bs_leader->nparticipanttuplesorts;
719 coordinate->sharedsort = state->bs_leader->sharedsort;
720
721 /*
722 * Begin leader tuplesort.
723 *
724 * In cases where parallelism is involved, the leader receives the
725 * same share of maintenance_work_mem as a serial sort (it is
726 * generally treated in the same way as a serial sort once we return).
727 * Parallel worker Tuplesortstates will have received only a fraction
728 * of maintenance_work_mem, though.
729 *
730 * We rely on the lifetime of the Leader Tuplesortstate almost not
731 * overlapping with any worker Tuplesortstate's lifetime. There may
732 * be some small overlap, but that's okay because we rely on leader
733 * Tuplesortstate only allocating a small, fixed amount of memory
734 * here. When its tuplesort_performsort() is called (by our caller),
735 * and significant amounts of memory are likely to be used, all
736 * workers must have already freed almost all memory held by their
737 * Tuplesortstates (they are about to go away completely, too). The
738 * overall effect is that maintenance_work_mem always represents an
739 * absolute high watermark on the amount of memory used by a CREATE
740 * INDEX operation, regardless of the use of parallelism or any other
741 * factor.
742 */
743 state->bs_sortstate =
747
748 /* scan the relation in parallel and merge per-worker results */
749 reltuples = _gin_parallel_merge(state);
750
751 _gin_end_parallel(state->bs_leader, state);
752 }
753 else /* no parallel index build */
754 {
755 /*
756 * Do the heap scan. We disallow sync scan here because
757 * dataPlaceToPage prefers to receive tuples in TID order.
758 */
759 reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
761
762 /* dump remaining entries to the index */
765 while ((list = ginGetBAEntry(&buildstate.accum,
766 &attnum, &key, &category, &nlist)) != NULL)
767 {
768 /* there could be many entries, so be willing to abort here */
770 ginEntryInsert(&buildstate.ginstate, attnum, key, category,
771 list, nlist, &buildstate.buildStats);
772 }
774 }
775
778
779 /*
780 * Update metapage stats
781 */
782 buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
783 ginUpdateStats(index, &buildstate.buildStats, true);
784
785 /*
786 * We didn't write WAL records as we built the index, so if WAL-logging is
787 * required, write all pages to the WAL now.
788 */
790 {
793 true);
794 }
795
796 /*
797 * Return statistics
798 */
800
801 result->heap_tuples = reltuples;
802 result->index_tuples = buildstate.indtuples;
803
804 return result;
805}
806
807/*
808 * ginbuildempty() -- build an empty gin index in the initialization fork
809 */
810void
812{
815
816 /* An empty GIN index has two pages. */
821
822 /* Initialize and xlog metabuffer and root buffer. */
831
832 /* Unlock and release the buffers. */
835}
836
837/*
838 * Insert index entries for a single indexable item during "normal"
839 * (non-fast-update) insertion
840 */
841static void
843 Datum value, bool isNull,
844 ItemPointer item)
845{
846 Datum *entries;
847 GinNullCategory *categories;
848 int32 i,
849 nentries;
850
851 entries = ginExtractEntries(ginstate, attnum, value, isNull,
852 &nentries, &categories);
853
854 for (i = 0; i < nentries; i++)
855 {
856 /* there could be many entries, so be willing to abort here */
858 ginEntryInsert(ginstate, attnum, entries[i], categories[i],
859 item, 1, NULL);
860 }
861}
862
863bool
864gininsert(Relation index, Datum *values, bool *isnull,
867 bool indexUnchanged,
868 IndexInfo *indexInfo)
869{
870 GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
873 int i;
874
875 /* Initialize GinState cache if first call in this statement */
876 if (ginstate == NULL)
877 {
879 ginstate = palloc_object(GinState);
880 initGinState(ginstate, index);
881 indexInfo->ii_AmCache = ginstate;
883 }
884
886 "Gin insert temporary context",
888
890
892 {
894
895 memset(&collector, 0, sizeof(GinTupleCollector));
896
897 for (i = 0; i < ginstate->origTupdesc->natts; i++)
899 (OffsetNumber) (i + 1),
900 values[i], isnull[i],
901 ht_ctid);
902
904 }
905 else
906 {
907 for (i = 0; i < ginstate->origTupdesc->natts; i++)
908 ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
909 values[i], isnull[i],
910 ht_ctid);
911 }
912
915
916 return false;
917}
918
919/*
920 * Create parallel context, and launch workers for leader.
921 *
922 * buildstate argument should be initialized (with the exception of the
923 * tuplesort states, which may later be created based on shared
924 * state initially set up here).
925 *
926 * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
927 *
928 * request is the target number of parallel worker processes to launch.
929 *
930 * Sets buildstate's GinLeader, which caller must use to shut down parallel
931 * mode by passing it to _gin_end_parallel() at the very end of its index
932 * build. If not even a single worker process can be launched, this is
933 * never set, and caller should proceed with a serial index build.
934 */
935static void
937 bool isconcurrent, int request)
938{
939 ParallelContext *pcxt;
940 int scantuplesortstates;
941 Snapshot snapshot;
944 GinBuildShared *ginshared;
945 Sharedsort *sharedsort;
947 WalUsage *walusage;
948 BufferUsage *bufferusage;
949 bool leaderparticipates = true;
950 int querylen;
951
952#ifdef DISABLE_LEADER_PARTICIPATION
953 leaderparticipates = false;
954#endif
955
956 /*
957 * Enter parallel mode, and create context for parallel build of gin index
958 */
960 Assert(request > 0);
961 pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
962 request);
963
964 scantuplesortstates = leaderparticipates ? request + 1 : request;
965
966 /*
967 * Prepare for scan of the base relation. In a normal index build, we use
968 * SnapshotAny because we must retrieve all tuples and do our own time
969 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
970 * concurrent build, we take a regular MVCC snapshot and index whatever's
971 * live according to that.
972 */
973 if (!isconcurrent)
974 snapshot = SnapshotAny;
975 else
977
978 /*
979 * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
980 */
983 estsort = tuplesort_estimate_shared(scantuplesortstates);
985
987
988 /*
989 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
990 * and PARALLEL_KEY_BUFFER_USAGE.
991 *
992 * If there are no extensions loaded that care, we could skip this. We
993 * have no way of knowing whether anyone's looking at pgWalUsage or
994 * pgBufferUsage, so do it unconditionally.
995 */
997 mul_size(sizeof(WalUsage), pcxt->nworkers));
1000 mul_size(sizeof(BufferUsage), pcxt->nworkers));
1002
1003 /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
1005 {
1009 }
1010 else
1011 querylen = 0; /* keep compiler quiet */
1012
1013 /* Everyone's had a chance to ask for space, so now create the DSM */
1015
1016 /* If no DSM segment was available, back out (do serial build) */
1017 if (pcxt->seg == NULL)
1018 {
1019 if (IsMVCCSnapshot(snapshot))
1020 UnregisterSnapshot(snapshot);
1023 return;
1024 }
1025
1026 /* Store shared build state, for which we reserved space */
1027 ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
1028 /* Initialize immutable state */
1029 ginshared->heaprelid = RelationGetRelid(heap);
1030 ginshared->indexrelid = RelationGetRelid(index);
1031 ginshared->isconcurrent = isconcurrent;
1032 ginshared->scantuplesortstates = scantuplesortstates;
1033
1035 SpinLockInit(&ginshared->mutex);
1036
1037 /* Initialize mutable state */
1038 ginshared->nparticipantsdone = 0;
1039 ginshared->reltuples = 0.0;
1040 ginshared->indtuples = 0.0;
1041
1044 snapshot);
1045
1046 /*
1047 * Store shared tuplesort-private state, for which we reserved space.
1048 * Then, initialize opaque state using tuplesort routine.
1049 */
1050 sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1051 tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1052 pcxt->seg);
1053
1054 shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
1055 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1056
1057 /* Store query string for workers */
1059 {
1060 char *sharedquery;
1061
1062 sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1065 }
1066
1067 /*
1068 * Allocate space for each worker's WalUsage and BufferUsage; no need to
1069 * initialize.
1070 */
1071 walusage = shm_toc_allocate(pcxt->toc,
1072 mul_size(sizeof(WalUsage), pcxt->nworkers));
1073 shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
1074 bufferusage = shm_toc_allocate(pcxt->toc,
1075 mul_size(sizeof(BufferUsage), pcxt->nworkers));
1076 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
1077
1078 /* Launch workers, saving status for leader/caller */
1080 ginleader->pcxt = pcxt;
1081 ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1083 ginleader->nparticipanttuplesorts++;
1084 ginleader->ginshared = ginshared;
1085 ginleader->sharedsort = sharedsort;
1086 ginleader->snapshot = snapshot;
1087 ginleader->walusage = walusage;
1088 ginleader->bufferusage = bufferusage;
1089
1090 /* If no workers were successfully launched, back out (do serial build) */
1091 if (pcxt->nworkers_launched == 0)
1092 {
1094 return;
1095 }
1096
1097 /* Save leader state now that it's clear build will be parallel */
1098 buildstate->bs_leader = ginleader;
1099
1100 /* Join heap scan ourselves */
1103
1104 /*
1105 * Caller needs to wait for all launched workers when we return. Make
1106 * sure that the failure-to-start case will not hang forever.
1107 */
1109}
1110
1111/*
1112 * Shut down workers, destroy parallel context, and end parallel mode.
1113 */
1114static void
1116{
1117 int i;
1118
1119 /* Shutdown worker processes */
1121
1122 /*
1123 * Next, accumulate WAL usage. (This must wait for the workers to finish,
1124 * or we might get incomplete data.)
1125 */
1126 for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
1127 InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
1128
1129 /* Free last reference to MVCC snapshot, if one was used */
1130 if (IsMVCCSnapshot(ginleader->snapshot))
1131 UnregisterSnapshot(ginleader->snapshot);
1134}
1135
1136/*
1137 * Within leader, wait for end of heap scan.
1138 *
1139 * When called, parallel heap scan started by _gin_begin_parallel() will
1140 * already be underway within worker processes (when leader participates
1141 * as a worker, we should end up here just as workers are finishing).
1142 *
1143 * Returns the total number of heap tuples scanned.
1144 */
1145static double
1147{
1148 GinBuildShared *ginshared = state->bs_leader->ginshared;
1149 int nparticipanttuplesorts;
1150
1151 nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
1152 for (;;)
1153 {
1154 SpinLockAcquire(&ginshared->mutex);
1155 if (ginshared->nparticipantsdone == nparticipanttuplesorts)
1156 {
1157 /* copy the data into leader state */
1158 state->bs_reltuples = ginshared->reltuples;
1159 state->bs_numtuples = ginshared->indtuples;
1160
1161 SpinLockRelease(&ginshared->mutex);
1162 break;
1163 }
1164 SpinLockRelease(&ginshared->mutex);
1165
1168 }
1169
1171
1172 return state->bs_reltuples;
1173}
1174
1175/*
1176 * Buffer used to accumulate TIDs from multiple GinTuples for the same key
1177 * (we read these from the tuplesort, sorted by the key).
1178 *
1179 * This is similar to BuildAccumulator in that it's used to collect TIDs
1180 * in memory before inserting them into the index, but it's much simpler
1181 * as it only deals with a single index key at a time.
1182 *
1183 * When adding TIDs to the buffer, we make sure to keep them sorted, both
1184 * during the initial table scan (and detecting when the scan wraps around),
1185 * and during merging (where we do mergesort).
1186 */
1187typedef struct GinBuffer
1188{
1191 Datum key; /* 0 if no key (and keylen == 0) */
1192 Size keylen; /* number of bytes (not typlen) */
1193
1194 /* type info */
1195 int16 typlen;
1196 bool typbyval;
1197
1198 /* Number of TIDs to collect before attempt to write some out. */
1199 int maxitems;
1200
1201 /* array of TID values */
1202 int nitems;
1203 int nfrozen;
1204 SortSupport ssup; /* for sorting/comparing keys */
1206} GinBuffer;
1207
1208/*
1209 * Check that TID array contains valid values, and that it's sorted (if we
1210 * expect it to be).
1211 */
1212static void
1214{
1215#ifdef USE_ASSERT_CHECKING
1216 /* we should not have a buffer with no TIDs to sort */
1217 Assert(buffer->items != NULL);
1218 Assert(buffer->nitems > 0);
1219
1220 for (int i = 0; i < buffer->nitems; i++)
1221 {
1222 Assert(ItemPointerIsValid(&buffer->items[i]));
1223
1224 /* don't check ordering for the first TID item */
1225 if (i == 0)
1226 continue;
1227
1228 Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
1229 }
1230#endif
1231}
1232
1233/*
1234 * GinBuffer checks
1235 *
1236 * Make sure the nitems/items fields are consistent (either the array is empty
1237 * or not empty, the fields need to agree). If there are items, check ordering.
1238 */
1239static void
1241{
1242#ifdef USE_ASSERT_CHECKING
1243 /* if we have any items, the array must exist */
1244 Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
1245
1246 /*
1247 * The buffer may be empty, in which case we must not call the check of
1248 * item pointers, because that assumes non-emptiness.
1249 */
1250 if (buffer->nitems == 0)
1251 return;
1252
1253 /* Make sure the item pointers are valid and sorted. */
1255#endif
1256}
1257
1258/*
1259 * GinBufferInit
1260 * Initialize buffer to store tuples for a GIN index.
1261 *
1262 * Initialize the buffer used to accumulate TID for a single key at a time
1263 * (we process the data sorted), so we know when we received all data for
1264 * a given key.
1265 *
1266 * Initializes sort support procedures for all index attributes.
1267 */
1268static GinBuffer *
1270{
1272 int i,
1273 nKeys;
1275
1276 /*
1277 * How many items can we fit into the memory limit? We don't want to end
1278 * with too many TIDs. and 64kB seems more than enough. But maybe this
1279 * should be tied to maintenance_work_mem or something like that?
1280 */
1281 buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
1282
1284
1285 buffer->ssup = palloc0_array(SortSupportData, nKeys);
1286
1287 /*
1288 * Lookup ordering operator for the index key data type, and initialize
1289 * the sort support function.
1290 */
1291 for (i = 0; i < nKeys; i++)
1292 {
1293 Oid cmpFunc;
1294 SortSupport sortKey = &buffer->ssup[i];
1296
1297 sortKey->ssup_cxt = CurrentMemoryContext;
1298 sortKey->ssup_collation = index->rd_indcollation[i];
1299
1300 if (!OidIsValid(sortKey->ssup_collation))
1301 sortKey->ssup_collation = DEFAULT_COLLATION_OID;
1302
1303 sortKey->ssup_nulls_first = false;
1304 sortKey->ssup_attno = i + 1;
1305 sortKey->abbreviate = false;
1306
1307 Assert(sortKey->ssup_attno != 0);
1308
1309 /*
1310 * If the compare proc isn't specified in the opclass definition, look
1311 * up the index key type's default btree comparator.
1312 */
1314 if (cmpFunc == InvalidOid)
1315 {
1316 TypeCacheEntry *typentry;
1317
1318 typentry = lookup_type_cache(att->atttypid,
1320 if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
1321 ereport(ERROR,
1323 errmsg("could not identify a comparison function for type %s",
1324 format_type_be(att->atttypid))));
1325
1326 cmpFunc = typentry->cmp_proc_finfo.fn_oid;
1327 }
1328
1330 }
1331
1332 return buffer;
1333}
1334
1335/* Is the buffer empty, i.e. has no TID values in the array? */
1336static bool
1338{
1339 return (buffer->nitems == 0);
1340}
1341
1342/*
1343 * GinBufferKeyEquals
1344 * Can the buffer store TIDs for the provided GIN tuple (same key)?
1345 *
1346 * Compare if the tuple matches the already accumulated data in the GIN
1347 * buffer. Compare scalar fields first, before the actual key.
1348 *
1349 * Returns true if the key matches, and the TID belongs to the buffer, or
1350 * false if the key does not match.
1351 */
1352static bool
1354{
1355 int r;
1356 Datum tupkey;
1357
1358 AssertCheckGinBuffer(buffer);
1359
1360 if (tup->attrnum != buffer->attnum)
1361 return false;
1362
1363 /* same attribute should have the same type info */
1364 Assert(tup->typbyval == buffer->typbyval);
1365 Assert(tup->typlen == buffer->typlen);
1366
1367 if (tup->category != buffer->category)
1368 return false;
1369
1370 /*
1371 * For NULL/empty keys, this means equality, for normal keys we need to
1372 * compare the actual key value.
1373 */
1374 if (buffer->category != GIN_CAT_NORM_KEY)
1375 return true;
1376
1377 /*
1378 * For the tuple, get either the first sizeof(Datum) bytes for byval
1379 * types, or a pointer to the beginning of the data array.
1380 */
1381 tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
1382
1383 r = ApplySortComparator(buffer->key, false,
1384 tupkey, false,
1385 &buffer->ssup[buffer->attnum - 1]);
1386
1387 return (r == 0);
1388}
1389
1390/*
1391 * GinBufferShouldTrim
1392 * Should we trim the list of item pointers?
1393 *
1394 * By trimming we understand writing out and removing the tuple IDs that
1395 * we know can't change by future merges. We can deduce the TID up to which
1396 * this is guaranteed from the "first" TID in each GIN tuple, which provides
1397 * a "horizon" (for a given key) thanks to the sort.
1398 *
1399 * We don't want to do this too often - compressing longer TID lists is more
1400 * efficient. But we also don't want to accumulate too many TIDs, for two
1401 * reasons. First, it consumes memory and we might exceed maintenance_work_mem
1402 * (or whatever limit applies), even if that's unlikely because TIDs are very
1403 * small so we can fit a lot of them. Second, and more importantly, long TID
1404 * lists are an issue if the scan wraps around, because a key may get a very
1405 * wide list (with min/max TID for that key), forcing "full" mergesorts for
1406 * every list merged into it (instead of the efficient append).
1407 *
1408 * So we look at two things when deciding if to trim - if the resulting list
1409 * (after adding TIDs from the new tuple) would be too long, and if there is
1410 * enough TIDs to trim (with values less than "first" TID from the new tuple),
1411 * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
1412 * number).
1413 *
1414 * We try freezing TIDs at the beginning of the list first, before attempting
1415 * to trim the buffer. This may allow trimming the data earlier, reducing the
1416 * memory usage and excluding it from the mergesort.
1417 */
1418static bool
1420{
1421 /*
1422 * Check if the last TID in the current list is frozen. This is the case
1423 * when merging non-overlapping lists, e.g. in each parallel worker.
1424 */
1425 if ((buffer->nitems > 0) &&
1426 (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
1427 GinTupleGetFirst(tup)) == 0))
1428 buffer->nfrozen = buffer->nitems;
1429
1430 /*
1431 * Now find the last TID we know to be frozen, i.e. the last TID right
1432 * before the new GIN tuple.
1433 *
1434 * Start with the first not-yet-frozen tuple, and walk until we find the
1435 * first TID that's higher. If we already know the whole list is frozen
1436 * (i.e. nfrozen == nitems), this does nothing.
1437 *
1438 * XXX This might do a binary search for sufficiently long lists, but it
1439 * does not seem worth the complexity. Overlapping lists should be rare
1440 * common, TID comparisons are cheap, and we should quickly freeze most of
1441 * the list.
1442 */
1443 for (int i = buffer->nfrozen; i < buffer->nitems; i++)
1444 {
1445 /* Is the TID after the first TID of the new tuple? Can't freeze. */
1446 if (ItemPointerCompare(&buffer->items[i],
1447 GinTupleGetFirst(tup)) > 0)
1448 break;
1449
1450 buffer->nfrozen++;
1451 }
1452
1453 /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
1454 if (buffer->nfrozen < 1024)
1455 return false;
1456
1457 /* no need to trim if we have not hit the memory limit yet */
1458 if ((buffer->nitems + tup->nitems) < buffer->maxitems)
1459 return false;
1460
1461 /*
1462 * OK, we have enough frozen TIDs to flush, and we have hit the memory
1463 * limit, so it's time to write it out.
1464 */
1465 return true;
1466}
1467
1468/*
1469 * GinBufferStoreTuple
1470 * Add data (especially TID list) from a GIN tuple to the buffer.
1471 *
1472 * The buffer is expected to be empty (in which case it's initialized), or
1473 * having the same key. The TID values from the tuple are combined with the
1474 * stored values using a merge sort.
1475 *
1476 * The tuples (for the same key) are expected to be sorted by first TID. But
1477 * this does not guarantee the lists do not overlap, especially in the leader,
1478 * because the workers process interleaving data. There should be no overlaps
1479 * in a single worker - it could happen when the parallel scan wraps around,
1480 * but we detect that and flush the data (see ginBuildCallbackParallel).
1481 *
1482 * By sorting the GinTuple not only by key, but also by the first TID, we make
1483 * it more less likely the lists will overlap during merge. We merge them using
1484 * mergesort, but it's cheaper to just append one list to the other.
1485 *
1486 * How often can the lists overlap? There should be no overlaps in workers,
1487 * and in the leader we can see overlaps between lists built by different
1488 * workers. But the workers merge the items as much as possible, so there
1489 * should not be too many.
1490 */
1491static void
1493{
1495 Datum key;
1496
1497 AssertCheckGinBuffer(buffer);
1498
1501
1502 /* if the buffer is empty, set the fields (and copy the key) */
1503 if (GinBufferIsEmpty(buffer))
1504 {
1505 buffer->category = tup->category;
1506 buffer->keylen = tup->keylen;
1507 buffer->attnum = tup->attrnum;
1508
1509 buffer->typlen = tup->typlen;
1510 buffer->typbyval = tup->typbyval;
1511
1512 if (tup->category == GIN_CAT_NORM_KEY)
1513 buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
1514 else
1515 buffer->key = (Datum) 0;
1516 }
1517
1518 /* add the new TIDs into the buffer, combine using merge-sort */
1519 {
1520 int nnew;
1521 ItemPointer new;
1522
1523 /*
1524 * Resize the array - we do this first, because we'll dereference the
1525 * first unfrozen TID, which would fail if the array is NULL. We'll
1526 * still pass 0 as number of elements in that array though.
1527 */
1528 if (buffer->items == NULL)
1529 buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1530 else
1531 buffer->items = repalloc(buffer->items,
1532 (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1533
1534 new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
1535 (buffer->nitems - buffer->nfrozen), /* num of unfrozen */
1536 items, tup->nitems, &nnew);
1537
1538 Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
1539
1540 memcpy(&buffer->items[buffer->nfrozen], new,
1541 nnew * sizeof(ItemPointerData));
1542
1543 pfree(new);
1544
1545 buffer->nitems += tup->nitems;
1546
1548 }
1549
1550 /* free the decompressed TID list */
1551 pfree(items);
1552}
1553
1554/*
1555 * GinBufferReset
1556 * Reset the buffer into a state as if it contains no data.
1557 */
1558static void
1560{
1561 Assert(!GinBufferIsEmpty(buffer));
1562
1563 /* release byref values, do nothing for by-val ones */
1564 if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1565 pfree(DatumGetPointer(buffer->key));
1566
1567 /*
1568 * Not required, but makes it more likely to trigger NULL dereference if
1569 * using the value incorrectly, etc.
1570 */
1571 buffer->key = (Datum) 0;
1572
1573 buffer->attnum = 0;
1574 buffer->category = 0;
1575 buffer->keylen = 0;
1576 buffer->nitems = 0;
1577 buffer->nfrozen = 0;
1578
1579 buffer->typlen = 0;
1580 buffer->typbyval = 0;
1581}
1582
1583/*
1584 * GinBufferTrim
1585 * Discard the "frozen" part of the TID list (which should have been
1586 * written to disk/index before this call).
1587 */
1588static void
1589GinBufferTrim(GinBuffer *buffer)
1590{
1591 Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
1592
1593 memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
1594 sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
1595
1596 buffer->nitems -= buffer->nfrozen;
1597 buffer->nfrozen = 0;
1598}
1599
1600/*
1601 * GinBufferFree
1602 * Release memory associated with the GinBuffer (including TID array).
1603 */
1604static void
1605GinBufferFree(GinBuffer *buffer)
1606{
1607 if (buffer->items)
1608 pfree(buffer->items);
1609
1610 /* release byref values, do nothing for by-val ones */
1611 if (!GinBufferIsEmpty(buffer) &&
1612 (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1613 pfree(DatumGetPointer(buffer->key));
1614
1615 pfree(buffer);
1616}
1617
1618/*
1619 * GinBufferCanAddKey
1620 * Check if a given GIN tuple can be added to the current buffer.
1621 *
1622 * Returns true if the buffer is either empty or for the same index key.
1623 */
1624static bool
1626{
1627 /* empty buffer can accept data for any key */
1628 if (GinBufferIsEmpty(buffer))
1629 return true;
1630
1631 /* otherwise just data for the same key */
1632 return GinBufferKeyEquals(buffer, tup);
1633}
1634
1635/*
1636 * Within leader, wait for end of heap scan and merge per-worker results.
1637 *
1638 * After waiting for all workers to finish, merge the per-worker results into
1639 * the complete index. The results from each worker are sorted by block number
1640 * (start of the page range). While combining the per-worker results we merge
1641 * summaries for the same page range, and also fill-in empty summaries for
1642 * ranges without any tuples.
1643 *
1644 * Returns the total number of heap tuples scanned.
1645 */
1646static double
1648{
1649 GinTuple *tup;
1650 Size tuplen;
1651 double reltuples = 0;
1652 GinBuffer *buffer;
1653
1654 /* GIN tuples from workers, merged by leader */
1655 double numtuples = 0;
1656
1657 /* wait for workers to scan table and produce partial results */
1658 reltuples = _gin_parallel_heapscan(state);
1659
1660 /* Execute the sort */
1663
1664 /* do the actual sort in the leader */
1665 tuplesort_performsort(state->bs_sortstate);
1666
1667 /*
1668 * Initialize buffer to combine entries for the same key.
1669 *
1670 * The leader is allowed to use the whole maintenance_work_mem buffer to
1671 * combine data. The parallel workers already completed.
1672 */
1673 buffer = GinBufferInit(state->ginstate.index);
1674
1675 /*
1676 * Set the progress target for the next phase. Reset the block number
1677 * values set by table_index_build_scan
1678 */
1679 {
1680 const int progress_index[] = {
1685 };
1686 const int64 progress_vals[] = {
1688 state->bs_numtuples,
1689 0, 0
1690 };
1691
1693 }
1694
1695 /*
1696 * Read the GIN tuples from the shared tuplesort, sorted by category and
1697 * key. That probably gives us order matching how data is organized in the
1698 * index.
1699 *
1700 * We don't insert the GIN tuples right away, but instead accumulate as
1701 * many TIDs for the same key as possible, and then insert that at once.
1702 * This way we don't need to decompress/recompress the posting lists, etc.
1703 */
1704 while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
1705 {
1707
1709
1710 /*
1711 * If the buffer can accept the new GIN tuple, just store it there and
1712 * we're done. If it's a different key (or maybe too much data) flush
1713 * the current contents into the index first.
1714 */
1715 if (!GinBufferCanAddKey(buffer, tup))
1716 {
1717 /*
1718 * Buffer is not empty and it's storing a different key - flush
1719 * the data into the insert, and start a new entry for current
1720 * GinTuple.
1721 */
1723
1725
1726 ginEntryInsert(&state->ginstate,
1727 buffer->attnum, buffer->key, buffer->category,
1728 buffer->items, buffer->nitems, &state->buildStats);
1729
1731 MemoryContextReset(state->tmpCtx);
1732
1733 /* discard the existing data */
1734 GinBufferReset(buffer);
1735 }
1736
1737 /*
1738 * We're about to add a GIN tuple to the buffer - check the memory
1739 * limit first, and maybe write out some of the data into the index
1740 * first, if needed (and possible). We only flush the part of the TID
1741 * list that we know won't change, and only if there's enough data for
1742 * compression to work well.
1743 */
1744 if (GinBufferShouldTrim(buffer, tup))
1745 {
1746 Assert(buffer->nfrozen > 0);
1747
1748 /*
1749 * Buffer is not empty and it's storing a different key - flush
1750 * the data into the insert, and start a new entry for current
1751 * GinTuple.
1752 */
1754
1756
1757 ginEntryInsert(&state->ginstate,
1758 buffer->attnum, buffer->key, buffer->category,
1759 buffer->items, buffer->nfrozen, &state->buildStats);
1760
1762 MemoryContextReset(state->tmpCtx);
1763
1764 /* truncate the data we've just discarded */
1765 GinBufferTrim(buffer);
1766 }
1767
1768 /*
1769 * Remember data for the current tuple (either remember the new key,
1770 * or append if to the existing data).
1771 */
1772 GinBufferStoreTuple(buffer, tup);
1773
1774 /* Report progress */
1776 ++numtuples);
1777 }
1778
1779 /* flush data remaining in the buffer (for the last key) */
1780 if (!GinBufferIsEmpty(buffer))
1781 {
1783
1784 ginEntryInsert(&state->ginstate,
1785 buffer->attnum, buffer->key, buffer->category,
1786 buffer->items, buffer->nitems, &state->buildStats);
1787
1788 /* discard the existing data */
1789 GinBufferReset(buffer);
1790
1791 /* Report progress */
1793 ++numtuples);
1794 }
1795
1796 /* release all the memory */
1797 GinBufferFree(buffer);
1798
1799 tuplesort_end(state->bs_sortstate);
1800
1801 return reltuples;
1802}
1803
1804/*
1805 * Returns size of shared memory required to store state for a parallel
1806 * gin index build based on the snapshot its parallel scan will use.
1807 */
1808static Size
1810{
1811 /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1812 return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
1813 table_parallelscan_estimate(heap, snapshot));
1814}
1815
1816/*
1817 * Within leader, participate as a parallel worker.
1818 */
1819static void
1821{
1822 GinLeader *ginleader = buildstate->bs_leader;
1823 int sortmem;
1824
1825 /*
1826 * Might as well use reliable figure when doling out maintenance_work_mem
1827 * (when requested number of workers were not launched, this will be
1828 * somewhat higher than it is for other workers).
1829 */
1831
1832 /* Perform work common to all participants */
1834 ginleader->sharedsort, heap, index,
1835 sortmem, true);
1836}
1837
1838/*
1839 * _gin_process_worker_data
1840 * First phase of the key merging, happening in the worker.
1841 *
1842 * Depending on the number of distinct keys, the TID lists produced by the
1843 * callback may be very short (due to frequent evictions in the callback).
1844 * But combining many tiny lists is expensive, so we try to do as much as
1845 * possible in the workers and only then pass the results to the leader.
1846 *
1847 * We read the tuples sorted by the key, and merge them into larger lists.
1848 * At the moment there's no memory limit, so this will just produce one
1849 * huge (sorted) list per key in each worker. Which means the leader will
1850 * do a very limited number of mergesorts, which is good.
1851 */
1852static void
1854 bool progress)
1855{
1856 GinTuple *tup;
1857 Size tuplen;
1858
1859 GinBuffer *buffer;
1860
1861 /*
1862 * Initialize buffer to combine entries for the same key.
1863 *
1864 * The workers are limited to the same amount of memory as during the sort
1865 * in ginBuildCallbackParallel. But this probably should be the 32MB used
1866 * during planning, just like there.
1867 */
1868 buffer = GinBufferInit(state->ginstate.index);
1869
1870 /* sort the raw per-worker data */
1871 if (progress)
1874
1875 tuplesort_performsort(state->bs_worker_sort);
1876
1877 /* reset the number of GIN tuples produced by this worker */
1878 state->bs_numtuples = 0;
1879
1880 if (progress)
1883
1884 /*
1885 * Read the GIN tuples from the shared tuplesort, sorted by the key, and
1886 * merge them into larger chunks for the leader to combine.
1887 */
1888 while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
1889 {
1890
1892
1893 /*
1894 * If the buffer can accept the new GIN tuple, just store it there and
1895 * we're done. If it's a different key (or maybe too much data) flush
1896 * the current contents into the index first.
1897 */
1898 if (!GinBufferCanAddKey(buffer, tup))
1899 {
1900 GinTuple *ntup;
1901 Size ntuplen;
1902
1903 /*
1904 * Buffer is not empty and it's storing a different key - flush
1905 * the data into the insert, and start a new entry for current
1906 * GinTuple.
1907 */
1909
1910 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1911 buffer->key, buffer->typlen, buffer->typbyval,
1912 buffer->items, buffer->nitems, &ntuplen);
1913
1914 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1915 state->bs_numtuples++;
1916
1917 pfree(ntup);
1918
1919 /* discard the existing data */
1920 GinBufferReset(buffer);
1921 }
1922
1923 /*
1924 * We're about to add a GIN tuple to the buffer - check the memory
1925 * limit first, and maybe write out some of the data into the index
1926 * first, if needed (and possible). We only flush the part of the TID
1927 * list that we know won't change, and only if there's enough data for
1928 * compression to work well.
1929 */
1930 if (GinBufferShouldTrim(buffer, tup))
1931 {
1932 GinTuple *ntup;
1933 Size ntuplen;
1934
1935 Assert(buffer->nfrozen > 0);
1936
1937 /*
1938 * Buffer is not empty and it's storing a different key - flush
1939 * the data into the insert, and start a new entry for current
1940 * GinTuple.
1941 */
1943
1944 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1945 buffer->key, buffer->typlen, buffer->typbyval,
1946 buffer->items, buffer->nfrozen, &ntuplen);
1947
1948 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1949
1950 pfree(ntup);
1951
1952 /* truncate the data we've just discarded */
1953 GinBufferTrim(buffer);
1954 }
1955
1956 /*
1957 * Remember data for the current tuple (either remember the new key,
1958 * or append if to the existing data).
1959 */
1960 GinBufferStoreTuple(buffer, tup);
1961 }
1962
1963 /* flush data remaining in the buffer (for the last key) */
1964 if (!GinBufferIsEmpty(buffer))
1965 {
1966 GinTuple *ntup;
1967 Size ntuplen;
1968
1970
1971 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1972 buffer->key, buffer->typlen, buffer->typbyval,
1973 buffer->items, buffer->nitems, &ntuplen);
1974
1975 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1976 state->bs_numtuples++;
1977
1978 pfree(ntup);
1979
1980 /* discard the existing data */
1981 GinBufferReset(buffer);
1982 }
1983
1984 /* release all the memory */
1985 GinBufferFree(buffer);
1986
1988}
1989
1990/*
1991 * Perform a worker's portion of a parallel GIN index build sort.
1992 *
1993 * This generates a tuplesort for the worker portion of the table.
1994 *
1995 * sortmem is the amount of working memory to use within each worker,
1996 * expressed in KBs.
1997 *
1998 * When this returns, workers are done, and need only release resources.
1999 *
2000 * Before feeding data into a shared tuplesort (for the leader process),
2001 * the workers process data in two phases.
2002 *
2003 * 1) A worker reads a portion of rows from the table, accumulates entries
2004 * in memory, and flushes them into a private tuplesort (e.g. because of
2005 * using too much memory).
2006 *
2007 * 2) The private tuplesort gets sorted (by key and TID), the worker reads
2008 * the data again, and combines the entries as much as possible. This has
2009 * to happen eventually, and this way it's done in workers in parallel.
2010 *
2011 * Finally, the combined entries are written into the shared tuplesort, so
2012 * that the leader can process them.
2013 *
2014 * How well this works (compared to just writing entries into the shared
2015 * tuplesort) depends on the data set. For large tables with many distinct
2016 * keys this helps a lot. With many distinct keys it's likely the buffers has
2017 * to be flushed often, generating many entries with the same key and short
2018 * TID lists. These entries need to be sorted and merged at some point,
2019 * before writing them to the index. The merging is quite expensive, it can
2020 * easily be ~50% of a serial build, and doing as much of it in the workers
2021 * means it's parallelized. The leader still has to merge results from the
2022 * workers, but it's much more efficient to merge few large entries than
2023 * many tiny ones.
2024 *
2025 * This also reduces the amount of data the workers pass to the leader through
2026 * the shared tuplesort. OTOH the workers need more space for the private sort,
2027 * possibly up to 2x of the data, if no entries be merged in a worker. But this
2028 * is very unlikely, and the only consequence is inefficiency, so we ignore it.
2029 */
2030static void
2032 GinBuildShared *ginshared, Sharedsort *sharedsort,
2033 Relation heap, Relation index,
2034 int sortmem, bool progress)
2035{
2037 TableScanDesc scan;
2038 double reltuples;
2039 IndexInfo *indexInfo;
2040
2041 /* Initialize local tuplesort coordination state */
2043 coordinate->isWorker = true;
2044 coordinate->nParticipants = -1;
2045 coordinate->sharedsort = sharedsort;
2046
2047 /* remember how much space is allowed for the accumulated entries */
2048 state->work_mem = (sortmem / 2);
2049
2050 /* remember how many workers participate in the build */
2051 state->bs_num_workers = ginshared->scantuplesortstates;
2052
2053 /* Begin "partial" tuplesort */
2054 state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
2055 state->work_mem,
2056 coordinate,
2058
2059 /* Local per-worker sort of raw-data */
2060 state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
2061 state->work_mem,
2062 NULL,
2064
2065 /* Join parallel scan */
2066 indexInfo = BuildIndexInfo(index);
2067 indexInfo->ii_Concurrent = ginshared->isconcurrent;
2068
2069 scan = table_beginscan_parallel(heap,
2071
2072 reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
2074
2075 /* write remaining accumulated entries */
2077
2078 /*
2079 * Do the first phase of in-worker processing - sort the data produced by
2080 * the callback, and combine them into much larger chunks and place that
2081 * into the shared tuplestore for leader to process.
2082 */
2083 _gin_process_worker_data(state, state->bs_worker_sort, progress);
2084
2085 /* sort the GIN tuples built by this worker */
2086 tuplesort_performsort(state->bs_sortstate);
2087
2088 state->bs_reltuples += reltuples;
2089
2090 /*
2091 * Done. Record ambuild statistics.
2092 */
2093 SpinLockAcquire(&ginshared->mutex);
2094 ginshared->nparticipantsdone++;
2095 ginshared->reltuples += state->bs_reltuples;
2096 ginshared->indtuples += state->bs_numtuples;
2097 SpinLockRelease(&ginshared->mutex);
2098
2099 /* Notify leader */
2101
2102 tuplesort_end(state->bs_sortstate);
2103}
2104
2105/*
2106 * Perform work within a launched parallel process.
2107 */
2108void
2110{
2111 char *sharedquery;
2112 GinBuildShared *ginshared;
2113 Sharedsort *sharedsort;
2115 Relation heapRel;
2116 Relation indexRel;
2119 WalUsage *walusage;
2120 BufferUsage *bufferusage;
2121 int sortmem;
2122
2123 /*
2124 * The only possible status flag that can be set to the parallel worker is
2125 * PROC_IN_SAFE_IC.
2126 */
2127 Assert((MyProc->statusFlags == 0) ||
2129
2130 /* Set debug_query_string for individual workers first */
2133
2134 /* Report the query string from leader */
2136
2137 /* Look up gin shared state */
2138 ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
2139
2140 /* Open relations using lock modes known to be obtained by index.c */
2141 if (!ginshared->isconcurrent)
2142 {
2145 }
2146 else
2147 {
2150 }
2151
2152 /* Open relations within worker */
2153 heapRel = table_open(ginshared->heaprelid, heapLockmode);
2154 indexRel = index_open(ginshared->indexrelid, indexLockmode);
2155
2156 /* initialize the GIN build state */
2157 initGinState(&buildstate.ginstate, indexRel);
2158 buildstate.indtuples = 0;
2159 memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
2160 memset(&buildstate.tid, 0, sizeof(ItemPointerData));
2161
2162 /*
2163 * create a temporary memory context that is used to hold data not yet
2164 * dumped out to the index
2165 */
2167 "Gin build temporary context",
2169
2170 /*
2171 * create a temporary memory context that is used for calling
2172 * ginExtractEntries(), and can be reset after each tuple
2173 */
2175 "Gin build temporary context for user-defined function",
2177
2178 buildstate.accum.ginstate = &buildstate.ginstate;
2179 ginInitBA(&buildstate.accum);
2180
2181
2182 /* Look up shared state private to tuplesort.c */
2183 sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
2184 tuplesort_attach_shared(sharedsort, seg);
2185
2186 /* Prepare to track buffer usage during parallel execution */
2188
2189 /*
2190 * Might as well use reliable figure when doling out maintenance_work_mem
2191 * (when requested number of workers were not launched, this will be
2192 * somewhat higher than it is for other workers).
2193 */
2195
2196 _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
2197 heapRel, indexRel, sortmem, false);
2198
2199 /* Report WAL/buffer usage during parallel execution */
2200 bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
2201 walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
2203 &walusage[ParallelWorkerNumber]);
2204
2205 index_close(indexRel, indexLockmode);
2206 table_close(heapRel, heapLockmode);
2207}
2208
2209/*
2210 * Used to keep track of compressed TID lists when building a GIN tuple.
2211 */
2212typedef struct
2213{
2214 dlist_node node; /* linked list pointers */
2215 GinPostingList *seg;
2217
2218/*
2219 * _gin_build_tuple
2220 * Serialize the state for an index key into a tuple for tuplesort.
2221 *
2222 * The tuple has a number of scalar fields (mostly matching the build state),
2223 * and then a data array that stores the key first, and then the TID list.
2224 *
2225 * For by-reference data types, we store the actual data. For by-val types
2226 * we simply copy the whole Datum, so that we don't have to care about stuff
2227 * like endianness etc. We could make it a little bit smaller, but it's not
2228 * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
2229 * start of the TID list anyway. So we wouldn't save anything. (This would
2230 * not be a good idea for the permanent in-index data, since we'd prefer
2231 * that that not depend on sizeof(Datum). But this is just a transient
2232 * representation to use while sorting the data.)
2233 *
2234 * The TID list is serialized as compressed - it's highly compressible, and
2235 * we already have ginCompressPostingList for this purpose. The list may be
2236 * pretty long, so we compress it into multiple segments and then copy all
2237 * of that into the GIN tuple.
2238 */
2239static GinTuple *
2240_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
2241 Datum key, int16 typlen, bool typbyval,
2243 Size *len)
2244{
2245 GinTuple *tuple;
2246 char *ptr;
2247
2248 Size tuplen;
2249 int keylen;
2250
2251 dlist_mutable_iter iter;
2252 dlist_head segments;
2253 int ncompressed;
2255
2256 /*
2257 * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
2258 * have actual non-empty key. We include varlena headers and \0 bytes for
2259 * strings, to make it easier to access the data in-line.
2260 *
2261 * For byval types we simply copy the whole Datum. We could store just the
2262 * necessary bytes, but this is simpler to work with and not worth the
2263 * extra complexity. Moreover we still need to do the MAXALIGN to allow
2264 * direct access to items pointers.
2265 *
2266 * XXX Note that for byval types we store the whole datum, no matter what
2267 * the typlen value is.
2268 */
2269 if (category != GIN_CAT_NORM_KEY)
2270 keylen = 0;
2271 else if (typbyval)
2272 keylen = sizeof(Datum);
2273 else if (typlen > 0)
2274 keylen = typlen;
2275 else if (typlen == -1)
2276 keylen = VARSIZE_ANY(DatumGetPointer(key));
2277 else if (typlen == -2)
2278 keylen = strlen(DatumGetPointer(key)) + 1;
2279 else
2280 elog(ERROR, "unexpected typlen value (%d)", typlen);
2281
2282 /* compress the item pointers */
2283 ncompressed = 0;
2284 compresslen = 0;
2285 dlist_init(&segments);
2286
2287 /* generate compressed segments of TID list chunks */
2288 while (ncompressed < nitems)
2289 {
2290 int cnt;
2292
2294 (nitems - ncompressed),
2295 UINT16_MAX,
2296 &cnt);
2297
2298 ncompressed += cnt;
2300
2301 dlist_push_tail(&segments, &seginfo->node);
2302 }
2303
2304 /*
2305 * Determine GIN tuple length with all the data included. Be careful about
2306 * alignment, to allow direct access to compressed segments (those require
2307 * only SHORTALIGN).
2308 */
2309 tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
2310
2311 *len = tuplen;
2312
2313 /*
2314 * Allocate space for the whole GIN tuple.
2315 *
2316 * The palloc0 is needed - writetup_index_gin will write the whole tuple
2317 * to disk, so we need to make sure the padding bytes are defined
2318 * (otherwise valgrind would report this).
2319 */
2320 tuple = palloc0(tuplen);
2321
2322 tuple->tuplen = tuplen;
2323 tuple->attrnum = attrnum;
2324 tuple->category = category;
2325 tuple->keylen = keylen;
2326 tuple->nitems = nitems;
2327
2328 /* key type info */
2329 tuple->typlen = typlen;
2330 tuple->typbyval = typbyval;
2331
2332 /*
2333 * Copy the key and items into the tuple. First the key value, which we
2334 * can simply copy right at the beginning of the data array.
2335 */
2336 if (category == GIN_CAT_NORM_KEY)
2337 {
2338 if (typbyval)
2339 {
2340 memcpy(tuple->data, &key, sizeof(Datum));
2341 }
2342 else if (typlen > 0) /* byref, fixed length */
2343 {
2344 memcpy(tuple->data, DatumGetPointer(key), typlen);
2345 }
2346 else if (typlen == -1)
2347 {
2348 memcpy(tuple->data, DatumGetPointer(key), keylen);
2349 }
2350 else if (typlen == -2)
2351 {
2352 memcpy(tuple->data, DatumGetPointer(key), keylen);
2353 }
2354 }
2355
2356 /* finally, copy the TIDs into the array */
2357 ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
2358
2359 /* copy in the compressed data, and free the segments */
2360 dlist_foreach_modify(iter, &segments)
2361 {
2363
2364 memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
2365
2366 ptr += SizeOfGinPostingList(seginfo->seg);
2367
2368 dlist_delete(&seginfo->node);
2369
2370 pfree(seginfo->seg);
2371 pfree(seginfo);
2372 }
2373
2374 return tuple;
2375}
2376
2377/*
2378 * _gin_parse_tuple_key
2379 * Return a Datum representing the key stored in the tuple.
2380 *
2381 * Most of the tuple fields are directly accessible, the only thing that
2382 * needs more care is the key and the TID list.
2383 *
2384 * For the key, this returns a regular Datum representing it. It's either the
2385 * actual key value, or a pointer to the beginning of the data array (which is
2386 * where the data was copied by _gin_build_tuple).
2387 */
2388static Datum
2390{
2391 Datum key;
2392
2393 if (a->category != GIN_CAT_NORM_KEY)
2394 return (Datum) 0;
2395
2396 if (a->typbyval)
2397 {
2398 memcpy(&key, a->data, a->keylen);
2399 return key;
2400 }
2401
2402 return PointerGetDatum(a->data);
2403}
2404
2405/*
2406* _gin_parse_tuple_items
2407 * Return a pointer to a palloc'd array of decompressed TID array.
2408 */
2409static ItemPointer
2411{
2412 int len;
2413 char *ptr;
2414 int ndecoded;
2416
2417 len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2418 ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2419
2421
2422 Assert(ndecoded == a->nitems);
2423
2424 return items;
2425}
2426
2427/*
2428 * _gin_compare_tuples
2429 * Compare GIN tuples, used by tuplesort during parallel index build.
2430 *
2431 * The scalar fields (attrnum, category) are compared first, the key value is
2432 * compared last. The comparisons are done using type-specific sort support
2433 * functions.
2434 *
2435 * If the key value matches, we compare the first TID value in the TID list,
2436 * which means the tuples are merged in an order in which they are most
2437 * likely to be simply concatenated. (This "first" TID will also allow us
2438 * to determine a point up to which the list is fully determined and can be
2439 * written into the index to enforce a memory limit etc.)
2440 */
2441int
2443{
2444 int r;
2445 Datum keya,
2446 keyb;
2447
2448 if (a->attrnum < b->attrnum)
2449 return -1;
2450
2451 if (a->attrnum > b->attrnum)
2452 return 1;
2453
2454 if (a->category < b->category)
2455 return -1;
2456
2457 if (a->category > b->category)
2458 return 1;
2459
2460 if (a->category == GIN_CAT_NORM_KEY)
2461 {
2464
2465 r = ApplySortComparator(keya, false,
2466 keyb, false,
2467 &ssup[a->attrnum - 1]);
2468
2469 /* if the key is the same, consider the first TID in the array */
2470 return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
2472 }
2473
2476}
int ParallelWorkerNumber
Definition parallel.c:117
void InitializeParallelDSM(ParallelContext *pcxt)
Definition parallel.c:213
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition parallel.c:805
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition parallel.c:583
void DestroyParallelContext(ParallelContext *pcxt)
Definition parallel.c:959
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition parallel.c:175
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition parallel.c:702
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_report_activity(BackendState state, const char *cmd_str)
@ STATE_RUNNING
uint32 BlockNumber
Definition block.h:31
static Datum values[MAXATTR]
Definition bootstrap.c:188
int Buffer
Definition buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition bufmgr.c:974
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5522
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3063
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:332
@ EB_SKIP_EXTENSION_LOCK
Definition bufmgr.h:75
@ EB_LOCK_FIRST
Definition bufmgr.h:87
#define BMR_REL(p_rel)
Definition bufmgr.h:114
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:379
PageData * Page
Definition bufpage.h:81
#define Min(x, y)
Definition c.h:1093
#define BUFFERALIGN(LEN)
Definition c.h:900
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
int16_t int16
Definition c.h:613
#define SHORTALIGN(LEN)
Definition c.h:894
int32_t int32
Definition c.h:614
uint32_t uint32
Definition c.h:618
#define OidIsValid(objectId)
Definition c.h:860
size_t Size
Definition c.h:691
bool ConditionVariableCancelSleep(void)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void ConditionVariableSignal(ConditionVariable *cv)
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition datum.c:132
int errcode(int sqlerrcode)
Definition elog.c:874
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define MaxAllocSize
Definition fe_memutils.h:22
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
char * format_type_be(Oid type_oid)
IndexUniqueCheck
Definition genam.h:124
#define GIN_COMPARE_PROC
Definition gin.h:24
#define PROGRESS_GIN_PHASE_PERFORMSORT_2
Definition gin.h:49
#define PROGRESS_GIN_PHASE_MERGE_1
Definition gin.h:48
#define PROGRESS_GIN_PHASE_PERFORMSORT_1
Definition gin.h:47
#define PROGRESS_GIN_PHASE_MERGE_2
Definition gin.h:50
#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN
Definition gin.h:46
#define GIN_UNLOCK
Definition gin_private.h:51
#define GinGetUseFastUpdate(relation)
Definition gin_private.h:36
static ItemPointer GinTupleGetFirst(GinTuple *tup)
Definition gin_tuple.h:35
#define GinIsPostingTree(itup)
Definition ginblock.h:231
#define GIN_CAT_NORM_KEY
Definition ginblock.h:208
#define SizeOfGinPostingList(plist)
Definition ginblock.h:342
#define GIN_LEAF
Definition ginblock.h:42
#define GinGetPostingTree(itup)
Definition ginblock.h:233
signed char GinNullCategory
Definition ginblock.h:206
#define GinSetPostingTree(itup, blkno)
Definition ginblock.h:232
#define GinMaxItemSize
Definition ginblock.h:248
void freeGinBtreeStack(GinBtreeStack *stack)
Definition ginbtree.c:198
void ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, GinStatsData *buildStats)
Definition ginbtree.c:816
GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, bool rootConflictCheck)
Definition ginbtree.c:83
void ginBeginBAScan(BuildAccumulator *accum)
Definition ginbulk.c:256
ItemPointerData * ginGetBAEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *key, GinNullCategory *category, uint32 *n)
Definition ginbulk.c:267
void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum *entries, GinNullCategory *categories, int32 nentries)
Definition ginbulk.c:209
void ginInitBA(BuildAccumulator *accum)
Definition ginbulk.c:109
BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, GinStatsData *buildStats, Buffer entrybuffer)
void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats)
ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, int *nitems)
IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, Pointer data, Size dataSize, int nipd, bool errorTooBig)
void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, Datum key, GinNullCategory category, GinState *ginstate)
void ginHeapTupleFastCollect(GinState *ginstate, GinTupleCollector *collector, OffsetNumber attnum, Datum value, bool isNull, ItemPointer ht_ctid)
Definition ginfast.c:483
void ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
Definition ginfast.c:219
static void ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition gininsert.c:570
#define PARALLEL_KEY_BUFFER_USAGE
Definition gininsert.c:49
static void AssertCheckItemPointers(GinBuffer *buffer)
Definition gininsert.c:1214
int _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
Definition gininsert.c:2443
static IndexTuple addItemPointersToLeafTuple(GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
Definition gininsert.c:217
static bool GinBufferIsEmpty(GinBuffer *buffer)
Definition gininsert.c:1338
#define PARALLEL_KEY_GIN_SHARED
Definition gininsert.c:45
IndexBuildResult * ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition gininsert.c:618
static GinBuffer * GinBufferInit(Relation index)
Definition gininsert.c:1270
static IndexTuple buildFreshLeafTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
Definition gininsert.c:299
static void GinBufferReset(GinBuffer *buffer)
Definition gininsert.c:1560
static void ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer heapptr)
Definition gininsert.c:428
static void AssertCheckGinBuffer(GinBuffer *buffer)
Definition gininsert.c:1241
static void _gin_parallel_scan_and_build(GinBuildState *state, GinBuildShared *ginshared, Sharedsort *sharedsort, Relation heap, Relation index, int sortmem, bool progress)
Definition gininsert.c:2032
void ginEntryInsert(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats)
Definition gininsert.c:351
static void GinBufferTrim(GinBuffer *buffer)
Definition gininsert.c:1590
static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition gininsert.c:1810
static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
Definition gininsert.c:1116
static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, bool isconcurrent, int request)
Definition gininsert.c:937
static void ginFlushBuildState(GinBuildState *buildstate, Relation index)
Definition gininsert.c:505
static void _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, bool progress)
Definition gininsert.c:1854
static bool GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
Definition gininsert.c:1354
static double _gin_parallel_merge(GinBuildState *state)
Definition gininsert.c:1648
static void ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer item)
Definition gininsert.c:843
static bool GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
Definition gininsert.c:1626
void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition gininsert.c:2110
void ginbuildempty(Relation index)
Definition gininsert.c:812
bool gininsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition gininsert.c:865
static void _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
Definition gininsert.c:1821
static GinTuple * _gin_build_tuple(OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len)
Definition gininsert.c:2241
static Datum _gin_parse_tuple_key(GinTuple *a)
Definition gininsert.c:2390
#define PARALLEL_KEY_TUPLESORT
Definition gininsert.c:46
static bool GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
Definition gininsert.c:1420
#define PARALLEL_KEY_QUERY_TEXT
Definition gininsert.c:47
static void GinBufferFree(GinBuffer *buffer)
Definition gininsert.c:1606
static ItemPointer _gin_parse_tuple_items(GinTuple *a)
Definition gininsert.c:2411
#define ParallelTableScanFromGinBuildShared(shared)
Definition gininsert.c:110
#define PARALLEL_KEY_WAL_USAGE
Definition gininsert.c:48
static void GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
Definition gininsert.c:1493
static void ginBuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition gininsert.c:452
static double _gin_parallel_heapscan(GinBuildState *state)
Definition gininsert.c:1147
ItemPointer ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
GinPostingList * ginCompressPostingList(const ItemPointerData *ipd, int nipd, int maxsize, int *nwritten)
ItemPointer ginMergeItemPointers(ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb, int *nmerged)
OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
Definition ginutil.c:233
Buffer GinNewBuffer(Relation index)
Definition ginutil.c:307
void GinInitBuffer(Buffer b, uint32 f)
Definition ginutil.c:357
Datum * ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, int32 *nentries, GinNullCategory **categories)
Definition ginutil.c:452
Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category)
Definition ginutil.c:266
void GinInitMetabuffer(Buffer b)
Definition ginutil.c:363
void initGinState(GinState *state, Relation index)
Definition ginutil.c:103
void ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
Definition ginutil.c:619
int maintenance_work_mem
Definition globals.c:133
int work_mem
Definition globals.c:131
static void dlist_init(dlist_head *head)
Definition ilist.h:314
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition ilist.h:364
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define nitems(x)
Definition indent.h:31
IndexInfo * BuildIndexInfo(Relation index)
Definition index.c:2429
void index_close(Relation relation, LOCKMODE lockmode)
Definition indexam.c:177
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition indexam.c:883
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition indexam.c:133
static struct @174 value
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition instrument.c:219
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition instrument.c:209
void InstrStartParallelQuery(void)
Definition instrument.c:201
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
IndexTupleData * IndexTuple
Definition itup.h:53
int LOCKMODE
Definition lockdefs.h:26
#define AccessExclusiveLock
Definition lockdefs.h:43
#define ShareUpdateExclusiveLock
Definition lockdefs.h:39
#define ShareLock
Definition lockdefs.h:40
#define RowExclusiveLock
Definition lockdefs.h:38
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define END_CRIT_SECTION()
Definition miscadmin.h:152
static char * errmsg
uint16 OffsetNumber
Definition off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
int16 attnum
FormData_pg_attribute * Form_pg_attribute
const void size_t len
const void * data
static int progress
Definition pgbench.c:262
const char * debug_query_string
Definition postgres.c:91
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
#define InvalidOid
unsigned int Oid
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4345
static int fb(int x)
#define PROC_IN_SAFE_IC
Definition proc.h:60
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition progress.h:112
#define PROGRESS_SCAN_BLOCKS_DONE
Definition progress.h:148
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition progress.h:113
#define PROGRESS_CREATEIDX_SUBPHASE
Definition progress.h:111
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition progress.h:147
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition rel.h:533
@ MAIN_FORKNUM
Definition relpath.h:58
@ INIT_FORKNUM
Definition relpath.h:61
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition shmem.c:485
Size mul_size(Size s1, Size s2)
Definition shmem.c:500
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:824
#define SnapshotAny
Definition snapmgr.h:33
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:59
void PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup)
Definition sortsupport.c:68
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
PGPROC * MyProc
Definition proc.c:68
Oid fn_oid
Definition fmgr.h:59
bool(* findItem)(GinBtree, GinBtreeStack *)
OffsetNumber off
Size keylen
Definition gininsert.c:1193
GinNullCategory category
Definition gininsert.c:1191
OffsetNumber attnum
Definition gininsert.c:1190
Datum key
Definition gininsert.c:1192
SortSupport ssup
Definition gininsert.c:1205
ItemPointerData * items
Definition gininsert.c:1206
int16 typlen
Definition gininsert.c:1196
bool typbyval
Definition gininsert.c:1197
double reltuples
Definition gininsert.c:94
int scantuplesortstates
Definition gininsert.c:65
double indtuples
Definition gininsert.c:95
bool isconcurrent
Definition gininsert.c:64
slock_t mutex
Definition gininsert.c:81
ConditionVariable workersdonecv
Definition gininsert.c:73
int nparticipantsdone
Definition gininsert.c:93
int nparticipanttuplesorts
Definition gininsert.c:127
TupleDesc origTupdesc
Definition gin_private.h:75
Relation index
Definition gin_private.h:61
int64 nEntries
Definition gin.h:61
char data[FLEXIBLE_ARRAY_MEMBER]
Definition gin_tuple.h:31
int nitems
Definition gin_tuple.h:30
int16 typlen
Definition gin_tuple.h:27
bool typbyval
Definition gin_tuple.h:28
signed char category
Definition gin_tuple.h:29
int tuplen
Definition gin_tuple.h:24
uint16 keylen
Definition gin_tuple.h:26
OffsetNumber attrnum
Definition gin_tuple.h:25
double heap_tuples
Definition genam.h:40
double index_tuples
Definition genam.h:41
void * ii_AmCache
Definition execnodes.h:234
int ii_ParallelWorkers
Definition execnodes.h:229
bool ii_Concurrent
Definition execnodes.h:221
MemoryContext ii_Context
Definition execnodes.h:237
uint8 statusFlags
Definition proc.h:207
dsm_segment * seg
Definition parallel.h:44
shm_toc_estimator estimator
Definition parallel.h:43
shm_toc * toc
Definition parallel.h:46
int nworkers_launched
Definition parallel.h:39
FmgrInfo cmp_proc_finfo
Definition typcache.h:77
dlist_node * cur
Definition ilist.h:200
Definition type.h:96
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition tableam.c:166
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition tableam.c:131
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition tableam.c:146
static double table_index_build_scan(Relation table_rel, Relation index_rel, IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition tableam.h:1765
static ItemArray items
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:178
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:193
void tuplesort_performsort(Tuplesortstate *state)
Definition tuplesort.c:1259
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition tuplesort.c:3210
Size tuplesort_estimate_shared(int nWorkers)
Definition tuplesort.c:3189
void tuplesort_end(Tuplesortstate *state)
Definition tuplesort.c:847
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition tuplesort.c:3233
#define TUPLESORT_NONE
Definition tuplesort.h:67
Tuplesortstate * tuplesort_begin_index_gin(Relation heapRel, Relation indexRel, int workMem, SortCoordinate coordinate, int sortopt)
GinTuple * tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
void tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition typcache.c:389
#define TYPECACHE_CMP_PROC_FINFO
Definition typcache.h:144
static Size VARSIZE_ANY(const void *PTR)
Definition varatt.h:460
void ExitParallelMode(void)
Definition xact.c:1066
void EnterParallelMode(void)
Definition xact.c:1053
void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std)
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

Typedef Documentation

◆ GinBuffer

◆ GinBuildShared

◆ GinLeader

Function Documentation

◆ _gin_begin_parallel()

static void _gin_begin_parallel ( GinBuildState buildstate,
Relation  heap,
Relation  index,
bool  isconcurrent,
int  request 
)
static

Definition at line 937 of file gininsert.c.

939{
940 ParallelContext *pcxt;
941 int scantuplesortstates;
942 Snapshot snapshot;
945 GinBuildShared *ginshared;
946 Sharedsort *sharedsort;
948 WalUsage *walusage;
949 BufferUsage *bufferusage;
950 bool leaderparticipates = true;
951 int querylen;
952
953#ifdef DISABLE_LEADER_PARTICIPATION
954 leaderparticipates = false;
955#endif
956
957 /*
958 * Enter parallel mode, and create context for parallel build of gin index
959 */
961 Assert(request > 0);
962 pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
963 request);
964
965 scantuplesortstates = leaderparticipates ? request + 1 : request;
966
967 /*
968 * Prepare for scan of the base relation. In a normal index build, we use
969 * SnapshotAny because we must retrieve all tuples and do our own time
970 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
971 * concurrent build, we take a regular MVCC snapshot and index whatever's
972 * live according to that.
973 */
974 if (!isconcurrent)
975 snapshot = SnapshotAny;
976 else
978
979 /*
980 * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
981 */
984 estsort = tuplesort_estimate_shared(scantuplesortstates);
986
988
989 /*
990 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
991 * and PARALLEL_KEY_BUFFER_USAGE.
992 *
993 * If there are no extensions loaded that care, we could skip this. We
994 * have no way of knowing whether anyone's looking at pgWalUsage or
995 * pgBufferUsage, so do it unconditionally.
996 */
998 mul_size(sizeof(WalUsage), pcxt->nworkers));
1001 mul_size(sizeof(BufferUsage), pcxt->nworkers));
1003
1004 /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
1006 {
1010 }
1011 else
1012 querylen = 0; /* keep compiler quiet */
1013
1014 /* Everyone's had a chance to ask for space, so now create the DSM */
1016
1017 /* If no DSM segment was available, back out (do serial build) */
1018 if (pcxt->seg == NULL)
1019 {
1020 if (IsMVCCSnapshot(snapshot))
1021 UnregisterSnapshot(snapshot);
1024 return;
1025 }
1026
1027 /* Store shared build state, for which we reserved space */
1028 ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
1029 /* Initialize immutable state */
1030 ginshared->heaprelid = RelationGetRelid(heap);
1031 ginshared->indexrelid = RelationGetRelid(index);
1032 ginshared->isconcurrent = isconcurrent;
1033 ginshared->scantuplesortstates = scantuplesortstates;
1034
1036 SpinLockInit(&ginshared->mutex);
1037
1038 /* Initialize mutable state */
1039 ginshared->nparticipantsdone = 0;
1040 ginshared->reltuples = 0.0;
1041 ginshared->indtuples = 0.0;
1042
1045 snapshot);
1046
1047 /*
1048 * Store shared tuplesort-private state, for which we reserved space.
1049 * Then, initialize opaque state using tuplesort routine.
1050 */
1051 sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1052 tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1053 pcxt->seg);
1054
1055 shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
1056 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1057
1058 /* Store query string for workers */
1060 {
1061 char *sharedquery;
1062
1063 sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1066 }
1067
1068 /*
1069 * Allocate space for each worker's WalUsage and BufferUsage; no need to
1070 * initialize.
1071 */
1072 walusage = shm_toc_allocate(pcxt->toc,
1073 mul_size(sizeof(WalUsage), pcxt->nworkers));
1074 shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
1075 bufferusage = shm_toc_allocate(pcxt->toc,
1076 mul_size(sizeof(BufferUsage), pcxt->nworkers));
1077 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
1078
1079 /* Launch workers, saving status for leader/caller */
1081 ginleader->pcxt = pcxt;
1082 ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1084 ginleader->nparticipanttuplesorts++;
1085 ginleader->ginshared = ginshared;
1086 ginleader->sharedsort = sharedsort;
1087 ginleader->snapshot = snapshot;
1088 ginleader->walusage = walusage;
1089 ginleader->bufferusage = bufferusage;
1090
1091 /* If no workers were successfully launched, back out (do serial build) */
1092 if (pcxt->nworkers_launched == 0)
1093 {
1095 return;
1096 }
1097
1098 /* Save leader state now that it's clear build will be parallel */
1099 buildstate->bs_leader = ginleader;
1100
1101 /* Join heap scan ourselves */
1104
1105 /*
1106 * Caller needs to wait for all launched workers when we return. Make
1107 * sure that the failure-to-start case will not hang forever.
1108 */
1110}

References _gin_end_parallel(), _gin_leader_participate_as_worker(), _gin_parallel_estimate_shared(), Assert, ConditionVariableInit(), CreateParallelContext(), debug_query_string, DestroyParallelContext(), EnterParallelMode(), ParallelContext::estimator, ExitParallelMode(), fb(), GetTransactionSnapshot(), GinBuildShared::heaprelid, GinBuildShared::indexrelid, GinBuildShared::indtuples, InitializeParallelDSM(), GinBuildShared::isconcurrent, IsMVCCSnapshot, LaunchParallelWorkers(), mul_size(), GinBuildShared::mutex, GinBuildShared::nparticipantsdone, ParallelContext::nworkers, ParallelContext::nworkers_launched, palloc0_object, PARALLEL_KEY_BUFFER_USAGE, PARALLEL_KEY_GIN_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_WAL_USAGE, ParallelTableScanFromGinBuildShared, RegisterSnapshot(), RelationGetRelid, GinBuildShared::reltuples, GinBuildShared::scantuplesortstates, ParallelContext::seg, shm_toc_allocate(), shm_toc_estimate_chunk, shm_toc_estimate_keys, shm_toc_insert(), SnapshotAny, SpinLockInit(), table_parallelscan_initialize(), ParallelContext::toc, tuplesort_estimate_shared(), tuplesort_initialize_shared(), UnregisterSnapshot(), WaitForParallelWorkersToAttach(), and GinBuildShared::workersdonecv.

Referenced by ginbuild().

◆ _gin_build_tuple()

static GinTuple * _gin_build_tuple ( OffsetNumber  attrnum,
unsigned char  category,
Datum  key,
int16  typlen,
bool  typbyval,
ItemPointerData items,
uint32  nitems,
Size len 
)
static

Definition at line 2241 of file gininsert.c.

2245{
2246 GinTuple *tuple;
2247 char *ptr;
2248
2249 Size tuplen;
2250 int keylen;
2251
2252 dlist_mutable_iter iter;
2253 dlist_head segments;
2254 int ncompressed;
2256
2257 /*
2258 * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
2259 * have actual non-empty key. We include varlena headers and \0 bytes for
2260 * strings, to make it easier to access the data in-line.
2261 *
2262 * For byval types we simply copy the whole Datum. We could store just the
2263 * necessary bytes, but this is simpler to work with and not worth the
2264 * extra complexity. Moreover we still need to do the MAXALIGN to allow
2265 * direct access to items pointers.
2266 *
2267 * XXX Note that for byval types we store the whole datum, no matter what
2268 * the typlen value is.
2269 */
2270 if (category != GIN_CAT_NORM_KEY)
2271 keylen = 0;
2272 else if (typbyval)
2273 keylen = sizeof(Datum);
2274 else if (typlen > 0)
2275 keylen = typlen;
2276 else if (typlen == -1)
2277 keylen = VARSIZE_ANY(DatumGetPointer(key));
2278 else if (typlen == -2)
2279 keylen = strlen(DatumGetPointer(key)) + 1;
2280 else
2281 elog(ERROR, "unexpected typlen value (%d)", typlen);
2282
2283 /* compress the item pointers */
2284 ncompressed = 0;
2285 compresslen = 0;
2286 dlist_init(&segments);
2287
2288 /* generate compressed segments of TID list chunks */
2289 while (ncompressed < nitems)
2290 {
2291 int cnt;
2293
2295 (nitems - ncompressed),
2296 UINT16_MAX,
2297 &cnt);
2298
2299 ncompressed += cnt;
2301
2302 dlist_push_tail(&segments, &seginfo->node);
2303 }
2304
2305 /*
2306 * Determine GIN tuple length with all the data included. Be careful about
2307 * alignment, to allow direct access to compressed segments (those require
2308 * only SHORTALIGN).
2309 */
2310 tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
2311
2312 *len = tuplen;
2313
2314 /*
2315 * Allocate space for the whole GIN tuple.
2316 *
2317 * The palloc0 is needed - writetup_index_gin will write the whole tuple
2318 * to disk, so we need to make sure the padding bytes are defined
2319 * (otherwise valgrind would report this).
2320 */
2321 tuple = palloc0(tuplen);
2322
2323 tuple->tuplen = tuplen;
2324 tuple->attrnum = attrnum;
2325 tuple->category = category;
2326 tuple->keylen = keylen;
2327 tuple->nitems = nitems;
2328
2329 /* key type info */
2330 tuple->typlen = typlen;
2331 tuple->typbyval = typbyval;
2332
2333 /*
2334 * Copy the key and items into the tuple. First the key value, which we
2335 * can simply copy right at the beginning of the data array.
2336 */
2337 if (category == GIN_CAT_NORM_KEY)
2338 {
2339 if (typbyval)
2340 {
2341 memcpy(tuple->data, &key, sizeof(Datum));
2342 }
2343 else if (typlen > 0) /* byref, fixed length */
2344 {
2345 memcpy(tuple->data, DatumGetPointer(key), typlen);
2346 }
2347 else if (typlen == -1)
2348 {
2349 memcpy(tuple->data, DatumGetPointer(key), keylen);
2350 }
2351 else if (typlen == -2)
2352 {
2353 memcpy(tuple->data, DatumGetPointer(key), keylen);
2354 }
2355 }
2356
2357 /* finally, copy the TIDs into the array */
2358 ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
2359
2360 /* copy in the compressed data, and free the segments */
2361 dlist_foreach_modify(iter, &segments)
2362 {
2364
2365 memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
2366
2367 ptr += SizeOfGinPostingList(seginfo->seg);
2368
2369 dlist_delete(&seginfo->node);
2370
2371 pfree(seginfo->seg);
2372 pfree(seginfo);
2373 }
2374
2375 return tuple;
2376}

References GinTuple::attrnum, GinTuple::category, dlist_mutable_iter::cur, GinTuple::data, data, DatumGetPointer(), dlist_container, dlist_delete(), dlist_foreach_modify, dlist_init(), dlist_push_tail(), elog, ERROR, fb(), GIN_CAT_NORM_KEY, ginCompressPostingList(), items, GinTuple::keylen, len, GinTuple::nitems, nitems, palloc0(), palloc_object, pfree(), SHORTALIGN, SizeOfGinPostingList, GinTuple::tuplen, GinTuple::typbyval, GinTuple::typlen, and VARSIZE_ANY().

Referenced by _gin_process_worker_data(), and ginFlushBuildState().

◆ _gin_compare_tuples()

int _gin_compare_tuples ( GinTuple a,
GinTuple b,
SortSupport  ssup 
)

Definition at line 2443 of file gininsert.c.

2444{
2445 int r;
2446 Datum keya,
2447 keyb;
2448
2449 if (a->attrnum < b->attrnum)
2450 return -1;
2451
2452 if (a->attrnum > b->attrnum)
2453 return 1;
2454
2455 if (a->category < b->category)
2456 return -1;
2457
2458 if (a->category > b->category)
2459 return 1;
2460
2461 if (a->category == GIN_CAT_NORM_KEY)
2462 {
2465
2466 r = ApplySortComparator(keya, false,
2467 keyb, false,
2468 &ssup[a->attrnum - 1]);
2469
2470 /* if the key is the same, consider the first TID in the array */
2471 return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
2473 }
2474
2477}

References _gin_parse_tuple_key(), a, ApplySortComparator(), b, fb(), GIN_CAT_NORM_KEY, GinTupleGetFirst(), and ItemPointerCompare().

Referenced by comparetup_index_gin().

◆ _gin_end_parallel()

static void _gin_end_parallel ( GinLeader ginleader,
GinBuildState state 
)
static

Definition at line 1116 of file gininsert.c.

1117{
1118 int i;
1119
1120 /* Shutdown worker processes */
1122
1123 /*
1124 * Next, accumulate WAL usage. (This must wait for the workers to finish,
1125 * or we might get incomplete data.)
1126 */
1127 for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
1128 InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
1129
1130 /* Free last reference to MVCC snapshot, if one was used */
1131 if (IsMVCCSnapshot(ginleader->snapshot))
1132 UnregisterSnapshot(ginleader->snapshot);
1135}

References DestroyParallelContext(), ExitParallelMode(), fb(), i, InstrAccumParallelQuery(), IsMVCCSnapshot, UnregisterSnapshot(), and WaitForParallelWorkersToFinish().

Referenced by _gin_begin_parallel(), and ginbuild().

◆ _gin_leader_participate_as_worker()

static void _gin_leader_participate_as_worker ( GinBuildState buildstate,
Relation  heap,
Relation  index 
)
static

Definition at line 1821 of file gininsert.c.

1822{
1823 GinLeader *ginleader = buildstate->bs_leader;
1824 int sortmem;
1825
1826 /*
1827 * Might as well use reliable figure when doling out maintenance_work_mem
1828 * (when requested number of workers were not launched, this will be
1829 * somewhat higher than it is for other workers).
1830 */
1832
1833 /* Perform work common to all participants */
1835 ginleader->sharedsort, heap, index,
1836 sortmem, true);
1837}

References _gin_parallel_scan_and_build(), fb(), maintenance_work_mem, and GinLeader::nparticipanttuplesorts.

Referenced by _gin_begin_parallel().

◆ _gin_parallel_build_main()

void _gin_parallel_build_main ( dsm_segment seg,
shm_toc toc 
)

Definition at line 2110 of file gininsert.c.

2111{
2112 char *sharedquery;
2113 GinBuildShared *ginshared;
2114 Sharedsort *sharedsort;
2116 Relation heapRel;
2117 Relation indexRel;
2120 WalUsage *walusage;
2121 BufferUsage *bufferusage;
2122 int sortmem;
2123
2124 /*
2125 * The only possible status flag that can be set to the parallel worker is
2126 * PROC_IN_SAFE_IC.
2127 */
2128 Assert((MyProc->statusFlags == 0) ||
2130
2131 /* Set debug_query_string for individual workers first */
2134
2135 /* Report the query string from leader */
2137
2138 /* Look up gin shared state */
2139 ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
2140
2141 /* Open relations using lock modes known to be obtained by index.c */
2142 if (!ginshared->isconcurrent)
2143 {
2146 }
2147 else
2148 {
2151 }
2152
2153 /* Open relations within worker */
2154 heapRel = table_open(ginshared->heaprelid, heapLockmode);
2155 indexRel = index_open(ginshared->indexrelid, indexLockmode);
2156
2157 /* initialize the GIN build state */
2158 initGinState(&buildstate.ginstate, indexRel);
2159 buildstate.indtuples = 0;
2160 memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
2161 memset(&buildstate.tid, 0, sizeof(ItemPointerData));
2162
2163 /*
2164 * create a temporary memory context that is used to hold data not yet
2165 * dumped out to the index
2166 */
2168 "Gin build temporary context",
2170
2171 /*
2172 * create a temporary memory context that is used for calling
2173 * ginExtractEntries(), and can be reset after each tuple
2174 */
2176 "Gin build temporary context for user-defined function",
2178
2179 buildstate.accum.ginstate = &buildstate.ginstate;
2180 ginInitBA(&buildstate.accum);
2181
2182
2183 /* Look up shared state private to tuplesort.c */
2184 sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
2185 tuplesort_attach_shared(sharedsort, seg);
2186
2187 /* Prepare to track buffer usage during parallel execution */
2189
2190 /*
2191 * Might as well use reliable figure when doling out maintenance_work_mem
2192 * (when requested number of workers were not launched, this will be
2193 * somewhat higher than it is for other workers).
2194 */
2196
2197 _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
2198 heapRel, indexRel, sortmem, false);
2199
2200 /* Report WAL/buffer usage during parallel execution */
2201 bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
2202 walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
2204 &walusage[ParallelWorkerNumber]);
2205
2206 index_close(indexRel, indexLockmode);
2207 table_close(heapRel, heapLockmode);
2208}

References _gin_parallel_scan_and_build(), AccessExclusiveLock, ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, CurrentMemoryContext, debug_query_string, fb(), ginInitBA(), GinBuildShared::heaprelid, index_close(), index_open(), GinBuildShared::indexrelid, initGinState(), InstrEndParallelQuery(), InstrStartParallelQuery(), GinBuildShared::isconcurrent, maintenance_work_mem, MyProc, PARALLEL_KEY_BUFFER_USAGE, PARALLEL_KEY_GIN_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_WAL_USAGE, ParallelWorkerNumber, pgstat_report_activity(), PROC_IN_SAFE_IC, RowExclusiveLock, GinBuildShared::scantuplesortstates, ShareLock, ShareUpdateExclusiveLock, shm_toc_lookup(), STATE_RUNNING, PGPROC::statusFlags, table_close(), table_open(), and tuplesort_attach_shared().

◆ _gin_parallel_estimate_shared()

static Size _gin_parallel_estimate_shared ( Relation  heap,
Snapshot  snapshot 
)
static

Definition at line 1810 of file gininsert.c.

1811{
1812 /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1813 return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
1814 table_parallelscan_estimate(heap, snapshot));
1815}

References add_size(), BUFFERALIGN, and table_parallelscan_estimate().

Referenced by _gin_begin_parallel().

◆ _gin_parallel_heapscan()

static double _gin_parallel_heapscan ( GinBuildState state)
static

Definition at line 1147 of file gininsert.c.

1148{
1149 GinBuildShared *ginshared = state->bs_leader->ginshared;
1150 int nparticipanttuplesorts;
1151
1152 nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
1153 for (;;)
1154 {
1155 SpinLockAcquire(&ginshared->mutex);
1156 if (ginshared->nparticipantsdone == nparticipanttuplesorts)
1157 {
1158 /* copy the data into leader state */
1159 state->bs_reltuples = ginshared->reltuples;
1160 state->bs_numtuples = ginshared->indtuples;
1161
1162 SpinLockRelease(&ginshared->mutex);
1163 break;
1164 }
1165 SpinLockRelease(&ginshared->mutex);
1166
1169 }
1170
1172
1173 return state->bs_reltuples;
1174}

References ConditionVariableCancelSleep(), ConditionVariableSleep(), fb(), GinBuildShared::indtuples, GinBuildShared::mutex, GinBuildShared::nparticipantsdone, GinBuildShared::reltuples, SpinLockAcquire(), SpinLockRelease(), and GinBuildShared::workersdonecv.

Referenced by _gin_parallel_merge().

◆ _gin_parallel_merge()

static double _gin_parallel_merge ( GinBuildState state)
static

Definition at line 1648 of file gininsert.c.

1649{
1650 GinTuple *tup;
1651 Size tuplen;
1652 double reltuples = 0;
1653 GinBuffer *buffer;
1654
1655 /* GIN tuples from workers, merged by leader */
1656 double numtuples = 0;
1657
1658 /* wait for workers to scan table and produce partial results */
1659 reltuples = _gin_parallel_heapscan(state);
1660
1661 /* Execute the sort */
1664
1665 /* do the actual sort in the leader */
1666 tuplesort_performsort(state->bs_sortstate);
1667
1668 /*
1669 * Initialize buffer to combine entries for the same key.
1670 *
1671 * The leader is allowed to use the whole maintenance_work_mem buffer to
1672 * combine data. The parallel workers already completed.
1673 */
1674 buffer = GinBufferInit(state->ginstate.index);
1675
1676 /*
1677 * Set the progress target for the next phase. Reset the block number
1678 * values set by table_index_build_scan
1679 */
1680 {
1681 const int progress_index[] = {
1686 };
1687 const int64 progress_vals[] = {
1689 state->bs_numtuples,
1690 0, 0
1691 };
1692
1694 }
1695
1696 /*
1697 * Read the GIN tuples from the shared tuplesort, sorted by category and
1698 * key. That probably gives us order matching how data is organized in the
1699 * index.
1700 *
1701 * We don't insert the GIN tuples right away, but instead accumulate as
1702 * many TIDs for the same key as possible, and then insert that at once.
1703 * This way we don't need to decompress/recompress the posting lists, etc.
1704 */
1705 while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
1706 {
1708
1710
1711 /*
1712 * If the buffer can accept the new GIN tuple, just store it there and
1713 * we're done. If it's a different key (or maybe too much data) flush
1714 * the current contents into the index first.
1715 */
1716 if (!GinBufferCanAddKey(buffer, tup))
1717 {
1718 /*
1719 * Buffer is not empty and it's storing a different key - flush
1720 * the data into the insert, and start a new entry for current
1721 * GinTuple.
1722 */
1724
1726
1727 ginEntryInsert(&state->ginstate,
1728 buffer->attnum, buffer->key, buffer->category,
1729 buffer->items, buffer->nitems, &state->buildStats);
1730
1732 MemoryContextReset(state->tmpCtx);
1733
1734 /* discard the existing data */
1735 GinBufferReset(buffer);
1736 }
1737
1738 /*
1739 * We're about to add a GIN tuple to the buffer - check the memory
1740 * limit first, and maybe write out some of the data into the index
1741 * first, if needed (and possible). We only flush the part of the TID
1742 * list that we know won't change, and only if there's enough data for
1743 * compression to work well.
1744 */
1745 if (GinBufferShouldTrim(buffer, tup))
1746 {
1747 Assert(buffer->nfrozen > 0);
1748
1749 /*
1750 * Buffer is not empty and it's storing a different key - flush
1751 * the data into the insert, and start a new entry for current
1752 * GinTuple.
1753 */
1755
1757
1758 ginEntryInsert(&state->ginstate,
1759 buffer->attnum, buffer->key, buffer->category,
1760 buffer->items, buffer->nfrozen, &state->buildStats);
1761
1763 MemoryContextReset(state->tmpCtx);
1764
1765 /* truncate the data we've just discarded */
1766 GinBufferTrim(buffer);
1767 }
1768
1769 /*
1770 * Remember data for the current tuple (either remember the new key,
1771 * or append if to the existing data).
1772 */
1773 GinBufferStoreTuple(buffer, tup);
1774
1775 /* Report progress */
1777 ++numtuples);
1778 }
1779
1780 /* flush data remaining in the buffer (for the last key) */
1781 if (!GinBufferIsEmpty(buffer))
1782 {
1784
1785 ginEntryInsert(&state->ginstate,
1786 buffer->attnum, buffer->key, buffer->category,
1787 buffer->items, buffer->nitems, &state->buildStats);
1788
1789 /* discard the existing data */
1790 GinBufferReset(buffer);
1791
1792 /* Report progress */
1794 ++numtuples);
1795 }
1796
1797 /* release all the memory */
1798 GinBufferFree(buffer);
1799
1800 tuplesort_end(state->bs_sortstate);
1801
1802 return reltuples;
1803}

References _gin_parallel_heapscan(), Assert, AssertCheckItemPointers(), GinBuffer::attnum, GinBuffer::category, CHECK_FOR_INTERRUPTS, fb(), GinBufferCanAddKey(), GinBufferFree(), GinBufferInit(), GinBufferIsEmpty(), GinBufferReset(), GinBufferShouldTrim(), GinBufferStoreTuple(), GinBufferTrim(), ginEntryInsert(), GinBuffer::items, GinBuffer::key, MemoryContextReset(), MemoryContextSwitchTo(), GinBuffer::nfrozen, GinBuffer::nitems, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_CREATEIDX_TUPLES_DONE, PROGRESS_CREATEIDX_TUPLES_TOTAL, PROGRESS_GIN_PHASE_MERGE_2, PROGRESS_GIN_PHASE_PERFORMSORT_2, PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, tuplesort_end(), tuplesort_getgintuple(), and tuplesort_performsort().

Referenced by ginbuild().

◆ _gin_parallel_scan_and_build()

static void _gin_parallel_scan_and_build ( GinBuildState state,
GinBuildShared ginshared,
Sharedsort sharedsort,
Relation  heap,
Relation  index,
int  sortmem,
bool  progress 
)
static

Definition at line 2032 of file gininsert.c.

2036{
2038 TableScanDesc scan;
2039 double reltuples;
2040 IndexInfo *indexInfo;
2041
2042 /* Initialize local tuplesort coordination state */
2044 coordinate->isWorker = true;
2045 coordinate->nParticipants = -1;
2046 coordinate->sharedsort = sharedsort;
2047
2048 /* remember how much space is allowed for the accumulated entries */
2049 state->work_mem = (sortmem / 2);
2050
2051 /* remember how many workers participate in the build */
2052 state->bs_num_workers = ginshared->scantuplesortstates;
2053
2054 /* Begin "partial" tuplesort */
2055 state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
2056 state->work_mem,
2057 coordinate,
2059
2060 /* Local per-worker sort of raw-data */
2061 state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
2062 state->work_mem,
2063 NULL,
2065
2066 /* Join parallel scan */
2067 indexInfo = BuildIndexInfo(index);
2068 indexInfo->ii_Concurrent = ginshared->isconcurrent;
2069
2070 scan = table_beginscan_parallel(heap,
2072
2073 reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
2075
2076 /* write remaining accumulated entries */
2078
2079 /*
2080 * Do the first phase of in-worker processing - sort the data produced by
2081 * the callback, and combine them into much larger chunks and place that
2082 * into the shared tuplestore for leader to process.
2083 */
2084 _gin_process_worker_data(state, state->bs_worker_sort, progress);
2085
2086 /* sort the GIN tuples built by this worker */
2087 tuplesort_performsort(state->bs_sortstate);
2088
2089 state->bs_reltuples += reltuples;
2090
2091 /*
2092 * Done. Record ambuild statistics.
2093 */
2094 SpinLockAcquire(&ginshared->mutex);
2095 ginshared->nparticipantsdone++;
2096 ginshared->reltuples += state->bs_reltuples;
2097 ginshared->indtuples += state->bs_numtuples;
2098 SpinLockRelease(&ginshared->mutex);
2099
2100 /* Notify leader */
2102
2103 tuplesort_end(state->bs_sortstate);
2104}

References _gin_process_worker_data(), BuildIndexInfo(), ConditionVariableSignal(), fb(), ginBuildCallbackParallel(), ginFlushBuildState(), IndexInfo::ii_Concurrent, GinBuildShared::indtuples, GinBuildShared::isconcurrent, GinBuildShared::mutex, GinBuildShared::nparticipantsdone, palloc0_object, ParallelTableScanFromGinBuildShared, progress, GinBuildShared::reltuples, GinBuildShared::scantuplesortstates, SpinLockAcquire(), SpinLockRelease(), table_beginscan_parallel(), table_index_build_scan(), tuplesort_begin_index_gin(), tuplesort_end(), TUPLESORT_NONE, tuplesort_performsort(), and GinBuildShared::workersdonecv.

Referenced by _gin_leader_participate_as_worker(), and _gin_parallel_build_main().

◆ _gin_parse_tuple_items()

static ItemPointer _gin_parse_tuple_items ( GinTuple a)
static

Definition at line 2411 of file gininsert.c.

2412{
2413 int len;
2414 char *ptr;
2415 int ndecoded;
2417
2418 len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2419 ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2420
2422
2423 Assert(ndecoded == a->nitems);
2424
2425 return items;
2426}

References a, Assert, data, fb(), ginPostingListDecodeAllSegments(), items, len, and SHORTALIGN.

Referenced by GinBufferStoreTuple().

◆ _gin_parse_tuple_key()

static Datum _gin_parse_tuple_key ( GinTuple a)
static

Definition at line 2390 of file gininsert.c.

2391{
2392 Datum key;
2393
2394 if (a->category != GIN_CAT_NORM_KEY)
2395 return (Datum) 0;
2396
2397 if (a->typbyval)
2398 {
2399 memcpy(&key, a->data, a->keylen);
2400 return key;
2401 }
2402
2403 return PointerGetDatum(a->data);
2404}

References a, fb(), GIN_CAT_NORM_KEY, and PointerGetDatum().

Referenced by _gin_compare_tuples(), and GinBufferStoreTuple().

◆ _gin_process_worker_data()

static void _gin_process_worker_data ( GinBuildState state,
Tuplesortstate worker_sort,
bool  progress 
)
static

Definition at line 1854 of file gininsert.c.

1856{
1857 GinTuple *tup;
1858 Size tuplen;
1859
1860 GinBuffer *buffer;
1861
1862 /*
1863 * Initialize buffer to combine entries for the same key.
1864 *
1865 * The workers are limited to the same amount of memory as during the sort
1866 * in ginBuildCallbackParallel. But this probably should be the 32MB used
1867 * during planning, just like there.
1868 */
1869 buffer = GinBufferInit(state->ginstate.index);
1870
1871 /* sort the raw per-worker data */
1872 if (progress)
1875
1876 tuplesort_performsort(state->bs_worker_sort);
1877
1878 /* reset the number of GIN tuples produced by this worker */
1879 state->bs_numtuples = 0;
1880
1881 if (progress)
1884
1885 /*
1886 * Read the GIN tuples from the shared tuplesort, sorted by the key, and
1887 * merge them into larger chunks for the leader to combine.
1888 */
1889 while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
1890 {
1891
1893
1894 /*
1895 * If the buffer can accept the new GIN tuple, just store it there and
1896 * we're done. If it's a different key (or maybe too much data) flush
1897 * the current contents into the index first.
1898 */
1899 if (!GinBufferCanAddKey(buffer, tup))
1900 {
1901 GinTuple *ntup;
1902 Size ntuplen;
1903
1904 /*
1905 * Buffer is not empty and it's storing a different key - flush
1906 * the data into the insert, and start a new entry for current
1907 * GinTuple.
1908 */
1910
1911 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1912 buffer->key, buffer->typlen, buffer->typbyval,
1913 buffer->items, buffer->nitems, &ntuplen);
1914
1915 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1916 state->bs_numtuples++;
1917
1918 pfree(ntup);
1919
1920 /* discard the existing data */
1921 GinBufferReset(buffer);
1922 }
1923
1924 /*
1925 * We're about to add a GIN tuple to the buffer - check the memory
1926 * limit first, and maybe write out some of the data into the index
1927 * first, if needed (and possible). We only flush the part of the TID
1928 * list that we know won't change, and only if there's enough data for
1929 * compression to work well.
1930 */
1931 if (GinBufferShouldTrim(buffer, tup))
1932 {
1933 GinTuple *ntup;
1934 Size ntuplen;
1935
1936 Assert(buffer->nfrozen > 0);
1937
1938 /*
1939 * Buffer is not empty and it's storing a different key - flush
1940 * the data into the insert, and start a new entry for current
1941 * GinTuple.
1942 */
1944
1945 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1946 buffer->key, buffer->typlen, buffer->typbyval,
1947 buffer->items, buffer->nfrozen, &ntuplen);
1948
1949 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1950
1951 pfree(ntup);
1952
1953 /* truncate the data we've just discarded */
1954 GinBufferTrim(buffer);
1955 }
1956
1957 /*
1958 * Remember data for the current tuple (either remember the new key,
1959 * or append if to the existing data).
1960 */
1961 GinBufferStoreTuple(buffer, tup);
1962 }
1963
1964 /* flush data remaining in the buffer (for the last key) */
1965 if (!GinBufferIsEmpty(buffer))
1966 {
1967 GinTuple *ntup;
1968 Size ntuplen;
1969
1971
1972 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1973 buffer->key, buffer->typlen, buffer->typbyval,
1974 buffer->items, buffer->nitems, &ntuplen);
1975
1976 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1977 state->bs_numtuples++;
1978
1979 pfree(ntup);
1980
1981 /* discard the existing data */
1982 GinBufferReset(buffer);
1983 }
1984
1985 /* release all the memory */
1986 GinBufferFree(buffer);
1987
1989}

References _gin_build_tuple(), Assert, AssertCheckItemPointers(), GinBuffer::attnum, GinBuffer::category, CHECK_FOR_INTERRUPTS, fb(), GinBufferCanAddKey(), GinBufferFree(), GinBufferInit(), GinBufferIsEmpty(), GinBufferReset(), GinBufferShouldTrim(), GinBufferStoreTuple(), GinBufferTrim(), GinBuffer::items, GinBuffer::key, GinBuffer::nfrozen, GinBuffer::nitems, pfree(), pgstat_progress_update_param(), progress, PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_GIN_PHASE_MERGE_1, PROGRESS_GIN_PHASE_PERFORMSORT_1, tuplesort_end(), tuplesort_getgintuple(), tuplesort_performsort(), tuplesort_putgintuple(), GinBuffer::typbyval, and GinBuffer::typlen.

Referenced by _gin_parallel_scan_and_build().

◆ addItemPointersToLeafTuple()

static IndexTuple addItemPointersToLeafTuple ( GinState ginstate,
IndexTuple  old,
ItemPointerData items,
uint32  nitem,
GinStatsData buildStats,
Buffer  buffer 
)
static

Definition at line 217 of file gininsert.c.

221{
223 Datum key;
224 GinNullCategory category;
225 IndexTuple res;
227 *oldItems;
228 int oldNPosting,
230 nwritten;
232
234
235 attnum = gintuple_get_attrnum(ginstate, old);
236 key = gintuple_get_key(ginstate, old, &category);
237
238 /* merge the old and new posting lists */
240
243 &newNPosting);
244
245 /* Compress the posting list, and try to a build tuple with room for it */
246 res = NULL;
248 if (nwritten == newNPosting)
249 {
250 res = GinFormTuple(ginstate, attnum, key, category,
251 (char *) compressedList,
254 false);
255 }
256
259
260 if (!res)
261 {
262 /* posting list would be too big, convert to posting tree */
264
265 /*
266 * Initialize posting tree with the old tuple's posting list. It's
267 * surely small enough to fit on one posting-tree page, and should
268 * already be in order with no duplicates.
269 */
271 oldItems,
273 buildStats,
274 buffer);
275
276 /* Now insert the TIDs-to-be-added into the posting tree */
278 items, nitem,
279 buildStats);
280
281 /* And build a new posting-tree-only result tuple */
282 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
284 }
286
287 return res;
288}

References Assert, attnum, createPostingTree(), fb(), ginCompressPostingList(), GinFormTuple(), ginInsertItemPointers(), GinIsPostingTree, GinMaxItemSize, ginMergeItemPointers(), ginReadTuple(), GinSetPostingTree, gintuple_get_attrnum(), gintuple_get_key(), GinState::index, items, pfree(), and SizeOfGinPostingList.

Referenced by ginEntryInsert().

◆ AssertCheckGinBuffer()

static void AssertCheckGinBuffer ( GinBuffer buffer)
static

Definition at line 1241 of file gininsert.c.

1242{
1243#ifdef USE_ASSERT_CHECKING
1244 /* if we have any items, the array must exist */
1245 Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
1246
1247 /*
1248 * The buffer may be empty, in which case we must not call the check of
1249 * item pointers, because that assumes non-emptiness.
1250 */
1251 if (buffer->nitems == 0)
1252 return;
1253
1254 /* Make sure the item pointers are valid and sorted. */
1256#endif
1257}

References Assert, AssertCheckItemPointers(), fb(), GinBuffer::items, and GinBuffer::nitems.

Referenced by GinBufferKeyEquals(), and GinBufferStoreTuple().

◆ AssertCheckItemPointers()

static void AssertCheckItemPointers ( GinBuffer buffer)
static

Definition at line 1214 of file gininsert.c.

1215{
1216#ifdef USE_ASSERT_CHECKING
1217 /* we should not have a buffer with no TIDs to sort */
1218 Assert(buffer->items != NULL);
1219 Assert(buffer->nitems > 0);
1220
1221 for (int i = 0; i < buffer->nitems; i++)
1222 {
1223 Assert(ItemPointerIsValid(&buffer->items[i]));
1224
1225 /* don't check ordering for the first TID item */
1226 if (i == 0)
1227 continue;
1228
1229 Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
1230 }
1231#endif
1232}

References Assert, fb(), i, ItemPointerCompare(), ItemPointerIsValid(), GinBuffer::items, and GinBuffer::nitems.

Referenced by _gin_parallel_merge(), _gin_process_worker_data(), AssertCheckGinBuffer(), and GinBufferStoreTuple().

◆ buildFreshLeafTuple()

static IndexTuple buildFreshLeafTuple ( GinState ginstate,
OffsetNumber  attnum,
Datum  key,
GinNullCategory  category,
ItemPointerData items,
uint32  nitem,
GinStatsData buildStats,
Buffer  buffer 
)
static

Definition at line 299 of file gininsert.c.

303{
304 IndexTuple res = NULL;
306 int nwritten;
307
308 /* try to build a posting list tuple with all the items */
310 if (nwritten == nitem)
311 {
312 res = GinFormTuple(ginstate, attnum, key, category,
313 (char *) compressedList,
315 nitem, false);
316 }
318
319 if (!res)
320 {
321 /* posting list would be too big, build posting tree */
323
324 /*
325 * Build posting-tree-only result tuple. We do this first so as to
326 * fail quickly if the key is too big.
327 */
328 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
329
330 /*
331 * Initialize a new posting tree with the TIDs.
332 */
333 postingRoot = createPostingTree(ginstate->index, items, nitem,
334 buildStats, buffer);
335
336 /* And save the root link in the result tuple */
338 }
339
340 return res;
341}

References attnum, createPostingTree(), fb(), ginCompressPostingList(), GinFormTuple(), GinMaxItemSize, GinSetPostingTree, GinState::index, items, pfree(), and SizeOfGinPostingList.

Referenced by ginEntryInsert().

◆ GinBufferCanAddKey()

static bool GinBufferCanAddKey ( GinBuffer buffer,
GinTuple tup 
)
static

Definition at line 1626 of file gininsert.c.

1627{
1628 /* empty buffer can accept data for any key */
1629 if (GinBufferIsEmpty(buffer))
1630 return true;
1631
1632 /* otherwise just data for the same key */
1633 return GinBufferKeyEquals(buffer, tup);
1634}

References fb(), GinBufferIsEmpty(), and GinBufferKeyEquals().

Referenced by _gin_parallel_merge(), and _gin_process_worker_data().

◆ GinBufferFree()

static void GinBufferFree ( GinBuffer buffer)
static

Definition at line 1606 of file gininsert.c.

1607{
1608 if (buffer->items)
1609 pfree(buffer->items);
1610
1611 /* release byref values, do nothing for by-val ones */
1612 if (!GinBufferIsEmpty(buffer) &&
1613 (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1614 pfree(DatumGetPointer(buffer->key));
1615
1616 pfree(buffer);
1617}

References GinBuffer::category, DatumGetPointer(), GIN_CAT_NORM_KEY, GinBufferIsEmpty(), GinBuffer::items, GinBuffer::key, pfree(), and GinBuffer::typbyval.

Referenced by _gin_parallel_merge(), and _gin_process_worker_data().

◆ GinBufferInit()

static GinBuffer * GinBufferInit ( Relation  index)
static

Definition at line 1270 of file gininsert.c.

1271{
1273 int i,
1274 nKeys;
1276
1277 /*
1278 * How many items can we fit into the memory limit? We don't want to end
1279 * with too many TIDs. and 64kB seems more than enough. But maybe this
1280 * should be tied to maintenance_work_mem or something like that?
1281 */
1282 buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
1283
1285
1286 buffer->ssup = palloc0_array(SortSupportData, nKeys);
1287
1288 /*
1289 * Lookup ordering operator for the index key data type, and initialize
1290 * the sort support function.
1291 */
1292 for (i = 0; i < nKeys; i++)
1293 {
1294 Oid cmpFunc;
1295 SortSupport sortKey = &buffer->ssup[i];
1297
1298 sortKey->ssup_cxt = CurrentMemoryContext;
1299 sortKey->ssup_collation = index->rd_indcollation[i];
1300
1301 if (!OidIsValid(sortKey->ssup_collation))
1302 sortKey->ssup_collation = DEFAULT_COLLATION_OID;
1303
1304 sortKey->ssup_nulls_first = false;
1305 sortKey->ssup_attno = i + 1;
1306 sortKey->abbreviate = false;
1307
1308 Assert(sortKey->ssup_attno != 0);
1309
1310 /*
1311 * If the compare proc isn't specified in the opclass definition, look
1312 * up the index key type's default btree comparator.
1313 */
1315 if (cmpFunc == InvalidOid)
1316 {
1317 TypeCacheEntry *typentry;
1318
1319 typentry = lookup_type_cache(att->atttypid,
1321 if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
1322 ereport(ERROR,
1324 errmsg("could not identify a comparison function for type %s",
1325 format_type_be(att->atttypid))));
1326
1327 cmpFunc = typentry->cmp_proc_finfo.fn_oid;
1328 }
1329
1331 }
1332
1333 return buffer;
1334}

References Assert, TypeCacheEntry::cmp_proc_finfo, CurrentMemoryContext, ereport, errcode(), errmsg, ERROR, fb(), FmgrInfo::fn_oid, format_type_be(), GIN_COMPARE_PROC, i, index_getprocid(), IndexRelationGetNumberOfKeyAttributes, InvalidOid, lookup_type_cache(), GinBuffer::maxitems, OidIsValid, palloc0_array, palloc0_object, PrepareSortSupportComparisonShim(), RelationGetDescr, GinBuffer::ssup, TupleDescAttr(), and TYPECACHE_CMP_PROC_FINFO.

Referenced by _gin_parallel_merge(), and _gin_process_worker_data().

◆ GinBufferIsEmpty()

static bool GinBufferIsEmpty ( GinBuffer buffer)
static

Definition at line 1338 of file gininsert.c.

1339{
1340 return (buffer->nitems == 0);
1341}

References GinBuffer::nitems.

Referenced by _gin_parallel_merge(), _gin_process_worker_data(), GinBufferCanAddKey(), GinBufferFree(), GinBufferReset(), and GinBufferStoreTuple().

◆ GinBufferKeyEquals()

static bool GinBufferKeyEquals ( GinBuffer buffer,
GinTuple tup 
)
static

Definition at line 1354 of file gininsert.c.

1355{
1356 int r;
1357 Datum tupkey;
1358
1359 AssertCheckGinBuffer(buffer);
1360
1361 if (tup->attrnum != buffer->attnum)
1362 return false;
1363
1364 /* same attribute should have the same type info */
1365 Assert(tup->typbyval == buffer->typbyval);
1366 Assert(tup->typlen == buffer->typlen);
1367
1368 if (tup->category != buffer->category)
1369 return false;
1370
1371 /*
1372 * For NULL/empty keys, this means equality, for normal keys we need to
1373 * compare the actual key value.
1374 */
1375 if (buffer->category != GIN_CAT_NORM_KEY)
1376 return true;
1377
1378 /*
1379 * For the tuple, get either the first sizeof(Datum) bytes for byval
1380 * types, or a pointer to the beginning of the data array.
1381 */
1382 tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
1383
1384 r = ApplySortComparator(buffer->key, false,
1385 tupkey, false,
1386 &buffer->ssup[buffer->attnum - 1]);
1387
1388 return (r == 0);
1389}

References ApplySortComparator(), Assert, AssertCheckGinBuffer(), GinBuffer::attnum, GinBuffer::category, fb(), GIN_CAT_NORM_KEY, GinBuffer::key, PointerGetDatum(), GinBuffer::ssup, GinBuffer::typbyval, and GinBuffer::typlen.

Referenced by GinBufferCanAddKey().

◆ GinBufferReset()

static void GinBufferReset ( GinBuffer buffer)
static

Definition at line 1560 of file gininsert.c.

1561{
1562 Assert(!GinBufferIsEmpty(buffer));
1563
1564 /* release byref values, do nothing for by-val ones */
1565 if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1566 pfree(DatumGetPointer(buffer->key));
1567
1568 /*
1569 * Not required, but makes it more likely to trigger NULL dereference if
1570 * using the value incorrectly, etc.
1571 */
1572 buffer->key = (Datum) 0;
1573
1574 buffer->attnum = 0;
1575 buffer->category = 0;
1576 buffer->keylen = 0;
1577 buffer->nitems = 0;
1578 buffer->nfrozen = 0;
1579
1580 buffer->typlen = 0;
1581 buffer->typbyval = 0;
1582}

References Assert, GinBuffer::attnum, GinBuffer::category, DatumGetPointer(), GIN_CAT_NORM_KEY, GinBufferIsEmpty(), GinBuffer::key, GinBuffer::keylen, GinBuffer::nfrozen, GinBuffer::nitems, pfree(), GinBuffer::typbyval, and GinBuffer::typlen.

Referenced by _gin_parallel_merge(), and _gin_process_worker_data().

◆ GinBufferShouldTrim()

static bool GinBufferShouldTrim ( GinBuffer buffer,
GinTuple tup 
)
static

Definition at line 1420 of file gininsert.c.

1421{
1422 /*
1423 * Check if the last TID in the current list is frozen. This is the case
1424 * when merging non-overlapping lists, e.g. in each parallel worker.
1425 */
1426 if ((buffer->nitems > 0) &&
1427 (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
1428 GinTupleGetFirst(tup)) == 0))
1429 buffer->nfrozen = buffer->nitems;
1430
1431 /*
1432 * Now find the last TID we know to be frozen, i.e. the last TID right
1433 * before the new GIN tuple.
1434 *
1435 * Start with the first not-yet-frozen tuple, and walk until we find the
1436 * first TID that's higher. If we already know the whole list is frozen
1437 * (i.e. nfrozen == nitems), this does nothing.
1438 *
1439 * XXX This might do a binary search for sufficiently long lists, but it
1440 * does not seem worth the complexity. Overlapping lists should be rare
1441 * common, TID comparisons are cheap, and we should quickly freeze most of
1442 * the list.
1443 */
1444 for (int i = buffer->nfrozen; i < buffer->nitems; i++)
1445 {
1446 /* Is the TID after the first TID of the new tuple? Can't freeze. */
1447 if (ItemPointerCompare(&buffer->items[i],
1448 GinTupleGetFirst(tup)) > 0)
1449 break;
1450
1451 buffer->nfrozen++;
1452 }
1453
1454 /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
1455 if (buffer->nfrozen < 1024)
1456 return false;
1457
1458 /* no need to trim if we have not hit the memory limit yet */
1459 if ((buffer->nitems + tup->nitems) < buffer->maxitems)
1460 return false;
1461
1462 /*
1463 * OK, we have enough frozen TIDs to flush, and we have hit the memory
1464 * limit, so it's time to write it out.
1465 */
1466 return true;
1467}

References fb(), GinTupleGetFirst(), i, ItemPointerCompare(), GinBuffer::items, GinBuffer::maxitems, GinBuffer::nfrozen, and GinBuffer::nitems.

Referenced by _gin_parallel_merge(), and _gin_process_worker_data().

◆ GinBufferStoreTuple()

static void GinBufferStoreTuple ( GinBuffer buffer,
GinTuple tup 
)
static

Definition at line 1493 of file gininsert.c.

1494{
1496 Datum key;
1497
1498 AssertCheckGinBuffer(buffer);
1499
1502
1503 /* if the buffer is empty, set the fields (and copy the key) */
1504 if (GinBufferIsEmpty(buffer))
1505 {
1506 buffer->category = tup->category;
1507 buffer->keylen = tup->keylen;
1508 buffer->attnum = tup->attrnum;
1509
1510 buffer->typlen = tup->typlen;
1511 buffer->typbyval = tup->typbyval;
1512
1513 if (tup->category == GIN_CAT_NORM_KEY)
1514 buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
1515 else
1516 buffer->key = (Datum) 0;
1517 }
1518
1519 /* add the new TIDs into the buffer, combine using merge-sort */
1520 {
1521 int nnew;
1522 ItemPointer new;
1523
1524 /*
1525 * Resize the array - we do this first, because we'll dereference the
1526 * first unfrozen TID, which would fail if the array is NULL. We'll
1527 * still pass 0 as number of elements in that array though.
1528 */
1529 if (buffer->items == NULL)
1530 buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1531 else
1532 buffer->items = repalloc(buffer->items,
1533 (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1534
1535 new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
1536 (buffer->nitems - buffer->nfrozen), /* num of unfrozen */
1537 items, tup->nitems, &nnew);
1538
1539 Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
1540
1541 memcpy(&buffer->items[buffer->nfrozen], new,
1542 nnew * sizeof(ItemPointerData));
1543
1544 pfree(new);
1545
1546 buffer->nitems += tup->nitems;
1547
1549 }
1550
1551 /* free the decompressed TID list */
1552 pfree(items);
1553}

References _gin_parse_tuple_items(), _gin_parse_tuple_key(), Assert, AssertCheckGinBuffer(), AssertCheckItemPointers(), GinBuffer::attnum, GinBuffer::category, datumCopy(), fb(), GIN_CAT_NORM_KEY, GinBufferIsEmpty(), ginMergeItemPointers(), GinBuffer::items, items, GinBuffer::key, GinBuffer::keylen, GinBuffer::nfrozen, GinBuffer::nitems, palloc(), pfree(), repalloc(), GinBuffer::typbyval, and GinBuffer::typlen.

Referenced by _gin_parallel_merge(), and _gin_process_worker_data().

◆ GinBufferTrim()

static void GinBufferTrim ( GinBuffer buffer)
static

Definition at line 1590 of file gininsert.c.

1591{
1592 Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
1593
1594 memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
1595 sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
1596
1597 buffer->nitems -= buffer->nfrozen;
1598 buffer->nfrozen = 0;
1599}

References Assert, fb(), GinBuffer::items, GinBuffer::nfrozen, and GinBuffer::nitems.

Referenced by _gin_parallel_merge(), and _gin_process_worker_data().

◆ ginbuild()

IndexBuildResult * ginbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 618 of file gininsert.c.

619{
620 IndexBuildResult *result;
621 double reltuples;
627 Datum key;
628 GinNullCategory category;
629 uint32 nlist;
632
634 elog(ERROR, "index \"%s\" already contains data",
636
637 initGinState(&buildstate.ginstate, index);
638 buildstate.indtuples = 0;
639 memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
640
641 /* Initialize fields for parallel build too. */
642 buildstate.bs_numtuples = 0;
643 buildstate.bs_reltuples = 0;
644 buildstate.bs_leader = NULL;
645 memset(&buildstate.tid, 0, sizeof(ItemPointerData));
646
647 /* initialize the meta page */
649
650 /* initialize the root page */
652
658
659
663
664 /* count the root as first entry page */
665 buildstate.buildStats.nEntryPages++;
666
667 /*
668 * create a temporary memory context that is used to hold data not yet
669 * dumped out to the index
670 */
672 "Gin build temporary context",
674
675 /*
676 * create a temporary memory context that is used for calling
677 * ginExtractEntries(), and can be reset after each tuple
678 */
680 "Gin build temporary context for user-defined function",
682
683 buildstate.accum.ginstate = &buildstate.ginstate;
684 ginInitBA(&buildstate.accum);
685
686 /* Report table scan phase started */
689
690 /*
691 * Attempt to launch parallel worker scan when required
692 *
693 * XXX plan_create_index_workers makes the number of workers dependent on
694 * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
695 * reasonable too, because we sort the data just like btree. It does
696 * ignore the memory used to accumulate data in memory (set by work_mem),
697 * but there is no way to communicate that to plan_create_index_workers.
698 */
699 if (indexInfo->ii_ParallelWorkers > 0)
700 _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
701 indexInfo->ii_ParallelWorkers);
702
703 /*
704 * If parallel build requested and at least one worker process was
705 * successfully launched, set up coordination state, wait for workers to
706 * complete. Then read all tuples from the shared tuplesort and insert
707 * them into the index.
708 *
709 * In serial mode, simply scan the table and build the index one index
710 * tuple at a time.
711 */
712 if (state->bs_leader)
713 {
715
717 coordinate->isWorker = false;
718 coordinate->nParticipants =
719 state->bs_leader->nparticipanttuplesorts;
720 coordinate->sharedsort = state->bs_leader->sharedsort;
721
722 /*
723 * Begin leader tuplesort.
724 *
725 * In cases where parallelism is involved, the leader receives the
726 * same share of maintenance_work_mem as a serial sort (it is
727 * generally treated in the same way as a serial sort once we return).
728 * Parallel worker Tuplesortstates will have received only a fraction
729 * of maintenance_work_mem, though.
730 *
731 * We rely on the lifetime of the Leader Tuplesortstate almost not
732 * overlapping with any worker Tuplesortstate's lifetime. There may
733 * be some small overlap, but that's okay because we rely on leader
734 * Tuplesortstate only allocating a small, fixed amount of memory
735 * here. When its tuplesort_performsort() is called (by our caller),
736 * and significant amounts of memory are likely to be used, all
737 * workers must have already freed almost all memory held by their
738 * Tuplesortstates (they are about to go away completely, too). The
739 * overall effect is that maintenance_work_mem always represents an
740 * absolute high watermark on the amount of memory used by a CREATE
741 * INDEX operation, regardless of the use of parallelism or any other
742 * factor.
743 */
744 state->bs_sortstate =
748
749 /* scan the relation in parallel and merge per-worker results */
750 reltuples = _gin_parallel_merge(state);
751
752 _gin_end_parallel(state->bs_leader, state);
753 }
754 else /* no parallel index build */
755 {
756 /*
757 * Do the heap scan. We disallow sync scan here because
758 * dataPlaceToPage prefers to receive tuples in TID order.
759 */
760 reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
762
763 /* dump remaining entries to the index */
766 while ((list = ginGetBAEntry(&buildstate.accum,
767 &attnum, &key, &category, &nlist)) != NULL)
768 {
769 /* there could be many entries, so be willing to abort here */
771 ginEntryInsert(&buildstate.ginstate, attnum, key, category,
772 list, nlist, &buildstate.buildStats);
773 }
775 }
776
779
780 /*
781 * Update metapage stats
782 */
783 buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
784 ginUpdateStats(index, &buildstate.buildStats, true);
785
786 /*
787 * We didn't write WAL records as we built the index, so if WAL-logging is
788 * required, write all pages to the WAL now.
789 */
791 {
794 true);
795 }
796
797 /*
798 * Return statistics
799 */
801
802 result->heap_tuples = reltuples;
803 result->index_tuples = buildstate.indtuples;
804
805 return result;
806}

References _gin_begin_parallel(), _gin_end_parallel(), _gin_parallel_merge(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, attnum, CHECK_FOR_INTERRUPTS, CurrentMemoryContext, elog, END_CRIT_SECTION, ERROR, fb(), GIN_LEAF, ginBeginBAScan(), ginBuildCallback(), ginEntryInsert(), ginGetBAEntry(), ginInitBA(), GinInitBuffer(), GinInitMetabuffer(), GinNewBuffer(), ginUpdateStats(), IndexBuildResult::heap_tuples, IndexInfo::ii_Concurrent, IndexInfo::ii_ParallelWorkers, IndexBuildResult::index_tuples, initGinState(), log_newpage_range(), MAIN_FORKNUM, maintenance_work_mem, MarkBufferDirty(), MemoryContextDelete(), MemoryContextSwitchTo(), palloc0_object, palloc_object, pgstat_progress_update_param(), PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN, RelationGetNumberOfBlocks, RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, table_index_build_scan(), tuplesort_begin_index_gin(), TUPLESORT_NONE, and UnlockReleaseBuffer().

Referenced by ginhandler().

◆ ginBuildCallback()

static void ginBuildCallback ( Relation  index,
ItemPointer  tid,
Datum values,
bool isnull,
bool  tupleIsAlive,
void state 
)
static

Definition at line 452 of file gininsert.c.

454{
457 int i;
458
460
461 for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
463 values[i], isnull[i], tid);
464
465 /* If we've maxed out our available memory, dump everything to the index */
466 if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
467 {
469 Datum key;
470 GinNullCategory category;
471 uint32 nlist;
473
474 ginBeginBAScan(&buildstate->accum);
475 while ((list = ginGetBAEntry(&buildstate->accum,
476 &attnum, &key, &category, &nlist)) != NULL)
477 {
478 /* there could be many entries, so be willing to abort here */
480 ginEntryInsert(&buildstate->ginstate, attnum, key, category,
481 list, nlist, &buildstate->buildStats);
482 }
483
485 ginInitBA(&buildstate->accum);
486 }
487
489}

References attnum, CHECK_FOR_INTERRUPTS, fb(), ginBeginBAScan(), ginEntryInsert(), ginGetBAEntry(), ginHeapTupleBulkInsert(), ginInitBA(), i, maintenance_work_mem, MemoryContextReset(), MemoryContextSwitchTo(), and values.

Referenced by ginbuild().

◆ ginBuildCallbackParallel()

static void ginBuildCallbackParallel ( Relation  index,
ItemPointer  tid,
Datum values,
bool isnull,
bool  tupleIsAlive,
void state 
)
static

Definition at line 570 of file gininsert.c.

572{
575 int i;
576
578
579 /*
580 * if scan wrapped around - flush accumulated entries and start anew
581 *
582 * With parallel scans, we don't have a guarantee the scan does not start
583 * half-way through the relation (serial builds disable sync scans and
584 * always start from block 0, parallel scans require allow_sync=true).
585 *
586 * Building the posting lists assumes the TIDs are monotonic and never go
587 * back, and the wrap around would break that. We handle that by detecting
588 * the wraparound, and flushing all entries. This means we'll later see
589 * two separate entries with non-overlapping TID lists (which can be
590 * combined by merge sort).
591 *
592 * To detect a wraparound, we remember the last TID seen by each worker
593 * (for any key). If the next TID seen by the worker is lower, the scan
594 * must have wrapped around.
595 */
596 if (ItemPointerCompare(tid, &buildstate->tid) < 0)
598
599 /* remember the TID we're about to process */
600 buildstate->tid = *tid;
601
602 for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
604 values[i], isnull[i], tid);
605
606 /*
607 * If we've maxed out our available memory, dump everything to the
608 * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
609 * the other half is used for the tuplesort.
610 */
611 if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
613
615}

References fb(), ginFlushBuildState(), ginHeapTupleBulkInsert(), i, ItemPointerCompare(), MemoryContextSwitchTo(), and values.

Referenced by _gin_parallel_scan_and_build().

◆ ginbuildempty()

◆ ginEntryInsert()

void ginEntryInsert ( GinState ginstate,
OffsetNumber  attnum,
Datum  key,
GinNullCategory  category,
ItemPointerData items,
uint32  nitem,
GinStatsData buildStats 
)

Definition at line 351 of file gininsert.c.

355{
356 GinBtreeData btree;
358 GinBtreeStack *stack;
359 IndexTuple itup;
360 Page page;
361
362 insertdata.isDelete = false;
363
364 ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
365 btree.isBuild = (buildStats != NULL);
366
367 stack = ginFindLeafPage(&btree, false, false);
368 page = BufferGetPage(stack->buffer);
369
370 if (btree.findItem(&btree, stack))
371 {
372 /* found pre-existing entry */
373 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
374
375 if (GinIsPostingTree(itup))
376 {
377 /* add entries to existing posting tree */
379
380 /* release all stack */
382 freeGinBtreeStack(stack);
383
384 /* insert into posting tree */
386 items, nitem,
387 buildStats);
388 return;
389 }
390
393 /* modify an existing leaf entry */
394 itup = addItemPointersToLeafTuple(ginstate, itup,
395 items, nitem, buildStats, stack->buffer);
396
397 insertdata.isDelete = true;
398 }
399 else
400 {
403 /* no match, so construct a new leaf entry */
404 itup = buildFreshLeafTuple(ginstate, attnum, key, category,
405 items, nitem, buildStats, stack->buffer);
406
407 /*
408 * nEntries counts leaf tuples, so increment it only when we make a
409 * new one.
410 */
411 if (buildStats)
412 buildStats->nEntries++;
413 }
414
415 /* Insert the new or modified leaf tuple */
416 insertdata.entry = itup;
417 ginInsertValue(&btree, stack, &insertdata, buildStats);
418 pfree(itup);
419}

References addItemPointersToLeafTuple(), attnum, GinBtreeStack::buffer, BufferGetBlockNumber(), BufferGetPage(), buildFreshLeafTuple(), CheckForSerializableConflictIn(), fb(), GinBtreeData::findItem, freeGinBtreeStack(), GIN_UNLOCK, ginFindLeafPage(), GinGetPostingTree, ginInsertItemPointers(), ginInsertValue(), GinIsPostingTree, ginPrepareEntryScan(), GinState::index, GinBtreeData::isBuild, items, LockBuffer(), GinStatsData::nEntries, GinBtreeStack::off, PageGetItem(), PageGetItemId(), and pfree().

Referenced by _gin_parallel_merge(), ginbuild(), ginBuildCallback(), ginHeapTupleInsert(), and ginInsertCleanup().

◆ ginFlushBuildState()

static void ginFlushBuildState ( GinBuildState buildstate,
Relation  index 
)
static

Definition at line 505 of file gininsert.c.

506{
508 Datum key;
509 GinNullCategory category;
510 uint32 nlist;
513 uint32 maxlen;
514
515 /* maximum number of TIDs per chunk (two chunks per worker) */
516 maxlen = MaxAllocSize / sizeof(ItemPointerData);
517 maxlen /= (2 * buildstate->bs_num_workers);
518
519 ginBeginBAScan(&buildstate->accum);
520 while ((list = ginGetBAEntry(&buildstate->accum,
521 &attnum, &key, &category, &nlist)) != NULL)
522 {
523 /* information about the key */
525
526 /* start of the chunk */
527 uint32 offset = 0;
528
529 /* split the entry into smaller chunk with up to maxlen items */
530 while (offset < nlist)
531 {
532 /* GIN tuple and tuple length */
533 GinTuple *tup;
534 Size tuplen;
535 uint32 len = Min(maxlen, nlist - offset);
536
537 /* there could be many entries, so be willing to abort here */
539
540 tup = _gin_build_tuple(attnum, category,
541 key, attr->attlen, attr->attbyval,
542 &list[offset], len,
543 &tuplen);
544
545 offset += len;
546
547 tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
548
549 pfree(tup);
550 }
551 }
552
554 ginInitBA(&buildstate->accum);
555}

References _gin_build_tuple(), CompactAttribute::attbyval, CompactAttribute::attlen, attnum, CHECK_FOR_INTERRUPTS, fb(), ginBeginBAScan(), ginGetBAEntry(), ginInitBA(), len, MaxAllocSize, MemoryContextReset(), Min, pfree(), RelationGetDescr, TupleDescCompactAttr(), and tuplesort_putgintuple().

Referenced by _gin_parallel_scan_and_build(), and ginBuildCallbackParallel().

◆ ginHeapTupleBulkInsert()

static void ginHeapTupleBulkInsert ( GinBuildState buildstate,
OffsetNumber  attnum,
Datum  value,
bool  isNull,
ItemPointer  heapptr 
)
static

Definition at line 428 of file gininsert.c.

431{
432 Datum *entries;
433 GinNullCategory *categories;
434 int32 nentries;
436
438 entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
439 value, isNull,
440 &nentries, &categories);
442
444 entries, categories, nentries);
445
446 buildstate->indtuples += nentries;
447
449}

References attnum, fb(), ginExtractEntries(), ginInsertBAEntries(), MemoryContextReset(), MemoryContextSwitchTo(), and value.

Referenced by ginBuildCallback(), and ginBuildCallbackParallel().

◆ ginHeapTupleInsert()

static void ginHeapTupleInsert ( GinState ginstate,
OffsetNumber  attnum,
Datum  value,
bool  isNull,
ItemPointer  item 
)
static

Definition at line 843 of file gininsert.c.

846{
847 Datum *entries;
848 GinNullCategory *categories;
849 int32 i,
850 nentries;
851
852 entries = ginExtractEntries(ginstate, attnum, value, isNull,
853 &nentries, &categories);
854
855 for (i = 0; i < nentries; i++)
856 {
857 /* there could be many entries, so be willing to abort here */
859 ginEntryInsert(ginstate, attnum, entries[i], categories[i],
860 item, 1, NULL);
861 }
862}

References attnum, CHECK_FOR_INTERRUPTS, fb(), ginEntryInsert(), ginExtractEntries(), i, and value.

Referenced by gininsert().

◆ gininsert()

bool gininsert ( Relation  index,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 865 of file gininsert.c.

870{
871 GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
874 int i;
875
876 /* Initialize GinState cache if first call in this statement */
877 if (ginstate == NULL)
878 {
880 ginstate = palloc_object(GinState);
881 initGinState(ginstate, index);
882 indexInfo->ii_AmCache = ginstate;
884 }
885
887 "Gin insert temporary context",
889
891
893 {
895
896 memset(&collector, 0, sizeof(GinTupleCollector));
897
898 for (i = 0; i < ginstate->origTupdesc->natts; i++)
900 (OffsetNumber) (i + 1),
901 values[i], isnull[i],
902 ht_ctid);
903
905 }
906 else
907 {
908 for (i = 0; i < ginstate->origTupdesc->natts; i++)
909 ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
910 values[i], isnull[i],
911 ht_ctid);
912 }
913
916
917 return false;
918}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, CurrentMemoryContext, fb(), GinGetUseFastUpdate, ginHeapTupleFastCollect(), ginHeapTupleFastInsert(), ginHeapTupleInsert(), i, IndexInfo::ii_AmCache, IndexInfo::ii_Context, initGinState(), MemoryContextDelete(), MemoryContextSwitchTo(), TupleDescData::natts, GinState::origTupdesc, palloc_object, and values.

Referenced by ginhandler().