PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nbtsort.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nbtsort.c
4 * Build a btree from sorted input by loading leaf pages sequentially.
5 *
6 * NOTES
7 *
8 * We use tuplesort.c to sort the given index tuples into order.
9 * Then we scan the index tuples in order and build the btree pages
10 * for each level. We load source tuples into leaf-level pages.
11 * Whenever we fill a page at one level, we add a link to it to its
12 * parent level (starting a new parent level if necessary). When
13 * done, we write out each final page on each level, adding it to
14 * its parent level. When we have only one page on a level, it must be
15 * the root -- it can be attached to the btree metapage and we are done.
16 *
17 * It is not wise to pack the pages entirely full, since then *any*
18 * insertion would cause a split (and not only of the leaf page; the need
19 * for a split would cascade right up the tree). The steady-state load
20 * factor for btrees is usually estimated at 70%. We choose to pack leaf
21 * pages to the user-controllable fill factor (default 90%) while upper pages
22 * are always packed to 70%. This gives us reasonable density (there aren't
23 * many upper pages if the keys are reasonable-size) without risking a lot of
24 * cascading splits during early insertions.
25 *
26 * We use the bulk smgr loading facility to bypass the buffer cache and
27 * WAL-log the pages efficiently.
28 *
29 * This code isn't concerned about the FSM at all. The caller is responsible
30 * for initializing that.
31 *
32 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
33 * Portions Copyright (c) 1994, Regents of the University of California
34 *
35 * IDENTIFICATION
36 * src/backend/access/nbtree/nbtsort.c
37 *
38 *-------------------------------------------------------------------------
39 */
40
41#include "postgres.h"
42
43#include "access/nbtree.h"
44#include "access/parallel.h"
45#include "access/relscan.h"
46#include "access/table.h"
47#include "access/tableam.h"
48#include "access/xact.h"
49#include "catalog/index.h"
50#include "commands/progress.h"
51#include "executor/instrument.h"
52#include "miscadmin.h"
53#include "pgstat.h"
54#include "storage/bulk_write.h"
55#include "storage/proc.h"
56#include "tcop/tcopprot.h"
57#include "utils/rel.h"
58#include "utils/sortsupport.h"
59#include "utils/tuplesort.h"
60
61
62/* Magic numbers for parallel state sharing */
63#define PARALLEL_KEY_BTREE_SHARED UINT64CONST(0xA000000000000001)
64#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002)
65#define PARALLEL_KEY_TUPLESORT_SPOOL2 UINT64CONST(0xA000000000000003)
66#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004)
67#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xA000000000000005)
68#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xA000000000000006)
69
70/*
71 * DISABLE_LEADER_PARTICIPATION disables the leader's participation in
72 * parallel index builds. This may be useful as a debugging aid.
73 */
74/* #define DISABLE_LEADER_PARTICIPATION */
75
76/*
77 * Status record for spooling/sorting phase. (Note we may have two of
78 * these due to the special requirements for uniqueness-checking with
79 * dead tuples.)
80 */
81typedef struct BTSpool
82{
83 Tuplesortstate *sortstate; /* state data for tuplesort.c */
89
90/*
91 * Status for index builds performed in parallel. This is allocated in a
92 * dynamic shared memory segment. Note that there is a separate tuplesort TOC
93 * entry, private to tuplesort.c but allocated by this module on its behalf.
94 */
95typedef struct BTShared
96{
97 /*
98 * These fields are not modified during the sort. They primarily exist
99 * for the benefit of worker processes that need to create BTSpool state
100 * corresponding to that used by the leader.
101 */
108
109 /* Query ID, for report in worker processes */
111
112 /*
113 * workersdonecv is used to monitor the progress of workers. All parallel
114 * participants must indicate that they are done before leader can use
115 * mutable state that workers maintain during scan (and before leader can
116 * proceed to tuplesort_performsort()).
117 */
119
120 /*
121 * mutex protects all fields before heapdesc.
122 *
123 * These fields contain status information of interest to B-Tree index
124 * builds that must work just the same when an index is built in parallel.
125 */
127
128 /*
129 * Mutable state that is maintained by workers, and reported back to
130 * leader at end of parallel scan.
131 *
132 * nparticipantsdone is number of worker processes finished.
133 *
134 * reltuples is the total number of input heap tuples.
135 *
136 * havedead indicates if RECENTLY_DEAD tuples were encountered during
137 * build.
138 *
139 * indtuples is the total number of tuples that made it into the index.
140 *
141 * brokenhotchain indicates if any worker detected a broken HOT chain
142 * during build.
143 */
145 double reltuples;
147 double indtuples;
149
150 /*
151 * ParallelTableScanDescData data follows. Can't directly embed here, as
152 * implementations of the parallel table scan desc interface might need
153 * stronger alignment.
154 */
156
157/*
158 * Return pointer to a BTShared's parallel table scan.
159 *
160 * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
161 * MAXALIGN.
162 */
163#define ParallelTableScanFromBTShared(shared) \
164 (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))
165
166/*
167 * Status for leader in parallel index build.
168 */
169typedef struct BTLeader
170{
171 /* parallel context itself */
173
174 /*
175 * nparticipanttuplesorts is the exact number of worker processes
176 * successfully launched, plus one leader process if it participates as a
177 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
178 * participating as a worker).
179 */
181
182 /*
183 * Leader process convenience pointers to shared state (leader avoids TOC
184 * lookups).
185 *
186 * btshared is the shared state for entire build. sharedsort is the
187 * shared, tuplesort-managed state passed to each process tuplesort.
188 * sharedsort2 is the corresponding btspool2 shared state, used only when
189 * building unique indexes. snapshot is the snapshot used by the scan iff
190 * an MVCC snapshot is required.
191 */
199
200/*
201 * Working state for btbuild and its callback.
202 *
203 * When parallel CREATE INDEX is used, there is a BTBuildState for each
204 * participant.
205 */
206typedef struct BTBuildState
207{
213
214 /*
215 * spool2 is needed only when the index is a unique index. Dead tuples are
216 * put into spool2 instead of spool in order to avoid uniqueness check.
217 */
219 double indtuples;
220
221 /*
222 * btleader is only present when a parallel index build is performed, and
223 * only in the leader process. (Actually, only the leader has a
224 * BTBuildState. Workers have their own spool and spool2, though.)
225 */
228
229/*
230 * Status record for a btree page being built. We have one of these
231 * for each active tree level.
232 */
233typedef struct BTPageState
234{
235 BulkWriteBuffer btps_buf; /* workspace for page building */
236 BlockNumber btps_blkno; /* block # to write this page at */
237 IndexTuple btps_lowkey; /* page's strict lower bound pivot tuple */
238 OffsetNumber btps_lastoff; /* last item offset loaded */
239 Size btps_lastextra; /* last item's extra posting list space */
240 uint32 btps_level; /* tree level (0 = leaf) */
241 Size btps_full; /* "full" if less than this much free space */
242 struct BTPageState *btps_next; /* link to parent level, if any */
244
245/*
246 * Overall status record for index writing phase.
247 */
256
257
258static double _bt_spools_heapscan(Relation heap, Relation index,
259 BTBuildState *buildstate, IndexInfo *indexInfo);
260static void _bt_spooldestroy(BTSpool *btspool);
261static void _bt_spool(BTSpool *btspool, const ItemPointerData *self,
262 const Datum *values, const bool *isnull);
265 bool *isnull, bool tupleIsAlive, void *state);
268static void _bt_slideleft(Page rightmostpage);
269static void _bt_sortaddtup(Page page, Size itemsize,
271 bool newfirstdataitem);
278static void _bt_load(BTWriteState *wstate,
280static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent,
281 int request);
282static void _bt_end_parallel(BTLeader *btleader);
285 bool *brokenhotchain);
288 BTShared *btshared, Sharedsort *sharedsort,
289 Sharedsort *sharedsort2, int sortmem,
290 bool progress);
291
292
293/*
294 * btbuild() -- build a new btree index.
295 */
298{
299 IndexBuildResult *result;
301 double reltuples;
302
303#ifdef BTREE_BUILD_STATS
305 ResetUsage();
306#endif /* BTREE_BUILD_STATS */
307
308 buildstate.isunique = indexInfo->ii_Unique;
309 buildstate.nulls_not_distinct = indexInfo->ii_NullsNotDistinct;
310 buildstate.havedead = false;
311 buildstate.heap = heap;
312 buildstate.spool = NULL;
313 buildstate.spool2 = NULL;
314 buildstate.indtuples = 0;
315 buildstate.btleader = NULL;
316
317 /*
318 * We expect to be called exactly once for any index relation. If that's
319 * not the case, big trouble's what we have.
320 */
322 elog(ERROR, "index \"%s\" already contains data",
324
325 reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
326
327 /*
328 * Finish the build by (1) completing the sort of the spool file, (2)
329 * inserting the sorted tuples into btree pages and (3) building the upper
330 * levels. Finally, it may also be necessary to end use of parallelism.
331 */
332 _bt_leafbuild(buildstate.spool, buildstate.spool2);
334 if (buildstate.spool2)
336 if (buildstate.btleader)
338
340
341 result->heap_tuples = reltuples;
342 result->index_tuples = buildstate.indtuples;
343
344#ifdef BTREE_BUILD_STATS
346 {
347 ShowUsage("BTREE BUILD STATS");
348 ResetUsage();
349 }
350#endif /* BTREE_BUILD_STATS */
351
352 return result;
353}
354
355/*
356 * Create and initialize one or two spool structures, and save them in caller's
357 * buildstate argument. May also fill-in fields within indexInfo used by index
358 * builds.
359 *
360 * Scans the heap, possibly in parallel, filling spools with IndexTuples. This
361 * routine encapsulates all aspects of managing parallelism. Caller need only
362 * call _bt_end_parallel() in parallel case after it is done with spool/spool2.
363 *
364 * Returns the total number of heap tuples scanned.
365 */
366static double
368 IndexInfo *indexInfo)
369{
372 double reltuples = 0;
373
374 /*
375 * We size the sort area as maintenance_work_mem rather than work_mem to
376 * speed index creation. This should be OK since a single backend can't
377 * run multiple index creations in parallel (see also: notes on
378 * parallelism and maintenance_work_mem below).
379 */
380 btspool->heap = heap;
381 btspool->index = index;
382 btspool->isunique = indexInfo->ii_Unique;
383 btspool->nulls_not_distinct = indexInfo->ii_NullsNotDistinct;
384
385 /* Save as primary spool */
386 buildstate->spool = btspool;
387
388 /* Report table scan phase started */
391
392 /* Attempt to launch parallel worker scan when required */
393 if (indexInfo->ii_ParallelWorkers > 0)
395 indexInfo->ii_ParallelWorkers);
396
397 /*
398 * If parallel build requested and at least one worker process was
399 * successfully launched, set up coordination state
400 */
401 if (buildstate->btleader)
402 {
404 coordinate->isWorker = false;
405 coordinate->nParticipants =
406 buildstate->btleader->nparticipanttuplesorts;
407 coordinate->sharedsort = buildstate->btleader->sharedsort;
408 }
409
410 /*
411 * Begin serial/leader tuplesort.
412 *
413 * In cases where parallelism is involved, the leader receives the same
414 * share of maintenance_work_mem as a serial sort (it is generally treated
415 * in the same way as a serial sort once we return). Parallel worker
416 * Tuplesortstates will have received only a fraction of
417 * maintenance_work_mem, though.
418 *
419 * We rely on the lifetime of the Leader Tuplesortstate almost not
420 * overlapping with any worker Tuplesortstate's lifetime. There may be
421 * some small overlap, but that's okay because we rely on leader
422 * Tuplesortstate only allocating a small, fixed amount of memory here.
423 * When its tuplesort_performsort() is called (by our caller), and
424 * significant amounts of memory are likely to be used, all workers must
425 * have already freed almost all memory held by their Tuplesortstates
426 * (they are about to go away completely, too). The overall effect is
427 * that maintenance_work_mem always represents an absolute high watermark
428 * on the amount of memory used by a CREATE INDEX operation, regardless of
429 * the use of parallelism or any other factor.
430 */
431 buildstate->spool->sortstate =
433 buildstate->nulls_not_distinct,
436
437 /*
438 * If building a unique index, put dead tuples in a second spool to keep
439 * them out of the uniqueness check. We expect that the second spool (for
440 * dead tuples) won't get very full, so we give it only work_mem.
441 */
442 if (indexInfo->ii_Unique)
443 {
446
447 /* Initialize secondary spool */
448 btspool2->heap = heap;
449 btspool2->index = index;
450 btspool2->isunique = false;
451 /* Save as secondary spool */
452 buildstate->spool2 = btspool2;
453
454 if (buildstate->btleader)
455 {
456 /*
457 * Set up non-private state that is passed to
458 * tuplesort_begin_index_btree() about the basic high level
459 * coordination of a parallel sort.
460 */
462 coordinate2->isWorker = false;
463 coordinate2->nParticipants =
464 buildstate->btleader->nparticipanttuplesorts;
465 coordinate2->sharedsort = buildstate->btleader->sharedsort2;
466 }
467
468 /*
469 * We expect that the second one (for dead tuples) won't get very
470 * full, so we give it only work_mem
471 */
472 buildstate->spool2->sortstate =
473 tuplesort_begin_index_btree(heap, index, false, false, work_mem,
475 }
476
477 /* Fill spool using either serial or parallel heap scan */
478 if (!buildstate->btleader)
479 reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
481 NULL);
482 else
484 &indexInfo->ii_BrokenHotChain);
485
486 /*
487 * Set the progress target for the next phase. Reset the block number
488 * values set by table_index_build_scan
489 */
490 {
491 const int progress_index[] = {
495 };
496 const int64 progress_vals[] = {
497 buildstate->indtuples,
498 0, 0
499 };
500
502 }
503
504 /* okay, all heap tuples are spooled */
505 if (buildstate->spool2 && !buildstate->havedead)
506 {
507 /* spool2 turns out to be unnecessary */
509 buildstate->spool2 = NULL;
510 }
511
512 return reltuples;
513}
514
515/*
516 * clean up a spool structure and its substructures.
517 */
518static void
524
525/*
526 * spool an index entry into the sort file.
527 */
528static void
529_bt_spool(BTSpool *btspool, const ItemPointerData *self, const Datum *values, const bool *isnull)
530{
532 self, values, isnull);
533}
534
535/*
536 * given a spool loaded by successive calls to _bt_spool,
537 * create an entire btree.
538 */
539static void
541{
543
544#ifdef BTREE_BUILD_STATS
546 {
547 ShowUsage("BTREE BUILD (Spool) STATISTICS");
548 ResetUsage();
549 }
550#endif /* BTREE_BUILD_STATS */
551
552 /* Execute the sort */
555 tuplesort_performsort(btspool->sortstate);
556 if (btspool2)
557 {
561 }
562
563 wstate.heap = btspool->heap;
564 wstate.index = btspool->index;
565 wstate.inskey = _bt_mkscankey(wstate.index, NULL);
566 /* _bt_mkscankey() won't set allequalimage without metapage */
567 wstate.inskey->allequalimage = _bt_allequalimage(wstate.index, true);
568
569 /* reserve the metapage */
570 wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
571
575}
576
577/*
578 * Per-tuple callback for table_index_build_scan
579 */
580static void
582 ItemPointer tid,
583 Datum *values,
584 bool *isnull,
585 bool tupleIsAlive,
586 void *state)
587{
589
590 /*
591 * insert the index tuple into the appropriate spool file for subsequent
592 * processing
593 */
594 if (tupleIsAlive || buildstate->spool2 == NULL)
595 _bt_spool(buildstate->spool, tid, values, isnull);
596 else
597 {
598 /* dead tuples are put into spool2 */
599 buildstate->havedead = true;
600 _bt_spool(buildstate->spool2, tid, values, isnull);
601 }
602
603 buildstate->indtuples += 1;
604}
605
606/*
607 * allocate workspace for a new, clean btree page, not linked to any siblings.
608 */
609static BulkWriteBuffer
611{
613 Page page;
614 BTPageOpaque opaque;
615
616 buf = smgr_bulk_get_buf(wstate->bulkstate);
617 page = (Page) buf;
618
619 /* Zero the page and set up standard page header info */
620 _bt_pageinit(page, BLCKSZ);
621
622 /* Initialize BT opaque state */
623 opaque = BTPageGetOpaque(page);
624 opaque->btpo_prev = opaque->btpo_next = P_NONE;
625 opaque->btpo_level = level;
626 opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
627 opaque->btpo_cycleid = 0;
628
629 /* Make the P_HIKEY line pointer appear allocated */
630 ((PageHeader) page)->pd_lower += sizeof(ItemIdData);
631
632 return buf;
633}
634
635/*
636 * emit a completed btree page, and release the working storage.
637 */
638static void
640{
641 smgr_bulk_write(wstate->bulkstate, blkno, buf, true);
642 /* smgr_bulk_write took ownership of 'buf' */
643}
644
645/*
646 * allocate and initialize a new BTPageState. the returned structure
647 * is suitable for immediate use by _bt_buildadd.
648 */
649static BTPageState *
651{
653
654 /* create initial page for level */
655 state->btps_buf = _bt_blnewpage(wstate, level);
656
657 /* and assign it a page position */
658 state->btps_blkno = wstate->btws_pages_alloced++;
659
660 state->btps_lowkey = NULL;
661 /* initialize lastoff so first item goes into P_FIRSTKEY */
662 state->btps_lastoff = P_HIKEY;
663 state->btps_lastextra = 0;
664 state->btps_level = level;
665 /* set "full" threshold based on level. See notes at head of file. */
666 if (level > 0)
667 state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
668 else
669 state->btps_full = BTGetTargetPageFreeSpace(wstate->index);
670
671 /* no parent level, yet */
672 state->btps_next = NULL;
673
674 return state;
675}
676
677/*
678 * Slide the array of ItemIds from the page back one slot (from P_FIRSTKEY to
679 * P_HIKEY, overwriting P_HIKEY).
680 *
681 * _bt_blnewpage() makes the P_HIKEY line pointer appear allocated, but the
682 * rightmost page on its level is not supposed to get a high key. Now that
683 * it's clear that this page is a rightmost page, remove the unneeded empty
684 * P_HIKEY line pointer space.
685 */
686static void
688{
689 OffsetNumber off;
690 OffsetNumber maxoff;
692
694 Assert(maxoff >= P_FIRSTKEY);
696 for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
697 {
699
700 *previi = *thisii;
701 previi = thisii;
702 }
703 ((PageHeader) rightmostpage)->pd_lower -= sizeof(ItemIdData);
704}
705
706/*
707 * Add an item to a page being built.
708 *
709 * This is very similar to nbtinsert.c's _bt_pgaddtup(), but this variant
710 * raises an error directly.
711 *
712 * Note that our nbtsort.c caller does not know yet if the page will be
713 * rightmost. Offset P_FIRSTKEY is always assumed to be the first data key by
714 * caller. Page that turns out to be the rightmost on its level is fixed by
715 * calling _bt_slideleft().
716 */
717static void
720 const IndexTupleData *itup,
722 bool newfirstdataitem)
723{
725
727 {
728 trunctuple = *itup;
730 BTreeTupleSetNAtts(&trunctuple, 0, false);
731 itup = &trunctuple;
732 itemsize = sizeof(IndexTupleData);
733 }
734
735 if (PageAddItem(page, itup, itemsize, itup_off, false, false) == InvalidOffsetNumber)
736 elog(ERROR, "failed to add item to the index page");
737}
738
739/*----------
740 * Add an item to a disk page from the sort output (or add a posting list
741 * item formed from the sort output).
742 *
743 * We must be careful to observe the page layout conventions of nbtsearch.c:
744 * - rightmost pages start data items at P_HIKEY instead of at P_FIRSTKEY.
745 * - on non-leaf pages, the key portion of the first item need not be
746 * stored, we should store only the link.
747 *
748 * A leaf page being built looks like:
749 *
750 * +----------------+---------------------------------+
751 * | PageHeaderData | linp0 linp1 linp2 ... |
752 * +-----------+----+---------------------------------+
753 * | ... linpN | |
754 * +-----------+--------------------------------------+
755 * | ^ last |
756 * | |
757 * +-------------+------------------------------------+
758 * | | itemN ... |
759 * +-------------+------------------+-----------------+
760 * | ... item3 item2 item1 | "special space" |
761 * +--------------------------------+-----------------+
762 *
763 * Contrast this with the diagram in bufpage.h; note the mismatch
764 * between linps and items. This is because we reserve linp0 as a
765 * placeholder for the pointer to the "high key" item; when we have
766 * filled up the page, we will set linp0 to point to itemN and clear
767 * linpN. On the other hand, if we find this is the last (rightmost)
768 * page, we leave the items alone and slide the linp array over. If
769 * the high key is to be truncated, offset 1 is deleted, and we insert
770 * the truncated high key at offset 1.
771 *
772 * 'last' pointer indicates the last offset added to the page.
773 *
774 * 'truncextra' is the size of the posting list in itup, if any. This
775 * information is stashed for the next call here, when we may benefit
776 * from considering the impact of truncating away the posting list on
777 * the page before deciding to finish the page off. Posting lists are
778 * often relatively large, so it is worth going to the trouble of
779 * accounting for the saving from truncating away the posting list of
780 * the tuple that becomes the high key (that may be the only way to
781 * get close to target free space on the page). Note that this is
782 * only used for the soft fillfactor-wise limit, not the critical hard
783 * limit.
784 *----------
785 */
786static void
789{
791 Page npage;
795 Size pgspc;
796 Size itupsz;
797 bool isleaf;
798
799 /*
800 * This is a handy place to check for cancel interrupts during the btree
801 * load phase of index creation.
802 */
804
805 nbuf = state->btps_buf;
806 npage = (Page) nbuf;
807 nblkno = state->btps_blkno;
808 last_off = state->btps_lastoff;
809 last_truncextra = state->btps_lastextra;
810 state->btps_lastextra = truncextra;
811
812 pgspc = PageGetFreeSpace(npage);
813 itupsz = IndexTupleSize(itup);
815 /* Leaf case has slightly different rules due to suffix truncation */
816 isleaf = (state->btps_level == 0);
817
818 /*
819 * Check whether the new item can fit on a btree page on current level at
820 * all.
821 *
822 * Every newly built index will treat heap TID as part of the keyspace,
823 * which imposes the requirement that new high keys must occasionally have
824 * a heap TID appended within _bt_truncate(). That may leave a new pivot
825 * tuple one or two MAXALIGN() quantums larger than the original
826 * firstright tuple it's derived from. v4 deals with the problem by
827 * decreasing the limit on the size of tuples inserted on the leaf level
828 * by the same small amount. Enforce the new v4+ limit on the leaf level,
829 * and the old limit on internal levels, since pivot tuples may need to
830 * make use of the reserved space. This should never fail on internal
831 * pages.
832 */
834 _bt_check_third_page(wstate->index, wstate->heap, isleaf, npage,
835 itup);
836
837 /*
838 * Check to see if current page will fit new item, with space left over to
839 * append a heap TID during suffix truncation when page is a leaf page.
840 *
841 * It is guaranteed that we can fit at least 2 non-pivot tuples plus a
842 * high key with heap TID when finishing off a leaf page, since we rely on
843 * _bt_check_third_page() rejecting oversized non-pivot tuples. On
844 * internal pages we can always fit 3 pivot tuples with larger internal
845 * page tuple limit (includes page high key).
846 *
847 * Most of the time, a page is only "full" in the sense that the soft
848 * fillfactor-wise limit has been exceeded. However, we must always leave
849 * at least two items plus a high key on each page before starting a new
850 * page. Disregard fillfactor and insert on "full" current page if we
851 * don't have the minimum number of items yet. (Note that we deliberately
852 * assume that suffix truncation neither enlarges nor shrinks new high key
853 * when applying soft limit, except when last tuple has a posting list.)
854 */
856 if (pgspc < itupsz + (isleaf ? MAXALIGN(sizeof(ItemPointerData)) : 0) ||
858 {
859 /*
860 * Finish off the page and write it out.
861 */
863 Page opage = npage;
865 ItemId ii;
866 ItemId hii;
868
869 /* Create new page of same level */
870 nbuf = _bt_blnewpage(wstate, state->btps_level);
871 npage = (Page) nbuf;
872
873 /* and assign it a page position */
874 nblkno = wstate->btws_pages_alloced++;
875
876 /*
877 * We copy the last item on the page into the new page, and then
878 * rearrange the old page so that the 'last item' becomes its high key
879 * rather than a true data item. There had better be at least two
880 * items on the page already, else the page would be empty of useful
881 * data.
882 */
887 !isleaf);
888
889 /*
890 * Move 'last' into the high key position on opage. _bt_blnewpage()
891 * allocated empty space for a line pointer when opage was first
892 * created, so this is a matter of rearranging already-allocated space
893 * on page, and initializing high key line pointer. (Actually, leaf
894 * pages must also swap oitup with a truncated version of oitup, which
895 * is sometimes larger than oitup, though never by more than the space
896 * needed to append a heap TID.)
897 */
899 *hii = *ii;
900 ItemIdSetUnused(ii); /* redundant */
901 ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
902
903 if (isleaf)
904 {
905 IndexTuple lastleft;
906 IndexTuple truncated;
907
908 /*
909 * Truncate away any unneeded attributes from high key on leaf
910 * level. This is only done at the leaf level because downlinks
911 * in internal pages are either negative infinity items, or get
912 * their contents from copying from one level down. See also:
913 * _bt_split().
914 *
915 * We don't try to bias our choice of split point to make it more
916 * likely that _bt_truncate() can truncate away more attributes,
917 * whereas the split point used within _bt_split() is chosen much
918 * more delicately. Even still, the lastleft and firstright
919 * tuples passed to _bt_truncate() here are at least not fully
920 * equal to each other when deduplication is used, unless there is
921 * a large group of duplicates (also, unique index builds usually
922 * have few or no spool2 duplicates). When the split point is
923 * between two unequal tuples, _bt_truncate() will avoid including
924 * a heap TID in the new high key, which is the most important
925 * benefit of suffix truncation.
926 *
927 * Overwrite the old item with new truncated high key directly.
928 * oitup is already located at the physical beginning of tuple
929 * space, so this should directly reuse the existing tuple space.
930 */
932 lastleft = (IndexTuple) PageGetItem(opage, ii);
933
935 truncated = _bt_truncate(wstate->index, lastleft, oitup,
936 wstate->inskey);
937 if (!PageIndexTupleOverwrite(opage, P_HIKEY, truncated, IndexTupleSize(truncated)))
938 elog(ERROR, "failed to add high key to the index page");
939 pfree(truncated);
940
941 /* oitup should continue to point to the page's high key */
944 }
945
946 /*
947 * Link the old page into its parent, using its low key. If we don't
948 * have a parent, we have to create one; this adds a new btree level.
949 */
950 if (state->btps_next == NULL)
951 state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
952
953 Assert((BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) <=
955 BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) > 0) ||
957 Assert(BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) == 0 ||
959 BTreeTupleSetDownLink(state->btps_lowkey, oblkno);
960 _bt_buildadd(wstate, state->btps_next, state->btps_lowkey, 0);
961 pfree(state->btps_lowkey);
962
963 /*
964 * Save a copy of the high key from the old page. It is also the low
965 * key for the new page.
966 */
967 state->btps_lowkey = CopyIndexTuple(oitup);
968
969 /*
970 * Set the sibling links for both pages.
971 */
972 {
975
976 oopaque->btpo_next = nblkno;
977 nopaque->btpo_prev = oblkno;
978 nopaque->btpo_next = P_NONE; /* redundant */
979 }
980
981 /*
982 * Write out the old page. _bt_blwritepage takes ownership of the
983 * 'opage' buffer.
984 */
986
987 /*
988 * Reset last_off to point to new page
989 */
991 }
992
993 /*
994 * By here, either original page is still the current page, or a new page
995 * was created that became the current page. Either way, the current page
996 * definitely has space for new item.
997 *
998 * If the new item is the first for its page, it must also be the first
999 * item on its entire level. On later same-level pages, a low key for a
1000 * page will be copied from the prior page in the code above. Generate a
1001 * minus infinity low key here instead.
1002 */
1003 if (last_off == P_HIKEY)
1004 {
1005 Assert(state->btps_lowkey == NULL);
1006 state->btps_lowkey = palloc0_object(IndexTupleData);
1007 state->btps_lowkey->t_info = sizeof(IndexTupleData);
1008 BTreeTupleSetNAtts(state->btps_lowkey, 0, false);
1009 }
1010
1011 /*
1012 * Add the new item into the current page.
1013 */
1015 _bt_sortaddtup(npage, itupsz, itup, last_off,
1016 !isleaf && last_off == P_FIRSTKEY);
1017
1018 state->btps_buf = nbuf;
1019 state->btps_blkno = nblkno;
1020 state->btps_lastoff = last_off;
1021}
1022
1023/*
1024 * Finalize pending posting list tuple, and add it to the index. Final tuple
1025 * is based on saved base tuple, and saved list of heap TIDs.
1026 *
1027 * This is almost like _bt_dedup_finish_pending(), but it adds a new tuple
1028 * using _bt_buildadd().
1029 */
1030static void
1033{
1034 Assert(dstate->nitems > 0);
1035
1036 if (dstate->nitems == 1)
1037 _bt_buildadd(wstate, state, dstate->base, 0);
1038 else
1039 {
1042
1043 /* form a tuple with a posting list */
1045 dstate->htids,
1046 dstate->nhtids);
1047 /* Calculate posting list overhead */
1050
1053 }
1054
1055 dstate->nmaxitems = 0;
1056 dstate->nhtids = 0;
1057 dstate->nitems = 0;
1058 dstate->phystupsize = 0;
1059}
1060
1061/*
1062 * Finish writing out the completed btree.
1063 */
1064static void
1066{
1067 BTPageState *s;
1069 uint32 rootlevel = 0;
1071
1072 /*
1073 * Each iteration of this loop completes one more level of the tree.
1074 */
1075 for (s = state; s != NULL; s = s->btps_next)
1076 {
1077 BlockNumber blkno;
1078 BTPageOpaque opaque;
1079
1080 blkno = s->btps_blkno;
1081 opaque = BTPageGetOpaque((Page) s->btps_buf);
1082
1083 /*
1084 * We have to link the last page on this level to somewhere.
1085 *
1086 * If we're at the top, it's the root, so attach it to the metapage.
1087 * Otherwise, add an entry for it to its parent using its low key.
1088 * This may cause the last page of the parent level to split, but
1089 * that's not a problem -- we haven't gotten to it yet.
1090 */
1091 if (s->btps_next == NULL)
1092 {
1093 opaque->btpo_flags |= BTP_ROOT;
1094 rootblkno = blkno;
1095 rootlevel = s->btps_level;
1096 }
1097 else
1098 {
1101 BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) > 0) ||
1102 P_LEFTMOST(opaque));
1103 Assert(BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) == 0 ||
1104 !P_LEFTMOST(opaque));
1107 pfree(s->btps_lowkey);
1108 s->btps_lowkey = NULL;
1109 }
1110
1111 /*
1112 * This is the rightmost page, so the ItemId array needs to be slid
1113 * back one slot. Then we can dump out the page.
1114 */
1117 s->btps_buf = NULL; /* writepage took ownership of the buffer */
1118 }
1119
1120 /*
1121 * As the last step in the process, construct the metapage and make it
1122 * point to the new root (unless we had no data at all, in which case it's
1123 * set to point to "P_NONE"). This changes the index to the "valid" state
1124 * by filling in a valid magic number in the metapage.
1125 */
1126 metabuf = smgr_bulk_get_buf(wstate->bulkstate);
1127 _bt_initmetapage((Page) metabuf, rootblkno, rootlevel,
1128 wstate->inskey->allequalimage);
1130}
1131
1132/*
1133 * Read tuples in correct sort order from tuplesort, and load them into
1134 * btree leaves.
1135 */
1136static void
1138{
1140 bool merge = (btspool2 != NULL);
1141 IndexTuple itup,
1142 itup2 = NULL;
1143 bool load1;
1145 int i,
1147 SortSupport sortKeys;
1148 int64 tuples_done = 0;
1149 bool deduplicate;
1150
1151 wstate->bulkstate = smgr_bulk_start_rel(wstate->index, MAIN_FORKNUM);
1152
1153 deduplicate = wstate->inskey->allequalimage && !btspool->isunique &&
1155
1156 if (merge)
1157 {
1158 /*
1159 * Another BTSpool for dead tuples exists. Now we have to merge
1160 * btspool and btspool2.
1161 */
1162
1163 /* the preparation of merge */
1164 itup = tuplesort_getindextuple(btspool->sortstate, true);
1165 itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1166
1167 /* Prepare SortSupport data for each column */
1168 sortKeys = palloc0_array(SortSupportData, keysz);
1169
1170 for (i = 0; i < keysz; i++)
1171 {
1172 SortSupport sortKey = sortKeys + i;
1173 ScanKey scanKey = wstate->inskey->scankeys + i;
1174 bool reverse;
1175
1176 sortKey->ssup_cxt = CurrentMemoryContext;
1177 sortKey->ssup_collation = scanKey->sk_collation;
1178 sortKey->ssup_nulls_first =
1179 (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1180 sortKey->ssup_attno = scanKey->sk_attno;
1181 /* Abbreviation is not supported here */
1182 sortKey->abbreviate = false;
1183
1184 Assert(sortKey->ssup_attno != 0);
1185
1186 reverse = (scanKey->sk_flags & SK_BT_DESC) != 0;
1187
1189 }
1190
1191 for (;;)
1192 {
1193 load1 = true; /* load BTSpool next ? */
1194 if (itup2 == NULL)
1195 {
1196 if (itup == NULL)
1197 break;
1198 }
1199 else if (itup != NULL)
1200 {
1201 int32 compare = 0;
1202
1203 for (i = 1; i <= keysz; i++)
1204 {
1205 SortSupport entry;
1207 attrDatum2;
1208 bool isNull1,
1209 isNull2;
1210
1211 entry = sortKeys + i - 1;
1214
1217 entry);
1218 if (compare > 0)
1219 {
1220 load1 = false;
1221 break;
1222 }
1223 else if (compare < 0)
1224 break;
1225 }
1226
1227 /*
1228 * If key values are equal, we sort on ItemPointer. This is
1229 * required for btree indexes, since heap TID is treated as an
1230 * implicit last key attribute in order to ensure that all
1231 * keys in the index are physically unique.
1232 */
1233 if (compare == 0)
1234 {
1235 compare = ItemPointerCompare(&itup->t_tid, &itup2->t_tid);
1236 Assert(compare != 0);
1237 if (compare > 0)
1238 load1 = false;
1239 }
1240 }
1241 else
1242 load1 = false;
1243
1244 /* When we see first tuple, create first index page */
1245 if (state == NULL)
1247
1248 if (load1)
1249 {
1250 _bt_buildadd(wstate, state, itup, 0);
1251 itup = tuplesort_getindextuple(btspool->sortstate, true);
1252 }
1253 else
1254 {
1256 itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1257 }
1258
1259 /* Report progress */
1261 ++tuples_done);
1262 }
1263 pfree(sortKeys);
1264 }
1265 else if (deduplicate)
1266 {
1267 /* merge is unnecessary, deduplicate into posting lists */
1269
1271 dstate->deduplicate = true; /* unused */
1272 dstate->nmaxitems = 0; /* unused */
1273 dstate->maxpostingsize = 0; /* set later */
1274 /* Metadata about base tuple of current pending posting list */
1275 dstate->base = NULL;
1276 dstate->baseoff = InvalidOffsetNumber; /* unused */
1277 dstate->basetupsize = 0;
1278 /* Metadata about current pending posting list TIDs */
1279 dstate->htids = NULL;
1280 dstate->nhtids = 0;
1281 dstate->nitems = 0;
1282 dstate->phystupsize = 0; /* unused */
1283 dstate->nintervals = 0; /* unused */
1284
1285 while ((itup = tuplesort_getindextuple(btspool->sortstate,
1286 true)) != NULL)
1287 {
1288 /* When we see first tuple, create first index page */
1289 if (state == NULL)
1290 {
1292
1293 /*
1294 * Limit size of posting list tuples to 1/10 space we want to
1295 * leave behind on the page, plus space for final item's line
1296 * pointer. This is equal to the space that we'd like to
1297 * leave behind on each leaf page when fillfactor is 90,
1298 * allowing us to get close to fillfactor% space utilization
1299 * when there happen to be a great many duplicates. (This
1300 * makes higher leaf fillfactor settings ineffective when
1301 * building indexes that have many duplicates, but packing
1302 * leaf pages full with few very large tuples doesn't seem
1303 * like a useful goal.)
1304 */
1305 dstate->maxpostingsize = MAXALIGN_DOWN((BLCKSZ * 10 / 100)) -
1306 sizeof(ItemIdData);
1307 Assert(dstate->maxpostingsize <= BTMaxItemSize &&
1308 dstate->maxpostingsize <= INDEX_SIZE_MASK);
1309 dstate->htids = palloc(dstate->maxpostingsize);
1310
1311 /* start new pending posting list with itup copy */
1314 }
1315 else if (_bt_keep_natts_fast(wstate->index, dstate->base,
1316 itup) > keysz &&
1318 {
1319 /*
1320 * Tuple is equal to base tuple of pending posting list. Heap
1321 * TID from itup has been saved in state.
1322 */
1323 }
1324 else
1325 {
1326 /*
1327 * Tuple is not equal to pending posting list tuple, or
1328 * _bt_dedup_save_htid() opted to not merge current item into
1329 * pending posting list.
1330 */
1332 pfree(dstate->base);
1333
1334 /* start new pending posting list with itup copy */
1337 }
1338
1339 /* Report progress */
1341 ++tuples_done);
1342 }
1343
1344 if (state)
1345 {
1346 /*
1347 * Handle the last item (there must be a last item when the
1348 * tuplesort returned one or more tuples)
1349 */
1351 pfree(dstate->base);
1352 pfree(dstate->htids);
1353 }
1354
1355 pfree(dstate);
1356 }
1357 else
1358 {
1359 /* merging and deduplication are both unnecessary */
1360 while ((itup = tuplesort_getindextuple(btspool->sortstate,
1361 true)) != NULL)
1362 {
1363 /* When we see first tuple, create first index page */
1364 if (state == NULL)
1366
1367 _bt_buildadd(wstate, state, itup, 0);
1368
1369 /* Report progress */
1371 ++tuples_done);
1372 }
1373 }
1374
1375 /* Close down final pages and write the metapage */
1377 smgr_bulk_finish(wstate->bulkstate);
1378}
1379
1380/*
1381 * Create parallel context, and launch workers for leader.
1382 *
1383 * buildstate argument should be initialized (with the exception of the
1384 * tuplesort state in spools, which may later be created based on shared
1385 * state initially set up here).
1386 *
1387 * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
1388 *
1389 * request is the target number of parallel worker processes to launch.
1390 *
1391 * Sets buildstate's BTLeader, which caller must use to shut down parallel
1392 * mode by passing it to _bt_end_parallel() at the very end of its index
1393 * build. If not even a single worker process can be launched, this is
1394 * never set, and caller should proceed with a serial index build.
1395 */
1396static void
1398{
1399 ParallelContext *pcxt;
1400 int scantuplesortstates;
1401 Snapshot snapshot;
1403 Size estsort;
1404 BTShared *btshared;
1405 Sharedsort *sharedsort;
1406 Sharedsort *sharedsort2;
1407 BTSpool *btspool = buildstate->spool;
1408 BTLeader *btleader = palloc0_object(BTLeader);
1409 WalUsage *walusage;
1410 BufferUsage *bufferusage;
1411 bool leaderparticipates = true;
1412 int querylen;
1413
1414#ifdef DISABLE_LEADER_PARTICIPATION
1415 leaderparticipates = false;
1416#endif
1417
1418 /*
1419 * Enter parallel mode, and create context for parallel build of btree
1420 * index
1421 */
1423 Assert(request > 0);
1424 pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
1425 request);
1426
1427 scantuplesortstates = leaderparticipates ? request + 1 : request;
1428
1429 /*
1430 * Prepare for scan of the base relation. In a normal index build, we use
1431 * SnapshotAny because we must retrieve all tuples and do our own time
1432 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1433 * concurrent build, we take a regular MVCC snapshot and index whatever's
1434 * live according to that.
1435 */
1436 if (!isconcurrent)
1437 snapshot = SnapshotAny;
1438 else
1440
1441 /*
1442 * Estimate size for our own PARALLEL_KEY_BTREE_SHARED workspace, and
1443 * PARALLEL_KEY_TUPLESORT tuplesort workspace
1444 */
1447 estsort = tuplesort_estimate_shared(scantuplesortstates);
1449
1450 /*
1451 * Unique case requires a second spool, and so we may have to account for
1452 * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2
1453 */
1454 if (!btspool->isunique)
1456 else
1457 {
1460 }
1461
1462 /*
1463 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
1464 * and PARALLEL_KEY_BUFFER_USAGE.
1465 *
1466 * If there are no extensions loaded that care, we could skip this. We
1467 * have no way of knowing whether anyone's looking at pgWalUsage or
1468 * pgBufferUsage, so do it unconditionally.
1469 */
1471 mul_size(sizeof(WalUsage), pcxt->nworkers));
1474 mul_size(sizeof(BufferUsage), pcxt->nworkers));
1476
1477 /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
1479 {
1483 }
1484 else
1485 querylen = 0; /* keep compiler quiet */
1486
1487 /* Everyone's had a chance to ask for space, so now create the DSM */
1489
1490 /* If no DSM segment was available, back out (do serial build) */
1491 if (pcxt->seg == NULL)
1492 {
1493 if (IsMVCCSnapshot(snapshot))
1494 UnregisterSnapshot(snapshot);
1497 return;
1498 }
1499
1500 /* Store shared build state, for which we reserved space */
1501 btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
1502 /* Initialize immutable state */
1503 btshared->heaprelid = RelationGetRelid(btspool->heap);
1504 btshared->indexrelid = RelationGetRelid(btspool->index);
1505 btshared->isunique = btspool->isunique;
1506 btshared->nulls_not_distinct = btspool->nulls_not_distinct;
1507 btshared->isconcurrent = isconcurrent;
1508 btshared->scantuplesortstates = scantuplesortstates;
1509 btshared->queryid = pgstat_get_my_query_id();
1511 SpinLockInit(&btshared->mutex);
1512 /* Initialize mutable state */
1513 btshared->nparticipantsdone = 0;
1514 btshared->reltuples = 0.0;
1515 btshared->havedead = false;
1516 btshared->indtuples = 0.0;
1517 btshared->brokenhotchain = false;
1520 snapshot);
1521
1522 /*
1523 * Store shared tuplesort-private state, for which we reserved space.
1524 * Then, initialize opaque state using tuplesort routine.
1525 */
1526 sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1527 tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1528 pcxt->seg);
1529
1531 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1532
1533 /* Unique case requires a second spool, and associated shared state */
1534 if (!btspool->isunique)
1535 sharedsort2 = NULL;
1536 else
1537 {
1538 /*
1539 * Store additional shared tuplesort-private state, for which we
1540 * reserved space. Then, initialize opaque state using tuplesort
1541 * routine.
1542 */
1543 sharedsort2 = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1544 tuplesort_initialize_shared(sharedsort2, scantuplesortstates,
1545 pcxt->seg);
1546
1548 }
1549
1550 /* Store query string for workers */
1552 {
1553 char *sharedquery;
1554
1555 sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1558 }
1559
1560 /*
1561 * Allocate space for each worker's WalUsage and BufferUsage; no need to
1562 * initialize.
1563 */
1564 walusage = shm_toc_allocate(pcxt->toc,
1565 mul_size(sizeof(WalUsage), pcxt->nworkers));
1566 shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
1567 bufferusage = shm_toc_allocate(pcxt->toc,
1568 mul_size(sizeof(BufferUsage), pcxt->nworkers));
1569 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
1570
1571 /* Launch workers, saving status for leader/caller */
1573 btleader->pcxt = pcxt;
1574 btleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1576 btleader->nparticipanttuplesorts++;
1577 btleader->btshared = btshared;
1578 btleader->sharedsort = sharedsort;
1579 btleader->sharedsort2 = sharedsort2;
1580 btleader->snapshot = snapshot;
1581 btleader->walusage = walusage;
1582 btleader->bufferusage = bufferusage;
1583
1584 /* If no workers were successfully launched, back out (do serial build) */
1585 if (pcxt->nworkers_launched == 0)
1586 {
1587 _bt_end_parallel(btleader);
1588 return;
1589 }
1590
1591 /* Save leader state now that it's clear build will be parallel */
1592 buildstate->btleader = btleader;
1593
1594 /* Join heap scan ourselves */
1597
1598 /*
1599 * Caller needs to wait for all launched workers when we return. Make
1600 * sure that the failure-to-start case will not hang forever.
1601 */
1603}
1604
1605/*
1606 * Shut down workers, destroy parallel context, and end parallel mode.
1607 */
1608static void
1610{
1611 int i;
1612
1613 /* Shutdown worker processes */
1615
1616 /*
1617 * Next, accumulate WAL usage. (This must wait for the workers to finish,
1618 * or we might get incomplete data.)
1619 */
1620 for (i = 0; i < btleader->pcxt->nworkers_launched; i++)
1621 InstrAccumParallelQuery(&btleader->bufferusage[i], &btleader->walusage[i]);
1622
1623 /* Free last reference to MVCC snapshot, if one was used */
1624 if (IsMVCCSnapshot(btleader->snapshot))
1625 UnregisterSnapshot(btleader->snapshot);
1626 DestroyParallelContext(btleader->pcxt);
1628}
1629
1630/*
1631 * Returns size of shared memory required to store state for a parallel
1632 * btree index build based on the snapshot its parallel scan will use.
1633 */
1634static Size
1636{
1637 /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1638 return add_size(BUFFERALIGN(sizeof(BTShared)),
1639 table_parallelscan_estimate(heap, snapshot));
1640}
1641
1642/*
1643 * Within leader, wait for end of heap scan.
1644 *
1645 * When called, parallel heap scan started by _bt_begin_parallel() will
1646 * already be underway within worker processes (when leader participates
1647 * as a worker, we should end up here just as workers are finishing).
1648 *
1649 * Fills in fields needed for ambuild statistics, and lets caller set
1650 * field indicating that some worker encountered a broken HOT chain.
1651 *
1652 * Returns the total number of heap tuples scanned.
1653 */
1654static double
1656{
1657 BTShared *btshared = buildstate->btleader->btshared;
1658 int nparticipanttuplesorts;
1659 double reltuples;
1660
1661 nparticipanttuplesorts = buildstate->btleader->nparticipanttuplesorts;
1662 for (;;)
1663 {
1664 SpinLockAcquire(&btshared->mutex);
1665 if (btshared->nparticipantsdone == nparticipanttuplesorts)
1666 {
1667 buildstate->havedead = btshared->havedead;
1668 buildstate->indtuples = btshared->indtuples;
1669 *brokenhotchain = btshared->brokenhotchain;
1670 reltuples = btshared->reltuples;
1671 SpinLockRelease(&btshared->mutex);
1672 break;
1673 }
1674 SpinLockRelease(&btshared->mutex);
1675
1678 }
1679
1681
1682 return reltuples;
1683}
1684
1685/*
1686 * Within leader, participate as a parallel worker.
1687 */
1688static void
1690{
1691 BTLeader *btleader = buildstate->btleader;
1694 int sortmem;
1695
1696 /* Allocate memory and initialize private spool */
1698 leaderworker->heap = buildstate->spool->heap;
1699 leaderworker->index = buildstate->spool->index;
1700 leaderworker->isunique = buildstate->spool->isunique;
1701 leaderworker->nulls_not_distinct = buildstate->spool->nulls_not_distinct;
1702
1703 /* Initialize second spool, if required */
1704 if (!btleader->btshared->isunique)
1706 else
1707 {
1708 /* Allocate memory for worker's own private secondary spool */
1710
1711 /* Initialize worker's own secondary spool */
1712 leaderworker2->heap = leaderworker->heap;
1713 leaderworker2->index = leaderworker->index;
1714 leaderworker2->isunique = false;
1715 }
1716
1717 /*
1718 * Might as well use reliable figure when doling out maintenance_work_mem
1719 * (when requested number of workers were not launched, this will be
1720 * somewhat higher than it is for other workers).
1721 */
1723
1724 /* Perform work common to all participants */
1726 btleader->sharedsort, btleader->sharedsort2,
1727 sortmem, true);
1728
1729#ifdef BTREE_BUILD_STATS
1731 {
1732 ShowUsage("BTREE BUILD (Leader Partial Spool) STATISTICS");
1733 ResetUsage();
1734 }
1735#endif /* BTREE_BUILD_STATS */
1736}
1737
1738/*
1739 * Perform work within a launched parallel process.
1740 */
1741void
1743{
1744 char *sharedquery;
1747 BTShared *btshared;
1748 Sharedsort *sharedsort;
1749 Sharedsort *sharedsort2;
1750 Relation heapRel;
1751 Relation indexRel;
1754 WalUsage *walusage;
1755 BufferUsage *bufferusage;
1756 int sortmem;
1757
1758#ifdef BTREE_BUILD_STATS
1760 ResetUsage();
1761#endif /* BTREE_BUILD_STATS */
1762
1763 /*
1764 * The only possible status flag that can be set to the parallel worker is
1765 * PROC_IN_SAFE_IC.
1766 */
1767 Assert((MyProc->statusFlags == 0) ||
1769
1770 /* Set debug_query_string for individual workers first */
1773
1774 /* Report the query string from leader */
1776
1777 /* Look up nbtree shared state */
1778 btshared = shm_toc_lookup(toc, PARALLEL_KEY_BTREE_SHARED, false);
1779
1780 /* Open relations using lock modes known to be obtained by index.c */
1781 if (!btshared->isconcurrent)
1782 {
1785 }
1786 else
1787 {
1790 }
1791
1792 /* Track query ID */
1793 pgstat_report_query_id(btshared->queryid, false);
1794
1795 /* Open relations within worker */
1796 heapRel = table_open(btshared->heaprelid, heapLockmode);
1797 indexRel = index_open(btshared->indexrelid, indexLockmode);
1798
1799 /* Initialize worker's own spool */
1801 btspool->heap = heapRel;
1802 btspool->index = indexRel;
1803 btspool->isunique = btshared->isunique;
1804 btspool->nulls_not_distinct = btshared->nulls_not_distinct;
1805
1806 /* Look up shared state private to tuplesort.c */
1807 sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
1808 tuplesort_attach_shared(sharedsort, seg);
1809 if (!btshared->isunique)
1810 {
1811 btspool2 = NULL;
1812 sharedsort2 = NULL;
1813 }
1814 else
1815 {
1816 /* Allocate memory for worker's own private secondary spool */
1818
1819 /* Initialize worker's own secondary spool */
1820 btspool2->heap = btspool->heap;
1821 btspool2->index = btspool->index;
1822 btspool2->isunique = false;
1823 /* Look up shared state private to tuplesort.c */
1824 sharedsort2 = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_SPOOL2, false);
1825 tuplesort_attach_shared(sharedsort2, seg);
1826 }
1827
1828 /* Prepare to track buffer usage during parallel execution */
1830
1831 /* Perform sorting of spool, and possibly a spool2 */
1833 _bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
1834 sharedsort2, sortmem, false);
1835
1836 /* Report WAL/buffer usage during parallel execution */
1837 bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
1838 walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
1840 &walusage[ParallelWorkerNumber]);
1841
1842#ifdef BTREE_BUILD_STATS
1844 {
1845 ShowUsage("BTREE BUILD (Worker Partial Spool) STATISTICS");
1846 ResetUsage();
1847 }
1848#endif /* BTREE_BUILD_STATS */
1849
1850 index_close(indexRel, indexLockmode);
1851 table_close(heapRel, heapLockmode);
1852}
1853
1854/*
1855 * Perform a worker's portion of a parallel sort.
1856 *
1857 * This generates a tuplesort for passed btspool, and a second tuplesort
1858 * state if a second btspool is need (i.e. for unique index builds). All
1859 * other spool fields should already be set when this is called.
1860 *
1861 * sortmem is the amount of working memory to use within each worker,
1862 * expressed in KBs.
1863 *
1864 * When this returns, workers are done, and need only release resources.
1865 */
1866static void
1868 BTShared *btshared, Sharedsort *sharedsort,
1869 Sharedsort *sharedsort2, int sortmem, bool progress)
1870{
1873 TableScanDesc scan;
1874 double reltuples;
1875 IndexInfo *indexInfo;
1876
1877 /* Initialize local tuplesort coordination state */
1879 coordinate->isWorker = true;
1880 coordinate->nParticipants = -1;
1881 coordinate->sharedsort = sharedsort;
1882
1883 /* Begin "partial" tuplesort */
1884 btspool->sortstate = tuplesort_begin_index_btree(btspool->heap,
1885 btspool->index,
1886 btspool->isunique,
1887 btspool->nulls_not_distinct,
1890
1891 /*
1892 * Just as with serial case, there may be a second spool. If so, a
1893 * second, dedicated spool2 partial tuplesort is required.
1894 */
1895 if (btspool2)
1896 {
1898
1899 /*
1900 * We expect that the second one (for dead tuples) won't get very
1901 * full, so we give it only work_mem (unless sortmem is less for
1902 * worker). Worker processes are generally permitted to allocate
1903 * work_mem independently.
1904 */
1906 coordinate2->isWorker = true;
1907 coordinate2->nParticipants = -1;
1908 coordinate2->sharedsort = sharedsort2;
1909 btspool2->sortstate =
1910 tuplesort_begin_index_btree(btspool->heap, btspool->index, false, false,
1912 false);
1913 }
1914
1915 /* Fill in buildstate for _bt_build_callback() */
1916 buildstate.isunique = btshared->isunique;
1917 buildstate.nulls_not_distinct = btshared->nulls_not_distinct;
1918 buildstate.havedead = false;
1919 buildstate.heap = btspool->heap;
1920 buildstate.spool = btspool;
1921 buildstate.spool2 = btspool2;
1922 buildstate.indtuples = 0;
1923 buildstate.btleader = NULL;
1924
1925 /* Join parallel scan */
1926 indexInfo = BuildIndexInfo(btspool->index);
1927 indexInfo->ii_Concurrent = btshared->isconcurrent;
1928 scan = table_beginscan_parallel(btspool->heap,
1930 reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
1932 &buildstate, scan);
1933
1934 /* Execute this worker's part of the sort */
1935 if (progress)
1938 tuplesort_performsort(btspool->sortstate);
1939 if (btspool2)
1940 {
1941 if (progress)
1944 tuplesort_performsort(btspool2->sortstate);
1945 }
1946
1947 /*
1948 * Done. Record ambuild statistics, and whether we encountered a broken
1949 * HOT chain.
1950 */
1951 SpinLockAcquire(&btshared->mutex);
1952 btshared->nparticipantsdone++;
1953 btshared->reltuples += reltuples;
1954 if (buildstate.havedead)
1955 btshared->havedead = true;
1956 btshared->indtuples += buildstate.indtuples;
1957 if (indexInfo->ii_BrokenHotChain)
1958 btshared->brokenhotchain = true;
1959 SpinLockRelease(&btshared->mutex);
1960
1961 /* Notify leader */
1963
1964 /* We can end tuplesorts immediately */
1965 tuplesort_end(btspool->sortstate);
1966 if (btspool2)
1967 tuplesort_end(btspool2->sortstate);
1968}
int ParallelWorkerNumber
Definition parallel.c:116
void InitializeParallelDSM(ParallelContext *pcxt)
Definition parallel.c:212
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition parallel.c:804
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition parallel.c:582
void DestroyParallelContext(ParallelContext *pcxt)
Definition parallel.c:958
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition parallel.c:174
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition parallel.c:701
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_report_query_id(int64 query_id, bool force)
int64 pgstat_get_my_query_id(void)
void pgstat_report_activity(BackendState state, const char *cmd_str)
@ STATE_RUNNING
uint32 BlockNumber
Definition block.h:31
static Datum values[MAXATTR]
Definition bootstrap.c:187
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
Size PageGetFreeSpace(const PageData *page)
Definition bufpage.c:906
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, const void *newtup, Size newsize)
Definition bufpage.c:1404
PageHeaderData * PageHeader
Definition bufpage.h:173
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
PageData * Page
Definition bufpage.h:81
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition bufpage.h:477
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition bulk_write.c:87
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition bulk_write.c:347
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition bulk_write.c:130
#define MAXALIGN_DOWN(LEN)
Definition c.h:871
#define Min(x, y)
Definition c.h:1040
#define MAXALIGN(LEN)
Definition c.h:859
#define BUFFERALIGN(LEN)
Definition c.h:861
#define Assert(condition)
Definition c.h:906
int64_t int64
Definition c.h:576
int32_t int32
Definition c.h:575
#define unlikely(x)
Definition c.h:424
uint32_t uint32
Definition c.h:579
size_t Size
Definition c.h:652
bool ConditionVariableCancelSleep(void)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void ConditionVariableSignal(ConditionVariable *cv)
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
static int compare(const void *arg1, const void *arg2)
Definition geqo_pool.c:144
int maintenance_work_mem
Definition globals.c:133
int work_mem
Definition globals.c:131
bool log_btree_build_stats
Definition guc_tables.c:535
IndexInfo * BuildIndexInfo(Relation index)
Definition index.c:2426
void index_close(Relation relation, LOCKMODE lockmode)
Definition indexam.c:177
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition indexam.c:133
IndexTuple CopyIndexTuple(IndexTuple source)
Definition indextuple.c:547
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition instrument.c:219
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition instrument.c:209
void InstrStartParallelQuery(void)
Definition instrument.c:201
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
IndexTupleData * IndexTuple
Definition itup.h:53
static Datum index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
Definition itup.h:131
static Size IndexTupleSize(const IndexTupleData *itup)
Definition itup.h:71
#define INDEX_SIZE_MASK
Definition itup.h:65
int LOCKMODE
Definition lockdefs.h:26
#define AccessExclusiveLock
Definition lockdefs.h:43
#define ShareUpdateExclusiveLock
Definition lockdefs.h:39
#define ShareLock
Definition lockdefs.h:40
#define RowExclusiveLock
Definition lockdefs.h:38
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition nbtdedup.c:484
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition nbtdedup.c:433
IndexTuple _bt_form_posting(IndexTuple base, const ItemPointerData *htids, int nhtids)
Definition nbtdedup.c:862
void _bt_pageinit(Page page, Size size)
Definition nbtpage.c:1130
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition nbtpage.c:68
#define BTGetDeduplicateItems(relation)
Definition nbtree.h:1135
#define BTGetTargetPageFreeSpace(relation)
Definition nbtree.h:1133
#define BTP_LEAF
Definition nbtree.h:77
#define P_HIKEY
Definition nbtree.h:368
#define PROGRESS_BTREE_PHASE_PERFORMSORT_2
Definition nbtree.h:1148
#define PROGRESS_BTREE_PHASE_LEAF_LOAD
Definition nbtree.h:1149
#define P_LEFTMOST(opaque)
Definition nbtree.h:219
#define BTPageGetOpaque(page)
Definition nbtree.h:74
#define BTP_ROOT
Definition nbtree.h:78
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition nbtree.h:563
#define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN
Definition nbtree.h:1146
#define PROGRESS_BTREE_PHASE_PERFORMSORT_1
Definition nbtree.h:1147
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition nbtree.h:530
#define P_NONE
Definition nbtree.h:213
#define SK_BT_NULLS_FIRST
Definition nbtree.h:1117
#define BTREE_METAPAGE
Definition nbtree.h:149
#define SK_BT_DESC
Definition nbtree.h:1116
#define P_FIRSTKEY
Definition nbtree.h:369
static void BTreeTupleSetNAtts(IndexTuple itup, uint16 nkeyatts, bool heaptid)
Definition nbtree.h:596
#define BTMaxItemSize
Definition nbtree.h:165
#define BTreeTupleGetNAtts(itup, rel)
Definition nbtree.h:578
#define BTREE_NONLEAF_FILLFACTOR
Definition nbtree.h:202
#define PARALLEL_KEY_BUFFER_USAGE
Definition nbtsort.c:68
#define ParallelTableScanFromBTShared(shared)
Definition nbtsort.c:163
static void _bt_blwritepage(BTWriteState *wstate, BulkWriteBuffer buf, BlockNumber blkno)
Definition nbtsort.c:639
static void _bt_slideleft(Page rightmostpage)
Definition nbtsort.c:687
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition nbtsort.c:650
static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
Definition nbtsort.c:1137
static void _bt_end_parallel(BTLeader *btleader)
Definition nbtsort.c:1609
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition nbtsort.c:65
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
Definition nbtsort.c:1867
static Size _bt_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition nbtsort.c:1635
static void _bt_sort_dedup_finish_pending(BTWriteState *wstate, BTPageState *state, BTDedupState dstate)
Definition nbtsort.c:1031
static double _bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain)
Definition nbtsort.c:1655
static void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition nbtsort.c:540
#define PARALLEL_KEY_BTREE_SHARED
Definition nbtsort.c:63
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition nbtsort.c:297
static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Definition nbtsort.c:1397
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
Definition nbtsort.c:787
void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition nbtsort.c:1742
static void _bt_build_callback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition nbtsort.c:581
static double _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
Definition nbtsort.c:367
static void _bt_spooldestroy(BTSpool *btspool)
Definition nbtsort.c:519
static void _bt_spool(BTSpool *btspool, const ItemPointerData *self, const Datum *values, const bool *isnull)
Definition nbtsort.c:529
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
Definition nbtsort.c:1065
#define PARALLEL_KEY_TUPLESORT
Definition nbtsort.c:64
#define PARALLEL_KEY_QUERY_TEXT
Definition nbtsort.c:66
#define PARALLEL_KEY_WAL_USAGE
Definition nbtsort.c:67
static BulkWriteBuffer _bt_blnewpage(BTWriteState *wstate, uint32 level)
Definition nbtsort.c:610
static void _bt_leader_participate_as_worker(BTBuildState *buildstate)
Definition nbtsort.c:1689
static void _bt_sortaddtup(Page page, Size itemsize, const IndexTupleData *itup, OffsetNumber itup_off, bool newfirstdataitem)
Definition nbtsort.c:718
void _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, Page page, IndexTuple newtup)
Definition nbtutils.c:1124
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition nbtutils.c:59
IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright, BTScanInsert itup_key)
Definition nbtutils.c:698
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition nbtutils.c:917
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition nbtutils.c:1181
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define OffsetNumberPrev(offsetNumber)
Definition off.h:54
static pairingheap_node * merge(pairingheap *heap, pairingheap_node *a, pairingheap_node *b)
Definition pairingheap.c:93
static char buf[DEFAULT_XLOG_SEG_SIZE]
static int progress
Definition pgbench.c:262
const char * debug_query_string
Definition postgres.c:90
void ShowUsage(const char *title)
Definition postgres.c:5102
void ResetUsage(void)
Definition postgres.c:5095
uint64_t Datum
Definition postgres.h:70
unsigned int Oid
static int fb(int x)
#define PROC_IN_SAFE_IC
Definition proc.h:60
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition progress.h:106
#define PROGRESS_SCAN_BLOCKS_DONE
Definition progress.h:142
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition progress.h:107
#define PROGRESS_CREATEIDX_SUBPHASE
Definition progress.h:105
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition progress.h:141
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition rel.h:533
@ MAIN_FORKNUM
Definition relpath.h:58
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition shmem.c:482
Size mul_size(Size s1, Size s2)
Definition shmem.c:497
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:824
#define SnapshotAny
Definition snapmgr.h:33
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:55
void PrepareSortSupportFromIndexRel(Relation indexRel, bool reverse, SortSupport ssup)
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
PGPROC * MyProc
Definition proc.c:67
bool isunique
Definition nbtsort.c:208
BTSpool * spool
Definition nbtsort.c:212
BTLeader * btleader
Definition nbtsort.c:226
bool nulls_not_distinct
Definition nbtsort.c:209
bool havedead
Definition nbtsort.c:210
Relation heap
Definition nbtsort.c:211
BTSpool * spool2
Definition nbtsort.c:218
double indtuples
Definition nbtsort.c:219
ParallelContext * pcxt
Definition nbtsort.c:172
BTShared * btshared
Definition nbtsort.c:192
Sharedsort * sharedsort
Definition nbtsort.c:193
Sharedsort * sharedsort2
Definition nbtsort.c:194
int nparticipanttuplesorts
Definition nbtsort.c:180
BufferUsage * bufferusage
Definition nbtsort.c:197
Snapshot snapshot
Definition nbtsort.c:195
WalUsage * walusage
Definition nbtsort.c:196
BlockNumber btpo_next
Definition nbtree.h:66
BlockNumber btpo_prev
Definition nbtree.h:65
uint16 btpo_flags
Definition nbtree.h:68
uint32 btpo_level
Definition nbtree.h:67
BTCycleId btpo_cycleid
Definition nbtree.h:69
IndexTuple btps_lowkey
Definition nbtsort.c:237
Size btps_full
Definition nbtsort.c:241
BulkWriteBuffer btps_buf
Definition nbtsort.c:235
OffsetNumber btps_lastoff
Definition nbtsort.c:238
Size btps_lastextra
Definition nbtsort.c:239
BlockNumber btps_blkno
Definition nbtsort.c:236
struct BTPageState * btps_next
Definition nbtsort.c:242
uint32 btps_level
Definition nbtsort.c:240
slock_t mutex
Definition nbtsort.c:126
bool isconcurrent
Definition nbtsort.c:106
double indtuples
Definition nbtsort.c:147
double reltuples
Definition nbtsort.c:145
Oid heaprelid
Definition nbtsort.c:102
bool brokenhotchain
Definition nbtsort.c:148
int64 queryid
Definition nbtsort.c:110
bool isunique
Definition nbtsort.c:104
int nparticipantsdone
Definition nbtsort.c:144
ConditionVariable workersdonecv
Definition nbtsort.c:118
int scantuplesortstates
Definition nbtsort.c:107
Oid indexrelid
Definition nbtsort.c:103
bool havedead
Definition nbtsort.c:146
bool nulls_not_distinct
Definition nbtsort.c:105
bool isunique
Definition nbtsort.c:86
bool nulls_not_distinct
Definition nbtsort.c:87
Relation heap
Definition nbtsort.c:84
Relation index
Definition nbtsort.c:85
Tuplesortstate * sortstate
Definition nbtsort.c:83
BulkWriteState * bulkstate
Definition nbtsort.c:252
Relation heap
Definition nbtsort.c:250
Relation index
Definition nbtsort.c:251
BlockNumber btws_pages_alloced
Definition nbtsort.c:254
BTScanInsert inskey
Definition nbtsort.c:253
double heap_tuples
Definition genam.h:38
double index_tuples
Definition genam.h:39
bool ii_Unique
Definition execnodes.h:202
bool ii_BrokenHotChain
Definition execnodes.h:214
bool ii_NullsNotDistinct
Definition execnodes.h:204
int ii_ParallelWorkers
Definition execnodes.h:220
bool ii_Concurrent
Definition execnodes.h:212
ItemPointerData t_tid
Definition itup.h:37
unsigned short t_info
Definition itup.h:49
uint8 statusFlags
Definition proc.h:202
dsm_segment * seg
Definition parallel.h:44
shm_toc_estimator estimator
Definition parallel.h:43
shm_toc * toc
Definition parallel.h:46
int nworkers_launched
Definition parallel.h:39
Definition type.h:96
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition tableam.c:166
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition tableam.c:131
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition tableam.c:146
static double table_index_build_scan(Relation table_rel, Relation index_rel, IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition tableam.h:1765
void tuplesort_performsort(Tuplesortstate *state)
Definition tuplesort.c:1259
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition tuplesort.c:3210
Size tuplesort_estimate_shared(int nWorkers)
Definition tuplesort.c:3189
void tuplesort_end(Tuplesortstate *state)
Definition tuplesort.c:847
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition tuplesort.c:3233
#define TUPLESORT_NONE
Definition tuplesort.h:67
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, bool uniqueNullsNotDistinct, int workMem, SortCoordinate coordinate, int sortopt)
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, const ItemPointerData *self, const Datum *values, const bool *isnull)
void ExitParallelMode(void)
Definition xact.c:1065
void EnterParallelMode(void)
Definition xact.c:1052