PostgreSQL Source Code  git master
nbtsort.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/parallel.h"
#include "access/relscan.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/rel.h"
#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
Include dependency graph for nbtsort.c:

Go to the source code of this file.

Data Structures

struct  BTSpool
 
struct  BTShared
 
struct  BTLeader
 
struct  BTBuildState
 
struct  BTPageState
 
struct  BTWriteState
 

Macros

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)
 
#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)
 
#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)
 

Typedefs

typedef struct BTSpool BTSpool
 
typedef struct BTShared BTShared
 
typedef struct BTLeader BTLeader
 
typedef struct BTBuildState BTBuildState
 
typedef struct BTPageState BTPageState
 
typedef struct BTWriteState BTWriteState
 

Functions

static double _bt_spools_heapscan (Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
 
static void _bt_spooldestroy (BTSpool *btspool)
 
static void _bt_spool (BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
 
static void _bt_leafbuild (BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_build_callback (Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static Page _bt_blnewpage (uint32 level)
 
static BTPageState_bt_pagestate (BTWriteState *wstate, uint32 level)
 
static void _bt_slideleft (Page page)
 
static void _bt_sortaddtup (Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
 
static void _bt_buildadd (BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 
static void _bt_uppershutdown (BTWriteState *wstate, BTPageState *state)
 
static void _bt_load (BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_begin_parallel (BTBuildState *buildstate, bool isconcurrent, int request)
 
static void _bt_end_parallel (BTLeader *btleader)
 
static Size _bt_parallel_estimate_shared (Snapshot snapshot)
 
static double _bt_parallel_heapscan (BTBuildState *buildstate, bool *brokenhotchain)
 
static void _bt_leader_participate_as_worker (BTBuildState *buildstate)
 
static void _bt_parallel_scan_and_sort (BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem)
 
IndexBuildResultbtbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
static void _bt_blwritepage (BTWriteState *wstate, Page page, BlockNumber blkno)
 
void _bt_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 

Macro Definition Documentation

◆ PARALLEL_KEY_BTREE_SHARED

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)

Definition at line 86 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT

#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)

Definition at line 87 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT_SPOOL2

#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)

Definition at line 88 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

Typedef Documentation

◆ BTBuildState

◆ BTLeader

◆ BTPageState

◆ BTShared

◆ BTSpool

◆ BTWriteState

Function Documentation

◆ _bt_begin_parallel()

static void _bt_begin_parallel ( BTBuildState buildstate,
bool  isconcurrent,
int  request 
)
static

Definition at line 1185 of file nbtsort.c.

References _bt_end_parallel(), _bt_leader_participate_as_worker(), _bt_parallel_estimate_shared(), Assert, BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableInit(), CreateParallelContext(), EnterParallelMode(), ParallelContext::estimator, GetTransactionSnapshot(), BTShared::havedead, BTSpool::heap, heap_parallelscan_initialize(), BTShared::heapdesc, BTShared::heaprelid, BTSpool::index, BTShared::indexrelid, BTShared::indtuples, InitializeParallelDSM(), BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, LaunchParallelWorkers(), BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, ParallelContext::nworkers_launched, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, BTLeader::pcxt, RegisterSnapshot(), RelationGetRelid, BTShared::reltuples, BTShared::scantuplesortstates, ParallelContext::seg, BTLeader::sharedsort, BTLeader::sharedsort2, shm_toc_allocate(), shm_toc_estimate_chunk, shm_toc_estimate_keys, shm_toc_insert(), BTLeader::snapshot, SnapshotAny, SpinLockInit, BTBuildState::spool, ParallelContext::toc, tuplesort_estimate_shared(), tuplesort_initialize_shared(), WaitForParallelWorkersToAttach(), and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1186 {
1187  ParallelContext *pcxt;
1188  int scantuplesortstates;
1189  Snapshot snapshot;
1190  Size estbtshared;
1191  Size estsort;
1192  BTShared *btshared;
1193  Sharedsort *sharedsort;
1194  Sharedsort *sharedsort2;
1195  BTSpool *btspool = buildstate->spool;
1196  BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
1197  bool leaderparticipates = true;
1198 
1199 #ifdef DISABLE_LEADER_PARTICIPATION
1200  leaderparticipates = false;
1201 #endif
1202 
1203  /*
1204  * Enter parallel mode, and create context for parallel build of btree
1205  * index
1206  */
1208  Assert(request > 0);
1209  pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
1210  request, true);
1211  scantuplesortstates = leaderparticipates ? request + 1 : request;
1212 
1213  /*
1214  * Prepare for scan of the base relation. In a normal index build, we use
1215  * SnapshotAny because we must retrieve all tuples and do our own time
1216  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1217  * concurrent build, we take a regular MVCC snapshot and index whatever's
1218  * live according to that.
1219  */
1220  if (!isconcurrent)
1221  snapshot = SnapshotAny;
1222  else
1224 
1225  /*
1226  * Estimate size for at least two keys -- our own
1227  * PARALLEL_KEY_BTREE_SHARED workspace, and PARALLEL_KEY_TUPLESORT
1228  * tuplesort workspace
1229  */
1230  estbtshared = _bt_parallel_estimate_shared(snapshot);
1231  shm_toc_estimate_chunk(&pcxt->estimator, estbtshared);
1232  estsort = tuplesort_estimate_shared(scantuplesortstates);
1233  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1234 
1235  /*
1236  * Unique case requires a second spool, and so we may have to account for
1237  * a third shared workspace -- PARALLEL_KEY_TUPLESORT_SPOOL2
1238  */
1239  if (!btspool->isunique)
1240  shm_toc_estimate_keys(&pcxt->estimator, 2);
1241  else
1242  {
1243  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1244  shm_toc_estimate_keys(&pcxt->estimator, 3);
1245  }
1246 
1247  /* Everyone's had a chance to ask for space, so now create the DSM */
1248  InitializeParallelDSM(pcxt);
1249 
1250  /* Store shared build state, for which we reserved space */
1251  btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
1252  /* Initialize immutable state */
1253  btshared->heaprelid = RelationGetRelid(btspool->heap);
1254  btshared->indexrelid = RelationGetRelid(btspool->index);
1255  btshared->isunique = btspool->isunique;
1256  btshared->isconcurrent = isconcurrent;
1257  btshared->scantuplesortstates = scantuplesortstates;
1259  SpinLockInit(&btshared->mutex);
1260  /* Initialize mutable state */
1261  btshared->nparticipantsdone = 0;
1262  btshared->reltuples = 0.0;
1263  btshared->havedead = false;
1264  btshared->indtuples = 0.0;
1265  btshared->brokenhotchain = false;
1266  heap_parallelscan_initialize(&btshared->heapdesc, btspool->heap, snapshot);
1267 
1268  /*
1269  * Store shared tuplesort-private state, for which we reserved space.
1270  * Then, initialize opaque state using tuplesort routine.
1271  */
1272  sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1273  tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1274  pcxt->seg);
1275 
1276  shm_toc_insert(pcxt->toc, PARALLEL_KEY_BTREE_SHARED, btshared);
1277  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1278 
1279  /* Unique case requires a second spool, and associated shared state */
1280  if (!btspool->isunique)
1281  sharedsort2 = NULL;
1282  else
1283  {
1284  /*
1285  * Store additional shared tuplesort-private state, for which we
1286  * reserved space. Then, initialize opaque state using tuplesort
1287  * routine.
1288  */
1289  sharedsort2 = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1290  tuplesort_initialize_shared(sharedsort2, scantuplesortstates,
1291  pcxt->seg);
1292 
1293  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2);
1294  }
1295 
1296  /* Launch workers, saving status for leader/caller */
1297  LaunchParallelWorkers(pcxt);
1298  btleader->pcxt = pcxt;
1299  btleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1300  if (leaderparticipates)
1301  btleader->nparticipanttuplesorts++;
1302  btleader->btshared = btshared;
1303  btleader->sharedsort = sharedsort;
1304  btleader->sharedsort2 = sharedsort2;
1305  btleader->snapshot = snapshot;
1306 
1307  /* If no workers were successfully launched, back out (do serial build) */
1308  if (pcxt->nworkers_launched == 0)
1309  {
1310  _bt_end_parallel(btleader);
1311  return;
1312  }
1313 
1314  /* Save leader state now that it's clear build will be parallel */
1315  buildstate->btleader = btleader;
1316 
1317  /* Join heap scan ourselves */
1318  if (leaderparticipates)
1320 
1321  /*
1322  * Caller needs to wait for all launched workers when we return. Make
1323  * sure that the failure-to-start case will not hang forever.
1324  */
1326 }
Sharedsort * sharedsort2
Definition: nbtsort.c:201
int scantuplesortstates
Definition: nbtsort.c:125
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:87
BTShared * btshared
Definition: nbtsort.c:199
int nparticipantsdone
Definition: nbtsort.c:159
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:863
Oid indexrelid
Definition: nbtsort.c:122
dsm_segment * seg
Definition: parallel.h:42
Sharedsort * sharedsort
Definition: nbtsort.c:200
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
Snapshot snapshot
Definition: nbtsort.c:202
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers, bool serializable_okay)
Definition: parallel.c:152
bool isunique
Definition: nbtsort.c:123
bool isconcurrent
Definition: nbtsort.c:124
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1332
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:4383
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
BTSpool * spool
Definition: nbtsort.c:216
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:304
ParallelContext * pcxt
Definition: nbtsort.c:179
ParallelHeapScanDescData heapdesc
Definition: nbtsort.c:170
Relation heap
Definition: nbtsort.c:104
void ConditionVariableInit(ConditionVariable *cv)
static Size _bt_parallel_estimate_shared(Snapshot snapshot)
Definition: nbtsort.c:1348
bool isunique
Definition: nbtsort.c:106
Oid heaprelid
Definition: nbtsort.c:121
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:88
int nworkers_launched
Definition: parallel.h:37
bool brokenhotchain
Definition: nbtsort.c:163
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:481
Relation index
Definition: nbtsort.c:105
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:206
void heap_parallelscan_initialize(ParallelHeapScanDesc target, Relation relation, Snapshot snapshot)
Definition: heapam.c:1618
void * palloc0(Size size)
Definition: mcxt.c:864
#define SnapshotAny
Definition: tqual.h:28
BTLeader * btleader
Definition: nbtsort.c:230
double reltuples
Definition: nbtsort.c:160
double indtuples
Definition: nbtsort.c:162
slock_t mutex
Definition: nbtsort.c:141
bool havedead
Definition: nbtsort.c:161
#define Assert(condition)
Definition: c.h:688
int nparticipanttuplesorts
Definition: nbtsort.c:187
size_t Size
Definition: c.h:422
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void EnterParallelMode(void)
Definition: xact.c:873
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
static void _bt_leader_participate_as_worker(BTBuildState *buildstate)
Definition: nbtsort.c:1408
Size tuplesort_estimate_shared(int nWorkers)
Definition: tuplesort.c:4362
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable workersdonecv
Definition: nbtsort.c:133
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:86
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:601
#define RelationGetRelid(relation)
Definition: rel.h:425
shm_toc * toc
Definition: parallel.h:44

◆ _bt_blnewpage()

static Page _bt_blnewpage ( uint32  level)
static

Definition at line 589 of file nbtsort.c.

References _bt_pageinit(), BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageOpaqueData::level, P_NONE, PageGetSpecialPointer, and palloc().

Referenced by _bt_buildadd(), and _bt_pagestate().

590 {
591  Page page;
592  BTPageOpaque opaque;
593 
594  page = (Page) palloc(BLCKSZ);
595 
596  /* Zero the page and set up standard page header info */
597  _bt_pageinit(page, BLCKSZ);
598 
599  /* Initialize BT opaque state */
600  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
601  opaque->btpo_prev = opaque->btpo_next = P_NONE;
602  opaque->btpo.level = level;
603  opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
604  opaque->btpo_cycleid = 0;
605 
606  /* Make the P_HIKEY line pointer appear allocated */
607  ((PageHeader) page)->pd_lower += sizeof(ItemIdData);
608 
609  return page;
610 }
BlockNumber btpo_next
Definition: nbtree.h:58
#define BTP_LEAF
Definition: nbtree.h:71
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:169
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
BTCycleId btpo_cycleid
Definition: nbtree.h:65
BlockNumber btpo_prev
Definition: nbtree.h:57
struct ItemIdData ItemIdData
uint32 level
Definition: nbtree.h:61
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
void * palloc(Size size)
Definition: mcxt.c:835
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:733
uint16 btpo_flags
Definition: nbtree.h:64
Pointer Page
Definition: bufpage.h:74

◆ _bt_blwritepage()

static void _bt_blwritepage ( BTWriteState wstate,
Page  page,
BlockNumber  blkno 
)
static

Definition at line 616 of file nbtsort.c.

References BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTWriteState::index, log_newpage(), MAIN_FORKNUM, PageSetChecksumInplace(), palloc0(), pfree(), RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, smgrextend(), and smgrwrite().

Referenced by _bt_buildadd(), and _bt_uppershutdown().

617 {
618  /* Ensure rd_smgr is open (could have been closed by relcache flush!) */
619  RelationOpenSmgr(wstate->index);
620 
621  /* XLOG stuff */
622  if (wstate->btws_use_wal)
623  {
624  /* We use the heap NEWPAGE record type for this */
625  log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
626  }
627 
628  /*
629  * If we have to write pages nonsequentially, fill in the space with
630  * zeroes until we come back and overwrite. This is not logically
631  * necessary on standard Unix filesystems (unwritten space will read as
632  * zeroes anyway), but it should help to avoid fragmentation. The dummy
633  * pages aren't WAL-logged though.
634  */
635  while (blkno > wstate->btws_pages_written)
636  {
637  if (!wstate->btws_zeropage)
638  wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
639  /* don't set checksum for all-zero page */
641  wstate->btws_pages_written++,
642  (char *) wstate->btws_zeropage,
643  true);
644  }
645 
646  PageSetChecksumInplace(page, blkno);
647 
648  /*
649  * Now write the page. There's no need for smgr to schedule an fsync for
650  * this write; we'll do it ourselves before ending the build.
651  */
652  if (blkno == wstate->btws_pages_written)
653  {
654  /* extending the file... */
655  smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
656  (char *) page, true);
657  wstate->btws_pages_written++;
658  }
659  else
660  {
661  /* overwriting a block we zero-filled before */
662  smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
663  (char *) page, true);
664  }
665 
666  pfree(page);
667 }
Page btws_zeropage
Definition: nbtsort.c:267
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
bool btws_use_wal
Definition: nbtsort.c:264
#define RelationOpenSmgr(relation)
Definition: rel.h:469
void pfree(void *pointer)
Definition: mcxt.c:936
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
Relation index
Definition: nbtsort.c:263
BlockNumber btws_pages_written
Definition: nbtsort.c:266
void * palloc0(Size size)
Definition: mcxt.c:864
RelFileNode rd_node
Definition: rel.h:85
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1195
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:600
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
Pointer Page
Definition: bufpage.h:74

◆ _bt_build_callback()

static void _bt_build_callback ( Relation  index,
HeapTuple  htup,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
)
static

Definition at line 560 of file nbtsort.c.

References _bt_spool(), BTBuildState::havedead, BTBuildState::indtuples, BTBuildState::spool, BTBuildState::spool2, and HeapTupleData::t_self.

Referenced by _bt_parallel_scan_and_sort(), and _bt_spools_heapscan().

566 {
567  BTBuildState *buildstate = (BTBuildState *) state;
568 
569  /*
570  * insert the index tuple into the appropriate spool file for subsequent
571  * processing
572  */
573  if (tupleIsAlive || buildstate->spool2 == NULL)
574  _bt_spool(buildstate->spool, &htup->t_self, values, isnull);
575  else
576  {
577  /* dead tuples are put into spool2 */
578  buildstate->havedead = true;
579  _bt_spool(buildstate->spool2, &htup->t_self, values, isnull);
580  }
581 
582  buildstate->indtuples += 1;
583 }
bool havedead
Definition: nbtsort.c:214
BTSpool * spool
Definition: nbtsort.c:216
ItemPointerData t_self
Definition: htup.h:65
BTSpool * spool2
Definition: nbtsort.c:222
double indtuples
Definition: nbtsort.c:223
Definition: regguts.h:298
static Datum values[MAXATTR]
Definition: bootstrap.c:164
static void _bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
Definition: nbtsort.c:512

◆ _bt_buildadd()

static void _bt_buildadd ( BTWriteState wstate,
BTPageState state,
IndexTuple  itup 
)
static

Definition at line 797 of file nbtsort.c.

References _bt_blnewpage(), _bt_blwritepage(), _bt_pagestate(), _bt_sortaddtup(), Assert, BTMaxItemSize, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageState::btps_blkno, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTWriteState::btws_pages_alloced, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), ereport, errcode(), errhint(), errmsg(), ERROR, errtableconstraint(), BTWriteState::heap, BTWriteState::index, IndexTupleDSize, ItemIdGetLength, ItemIdSetUnused, ItemPointerSet, MAXALIGN, OffsetNumberNext, P_FIRSTKEY, P_HIKEY, P_NONE, PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetSpecialPointer, pfree(), RelationGetRelationName, and IndexTupleData::t_tid.

Referenced by _bt_load(), and _bt_uppershutdown().

798 {
799  Page npage;
800  BlockNumber nblkno;
801  OffsetNumber last_off;
802  Size pgspc;
803  Size itupsz;
804 
805  /*
806  * This is a handy place to check for cancel interrupts during the btree
807  * load phase of index creation.
808  */
810 
811  npage = state->btps_page;
812  nblkno = state->btps_blkno;
813  last_off = state->btps_lastoff;
814 
815  pgspc = PageGetFreeSpace(npage);
816  itupsz = IndexTupleDSize(*itup);
817  itupsz = MAXALIGN(itupsz);
818 
819  /*
820  * Check whether the item can fit on a btree page at all. (Eventually, we
821  * ought to try to apply TOAST methods if not.) We actually need to be
822  * able to fit three items on every page, so restrict any one item to 1/3
823  * the per-page available space. Note that at this point, itupsz doesn't
824  * include the ItemId.
825  *
826  * NOTE: similar code appears in _bt_insertonpg() to defend against
827  * oversize items being inserted into an already-existing index. But
828  * during creation of an index, we don't go through there.
829  */
830  if (itupsz > BTMaxItemSize(npage))
831  ereport(ERROR,
832  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
833  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
834  itupsz, BTMaxItemSize(npage),
835  RelationGetRelationName(wstate->index)),
836  errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
837  "Consider a function index of an MD5 hash of the value, "
838  "or use full text indexing."),
839  errtableconstraint(wstate->heap,
840  RelationGetRelationName(wstate->index))));
841 
842  /*
843  * Check to see if page is "full". It's definitely full if the item won't
844  * fit. Otherwise, compare to the target freespace derived from the
845  * fillfactor. However, we must put at least two items on each page, so
846  * disregard fillfactor if we don't have that many.
847  */
848  if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
849  {
850  /*
851  * Finish off the page and write it out.
852  */
853  Page opage = npage;
854  BlockNumber oblkno = nblkno;
855  ItemId ii;
856  ItemId hii;
857  IndexTuple oitup;
858 
859  /* Create new page of same level */
860  npage = _bt_blnewpage(state->btps_level);
861 
862  /* and assign it a page position */
863  nblkno = wstate->btws_pages_alloced++;
864 
865  /*
866  * We copy the last item on the page into the new page, and then
867  * rearrange the old page so that the 'last item' becomes its high key
868  * rather than a true data item. There had better be at least two
869  * items on the page already, else the page would be empty of useful
870  * data.
871  */
872  Assert(last_off > P_FIRSTKEY);
873  ii = PageGetItemId(opage, last_off);
874  oitup = (IndexTuple) PageGetItem(opage, ii);
875  _bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY);
876 
877  /*
878  * Move 'last' into the high key position on opage
879  */
880  hii = PageGetItemId(opage, P_HIKEY);
881  *hii = *ii;
882  ItemIdSetUnused(ii); /* redundant */
883  ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
884 
885  /*
886  * Link the old page into its parent, using its minimum key. If we
887  * don't have a parent, we have to create one; this adds a new btree
888  * level.
889  */
890  if (state->btps_next == NULL)
891  state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
892 
893  Assert(state->btps_minkey != NULL);
894  ItemPointerSet(&(state->btps_minkey->t_tid), oblkno, P_HIKEY);
895  _bt_buildadd(wstate, state->btps_next, state->btps_minkey);
896  pfree(state->btps_minkey);
897 
898  /*
899  * Save a copy of the minimum key for the new page. We have to copy
900  * it off the old page, not the new one, in case we are not at leaf
901  * level.
902  */
903  state->btps_minkey = CopyIndexTuple(oitup);
904 
905  /*
906  * Set the sibling links for both pages.
907  */
908  {
911 
912  oopaque->btpo_next = nblkno;
913  nopaque->btpo_prev = oblkno;
914  nopaque->btpo_next = P_NONE; /* redundant */
915  }
916 
917  /*
918  * Write out the old page. We never need to touch it again, so we can
919  * free the opage workspace too.
920  */
921  _bt_blwritepage(wstate, opage, oblkno);
922 
923  /*
924  * Reset last_off to point to new page
925  */
926  last_off = P_FIRSTKEY;
927  }
928 
929  /*
930  * If the new item is the first for its page, stash a copy for later. Note
931  * this will only happen for the first item on a level; on later pages,
932  * the first item for a page is copied from the prior page in the code
933  * above.
934  */
935  if (last_off == P_HIKEY)
936  {
937  Assert(state->btps_minkey == NULL);
938  state->btps_minkey = CopyIndexTuple(itup);
939  }
940 
941  /*
942  * Add the new item into the current page.
943  */
944  last_off = OffsetNumberNext(last_off);
945  _bt_sortaddtup(npage, itupsz, itup, last_off);
946 
947  state->btps_page = npage;
948  state->btps_blkno = nblkno;
949  state->btps_lastoff = last_off;
950 }
BlockNumber btpo_next
Definition: nbtree.h:58
int errhint(const char *fmt,...)
Definition: elog.c:987
BlockNumber btws_pages_alloced
Definition: nbtsort.c:265
ItemPointerData t_tid
Definition: itup.h:37
#define P_NONE
Definition: nbtree.h:169
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:249
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:616
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:578
IndexTuple btps_minkey
Definition: nbtsort.c:250
uint16 OffsetNumber
Definition: off.h:24
int errtableconstraint(Relation rel, const char *conname)
Definition: relcache.c:5281
Page btps_page
Definition: nbtsort.c:248
void pfree(void *pointer)
Definition: mcxt.c:936
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
#define ERROR
Definition: elog.h:43
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:797
BlockNumber btpo_prev
Definition: nbtree.h:57
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:438
#define IndexTupleDSize(itup)
Definition: itup.h:71
OffsetNumber btps_lastoff
Definition: nbtsort.c:251
IndexTupleData * IndexTuple
Definition: itup.h:53
#define P_FIRSTKEY
Definition: nbtree.h:205
#define RelationGetRelationName(relation)
Definition: rel.h:445
struct ItemIdData ItemIdData
#define ereport(elevel, rest)
Definition: elog.h:122
Relation index
Definition: nbtsort.c:263
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
uint32 btps_level
Definition: nbtsort.c:252
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:674
struct BTPageState * btps_next
Definition: nbtsort.c:254
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define Assert(condition)
Definition: c.h:688
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:422
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define MAXALIGN(LEN)
Definition: c.h:641
#define BTMaxItemSize(page)
Definition: nbtree.h:120
#define P_HIKEY
Definition: nbtree.h:204
int errmsg(const char *fmt,...)
Definition: elog.c:797
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:589
static void _bt_sortaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
Definition: nbtsort.c:742
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
Relation heap
Definition: nbtsort.c:262
#define ItemIdSetUnused(itemId)
Definition: itemid.h:127
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105

◆ _bt_end_parallel()

static void _bt_end_parallel ( BTLeader btleader)
static

Definition at line 1332 of file nbtsort.c.

References DestroyParallelContext(), ExitParallelMode(), IsMVCCSnapshot, BTLeader::pcxt, BTLeader::snapshot, UnregisterSnapshot(), and WaitForParallelWorkersToFinish().

Referenced by _bt_begin_parallel(), and btbuild().

1333 {
1334  /* Shutdown worker processes */
1336  /* Free last reference to MVCC snapshot, if one was used */
1337  if (IsMVCCSnapshot(btleader->snapshot))
1338  UnregisterSnapshot(btleader->snapshot);
1339  DestroyParallelContext(btleader->pcxt);
1340  ExitParallelMode();
1341 }
Snapshot snapshot
Definition: nbtsort.c:202
ParallelContext * pcxt
Definition: nbtsort.c:179
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:708
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:862
void ExitParallelMode(void)
Definition: xact.c:886
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:905
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31

◆ _bt_leader_participate_as_worker()

static void _bt_leader_participate_as_worker ( BTBuildState buildstate)
static

Definition at line 1408 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), BTBuildState::btleader, BTLeader::btshared, BTSpool::heap, BTSpool::index, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, BTLeader::nparticipanttuplesorts, palloc0(), ResetUsage(), BTLeader::sharedsort, BTLeader::sharedsort2, ShowUsage(), and BTBuildState::spool.

Referenced by _bt_begin_parallel().

1409 {
1410  BTLeader *btleader = buildstate->btleader;
1411  BTSpool *leaderworker;
1412  BTSpool *leaderworker2;
1413  int sortmem;
1414 
1415  /* Allocate memory and initialize private spool */
1416  leaderworker = (BTSpool *) palloc0(sizeof(BTSpool));
1417  leaderworker->heap = buildstate->spool->heap;
1418  leaderworker->index = buildstate->spool->index;
1419  leaderworker->isunique = buildstate->spool->isunique;
1420 
1421  /* Initialize second spool, if required */
1422  if (!btleader->btshared->isunique)
1423  leaderworker2 = NULL;
1424  else
1425  {
1426  /* Allocate memory for worker's own private secondary spool */
1427  leaderworker2 = (BTSpool *) palloc0(sizeof(BTSpool));
1428 
1429  /* Initialize worker's own secondary spool */
1430  leaderworker2->heap = leaderworker->heap;
1431  leaderworker2->index = leaderworker->index;
1432  leaderworker2->isunique = false;
1433  }
1434 
1435  /*
1436  * Might as well use reliable figure when doling out maintenance_work_mem
1437  * (when requested number of workers were not launched, this will be
1438  * somewhat higher than it is for other workers).
1439  */
1440  sortmem = maintenance_work_mem / btleader->nparticipanttuplesorts;
1441 
1442  /* Perform work common to all participants */
1443  _bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared,
1444  btleader->sharedsort, btleader->sharedsort2,
1445  sortmem);
1446 
1447 #ifdef BTREE_BUILD_STATS
1449  {
1450  ShowUsage("BTREE BUILD (Leader Partial Spool) STATISTICS");
1451  ResetUsage();
1452  }
1453 #endif /* BTREE_BUILD_STATS */
1454 }
Sharedsort * sharedsort2
Definition: nbtsort.c:201
BTShared * btshared
Definition: nbtsort.c:199
void ShowUsage(const char *title)
Definition: postgres.c:4449
Sharedsort * sharedsort
Definition: nbtsort.c:200
bool isunique
Definition: nbtsort.c:123
BTSpool * spool
Definition: nbtsort.c:216
Relation heap
Definition: nbtsort.c:104
void ResetUsage(void)
Definition: postgres.c:4442
bool isunique
Definition: nbtsort.c:106
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem)
Definition: nbtsort.c:1555
Relation index
Definition: nbtsort.c:105
void * palloc0(Size size)
Definition: mcxt.c:864
bool log_btree_build_stats
Definition: guc.c:443
BTLeader * btleader
Definition: nbtsort.c:230
int maintenance_work_mem
Definition: globals.c:114
int nparticipanttuplesorts
Definition: nbtsort.c:187

◆ _bt_leafbuild()

static void _bt_leafbuild ( BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 523 of file nbtsort.c.

References _bt_load(), BTREE_METAPAGE, BTWriteState::btws_pages_alloced, BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTSpool::heap, BTWriteState::heap, BTSpool::index, BTWriteState::index, log_btree_build_stats, RelationNeedsWAL, ResetUsage(), ShowUsage(), BTSpool::sortstate, tuplesort_performsort(), and XLogIsNeeded.

Referenced by btbuild().

524 {
525  BTWriteState wstate;
526 
527 #ifdef BTREE_BUILD_STATS
529  {
530  ShowUsage("BTREE BUILD (Spool) STATISTICS");
531  ResetUsage();
532  }
533 #endif /* BTREE_BUILD_STATS */
534 
536  if (btspool2)
537  tuplesort_performsort(btspool2->sortstate);
538 
539  wstate.heap = btspool->heap;
540  wstate.index = btspool->index;
541 
542  /*
543  * We need to log index creation in WAL iff WAL archiving/streaming is
544  * enabled UNLESS the index isn't WAL-logged anyway.
545  */
546  wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);
547 
548  /* reserve the metapage */
549  wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
550  wstate.btws_pages_written = 0;
551  wstate.btws_zeropage = NULL; /* until needed */
552 
553  _bt_load(&wstate, btspool, btspool2);
554 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
Page btws_zeropage
Definition: nbtsort.c:267
static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:1022
void ShowUsage(const char *title)
Definition: postgres.c:4449
BlockNumber btws_pages_alloced
Definition: nbtsort.c:265
#define XLogIsNeeded()
Definition: xlog.h:146
Tuplesortstate * sortstate
Definition: nbtsort.c:103
bool btws_use_wal
Definition: nbtsort.c:264
Relation heap
Definition: nbtsort.c:104
void ResetUsage(void)
Definition: postgres.c:4442
#define BTREE_METAPAGE
Definition: nbtree.h:110
Relation index
Definition: nbtsort.c:105
Relation index
Definition: nbtsort.c:263
BlockNumber btws_pages_written
Definition: nbtsort.c:266
bool log_btree_build_stats
Definition: guc.c:443
#define RelationNeedsWAL(relation)
Definition: rel.h:514
Relation heap
Definition: nbtsort.c:262

◆ _bt_load()

static void _bt_load ( BTWriteState wstate,
BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 1022 of file nbtsort.c.

References _bt_buildadd(), _bt_freeskey(), _bt_mkscankey_nodata(), _bt_pagestate(), _bt_uppershutdown(), SortSupportData::abbreviate, ApplySortComparator(), AssertState, BTGreaterStrategyNumber, BTLessStrategyNumber, compare(), CurrentMemoryContext, i, BTWriteState::index, index_getattr, MAIN_FORKNUM, merge(), palloc0(), pfree(), PrepareSortSupportFromIndexRel(), RelationData::rd_smgr, RelationGetDescr, RelationGetNumberOfAttributes, RelationNeedsWAL, RelationOpenSmgr, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, smgrimmedsync(), BTSpool::sortstate, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, and tuplesort_getindextuple().

Referenced by _bt_leafbuild().

1023 {
1024  BTPageState *state = NULL;
1025  bool merge = (btspool2 != NULL);
1026  IndexTuple itup,
1027  itup2 = NULL;
1028  bool load1;
1029  TupleDesc tupdes = RelationGetDescr(wstate->index);
1030  int i,
1031  keysz = RelationGetNumberOfAttributes(wstate->index);
1032  ScanKey indexScanKey = NULL;
1033  SortSupport sortKeys;
1034 
1035  if (merge)
1036  {
1037  /*
1038  * Another BTSpool for dead tuples exists. Now we have to merge
1039  * btspool and btspool2.
1040  */
1041 
1042  /* the preparation of merge */
1043  itup = tuplesort_getindextuple(btspool->sortstate, true);
1044  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1045  indexScanKey = _bt_mkscankey_nodata(wstate->index);
1046 
1047  /* Prepare SortSupport data for each column */
1048  sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
1049 
1050  for (i = 0; i < keysz; i++)
1051  {
1052  SortSupport sortKey = sortKeys + i;
1053  ScanKey scanKey = indexScanKey + i;
1054  int16 strategy;
1055 
1056  sortKey->ssup_cxt = CurrentMemoryContext;
1057  sortKey->ssup_collation = scanKey->sk_collation;
1058  sortKey->ssup_nulls_first =
1059  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1060  sortKey->ssup_attno = scanKey->sk_attno;
1061  /* Abbreviation is not supported here */
1062  sortKey->abbreviate = false;
1063 
1064  AssertState(sortKey->ssup_attno != 0);
1065 
1066  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
1068 
1069  PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
1070  }
1071 
1072  _bt_freeskey(indexScanKey);
1073 
1074  for (;;)
1075  {
1076  load1 = true; /* load BTSpool next ? */
1077  if (itup2 == NULL)
1078  {
1079  if (itup == NULL)
1080  break;
1081  }
1082  else if (itup != NULL)
1083  {
1084  for (i = 1; i <= keysz; i++)
1085  {
1086  SortSupport entry;
1087  Datum attrDatum1,
1088  attrDatum2;
1089  bool isNull1,
1090  isNull2;
1091  int32 compare;
1092 
1093  entry = sortKeys + i - 1;
1094  attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
1095  attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
1096 
1097  compare = ApplySortComparator(attrDatum1, isNull1,
1098  attrDatum2, isNull2,
1099  entry);
1100  if (compare > 0)
1101  {
1102  load1 = false;
1103  break;
1104  }
1105  else if (compare < 0)
1106  break;
1107  }
1108  }
1109  else
1110  load1 = false;
1111 
1112  /* When we see first tuple, create first index page */
1113  if (state == NULL)
1114  state = _bt_pagestate(wstate, 0);
1115 
1116  if (load1)
1117  {
1118  _bt_buildadd(wstate, state, itup);
1119  itup = tuplesort_getindextuple(btspool->sortstate, true);
1120  }
1121  else
1122  {
1123  _bt_buildadd(wstate, state, itup2);
1124  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1125  }
1126  }
1127  pfree(sortKeys);
1128  }
1129  else
1130  {
1131  /* merge is unnecessary */
1132  while ((itup = tuplesort_getindextuple(btspool->sortstate,
1133  true)) != NULL)
1134  {
1135  /* When we see first tuple, create first index page */
1136  if (state == NULL)
1137  state = _bt_pagestate(wstate, 0);
1138 
1139  _bt_buildadd(wstate, state, itup);
1140  }
1141  }
1142 
1143  /* Close down final pages and write the metapage */
1144  _bt_uppershutdown(wstate, state);
1145 
1146  /*
1147  * If the index is WAL-logged, we must fsync it down to disk before it's
1148  * safe to commit the transaction. (For a non-WAL-logged index we don't
1149  * care since the index will be uninteresting after a crash anyway.)
1150  *
1151  * It's obvious that we must do this when not WAL-logging the build. It's
1152  * less obvious that we have to do it even if we did WAL-log the index
1153  * pages. The reason is that since we're building outside shared buffers,
1154  * a CHECKPOINT occurring during the build has no way to flush the
1155  * previously written data to disk (indeed it won't know the index even
1156  * exists). A crash later on would replay WAL from the checkpoint,
1157  * therefore it wouldn't replay our earlier WAL entries. If we do not
1158  * fsync those pages here, they might still not be on disk when the crash
1159  * occurs.
1160  */
1161  if (RelationNeedsWAL(wstate->index))
1162  {
1163  RelationOpenSmgr(wstate->index);
1165  }
1166 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2215
signed short int16
Definition: c.h:301
bool ssup_nulls_first
Definition: sortsupport.h:75
ScanKey _bt_mkscankey_nodata(Relation rel)
Definition: nbtutils.c:115
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define AssertState(condition)
Definition: c.h:691
#define RelationGetDescr(relation)
Definition: rel.h:437
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:431
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
void _bt_freeskey(ScanKey skey)
Definition: nbtutils.c:155
Tuplesortstate * sortstate
Definition: nbtsort.c:103
static pairingheap_node * merge(pairingheap *heap, pairingheap_node *a, pairingheap_node *b)
Definition: pairingheap.c:79
signed int int32
Definition: c.h:302
#define RelationOpenSmgr(relation)
Definition: rel.h:469
void pfree(void *pointer)
Definition: mcxt.c:936
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
MemoryContext ssup_cxt
Definition: sortsupport.h:66
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:797
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy, SortSupport ssup)
Definition: sortsupport.c:160
Relation index
Definition: nbtsort.c:263
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:435
void * palloc0(Size size)
Definition: mcxt.c:864
uintptr_t Datum
Definition: postgres.h:365
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:674
AttrNumber ssup_attno
Definition: sortsupport.h:81
int sk_flags
Definition: skey.h:66
#define SK_BT_DESC
Definition: nbtree.h:434
Definition: regguts.h:298
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
#define RelationNeedsWAL(relation)
Definition: rel.h:514
Oid sk_collation
Definition: skey.h:70
int i
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
Definition: nbtsort.c:956
#define BTLessStrategyNumber
Definition: stratnum.h:29
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:201
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:734
AttrNumber sk_attno
Definition: skey.h:67

◆ _bt_pagestate()

static BTPageState * _bt_pagestate ( BTWriteState wstate,
uint32  level 
)
static

Definition at line 674 of file nbtsort.c.

References _bt_blnewpage(), BTPageState::btps_blkno, BTPageState::btps_full, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_DEFAULT_FILLFACTOR, BTREE_NONLEAF_FILLFACTOR, BTWriteState::btws_pages_alloced, BTWriteState::index, P_HIKEY, palloc0(), and RelationGetTargetPageFreeSpace.

Referenced by _bt_buildadd(), and _bt_load().

675 {
677 
678  /* create initial page for level */
679  state->btps_page = _bt_blnewpage(level);
680 
681  /* and assign it a page position */
682  state->btps_blkno = wstate->btws_pages_alloced++;
683 
684  state->btps_minkey = NULL;
685  /* initialize lastoff so first item goes into P_FIRSTKEY */
686  state->btps_lastoff = P_HIKEY;
687  state->btps_level = level;
688  /* set "full" threshold based on level. See notes at head of file. */
689  if (level > 0)
690  state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
691  else
694  /* no parent level, yet */
695  state->btps_next = NULL;
696 
697  return state;
698 }
BlockNumber btws_pages_alloced
Definition: nbtsort.c:265
BlockNumber btps_blkno
Definition: nbtsort.c:249
IndexTuple btps_minkey
Definition: nbtsort.c:250
Page btps_page
Definition: nbtsort.c:248
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:134
Size btps_full
Definition: nbtsort.c:253
OffsetNumber btps_lastoff
Definition: nbtsort.c:251
#define BTREE_DEFAULT_FILLFACTOR
Definition: nbtree.h:133
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:316
Relation index
Definition: nbtsort.c:263
void * palloc0(Size size)
Definition: mcxt.c:864
uint32 btps_level
Definition: nbtsort.c:252
struct BTPageState * btps_next
Definition: nbtsort.c:254
Definition: regguts.h:298
#define P_HIKEY
Definition: nbtree.h:204
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:589

◆ _bt_parallel_build_main()

void _bt_parallel_build_main ( dsm_segment seg,
shm_toc toc 
)

Definition at line 1460 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), AccessExclusiveLock, BTSpool::heap, heap_close, heap_open(), BTShared::heaprelid, BTSpool::index, index_close(), index_open(), BTShared::indexrelid, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, ResetUsage(), RowExclusiveLock, BTShared::scantuplesortstates, ShareLock, ShareUpdateExclusiveLock, shm_toc_lookup(), ShowUsage(), and tuplesort_attach_shared().

1461 {
1462  BTSpool *btspool;
1463  BTSpool *btspool2;
1464  BTShared *btshared;
1465  Sharedsort *sharedsort;
1466  Sharedsort *sharedsort2;
1467  Relation heapRel;
1468  Relation indexRel;
1469  LOCKMODE heapLockmode;
1470  LOCKMODE indexLockmode;
1471  int sortmem;
1472 
1473 #ifdef BTREE_BUILD_STATS
1475  ResetUsage();
1476 #endif /* BTREE_BUILD_STATS */
1477 
1478  /* Look up shared state */
1479  btshared = shm_toc_lookup(toc, PARALLEL_KEY_BTREE_SHARED, false);
1480 
1481  /* Open relations using lock modes known to be obtained by index.c */
1482  if (!btshared->isconcurrent)
1483  {
1484  heapLockmode = ShareLock;
1485  indexLockmode = AccessExclusiveLock;
1486  }
1487  else
1488  {
1489  heapLockmode = ShareUpdateExclusiveLock;
1490  indexLockmode = RowExclusiveLock;
1491  }
1492 
1493  /* Open relations within worker */
1494  heapRel = heap_open(btshared->heaprelid, heapLockmode);
1495  indexRel = index_open(btshared->indexrelid, indexLockmode);
1496 
1497  /* Initialize worker's own spool */
1498  btspool = (BTSpool *) palloc0(sizeof(BTSpool));
1499  btspool->heap = heapRel;
1500  btspool->index = indexRel;
1501  btspool->isunique = btshared->isunique;
1502 
1503  /* Look up shared state private to tuplesort.c */
1504  sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
1505  tuplesort_attach_shared(sharedsort, seg);
1506  if (!btshared->isunique)
1507  {
1508  btspool2 = NULL;
1509  sharedsort2 = NULL;
1510  }
1511  else
1512  {
1513  /* Allocate memory for worker's own private secondary spool */
1514  btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
1515 
1516  /* Initialize worker's own secondary spool */
1517  btspool2->heap = btspool->heap;
1518  btspool2->index = btspool->index;
1519  btspool2->isunique = false;
1520  /* Look up shared state private to tuplesort.c */
1521  sharedsort2 = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_SPOOL2, false);
1522  tuplesort_attach_shared(sharedsort2, seg);
1523  }
1524 
1525  /* Perform sorting of spool, and possibly a spool2 */
1526  sortmem = maintenance_work_mem / btshared->scantuplesortstates;
1527  _bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
1528  sharedsort2, sortmem);
1529 
1530 #ifdef BTREE_BUILD_STATS
1532  {
1533  ShowUsage("BTREE BUILD (Worker Partial Spool) STATISTICS");
1534  ResetUsage();
1535  }
1536 #endif /* BTREE_BUILD_STATS */
1537 
1538  index_close(indexRel, indexLockmode);
1539  heap_close(heapRel, heapLockmode);
1540 }
int scantuplesortstates
Definition: nbtsort.c:125
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:87
int LOCKMODE
Definition: lockdefs.h:26
Oid indexrelid
Definition: nbtsort.c:122
void ShowUsage(const char *title)
Definition: postgres.c:4449
bool isunique
Definition: nbtsort.c:123
bool isconcurrent
Definition: nbtsort.c:124
#define heap_close(r, l)
Definition: heapam.h:97
Relation heap
Definition: nbtsort.c:104
void ResetUsage(void)
Definition: postgres.c:4442
bool isunique
Definition: nbtsort.c:106
Oid heaprelid
Definition: nbtsort.c:121
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem)
Definition: nbtsort.c:1555
#define RowExclusiveLock
Definition: lockdefs.h:38
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:88
Relation index
Definition: nbtsort.c:105
void * palloc0(Size size)
Definition: mcxt.c:864
bool log_btree_build_stats
Definition: guc.c:443
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1290
int maintenance_work_mem
Definition: globals.c:114
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:4407
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
#define AccessExclusiveLock
Definition: lockdefs.h:45
#define ShareLock
Definition: lockdefs.h:41
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:86
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:151

◆ _bt_parallel_estimate_shared()

static Size _bt_parallel_estimate_shared ( Snapshot  snapshot)
static

Definition at line 1348 of file nbtsort.c.

References add_size(), Assert, EstimateSnapshotSpace(), IsMVCCSnapshot, offsetof, and SnapshotAny.

Referenced by _bt_begin_parallel().

1349 {
1350  if (!IsMVCCSnapshot(snapshot))
1351  {
1352  Assert(snapshot == SnapshotAny);
1353  return sizeof(BTShared);
1354  }
1355 
1356  return add_size(offsetof(BTShared, heapdesc) +
1357  offsetof(ParallelHeapScanDescData, phs_snapshot_data),
1358  EstimateSnapshotSpace(snapshot));
1359 }
struct BTShared BTShared
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2044
#define SnapshotAny
Definition: tqual.h:28
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
#define Assert(condition)
Definition: c.h:688
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31
#define offsetof(type, field)
Definition: c.h:611

◆ _bt_parallel_heapscan()

static double _bt_parallel_heapscan ( BTBuildState buildstate,
bool brokenhotchain 
)
static

Definition at line 1374 of file nbtsort.c.

References BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTShared::havedead, BTBuildState::havedead, BTShared::indtuples, BTBuildState::indtuples, BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, BTShared::reltuples, SpinLockAcquire, SpinLockRelease, WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN, and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1375 {
1376  BTShared *btshared = buildstate->btleader->btshared;
1377  int nparticipanttuplesorts;
1378  double reltuples;
1379 
1380  nparticipanttuplesorts = buildstate->btleader->nparticipanttuplesorts;
1381  for (;;)
1382  {
1383  SpinLockAcquire(&btshared->mutex);
1384  if (btshared->nparticipantsdone == nparticipanttuplesorts)
1385  {
1386  buildstate->havedead = btshared->havedead;
1387  buildstate->indtuples = btshared->indtuples;
1388  *brokenhotchain = btshared->brokenhotchain;
1389  reltuples = btshared->reltuples;
1390  SpinLockRelease(&btshared->mutex);
1391  break;
1392  }
1393  SpinLockRelease(&btshared->mutex);
1394 
1397  }
1398 
1400 
1401  return reltuples;
1402 }
BTShared * btshared
Definition: nbtsort.c:199
int nparticipantsdone
Definition: nbtsort.c:159
bool havedead
Definition: nbtsort.c:214
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
double indtuples
Definition: nbtsort.c:223
bool brokenhotchain
Definition: nbtsort.c:163
#define SpinLockRelease(lock)
Definition: spin.h:64
BTLeader * btleader
Definition: nbtsort.c:230
double reltuples
Definition: nbtsort.c:160
double indtuples
Definition: nbtsort.c:162
slock_t mutex
Definition: nbtsort.c:141
bool havedead
Definition: nbtsort.c:161
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int nparticipanttuplesorts
Definition: nbtsort.c:187
ConditionVariable workersdonecv
Definition: nbtsort.c:133

◆ _bt_parallel_scan_and_sort()

static void _bt_parallel_scan_and_sort ( BTSpool btspool,
BTSpool btspool2,
BTShared btshared,
Sharedsort sharedsort,
Sharedsort sharedsort2,
int  sortmem 
)
static

Definition at line 1555 of file nbtsort.c.

References _bt_build_callback(), BTShared::brokenhotchain, BTBuildState::btleader, BuildIndexInfo(), ConditionVariableSignal(), BTShared::havedead, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, heap_beginscan_parallel(), BTShared::heapdesc, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, BTSpool::index, IndexBuildHeapScan(), BTShared::indtuples, BTBuildState::indtuples, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, Min, BTShared::mutex, SortCoordinateData::nParticipants, BTShared::nparticipantsdone, palloc0(), BTShared::reltuples, SortCoordinateData::sharedsort, BTSpool::sortstate, SpinLockAcquire, SpinLockRelease, BTBuildState::spool, BTBuildState::spool2, tuplesort_begin_index_btree(), tuplesort_end(), tuplesort_performsort(), work_mem, and BTShared::workersdonecv.

Referenced by _bt_leader_participate_as_worker(), and _bt_parallel_build_main().

1558 {
1559  SortCoordinate coordinate;
1560  BTBuildState buildstate;
1561  HeapScanDesc scan;
1562  double reltuples;
1563  IndexInfo *indexInfo;
1564 
1565  /* Initialize local tuplesort coordination state */
1566  coordinate = palloc0(sizeof(SortCoordinateData));
1567  coordinate->isWorker = true;
1568  coordinate->nParticipants = -1;
1569  coordinate->sharedsort = sharedsort;
1570 
1571  /* Begin "partial" tuplesort */
1572  btspool->sortstate = tuplesort_begin_index_btree(btspool->heap,
1573  btspool->index,
1574  btspool->isunique,
1575  sortmem, coordinate,
1576  false);
1577 
1578  /*
1579  * Just as with serial case, there may be a second spool. If so, a
1580  * second, dedicated spool2 partial tuplesort is required.
1581  */
1582  if (btspool2)
1583  {
1584  SortCoordinate coordinate2;
1585 
1586  /*
1587  * We expect that the second one (for dead tuples) won't get very
1588  * full, so we give it only work_mem (unless sortmem is less for
1589  * worker). Worker processes are generally permitted to allocate
1590  * work_mem independently.
1591  */
1592  coordinate2 = palloc0(sizeof(SortCoordinateData));
1593  coordinate2->isWorker = true;
1594  coordinate2->nParticipants = -1;
1595  coordinate2->sharedsort = sharedsort2;
1596  btspool2->sortstate =
1597  tuplesort_begin_index_btree(btspool->heap, btspool->index, false,
1598  Min(sortmem, work_mem), coordinate2,
1599  false);
1600  }
1601 
1602  /* Fill in buildstate for _bt_build_callback() */
1603  buildstate.isunique = btshared->isunique;
1604  buildstate.havedead = false;
1605  buildstate.heap = btspool->heap;
1606  buildstate.spool = btspool;
1607  buildstate.spool2 = btspool2;
1608  buildstate.indtuples = 0;
1609  buildstate.btleader = NULL;
1610 
1611  /* Join parallel scan */
1612  indexInfo = BuildIndexInfo(btspool->index);
1613  indexInfo->ii_Concurrent = btshared->isconcurrent;
1614  scan = heap_beginscan_parallel(btspool->heap, &btshared->heapdesc);
1615  reltuples = IndexBuildHeapScan(btspool->heap, btspool->index, indexInfo,
1616  true, _bt_build_callback,
1617  (void *) &buildstate, scan);
1618 
1619  /*
1620  * Execute this worker's part of the sort.
1621  *
1622  * Unlike leader and serial cases, we cannot avoid calling
1623  * tuplesort_performsort() for spool2 if it ends up containing no dead
1624  * tuples (this is disallowed for workers by tuplesort).
1625  */
1626  tuplesort_performsort(btspool->sortstate);
1627  if (btspool2)
1628  tuplesort_performsort(btspool2->sortstate);
1629 
1630  /*
1631  * Done. Record ambuild statistics, and whether we encountered a broken
1632  * HOT chain.
1633  */
1634  SpinLockAcquire(&btshared->mutex);
1635  btshared->nparticipantsdone++;
1636  btshared->reltuples += reltuples;
1637  if (buildstate.havedead)
1638  btshared->havedead = true;
1639  btshared->indtuples += buildstate.indtuples;
1640  if (indexInfo->ii_BrokenHotChain)
1641  btshared->brokenhotchain = true;
1642  SpinLockRelease(&btshared->mutex);
1643 
1644  /* Notify leader */
1646 
1647  /* We can end tuplesorts immediately */
1648  tuplesort_end(btspool->sortstate);
1649  if (btspool2)
1650  tuplesort_end(btspool2->sortstate);
1651 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
Relation heap
Definition: nbtsort.c:215
int nparticipantsdone
Definition: nbtsort.c:159
#define Min(x, y)
Definition: c.h:846
bool havedead
Definition: nbtsort.c:214
Sharedsort * sharedsort
Definition: tuplesort.h:56
bool isunique
Definition: nbtsort.c:123
bool isconcurrent
Definition: nbtsort.c:124
Tuplesortstate * sortstate
Definition: nbtsort.c:103
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:1741
BTSpool * spool
Definition: nbtsort.c:216
ParallelHeapScanDescData heapdesc
Definition: nbtsort.c:170
Relation heap
Definition: nbtsort.c:104
#define SpinLockAcquire(lock)
Definition: spin.h:62
static void _bt_build_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:560
double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state, HeapScanDesc scan)
Definition: index.c:2411
bool isunique
Definition: nbtsort.c:106
void ConditionVariableSignal(ConditionVariable *cv)
BTSpool * spool2
Definition: nbtsort.c:222
double indtuples
Definition: nbtsort.c:223
bool isunique
Definition: nbtsort.c:213
bool ii_BrokenHotChain
Definition: execnodes.h:161
bool brokenhotchain
Definition: nbtsort.c:163
Relation index
Definition: nbtsort.c:105
#define SpinLockRelease(lock)
Definition: spin.h:64
void * palloc0(Size size)
Definition: mcxt.c:864
BTLeader * btleader
Definition: nbtsort.c:230
int work_mem
Definition: globals.c:113
double reltuples
Definition: nbtsort.c:160
double indtuples
Definition: nbtsort.c:162
slock_t mutex
Definition: nbtsort.c:141
bool havedead
Definition: nbtsort.c:161
bool ii_Concurrent
Definition: execnodes.h:160
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
ConditionVariable workersdonecv
Definition: nbtsort.c:133
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236
HeapScanDesc heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan)
Definition: heapam.c:1662

◆ _bt_slideleft()

static void _bt_slideleft ( Page  page)
static

Definition at line 707 of file nbtsort.c.

References OffsetNumberNext, P_FIRSTKEY, P_HIKEY, PageGetItemId, PageGetMaxOffsetNumber, and PageIsEmpty.

Referenced by _bt_uppershutdown().

708 {
709  OffsetNumber off;
710  OffsetNumber maxoff;
711  ItemId previi;
712  ItemId thisii;
713 
714  if (!PageIsEmpty(page))
715  {
716  maxoff = PageGetMaxOffsetNumber(page);
717  previi = PageGetItemId(page, P_HIKEY);
718  for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
719  {
720  thisii = PageGetItemId(page, off);
721  *previi = *thisii;
722  previi = thisii;
723  }
724  ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
725  }
726 }
#define PageIsEmpty(page)
Definition: bufpage.h:218
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
uint16 OffsetNumber
Definition: off.h:24
#define P_FIRSTKEY
Definition: nbtree.h:205
struct ItemIdData ItemIdData
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define P_HIKEY
Definition: nbtree.h:204

◆ _bt_sortaddtup()

static void _bt_sortaddtup ( Page  page,
Size  itemsize,
IndexTuple  itup,
OffsetNumber  itup_off 
)
static

Definition at line 742 of file nbtsort.c.

References elog, ERROR, InvalidOffsetNumber, P_FIRSTKEY, P_ISLEAF, PageAddItem, PageGetSpecialPointer, and IndexTupleData::t_info.

Referenced by _bt_buildadd().

746 {
748  IndexTupleData trunctuple;
749 
750  if (!P_ISLEAF(opaque) && itup_off == P_FIRSTKEY)
751  {
752  trunctuple = *itup;
753  trunctuple.t_info = sizeof(IndexTupleData);
754  itup = &trunctuple;
755  itemsize = sizeof(IndexTupleData);
756  }
757 
758  if (PageAddItem(page, (Item) itup, itemsize, itup_off,
759  false, false) == InvalidOffsetNumber)
760  elog(ERROR, "failed to add item to the index page");
761 }
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define ERROR
Definition: elog.h:43
#define P_FIRSTKEY
Definition: nbtree.h:205
struct IndexTupleData IndexTupleData
#define InvalidOffsetNumber
Definition: off.h:26
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define elog
Definition: elog.h:219
unsigned short t_info
Definition: itup.h:49
#define P_ISLEAF(opaque)
Definition: nbtree.h:177

◆ _bt_spool()

static void _bt_spool ( BTSpool btspool,
ItemPointer  self,
Datum values,
bool isnull 
)
static

Definition at line 512 of file nbtsort.c.

References BTSpool::index, BTSpool::sortstate, and tuplesort_putindextuplevalues().

Referenced by _bt_build_callback().

513 {
514  tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
515  self, values, isnull);
516 }
Tuplesortstate * sortstate
Definition: nbtsort.c:103
Relation index
Definition: nbtsort.c:105
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
Definition: tuplesort.c:1478
static Datum values[MAXATTR]
Definition: bootstrap.c:164

◆ _bt_spooldestroy()

static void _bt_spooldestroy ( BTSpool btspool)
static

Definition at line 502 of file nbtsort.c.

References pfree(), BTSpool::sortstate, and tuplesort_end().

Referenced by _bt_spools_heapscan(), and btbuild().

503 {
504  tuplesort_end(btspool->sortstate);
505  pfree(btspool);
506 }
Tuplesortstate * sortstate
Definition: nbtsort.c:103
void pfree(void *pointer)
Definition: mcxt.c:936
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236

◆ _bt_spools_heapscan()

static double _bt_spools_heapscan ( Relation  heap,
Relation  index,
BTBuildState buildstate,
IndexInfo indexInfo 
)
static

Definition at line 374 of file nbtsort.c.

References _bt_begin_parallel(), _bt_build_callback(), _bt_parallel_heapscan(), _bt_spooldestroy(), BTBuildState::btleader, BTSpool::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, IndexInfo::ii_ParallelWorkers, IndexInfo::ii_Unique, BTSpool::index, IndexBuildHeapScan(), BTSpool::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, maintenance_work_mem, SortCoordinateData::nParticipants, BTLeader::nparticipanttuplesorts, palloc0(), SortCoordinateData::sharedsort, BTLeader::sharedsort, BTLeader::sharedsort2, BTSpool::sortstate, BTBuildState::spool, BTBuildState::spool2, tuplesort_begin_index_btree(), and work_mem.

Referenced by btbuild().

376 {
377  BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
378  SortCoordinate coordinate = NULL;
379  double reltuples = 0;
380 
381  /*
382  * We size the sort area as maintenance_work_mem rather than work_mem to
383  * speed index creation. This should be OK since a single backend can't
384  * run multiple index creations in parallel (see also: notes on
385  * parallelism and maintenance_work_mem below).
386  */
387  btspool->heap = heap;
388  btspool->index = index;
389  btspool->isunique = indexInfo->ii_Unique;
390 
391  /* Save as primary spool */
392  buildstate->spool = btspool;
393 
394  /* Attempt to launch parallel worker scan when required */
395  if (indexInfo->ii_ParallelWorkers > 0)
396  _bt_begin_parallel(buildstate, indexInfo->ii_Concurrent,
397  indexInfo->ii_ParallelWorkers);
398 
399  /*
400  * If parallel build requested and at least one worker process was
401  * successfully launched, set up coordination state
402  */
403  if (buildstate->btleader)
404  {
405  coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
406  coordinate->isWorker = false;
407  coordinate->nParticipants =
408  buildstate->btleader->nparticipanttuplesorts;
409  coordinate->sharedsort = buildstate->btleader->sharedsort;
410  }
411 
412  /*
413  * Begin serial/leader tuplesort.
414  *
415  * In cases where parallelism is involved, the leader receives the same
416  * share of maintenance_work_mem as a serial sort (it is generally treated
417  * in the same way as a serial sort once we return). Parallel worker
418  * Tuplesortstates will have received only a fraction of
419  * maintenance_work_mem, though.
420  *
421  * We rely on the lifetime of the Leader Tuplesortstate almost not
422  * overlapping with any worker Tuplesortstate's lifetime. There may be
423  * some small overlap, but that's okay because we rely on leader
424  * Tuplesortstate only allocating a small, fixed amount of memory here.
425  * When its tuplesort_performsort() is called (by our caller), and
426  * significant amounts of memory are likely to be used, all workers must
427  * have already freed almost all memory held by their Tuplesortstates
428  * (they are about to go away completely, too). The overall effect is
429  * that maintenance_work_mem always represents an absolute high watermark
430  * on the amount of memory used by a CREATE INDEX operation, regardless of
431  * the use of parallelism or any other factor.
432  */
433  buildstate->spool->sortstate =
434  tuplesort_begin_index_btree(heap, index, buildstate->isunique,
435  maintenance_work_mem, coordinate,
436  false);
437 
438  /*
439  * If building a unique index, put dead tuples in a second spool to keep
440  * them out of the uniqueness check. We expect that the second spool (for
441  * dead tuples) won't get very full, so we give it only work_mem.
442  */
443  if (indexInfo->ii_Unique)
444  {
445  BTSpool *btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
446  SortCoordinate coordinate2 = NULL;
447 
448  /* Initialize secondary spool */
449  btspool2->heap = heap;
450  btspool2->index = index;
451  btspool2->isunique = false;
452  /* Save as secondary spool */
453  buildstate->spool2 = btspool2;
454 
455  if (buildstate->btleader)
456  {
457  /*
458  * Set up non-private state that is passed to
459  * tuplesort_begin_index_btree() about the basic high level
460  * coordination of a parallel sort.
461  */
462  coordinate2 = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
463  coordinate2->isWorker = false;
464  coordinate2->nParticipants =
465  buildstate->btleader->nparticipanttuplesorts;
466  coordinate2->sharedsort = buildstate->btleader->sharedsort2;
467  }
468 
469  /*
470  * We expect that the second one (for dead tuples) won't get very
471  * full, so we give it only work_mem
472  */
473  buildstate->spool2->sortstate =
474  tuplesort_begin_index_btree(heap, index, false, work_mem,
475  coordinate2, false);
476  }
477 
478  /* Fill spool using either serial or parallel heap scan */
479  if (!buildstate->btleader)
480  reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
481  _bt_build_callback, (void *) buildstate,
482  NULL);
483  else
484  reltuples = _bt_parallel_heapscan(buildstate,
485  &indexInfo->ii_BrokenHotChain);
486 
487  /* okay, all heap tuples are spooled */
488  if (buildstate->spool2 && !buildstate->havedead)
489  {
490  /* spool2 turns out to be unnecessary */
491  _bt_spooldestroy(buildstate->spool2);
492  buildstate->spool2 = NULL;
493  }
494 
495  return reltuples;
496 }
static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Definition: nbtsort.c:1185
Sharedsort * sharedsort2
Definition: nbtsort.c:201
Sharedsort * sharedsort
Definition: nbtsort.c:200
bool havedead
Definition: nbtsort.c:214
Sharedsort * sharedsort
Definition: tuplesort.h:56
Tuplesortstate * sortstate
Definition: nbtsort.c:103
BTSpool * spool
Definition: nbtsort.c:216
Relation heap
Definition: nbtsort.c:104
static void _bt_build_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:560
static double _bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain)
Definition: nbtsort.c:1374
double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state, HeapScanDesc scan)
Definition: index.c:2411
bool isunique
Definition: nbtsort.c:106
BTSpool * spool2
Definition: nbtsort.c:222
bool isunique
Definition: nbtsort.c:213
bool ii_BrokenHotChain
Definition: execnodes.h:161
Relation index
Definition: nbtsort.c:105
void * palloc0(Size size)
Definition: mcxt.c:864
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:59
BTLeader * btleader
Definition: nbtsort.c:230
int work_mem
Definition: globals.c:113
int maintenance_work_mem
Definition: globals.c:114
bool ii_Unique
Definition: execnodes.h:158
int nparticipanttuplesorts
Definition: nbtsort.c:187
int ii_ParallelWorkers
Definition: execnodes.h:162
bool ii_Concurrent
Definition: execnodes.h:160
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:502
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975

◆ _bt_uppershutdown()

static void _bt_uppershutdown ( BTWriteState wstate,
BTPageState state 
)
static

Definition at line 956 of file nbtsort.c.

References _bt_blwritepage(), _bt_buildadd(), _bt_initmetapage(), _bt_slideleft(), Assert, BTP_ROOT, BTPageOpaqueData::btpo_flags, BTPageState::btps_blkno, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_METAPAGE, ItemPointerSet, P_HIKEY, P_NONE, PageGetSpecialPointer, palloc(), pfree(), and IndexTupleData::t_tid.

Referenced by _bt_load().

957 {
958  BTPageState *s;
959  BlockNumber rootblkno = P_NONE;
960  uint32 rootlevel = 0;
961  Page metapage;
962 
963  /*
964  * Each iteration of this loop completes one more level of the tree.
965  */
966  for (s = state; s != NULL; s = s->btps_next)
967  {
968  BlockNumber blkno;
969  BTPageOpaque opaque;
970 
971  blkno = s->btps_blkno;
973 
974  /*
975  * We have to link the last page on this level to somewhere.
976  *
977  * If we're at the top, it's the root, so attach it to the metapage.
978  * Otherwise, add an entry for it to its parent using its minimum key.
979  * This may cause the last page of the parent level to split, but
980  * that's not a problem -- we haven't gotten to it yet.
981  */
982  if (s->btps_next == NULL)
983  {
984  opaque->btpo_flags |= BTP_ROOT;
985  rootblkno = blkno;
986  rootlevel = s->btps_level;
987  }
988  else
989  {
990  Assert(s->btps_minkey != NULL);
991  ItemPointerSet(&(s->btps_minkey->t_tid), blkno, P_HIKEY);
992  _bt_buildadd(wstate, s->btps_next, s->btps_minkey);
993  pfree(s->btps_minkey);
994  s->btps_minkey = NULL;
995  }
996 
997  /*
998  * This is the rightmost page, so the ItemId array needs to be slid
999  * back one slot. Then we can dump out the page.
1000  */
1002  _bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
1003  s->btps_page = NULL; /* writepage freed the workspace */
1004  }
1005 
1006  /*
1007  * As the last step in the process, construct the metapage and make it
1008  * point to the new root (unless we had no data at all, in which case it's
1009  * set to point to "P_NONE"). This changes the index to the "valid" state
1010  * by filling in a valid magic number in the metapage.
1011  */
1012  metapage = (Page) palloc(BLCKSZ);
1013  _bt_initmetapage(metapage, rootblkno, rootlevel);
1014  _bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
1015 }
#define BTP_ROOT
Definition: nbtree.h:72
ItemPointerData t_tid
Definition: itup.h:37
#define P_NONE
Definition: nbtree.h:169
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:249
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:616
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
IndexTuple btps_minkey
Definition: nbtsort.c:250
Page btps_page
Definition: nbtsort.c:248
void pfree(void *pointer)
Definition: mcxt.c:936
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:797
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:49
unsigned int uint32
Definition: c.h:314
#define BTREE_METAPAGE
Definition: nbtree.h:110
uint32 btps_level
Definition: nbtsort.c:252
struct BTPageState * btps_next
Definition: nbtsort.c:254
#define Assert(condition)
Definition: c.h:688
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define P_HIKEY
Definition: nbtree.h:204
static void _bt_slideleft(Page page)
Definition: nbtsort.c:707
void * palloc(Size size)
Definition: mcxt.c:835
uint16 btpo_flags
Definition: nbtree.h:64
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105

◆ btbuild()

IndexBuildResult* btbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 305 of file nbtsort.c.

References _bt_end_parallel(), _bt_leafbuild(), _bt_spooldestroy(), _bt_spools_heapscan(), BTBuildState::btleader, elog, ERROR, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, IndexBuildResult::heap_tuples, IndexInfo::ii_Unique, IndexBuildResult::index_tuples, BTBuildState::indtuples, BTBuildState::isunique, log_btree_build_stats, palloc(), RelationGetNumberOfBlocks, RelationGetRelationName, ResetUsage(), ShowUsage(), BTBuildState::spool, and BTBuildState::spool2.

Referenced by bthandler().

306 {
307  IndexBuildResult *result;
308  BTBuildState buildstate;
309  double reltuples;
310 
311 #ifdef BTREE_BUILD_STATS
313  ResetUsage();
314 #endif /* BTREE_BUILD_STATS */
315 
316  buildstate.isunique = indexInfo->ii_Unique;
317  buildstate.havedead = false;
318  buildstate.heap = heap;
319  buildstate.spool = NULL;
320  buildstate.spool2 = NULL;
321  buildstate.indtuples = 0;
322  buildstate.btleader = NULL;
323 
324  /*
325  * We expect to be called exactly once for any index relation. If that's
326  * not the case, big trouble's what we have.
327  */
328  if (RelationGetNumberOfBlocks(index) != 0)
329  elog(ERROR, "index \"%s\" already contains data",
330  RelationGetRelationName(index));
331 
332  reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
333 
334  /*
335  * Finish the build by (1) completing the sort of the spool file, (2)
336  * inserting the sorted tuples into btree pages and (3) building the upper
337  * levels. Finally, it may also be necessary to end use of parallelism.
338  */
339  _bt_leafbuild(buildstate.spool, buildstate.spool2);
340  _bt_spooldestroy(buildstate.spool);
341  if (buildstate.spool2)
342  _bt_spooldestroy(buildstate.spool2);
343  if (buildstate.btleader)
344  _bt_end_parallel(buildstate.btleader);
345 
346  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
347 
348  result->heap_tuples = reltuples;
349  result->index_tuples = buildstate.indtuples;
350 
351 #ifdef BTREE_BUILD_STATS
353  {
354  ShowUsage("BTREE BUILD STATS");
355  ResetUsage();
356  }
357 #endif /* BTREE_BUILD_STATS */
358 
359  return result;
360 }
Relation heap
Definition: nbtsort.c:215
void ShowUsage(const char *title)
Definition: postgres.c:4449
bool havedead
Definition: nbtsort.c:214
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1332
static void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:523
BTSpool * spool
Definition: nbtsort.c:216
void ResetUsage(void)
Definition: postgres.c:4442
#define ERROR
Definition: elog.h:43
BTSpool * spool2
Definition: nbtsort.c:222
double indtuples
Definition: nbtsort.c:223
bool isunique
Definition: nbtsort.c:213
#define RelationGetRelationName(relation)
Definition: rel.h:445
static double _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
Definition: nbtsort.c:374
bool log_btree_build_stats
Definition: guc.c:443
BTLeader * btleader
Definition: nbtsort.c:230
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
bool ii_Unique
Definition: execnodes.h:158
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:502
void * palloc(Size size)
Definition: mcxt.c:835
#define elog
Definition: elog.h:219
double index_tuples
Definition: genam.h:33
double heap_tuples
Definition: genam.h:32