PostgreSQL Source Code  git master
nbtsort.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/parallel.h"
#include "access/relscan.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/rel.h"
#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
Include dependency graph for nbtsort.c:

Go to the source code of this file.

Data Structures

struct  BTSpool
 
struct  BTShared
 
struct  BTLeader
 
struct  BTBuildState
 
struct  BTPageState
 
struct  BTWriteState
 

Macros

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)
 
#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)
 
#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)
 
#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)
 

Typedefs

typedef struct BTSpool BTSpool
 
typedef struct BTShared BTShared
 
typedef struct BTLeader BTLeader
 
typedef struct BTBuildState BTBuildState
 
typedef struct BTPageState BTPageState
 
typedef struct BTWriteState BTWriteState
 

Functions

static double _bt_spools_heapscan (Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
 
static void _bt_spooldestroy (BTSpool *btspool)
 
static void _bt_spool (BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
 
static void _bt_leafbuild (BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_build_callback (Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static Page _bt_blnewpage (uint32 level)
 
static BTPageState_bt_pagestate (BTWriteState *wstate, uint32 level)
 
static void _bt_slideleft (Page page)
 
static void _bt_sortaddtup (Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
 
static void _bt_buildadd (BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 
static void _bt_uppershutdown (BTWriteState *wstate, BTPageState *state)
 
static void _bt_load (BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_begin_parallel (BTBuildState *buildstate, bool isconcurrent, int request)
 
static void _bt_end_parallel (BTLeader *btleader)
 
static Size _bt_parallel_estimate_shared (Snapshot snapshot)
 
static double _bt_parallel_heapscan (BTBuildState *buildstate, bool *brokenhotchain)
 
static void _bt_leader_participate_as_worker (BTBuildState *buildstate)
 
static void _bt_parallel_scan_and_sort (BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem)
 
IndexBuildResultbtbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
static void _bt_blwritepage (BTWriteState *wstate, Page page, BlockNumber blkno)
 
void _bt_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 

Macro Definition Documentation

◆ PARALLEL_KEY_BTREE_SHARED

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)

Definition at line 86 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_QUERY_TEXT

#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)

Definition at line 89 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT

#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)

Definition at line 87 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT_SPOOL2

#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)

Definition at line 88 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

Typedef Documentation

◆ BTBuildState

◆ BTLeader

◆ BTPageState

◆ BTShared

◆ BTSpool

◆ BTWriteState

Function Documentation

◆ _bt_begin_parallel()

static void _bt_begin_parallel ( BTBuildState buildstate,
bool  isconcurrent,
int  request 
)
static

Definition at line 1242 of file nbtsort.c.

References _bt_end_parallel(), _bt_leader_participate_as_worker(), _bt_parallel_estimate_shared(), Assert, BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableInit(), CreateParallelContext(), debug_query_string, EnterParallelMode(), ParallelContext::estimator, GetTransactionSnapshot(), BTShared::havedead, BTSpool::heap, heap_parallelscan_initialize(), BTShared::heapdesc, BTShared::heaprelid, BTSpool::index, BTShared::indexrelid, BTShared::indtuples, InitializeParallelDSM(), BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, LaunchParallelWorkers(), BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, ParallelContext::nworkers_launched, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, BTLeader::pcxt, RegisterSnapshot(), RelationGetRelid, BTShared::reltuples, BTShared::scantuplesortstates, ParallelContext::seg, BTLeader::sharedsort, BTLeader::sharedsort2, shm_toc_allocate(), shm_toc_estimate_chunk, shm_toc_estimate_keys, shm_toc_insert(), BTLeader::snapshot, SnapshotAny, SpinLockInit, BTBuildState::spool, ParallelContext::toc, tuplesort_estimate_shared(), tuplesort_initialize_shared(), WaitForParallelWorkersToAttach(), and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1243 {
1244  ParallelContext *pcxt;
1245  int scantuplesortstates;
1246  Snapshot snapshot;
1247  Size estbtshared;
1248  Size estsort;
1249  BTShared *btshared;
1250  Sharedsort *sharedsort;
1251  Sharedsort *sharedsort2;
1252  BTSpool *btspool = buildstate->spool;
1253  BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
1254  bool leaderparticipates = true;
1255  char *sharedquery;
1256  int querylen;
1257 
1258 #ifdef DISABLE_LEADER_PARTICIPATION
1259  leaderparticipates = false;
1260 #endif
1261 
1262  /*
1263  * Enter parallel mode, and create context for parallel build of btree
1264  * index
1265  */
1267  Assert(request > 0);
1268  pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
1269  request, true);
1270  scantuplesortstates = leaderparticipates ? request + 1 : request;
1271 
1272  /*
1273  * Prepare for scan of the base relation. In a normal index build, we use
1274  * SnapshotAny because we must retrieve all tuples and do our own time
1275  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1276  * concurrent build, we take a regular MVCC snapshot and index whatever's
1277  * live according to that.
1278  */
1279  if (!isconcurrent)
1280  snapshot = SnapshotAny;
1281  else
1283 
1284  /*
1285  * Estimate size for our own PARALLEL_KEY_BTREE_SHARED workspace, and
1286  * PARALLEL_KEY_TUPLESORT tuplesort workspace
1287  */
1288  estbtshared = _bt_parallel_estimate_shared(snapshot);
1289  shm_toc_estimate_chunk(&pcxt->estimator, estbtshared);
1290  estsort = tuplesort_estimate_shared(scantuplesortstates);
1291  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1292 
1293  /*
1294  * Unique case requires a second spool, and so we may have to account for
1295  * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2
1296  */
1297  if (!btspool->isunique)
1298  shm_toc_estimate_keys(&pcxt->estimator, 2);
1299  else
1300  {
1301  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1302  shm_toc_estimate_keys(&pcxt->estimator, 3);
1303  }
1304 
1305  /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
1306  querylen = strlen(debug_query_string);
1307  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
1308  shm_toc_estimate_keys(&pcxt->estimator, 1);
1309 
1310  /* Everyone's had a chance to ask for space, so now create the DSM */
1311  InitializeParallelDSM(pcxt);
1312 
1313  /* Store shared build state, for which we reserved space */
1314  btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
1315  /* Initialize immutable state */
1316  btshared->heaprelid = RelationGetRelid(btspool->heap);
1317  btshared->indexrelid = RelationGetRelid(btspool->index);
1318  btshared->isunique = btspool->isunique;
1319  btshared->isconcurrent = isconcurrent;
1320  btshared->scantuplesortstates = scantuplesortstates;
1322  SpinLockInit(&btshared->mutex);
1323  /* Initialize mutable state */
1324  btshared->nparticipantsdone = 0;
1325  btshared->reltuples = 0.0;
1326  btshared->havedead = false;
1327  btshared->indtuples = 0.0;
1328  btshared->brokenhotchain = false;
1329  heap_parallelscan_initialize(&btshared->heapdesc, btspool->heap, snapshot);
1330 
1331  /*
1332  * Store shared tuplesort-private state, for which we reserved space.
1333  * Then, initialize opaque state using tuplesort routine.
1334  */
1335  sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1336  tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1337  pcxt->seg);
1338 
1339  shm_toc_insert(pcxt->toc, PARALLEL_KEY_BTREE_SHARED, btshared);
1340  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1341 
1342  /* Unique case requires a second spool, and associated shared state */
1343  if (!btspool->isunique)
1344  sharedsort2 = NULL;
1345  else
1346  {
1347  /*
1348  * Store additional shared tuplesort-private state, for which we
1349  * reserved space. Then, initialize opaque state using tuplesort
1350  * routine.
1351  */
1352  sharedsort2 = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1353  tuplesort_initialize_shared(sharedsort2, scantuplesortstates,
1354  pcxt->seg);
1355 
1356  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2);
1357  }
1358 
1359  /* Store query string for workers */
1360  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1361  memcpy(sharedquery, debug_query_string, querylen + 1);
1362  shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
1363 
1364  /* Launch workers, saving status for leader/caller */
1365  LaunchParallelWorkers(pcxt);
1366  btleader->pcxt = pcxt;
1367  btleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1368  if (leaderparticipates)
1369  btleader->nparticipanttuplesorts++;
1370  btleader->btshared = btshared;
1371  btleader->sharedsort = sharedsort;
1372  btleader->sharedsort2 = sharedsort2;
1373  btleader->snapshot = snapshot;
1374 
1375  /* If no workers were successfully launched, back out (do serial build) */
1376  if (pcxt->nworkers_launched == 0)
1377  {
1378  _bt_end_parallel(btleader);
1379  return;
1380  }
1381 
1382  /* Save leader state now that it's clear build will be parallel */
1383  buildstate->btleader = btleader;
1384 
1385  /* Join heap scan ourselves */
1386  if (leaderparticipates)
1388 
1389  /*
1390  * Caller needs to wait for all launched workers when we return. Make
1391  * sure that the failure-to-start case will not hang forever.
1392  */
1394 }
Sharedsort * sharedsort2
Definition: nbtsort.c:202
int scantuplesortstates
Definition: nbtsort.c:126
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:87
BTShared * btshared
Definition: nbtsort.c:200
int nparticipantsdone
Definition: nbtsort.c:160
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:863
Oid indexrelid
Definition: nbtsort.c:123
dsm_segment * seg
Definition: parallel.h:42
Sharedsort * sharedsort
Definition: nbtsort.c:201
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
Snapshot snapshot
Definition: nbtsort.c:203
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers, bool serializable_okay)
Definition: parallel.c:152
bool isunique
Definition: nbtsort.c:124
bool isconcurrent
Definition: nbtsort.c:125
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1400
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:4384
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
BTSpool * spool
Definition: nbtsort.c:217
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:304
ParallelContext * pcxt
Definition: nbtsort.c:180
ParallelHeapScanDescData heapdesc
Definition: nbtsort.c:171
Relation heap
Definition: nbtsort.c:105
void ConditionVariableInit(ConditionVariable *cv)
static Size _bt_parallel_estimate_shared(Snapshot snapshot)
Definition: nbtsort.c:1416
bool isunique
Definition: nbtsort.c:107
Oid heaprelid
Definition: nbtsort.c:122
#define PARALLEL_KEY_QUERY_TEXT
Definition: nbtsort.c:89
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:88
int nworkers_launched
Definition: parallel.h:37
bool brokenhotchain
Definition: nbtsort.c:164
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:481
Relation index
Definition: nbtsort.c:106
const char * debug_query_string
Definition: postgres.c:86
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:206
void heap_parallelscan_initialize(ParallelHeapScanDesc target, Relation relation, Snapshot snapshot)
Definition: heapam.c:1622
void * palloc0(Size size)
Definition: mcxt.c:955
#define SnapshotAny
Definition: tqual.h:28
BTLeader * btleader
Definition: nbtsort.c:231
double reltuples
Definition: nbtsort.c:161
double indtuples
Definition: nbtsort.c:163
slock_t mutex
Definition: nbtsort.c:142
bool havedead
Definition: nbtsort.c:162
#define Assert(condition)
Definition: c.h:699
int nparticipanttuplesorts
Definition: nbtsort.c:188
size_t Size
Definition: c.h:433
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void EnterParallelMode(void)
Definition: xact.c:872
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
static void _bt_leader_participate_as_worker(BTBuildState *buildstate)
Definition: nbtsort.c:1476
Size tuplesort_estimate_shared(int nWorkers)
Definition: tuplesort.c:4363
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable workersdonecv
Definition: nbtsort.c:134
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:86
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:601
#define RelationGetRelid(relation)
Definition: rel.h:407
shm_toc * toc
Definition: parallel.h:44

◆ _bt_blnewpage()

static Page _bt_blnewpage ( uint32  level)
static

Definition at line 590 of file nbtsort.c.

References _bt_pageinit(), BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageOpaqueData::level, P_NONE, PageGetSpecialPointer, and palloc().

Referenced by _bt_buildadd(), and _bt_pagestate().

591 {
592  Page page;
593  BTPageOpaque opaque;
594 
595  page = (Page) palloc(BLCKSZ);
596 
597  /* Zero the page and set up standard page header info */
598  _bt_pageinit(page, BLCKSZ);
599 
600  /* Initialize BT opaque state */
601  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
602  opaque->btpo_prev = opaque->btpo_next = P_NONE;
603  opaque->btpo.level = level;
604  opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
605  opaque->btpo_cycleid = 0;
606 
607  /* Make the P_HIKEY line pointer appear allocated */
608  ((PageHeader) page)->pd_lower += sizeof(ItemIdData);
609 
610  return page;
611 }
BlockNumber btpo_next
Definition: nbtree.h:58
#define BTP_LEAF
Definition: nbtree.h:71
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:150
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
BTCycleId btpo_cycleid
Definition: nbtree.h:65
BlockNumber btpo_prev
Definition: nbtree.h:57
struct ItemIdData ItemIdData
uint32 level
Definition: nbtree.h:61
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
void * palloc(Size size)
Definition: mcxt.c:924
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:891
uint16 btpo_flags
Definition: nbtree.h:64
Pointer Page
Definition: bufpage.h:74

◆ _bt_blwritepage()

static void _bt_blwritepage ( BTWriteState wstate,
Page  page,
BlockNumber  blkno 
)
static

Definition at line 617 of file nbtsort.c.

References BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTWriteState::index, log_newpage(), MAIN_FORKNUM, PageSetChecksumInplace(), palloc0(), pfree(), RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, smgrextend(), and smgrwrite().

Referenced by _bt_buildadd(), and _bt_uppershutdown().

618 {
619  /* Ensure rd_smgr is open (could have been closed by relcache flush!) */
620  RelationOpenSmgr(wstate->index);
621 
622  /* XLOG stuff */
623  if (wstate->btws_use_wal)
624  {
625  /* We use the heap NEWPAGE record type for this */
626  log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
627  }
628 
629  /*
630  * If we have to write pages nonsequentially, fill in the space with
631  * zeroes until we come back and overwrite. This is not logically
632  * necessary on standard Unix filesystems (unwritten space will read as
633  * zeroes anyway), but it should help to avoid fragmentation. The dummy
634  * pages aren't WAL-logged though.
635  */
636  while (blkno > wstate->btws_pages_written)
637  {
638  if (!wstate->btws_zeropage)
639  wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
640  /* don't set checksum for all-zero page */
642  wstate->btws_pages_written++,
643  (char *) wstate->btws_zeropage,
644  true);
645  }
646 
647  PageSetChecksumInplace(page, blkno);
648 
649  /*
650  * Now write the page. There's no need for smgr to schedule an fsync for
651  * this write; we'll do it ourselves before ending the build.
652  */
653  if (blkno == wstate->btws_pages_written)
654  {
655  /* extending the file... */
656  smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
657  (char *) page, true);
658  wstate->btws_pages_written++;
659  }
660  else
661  {
662  /* overwriting a block we zero-filled before */
663  smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
664  (char *) page, true);
665  }
666 
667  pfree(page);
668 }
Page btws_zeropage
Definition: nbtsort.c:268
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
bool btws_use_wal
Definition: nbtsort.c:265
#define RelationOpenSmgr(relation)
Definition: rel.h:465
void pfree(void *pointer)
Definition: mcxt.c:1031
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
Relation index
Definition: nbtsort.c:264
BlockNumber btws_pages_written
Definition: nbtsort.c:267
void * palloc0(Size size)
Definition: mcxt.c:955
RelFileNode rd_node
Definition: rel.h:55
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1195
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:600
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
Pointer Page
Definition: bufpage.h:74

◆ _bt_build_callback()

static void _bt_build_callback ( Relation  index,
HeapTuple  htup,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
)
static

Definition at line 561 of file nbtsort.c.

References _bt_spool(), BTBuildState::havedead, BTBuildState::indtuples, BTBuildState::spool, BTBuildState::spool2, and HeapTupleData::t_self.

Referenced by _bt_parallel_scan_and_sort(), and _bt_spools_heapscan().

567 {
568  BTBuildState *buildstate = (BTBuildState *) state;
569 
570  /*
571  * insert the index tuple into the appropriate spool file for subsequent
572  * processing
573  */
574  if (tupleIsAlive || buildstate->spool2 == NULL)
575  _bt_spool(buildstate->spool, &htup->t_self, values, isnull);
576  else
577  {
578  /* dead tuples are put into spool2 */
579  buildstate->havedead = true;
580  _bt_spool(buildstate->spool2, &htup->t_self, values, isnull);
581  }
582 
583  buildstate->indtuples += 1;
584 }
bool havedead
Definition: nbtsort.c:215
BTSpool * spool
Definition: nbtsort.c:217
ItemPointerData t_self
Definition: htup.h:65
BTSpool * spool2
Definition: nbtsort.c:223
double indtuples
Definition: nbtsort.c:224
Definition: regguts.h:298
static Datum values[MAXATTR]
Definition: bootstrap.c:164
static void _bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
Definition: nbtsort.c:513

◆ _bt_buildadd()

static void _bt_buildadd ( BTWriteState wstate,
BTPageState state,
IndexTuple  itup 
)
static

Definition at line 801 of file nbtsort.c.

References _bt_blnewpage(), _bt_blwritepage(), _bt_nonkey_truncate(), _bt_pagestate(), _bt_sortaddtup(), Assert, BTMaxItemSize, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageState::btps_blkno, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTreeInnerTupleSetDownLink, BTreeTupleGetNAtts, BTWriteState::btws_pages_alloced, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), ereport, errcode(), errhint(), errmsg(), ERROR, errtableconstraint(), BTWriteState::heap, BTWriteState::index, IndexRelationGetNumberOfAttributes, IndexRelationGetNumberOfKeyAttributes, IndexTupleSize, ItemIdGetLength, ItemIdSetUnused, MAXALIGN, OffsetNumberNext, P_FIRSTKEY, P_HIKEY, P_ISLEAF, P_NONE, PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetSpecialPointer, PageIndexTupleDelete(), pfree(), and RelationGetRelationName.

Referenced by _bt_load(), and _bt_uppershutdown().

802 {
803  Page npage;
804  BlockNumber nblkno;
805  OffsetNumber last_off;
806  Size pgspc;
807  Size itupsz;
808  int indnatts = IndexRelationGetNumberOfAttributes(wstate->index);
809  int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(wstate->index);
810 
811  /*
812  * This is a handy place to check for cancel interrupts during the btree
813  * load phase of index creation.
814  */
816 
817  npage = state->btps_page;
818  nblkno = state->btps_blkno;
819  last_off = state->btps_lastoff;
820 
821  pgspc = PageGetFreeSpace(npage);
822  itupsz = IndexTupleSize(itup);
823  itupsz = MAXALIGN(itupsz);
824 
825  /*
826  * Check whether the item can fit on a btree page at all. (Eventually, we
827  * ought to try to apply TOAST methods if not.) We actually need to be
828  * able to fit three items on every page, so restrict any one item to 1/3
829  * the per-page available space. Note that at this point, itupsz doesn't
830  * include the ItemId.
831  *
832  * NOTE: similar code appears in _bt_insertonpg() to defend against
833  * oversize items being inserted into an already-existing index. But
834  * during creation of an index, we don't go through there.
835  */
836  if (itupsz > BTMaxItemSize(npage))
837  ereport(ERROR,
838  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
839  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
840  itupsz, BTMaxItemSize(npage),
841  RelationGetRelationName(wstate->index)),
842  errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
843  "Consider a function index of an MD5 hash of the value, "
844  "or use full text indexing."),
845  errtableconstraint(wstate->heap,
846  RelationGetRelationName(wstate->index))));
847 
848  /*
849  * Check to see if page is "full". It's definitely full if the item won't
850  * fit. Otherwise, compare to the target freespace derived from the
851  * fillfactor. However, we must put at least two items on each page, so
852  * disregard fillfactor if we don't have that many.
853  */
854  if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
855  {
856  /*
857  * Finish off the page and write it out.
858  */
859  Page opage = npage;
860  BlockNumber oblkno = nblkno;
861  ItemId ii;
862  ItemId hii;
863  IndexTuple oitup;
865 
866  /* Create new page of same level */
867  npage = _bt_blnewpage(state->btps_level);
868 
869  /* and assign it a page position */
870  nblkno = wstate->btws_pages_alloced++;
871 
872  /*
873  * We copy the last item on the page into the new page, and then
874  * rearrange the old page so that the 'last item' becomes its high key
875  * rather than a true data item. There had better be at least two
876  * items on the page already, else the page would be empty of useful
877  * data.
878  */
879  Assert(last_off > P_FIRSTKEY);
880  ii = PageGetItemId(opage, last_off);
881  oitup = (IndexTuple) PageGetItem(opage, ii);
882  _bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY);
883 
884  /*
885  * Move 'last' into the high key position on opage
886  */
887  hii = PageGetItemId(opage, P_HIKEY);
888  *hii = *ii;
889  ItemIdSetUnused(ii); /* redundant */
890  ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
891 
892  if (indnkeyatts != indnatts && P_ISLEAF(opageop))
893  {
894  IndexTuple truncated;
895  Size truncsz;
896 
897  /*
898  * Truncate any non-key attributes from high key on leaf level
899  * (i.e. truncate on leaf level if we're building an INCLUDE
900  * index). This is only done at the leaf level because
901  * downlinks in internal pages are either negative infinity
902  * items, or get their contents from copying from one level
903  * down. See also: _bt_split().
904  *
905  * Since the truncated tuple is probably smaller than the
906  * original, it cannot just be copied in place (besides, we want
907  * to actually save space on the leaf page). We delete the
908  * original high key, and add our own truncated high key at the
909  * same offset.
910  *
911  * Note that the page layout won't be changed very much. oitup
912  * is already located at the physical beginning of tuple space,
913  * so we only shift the line pointer array back and forth, and
914  * overwrite the latter portion of the space occupied by the
915  * original tuple. This is fairly cheap.
916  */
917  truncated = _bt_nonkey_truncate(wstate->index, oitup);
918  truncsz = IndexTupleSize(truncated);
920  _bt_sortaddtup(opage, truncsz, truncated, P_HIKEY);
921  pfree(truncated);
922 
923  /* oitup should continue to point to the page's high key */
924  hii = PageGetItemId(opage, P_HIKEY);
925  oitup = (IndexTuple) PageGetItem(opage, hii);
926  }
927 
928  /*
929  * Link the old page into its parent, using its minimum key. If we
930  * don't have a parent, we have to create one; this adds a new btree
931  * level.
932  */
933  if (state->btps_next == NULL)
934  state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
935 
936  Assert(BTreeTupleGetNAtts(state->btps_minkey, wstate->index) ==
938  BTreeInnerTupleSetDownLink(state->btps_minkey, oblkno);
939  _bt_buildadd(wstate, state->btps_next, state->btps_minkey);
940  pfree(state->btps_minkey);
941 
942  /*
943  * Save a copy of the minimum key for the new page. We have to copy
944  * it off the old page, not the new one, in case we are not at leaf
945  * level.
946  */
947  state->btps_minkey = CopyIndexTuple(oitup);
948 
949  /*
950  * Set the sibling links for both pages.
951  */
952  {
955 
956  oopaque->btpo_next = nblkno;
957  nopaque->btpo_prev = oblkno;
958  nopaque->btpo_next = P_NONE; /* redundant */
959  }
960 
961  /*
962  * Write out the old page. We never need to touch it again, so we can
963  * free the opage workspace too.
964  */
965  _bt_blwritepage(wstate, opage, oblkno);
966 
967  /*
968  * Reset last_off to point to new page
969  */
970  last_off = P_FIRSTKEY;
971  }
972 
973  /*
974  * If the new item is the first for its page, stash a copy for later. Note
975  * this will only happen for the first item on a level; on later pages,
976  * the first item for a page is copied from the prior page in the code
977  * above.
978  */
979  if (last_off == P_HIKEY)
980  {
981  BTPageOpaque npageop;
982 
983  Assert(state->btps_minkey == NULL);
984 
985  npageop = (BTPageOpaque) PageGetSpecialPointer(npage);
986 
987  /*
988  * Truncate included attributes of the tuple that we're going to
989  * insert into the parent page as a downlink
990  */
991  if (indnkeyatts != indnatts && P_ISLEAF(npageop))
992  state->btps_minkey = _bt_nonkey_truncate(wstate->index, itup);
993  else
994  state->btps_minkey = CopyIndexTuple(itup);
995  }
996 
997  /*
998  * Add the new item into the current page.
999  */
1000  last_off = OffsetNumberNext(last_off);
1001  _bt_sortaddtup(npage, itupsz, itup, last_off);
1002 
1003  state->btps_page = npage;
1004  state->btps_blkno = nblkno;
1005  state->btps_lastoff = last_off;
1006 }
BlockNumber btpo_next
Definition: nbtree.h:58
int errhint(const char *fmt,...)
Definition: elog.c:987
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:723
BlockNumber btws_pages_alloced
Definition: nbtsort.c:266
#define P_NONE
Definition: nbtree.h:150
IndexTuple _bt_nonkey_truncate(Relation rel, IndexTuple itup)
Definition: nbtutils.c:2102
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:250
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:233
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:617
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:578
IndexTuple btps_minkey
Definition: nbtsort.c:251
uint16 OffsetNumber
Definition: off.h:24
int errtableconstraint(Relation rel, const char *conname)
Definition: relcache.c:5246
Page btps_page
Definition: nbtsort.c:249
void pfree(void *pointer)
Definition: mcxt.c:1031
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
#define ERROR
Definition: elog.h:43
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:801
BlockNumber btpo_prev
Definition: nbtree.h:57
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:441
OffsetNumber btps_lastoff
Definition: nbtsort.c:252
IndexTupleData * IndexTuple
Definition: itup.h:53
#define P_FIRSTKEY
Definition: nbtree.h:186
#define IndexRelationGetNumberOfAttributes(relation)
Definition: rel.h:419
#define RelationGetRelationName(relation)
Definition: rel.h:441
struct ItemIdData ItemIdData
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
#define ereport(elevel, rest)
Definition: elog.h:122
Relation index
Definition: nbtsort.c:264
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
uint32 btps_level
Definition: nbtsort.c:253
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:675
struct BTPageState * btps_next
Definition: nbtsort.c:255
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define Assert(condition)
Definition: c.h:699
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:433
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define MAXALIGN(LEN)
Definition: c.h:652
#define BTreeInnerTupleSetDownLink(itup, blkno)
Definition: nbtree.h:226
#define BTMaxItemSize(page)
Definition: nbtree.h:126
#define P_HIKEY
Definition: nbtree.h:185
int errmsg(const char *fmt,...)
Definition: elog.c:797
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:590
static void _bt_sortaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
Definition: nbtsort.c:743
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
Relation heap
Definition: nbtsort.c:263
#define ItemIdSetUnused(itemId)
Definition: itemid.h:127
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define IndexTupleSize(itup)
Definition: itup.h:71
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_end_parallel()

static void _bt_end_parallel ( BTLeader btleader)
static

Definition at line 1400 of file nbtsort.c.

References DestroyParallelContext(), ExitParallelMode(), IsMVCCSnapshot, BTLeader::pcxt, BTLeader::snapshot, UnregisterSnapshot(), and WaitForParallelWorkersToFinish().

Referenced by _bt_begin_parallel(), and btbuild().

1401 {
1402  /* Shutdown worker processes */
1404  /* Free last reference to MVCC snapshot, if one was used */
1405  if (IsMVCCSnapshot(btleader->snapshot))
1406  UnregisterSnapshot(btleader->snapshot);
1407  DestroyParallelContext(btleader->pcxt);
1408  ExitParallelMode();
1409 }
Snapshot snapshot
Definition: nbtsort.c:203
ParallelContext * pcxt
Definition: nbtsort.c:180
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:708
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:862
void ExitParallelMode(void)
Definition: xact.c:885
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:905
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31

◆ _bt_leader_participate_as_worker()

static void _bt_leader_participate_as_worker ( BTBuildState buildstate)
static

Definition at line 1476 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), BTBuildState::btleader, BTLeader::btshared, BTSpool::heap, BTSpool::index, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, BTLeader::nparticipanttuplesorts, palloc0(), ResetUsage(), BTLeader::sharedsort, BTLeader::sharedsort2, ShowUsage(), and BTBuildState::spool.

Referenced by _bt_begin_parallel().

1477 {
1478  BTLeader *btleader = buildstate->btleader;
1479  BTSpool *leaderworker;
1480  BTSpool *leaderworker2;
1481  int sortmem;
1482 
1483  /* Allocate memory and initialize private spool */
1484  leaderworker = (BTSpool *) palloc0(sizeof(BTSpool));
1485  leaderworker->heap = buildstate->spool->heap;
1486  leaderworker->index = buildstate->spool->index;
1487  leaderworker->isunique = buildstate->spool->isunique;
1488 
1489  /* Initialize second spool, if required */
1490  if (!btleader->btshared->isunique)
1491  leaderworker2 = NULL;
1492  else
1493  {
1494  /* Allocate memory for worker's own private secondary spool */
1495  leaderworker2 = (BTSpool *) palloc0(sizeof(BTSpool));
1496 
1497  /* Initialize worker's own secondary spool */
1498  leaderworker2->heap = leaderworker->heap;
1499  leaderworker2->index = leaderworker->index;
1500  leaderworker2->isunique = false;
1501  }
1502 
1503  /*
1504  * Might as well use reliable figure when doling out maintenance_work_mem
1505  * (when requested number of workers were not launched, this will be
1506  * somewhat higher than it is for other workers).
1507  */
1508  sortmem = maintenance_work_mem / btleader->nparticipanttuplesorts;
1509 
1510  /* Perform work common to all participants */
1511  _bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared,
1512  btleader->sharedsort, btleader->sharedsort2,
1513  sortmem);
1514 
1515 #ifdef BTREE_BUILD_STATS
1517  {
1518  ShowUsage("BTREE BUILD (Leader Partial Spool) STATISTICS");
1519  ResetUsage();
1520  }
1521 #endif /* BTREE_BUILD_STATS */
1522 }
Sharedsort * sharedsort2
Definition: nbtsort.c:202
BTShared * btshared
Definition: nbtsort.c:200
void ShowUsage(const char *title)
Definition: postgres.c:4453
Sharedsort * sharedsort
Definition: nbtsort.c:201
bool isunique
Definition: nbtsort.c:124
BTSpool * spool
Definition: nbtsort.c:217
Relation heap
Definition: nbtsort.c:105
void ResetUsage(void)
Definition: postgres.c:4446
bool isunique
Definition: nbtsort.c:107
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem)
Definition: nbtsort.c:1631
Relation index
Definition: nbtsort.c:106
void * palloc0(Size size)
Definition: mcxt.c:955
bool log_btree_build_stats
Definition: guc.c:445
BTLeader * btleader
Definition: nbtsort.c:231
int maintenance_work_mem
Definition: globals.c:123
int nparticipanttuplesorts
Definition: nbtsort.c:188

◆ _bt_leafbuild()

static void _bt_leafbuild ( BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 524 of file nbtsort.c.

References _bt_load(), BTREE_METAPAGE, BTWriteState::btws_pages_alloced, BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTSpool::heap, BTWriteState::heap, BTSpool::index, BTWriteState::index, log_btree_build_stats, RelationNeedsWAL, ResetUsage(), ShowUsage(), BTSpool::sortstate, tuplesort_performsort(), and XLogIsNeeded.

Referenced by btbuild().

525 {
526  BTWriteState wstate;
527 
528 #ifdef BTREE_BUILD_STATS
530  {
531  ShowUsage("BTREE BUILD (Spool) STATISTICS");
532  ResetUsage();
533  }
534 #endif /* BTREE_BUILD_STATS */
535 
537  if (btspool2)
538  tuplesort_performsort(btspool2->sortstate);
539 
540  wstate.heap = btspool->heap;
541  wstate.index = btspool->index;
542 
543  /*
544  * We need to log index creation in WAL iff WAL archiving/streaming is
545  * enabled UNLESS the index isn't WAL-logged anyway.
546  */
547  wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);
548 
549  /* reserve the metapage */
550  wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
551  wstate.btws_pages_written = 0;
552  wstate.btws_zeropage = NULL; /* until needed */
553 
554  _bt_load(&wstate, btspool, btspool2);
555 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1790
Page btws_zeropage
Definition: nbtsort.c:268
static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:1079
void ShowUsage(const char *title)
Definition: postgres.c:4453
BlockNumber btws_pages_alloced
Definition: nbtsort.c:266
#define XLogIsNeeded()
Definition: xlog.h:146
Tuplesortstate * sortstate
Definition: nbtsort.c:104
bool btws_use_wal
Definition: nbtsort.c:265
Relation heap
Definition: nbtsort.c:105
void ResetUsage(void)
Definition: postgres.c:4446
#define BTREE_METAPAGE
Definition: nbtree.h:115
Relation index
Definition: nbtsort.c:106
Relation index
Definition: nbtsort.c:264
BlockNumber btws_pages_written
Definition: nbtsort.c:267
bool log_btree_build_stats
Definition: guc.c:445
#define RelationNeedsWAL(relation)
Definition: rel.h:510
Relation heap
Definition: nbtsort.c:263

◆ _bt_load()

static void _bt_load ( BTWriteState wstate,
BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 1079 of file nbtsort.c.

References _bt_buildadd(), _bt_freeskey(), _bt_mkscankey_nodata(), _bt_pagestate(), _bt_uppershutdown(), SortSupportData::abbreviate, ApplySortComparator(), AssertState, BTGreaterStrategyNumber, BTLessStrategyNumber, compare(), CurrentMemoryContext, i, BTWriteState::index, index_getattr, IndexRelationGetNumberOfKeyAttributes, MAIN_FORKNUM, merge(), palloc0(), pfree(), PrepareSortSupportFromIndexRel(), RelationData::rd_smgr, RelationGetDescr, RelationNeedsWAL, RelationOpenSmgr, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, smgrimmedsync(), BTSpool::sortstate, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, and tuplesort_getindextuple().

Referenced by _bt_leafbuild().

1080 {
1081  BTPageState *state = NULL;
1082  bool merge = (btspool2 != NULL);
1083  IndexTuple itup,
1084  itup2 = NULL;
1085  bool load1;
1086  TupleDesc tupdes = RelationGetDescr(wstate->index);
1087  int i,
1089  ScanKey indexScanKey = NULL;
1090  SortSupport sortKeys;
1091 
1092  if (merge)
1093  {
1094  /*
1095  * Another BTSpool for dead tuples exists. Now we have to merge
1096  * btspool and btspool2.
1097  */
1098 
1099  /* the preparation of merge */
1100  itup = tuplesort_getindextuple(btspool->sortstate, true);
1101  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1102  indexScanKey = _bt_mkscankey_nodata(wstate->index);
1103 
1104  /* Prepare SortSupport data for each column */
1105  sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
1106 
1107  for (i = 0; i < keysz; i++)
1108  {
1109  SortSupport sortKey = sortKeys + i;
1110  ScanKey scanKey = indexScanKey + i;
1111  int16 strategy;
1112 
1113  sortKey->ssup_cxt = CurrentMemoryContext;
1114  sortKey->ssup_collation = scanKey->sk_collation;
1115  sortKey->ssup_nulls_first =
1116  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1117  sortKey->ssup_attno = scanKey->sk_attno;
1118  /* Abbreviation is not supported here */
1119  sortKey->abbreviate = false;
1120 
1121  AssertState(sortKey->ssup_attno != 0);
1122 
1123  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
1125 
1126  PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
1127  }
1128 
1129  _bt_freeskey(indexScanKey);
1130 
1131  for (;;)
1132  {
1133  load1 = true; /* load BTSpool next ? */
1134  if (itup2 == NULL)
1135  {
1136  if (itup == NULL)
1137  break;
1138  }
1139  else if (itup != NULL)
1140  {
1141  for (i = 1; i <= keysz; i++)
1142  {
1143  SortSupport entry;
1144  Datum attrDatum1,
1145  attrDatum2;
1146  bool isNull1,
1147  isNull2;
1148  int32 compare;
1149 
1150  entry = sortKeys + i - 1;
1151  attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
1152  attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
1153 
1154  compare = ApplySortComparator(attrDatum1, isNull1,
1155  attrDatum2, isNull2,
1156  entry);
1157  if (compare > 0)
1158  {
1159  load1 = false;
1160  break;
1161  }
1162  else if (compare < 0)
1163  break;
1164  }
1165  }
1166  else
1167  load1 = false;
1168 
1169  /* When we see first tuple, create first index page */
1170  if (state == NULL)
1171  state = _bt_pagestate(wstate, 0);
1172 
1173  if (load1)
1174  {
1175  _bt_buildadd(wstate, state, itup);
1176  itup = tuplesort_getindextuple(btspool->sortstate, true);
1177  }
1178  else
1179  {
1180  _bt_buildadd(wstate, state, itup2);
1181  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1182  }
1183  }
1184  pfree(sortKeys);
1185  }
1186  else
1187  {
1188  /* merge is unnecessary */
1189  while ((itup = tuplesort_getindextuple(btspool->sortstate,
1190  true)) != NULL)
1191  {
1192  /* When we see first tuple, create first index page */
1193  if (state == NULL)
1194  state = _bt_pagestate(wstate, 0);
1195 
1196  _bt_buildadd(wstate, state, itup);
1197  }
1198  }
1199 
1200  /* Close down final pages and write the metapage */
1201  _bt_uppershutdown(wstate, state);
1202 
1203  /*
1204  * If the index is WAL-logged, we must fsync it down to disk before it's
1205  * safe to commit the transaction. (For a non-WAL-logged index we don't
1206  * care since the index will be uninteresting after a crash anyway.)
1207  *
1208  * It's obvious that we must do this when not WAL-logging the build. It's
1209  * less obvious that we have to do it even if we did WAL-log the index
1210  * pages. The reason is that since we're building outside shared buffers,
1211  * a CHECKPOINT occurring during the build has no way to flush the
1212  * previously written data to disk (indeed it won't know the index even
1213  * exists). A crash later on would replay WAL from the checkpoint,
1214  * therefore it wouldn't replay our earlier WAL entries. If we do not
1215  * fsync those pages here, they might still not be on disk when the crash
1216  * occurs.
1217  */
1218  if (RelationNeedsWAL(wstate->index))
1219  {
1220  RelationOpenSmgr(wstate->index);
1222  }
1223 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2215
signed short int16
Definition: c.h:312
bool ssup_nulls_first
Definition: sortsupport.h:75
ScanKey _bt_mkscankey_nodata(Relation rel)
Definition: nbtutils.c:126
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define AssertState(condition)
Definition: c.h:702
#define RelationGetDescr(relation)
Definition: rel.h:433
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
void _bt_freeskey(ScanKey skey)
Definition: nbtutils.c:166
Tuplesortstate * sortstate
Definition: nbtsort.c:104
static pairingheap_node * merge(pairingheap *heap, pairingheap_node *a, pairingheap_node *b)
Definition: pairingheap.c:79
signed int int32
Definition: c.h:313
#define RelationOpenSmgr(relation)
Definition: rel.h:465
void pfree(void *pointer)
Definition: mcxt.c:1031
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
MemoryContext ssup_cxt
Definition: sortsupport.h:66
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:801
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy, SortSupport ssup)
Definition: sortsupport.c:160
Relation index
Definition: nbtsort.c:264
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:476
void * palloc0(Size size)
Definition: mcxt.c:955
uintptr_t Datum
Definition: postgres.h:365
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:675
AttrNumber ssup_attno
Definition: sortsupport.h:81
int sk_flags
Definition: skey.h:66
#define SK_BT_DESC
Definition: nbtree.h:475
Definition: regguts.h:298
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
#define RelationNeedsWAL(relation)
Definition: rel.h:510
Oid sk_collation
Definition: skey.h:70
int i
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
Definition: nbtsort.c:1012
#define BTLessStrategyNumber
Definition: stratnum.h:29
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:201
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:734
AttrNumber sk_attno
Definition: skey.h:67

◆ _bt_pagestate()

static BTPageState * _bt_pagestate ( BTWriteState wstate,
uint32  level 
)
static

Definition at line 675 of file nbtsort.c.

References _bt_blnewpage(), BTPageState::btps_blkno, BTPageState::btps_full, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_DEFAULT_FILLFACTOR, BTREE_NONLEAF_FILLFACTOR, BTWriteState::btws_pages_alloced, BTWriteState::index, P_HIKEY, palloc0(), and RelationGetTargetPageFreeSpace.

Referenced by _bt_buildadd(), and _bt_load().

676 {
678 
679  /* create initial page for level */
680  state->btps_page = _bt_blnewpage(level);
681 
682  /* and assign it a page position */
683  state->btps_blkno = wstate->btws_pages_alloced++;
684 
685  state->btps_minkey = NULL;
686  /* initialize lastoff so first item goes into P_FIRSTKEY */
687  state->btps_lastoff = P_HIKEY;
688  state->btps_level = level;
689  /* set "full" threshold based on level. See notes at head of file. */
690  if (level > 0)
691  state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
692  else
695  /* no parent level, yet */
696  state->btps_next = NULL;
697 
698  return state;
699 }
BlockNumber btws_pages_alloced
Definition: nbtsort.c:266
BlockNumber btps_blkno
Definition: nbtsort.c:250
IndexTuple btps_minkey
Definition: nbtsort.c:251
Page btps_page
Definition: nbtsort.c:249
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:140
Size btps_full
Definition: nbtsort.c:254
OffsetNumber btps_lastoff
Definition: nbtsort.c:252
#define BTREE_DEFAULT_FILLFACTOR
Definition: nbtree.h:139
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:298
Relation index
Definition: nbtsort.c:264
void * palloc0(Size size)
Definition: mcxt.c:955
uint32 btps_level
Definition: nbtsort.c:253
struct BTPageState * btps_next
Definition: nbtsort.c:255
Definition: regguts.h:298
#define P_HIKEY
Definition: nbtree.h:185
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:590

◆ _bt_parallel_build_main()

void _bt_parallel_build_main ( dsm_segment seg,
shm_toc toc 
)

Definition at line 1528 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), AccessExclusiveLock, debug_query_string, BTSpool::heap, heap_close, heap_open(), BTShared::heaprelid, BTSpool::index, index_close(), index_open(), BTShared::indexrelid, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, pgstat_report_activity(), ResetUsage(), RowExclusiveLock, BTShared::scantuplesortstates, ShareLock, ShareUpdateExclusiveLock, shm_toc_lookup(), ShowUsage(), STATE_RUNNING, and tuplesort_attach_shared().

1529 {
1530  char *sharedquery;
1531  BTSpool *btspool;
1532  BTSpool *btspool2;
1533  BTShared *btshared;
1534  Sharedsort *sharedsort;
1535  Sharedsort *sharedsort2;
1536  Relation heapRel;
1537  Relation indexRel;
1538  LOCKMODE heapLockmode;
1539  LOCKMODE indexLockmode;
1540  int sortmem;
1541 
1542 #ifdef BTREE_BUILD_STATS
1544  ResetUsage();
1545 #endif /* BTREE_BUILD_STATS */
1546 
1547  /* Set debug_query_string for individual workers first */
1548  sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, false);
1549  debug_query_string = sharedquery;
1550 
1551  /* Report the query string from leader */
1553 
1554  /* Look up nbtree shared state */
1555  btshared = shm_toc_lookup(toc, PARALLEL_KEY_BTREE_SHARED, false);
1556 
1557  /* Open relations using lock modes known to be obtained by index.c */
1558  if (!btshared->isconcurrent)
1559  {
1560  heapLockmode = ShareLock;
1561  indexLockmode = AccessExclusiveLock;
1562  }
1563  else
1564  {
1565  heapLockmode = ShareUpdateExclusiveLock;
1566  indexLockmode = RowExclusiveLock;
1567  }
1568 
1569  /* Open relations within worker */
1570  heapRel = heap_open(btshared->heaprelid, heapLockmode);
1571  indexRel = index_open(btshared->indexrelid, indexLockmode);
1572 
1573  /* Initialize worker's own spool */
1574  btspool = (BTSpool *) palloc0(sizeof(BTSpool));
1575  btspool->heap = heapRel;
1576  btspool->index = indexRel;
1577  btspool->isunique = btshared->isunique;
1578 
1579  /* Look up shared state private to tuplesort.c */
1580  sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
1581  tuplesort_attach_shared(sharedsort, seg);
1582  if (!btshared->isunique)
1583  {
1584  btspool2 = NULL;
1585  sharedsort2 = NULL;
1586  }
1587  else
1588  {
1589  /* Allocate memory for worker's own private secondary spool */
1590  btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
1591 
1592  /* Initialize worker's own secondary spool */
1593  btspool2->heap = btspool->heap;
1594  btspool2->index = btspool->index;
1595  btspool2->isunique = false;
1596  /* Look up shared state private to tuplesort.c */
1597  sharedsort2 = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_SPOOL2, false);
1598  tuplesort_attach_shared(sharedsort2, seg);
1599  }
1600 
1601  /* Perform sorting of spool, and possibly a spool2 */
1602  sortmem = maintenance_work_mem / btshared->scantuplesortstates;
1603  _bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
1604  sharedsort2, sortmem);
1605 
1606 #ifdef BTREE_BUILD_STATS
1608  {
1609  ShowUsage("BTREE BUILD (Worker Partial Spool) STATISTICS");
1610  ResetUsage();
1611  }
1612 #endif /* BTREE_BUILD_STATS */
1613 
1614  index_close(indexRel, indexLockmode);
1615  heap_close(heapRel, heapLockmode);
1616 }
int scantuplesortstates
Definition: nbtsort.c:126
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:87
int LOCKMODE
Definition: lockdefs.h:26
Oid indexrelid
Definition: nbtsort.c:123
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:2994
void ShowUsage(const char *title)
Definition: postgres.c:4453
bool isunique
Definition: nbtsort.c:124
bool isconcurrent
Definition: nbtsort.c:125
#define heap_close(r, l)
Definition: heapam.h:97
Relation heap
Definition: nbtsort.c:105
void ResetUsage(void)
Definition: postgres.c:4446
bool isunique
Definition: nbtsort.c:107
Oid heaprelid
Definition: nbtsort.c:122
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem)
Definition: nbtsort.c:1631
#define RowExclusiveLock
Definition: lockdefs.h:38
#define PARALLEL_KEY_QUERY_TEXT
Definition: nbtsort.c:89
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:88
Relation index
Definition: nbtsort.c:106
const char * debug_query_string
Definition: postgres.c:86
void * palloc0(Size size)
Definition: mcxt.c:955
bool log_btree_build_stats
Definition: guc.c:445
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1294
int maintenance_work_mem
Definition: globals.c:123
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:4408
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:176
#define AccessExclusiveLock
Definition: lockdefs.h:45
#define ShareLock
Definition: lockdefs.h:41
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:86
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:150

◆ _bt_parallel_estimate_shared()

static Size _bt_parallel_estimate_shared ( Snapshot  snapshot)
static

Definition at line 1416 of file nbtsort.c.

References add_size(), Assert, EstimateSnapshotSpace(), IsMVCCSnapshot, offsetof, and SnapshotAny.

Referenced by _bt_begin_parallel().

1417 {
1418  if (!IsMVCCSnapshot(snapshot))
1419  {
1420  Assert(snapshot == SnapshotAny);
1421  return sizeof(BTShared);
1422  }
1423 
1424  return add_size(offsetof(BTShared, heapdesc) +
1425  offsetof(ParallelHeapScanDescData, phs_snapshot_data),
1426  EstimateSnapshotSpace(snapshot));
1427 }
struct BTShared BTShared
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2044
#define SnapshotAny
Definition: tqual.h:28
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
#define Assert(condition)
Definition: c.h:699
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31
#define offsetof(type, field)
Definition: c.h:622

◆ _bt_parallel_heapscan()

static double _bt_parallel_heapscan ( BTBuildState buildstate,
bool brokenhotchain 
)
static

Definition at line 1442 of file nbtsort.c.

References BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTShared::havedead, BTBuildState::havedead, BTShared::indtuples, BTBuildState::indtuples, BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, reltuples, BTShared::reltuples, SpinLockAcquire, SpinLockRelease, WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN, and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1443 {
1444  BTShared *btshared = buildstate->btleader->btshared;
1445  int nparticipanttuplesorts;
1446  double reltuples;
1447 
1448  nparticipanttuplesorts = buildstate->btleader->nparticipanttuplesorts;
1449  for (;;)
1450  {
1451  SpinLockAcquire(&btshared->mutex);
1452  if (btshared->nparticipantsdone == nparticipanttuplesorts)
1453  {
1454  buildstate->havedead = btshared->havedead;
1455  buildstate->indtuples = btshared->indtuples;
1456  *brokenhotchain = btshared->brokenhotchain;
1457  reltuples = btshared->reltuples;
1458  SpinLockRelease(&btshared->mutex);
1459  break;
1460  }
1461  SpinLockRelease(&btshared->mutex);
1462 
1465  }
1466 
1468 
1469  return reltuples;
1470 }
BTShared * btshared
Definition: nbtsort.c:200
int nparticipantsdone
Definition: nbtsort.c:160
bool havedead
Definition: nbtsort.c:215
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
double indtuples
Definition: nbtsort.c:224
bool brokenhotchain
Definition: nbtsort.c:164
#define SpinLockRelease(lock)
Definition: spin.h:64
BTLeader * btleader
Definition: nbtsort.c:231
double reltuples
Definition: nbtsort.c:161
double indtuples
Definition: nbtsort.c:163
slock_t mutex
Definition: nbtsort.c:142
bool havedead
Definition: nbtsort.c:162
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int nparticipanttuplesorts
Definition: nbtsort.c:188
ConditionVariable workersdonecv
Definition: nbtsort.c:134
float4 reltuples
Definition: pg_class.h:44

◆ _bt_parallel_scan_and_sort()

static void _bt_parallel_scan_and_sort ( BTSpool btspool,
BTSpool btspool2,
BTShared btshared,
Sharedsort sharedsort,
Sharedsort sharedsort2,
int  sortmem 
)
static

Definition at line 1631 of file nbtsort.c.

References _bt_build_callback(), BTShared::brokenhotchain, BTBuildState::btleader, BuildIndexInfo(), ConditionVariableSignal(), BTShared::havedead, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, heap_beginscan_parallel(), BTShared::heapdesc, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, BTSpool::index, IndexBuildHeapScan(), BTShared::indtuples, BTBuildState::indtuples, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, Min, BTShared::mutex, SortCoordinateData::nParticipants, BTShared::nparticipantsdone, palloc0(), reltuples, BTShared::reltuples, SortCoordinateData::sharedsort, BTSpool::sortstate, SpinLockAcquire, SpinLockRelease, BTBuildState::spool, BTBuildState::spool2, tuplesort_begin_index_btree(), tuplesort_end(), tuplesort_performsort(), work_mem, and BTShared::workersdonecv.

Referenced by _bt_leader_participate_as_worker(), and _bt_parallel_build_main().

1634 {
1635  SortCoordinate coordinate;
1636  BTBuildState buildstate;
1637  HeapScanDesc scan;
1638  double reltuples;
1639  IndexInfo *indexInfo;
1640 
1641  /* Initialize local tuplesort coordination state */
1642  coordinate = palloc0(sizeof(SortCoordinateData));
1643  coordinate->isWorker = true;
1644  coordinate->nParticipants = -1;
1645  coordinate->sharedsort = sharedsort;
1646 
1647  /* Begin "partial" tuplesort */
1648  btspool->sortstate = tuplesort_begin_index_btree(btspool->heap,
1649  btspool->index,
1650  btspool->isunique,
1651  sortmem, coordinate,
1652  false);
1653 
1654  /*
1655  * Just as with serial case, there may be a second spool. If so, a
1656  * second, dedicated spool2 partial tuplesort is required.
1657  */
1658  if (btspool2)
1659  {
1660  SortCoordinate coordinate2;
1661 
1662  /*
1663  * We expect that the second one (for dead tuples) won't get very
1664  * full, so we give it only work_mem (unless sortmem is less for
1665  * worker). Worker processes are generally permitted to allocate
1666  * work_mem independently.
1667  */
1668  coordinate2 = palloc0(sizeof(SortCoordinateData));
1669  coordinate2->isWorker = true;
1670  coordinate2->nParticipants = -1;
1671  coordinate2->sharedsort = sharedsort2;
1672  btspool2->sortstate =
1673  tuplesort_begin_index_btree(btspool->heap, btspool->index, false,
1674  Min(sortmem, work_mem), coordinate2,
1675  false);
1676  }
1677 
1678  /* Fill in buildstate for _bt_build_callback() */
1679  buildstate.isunique = btshared->isunique;
1680  buildstate.havedead = false;
1681  buildstate.heap = btspool->heap;
1682  buildstate.spool = btspool;
1683  buildstate.spool2 = btspool2;
1684  buildstate.indtuples = 0;
1685  buildstate.btleader = NULL;
1686 
1687  /* Join parallel scan */
1688  indexInfo = BuildIndexInfo(btspool->index);
1689  indexInfo->ii_Concurrent = btshared->isconcurrent;
1690  scan = heap_beginscan_parallel(btspool->heap, &btshared->heapdesc);
1691  reltuples = IndexBuildHeapScan(btspool->heap, btspool->index, indexInfo,
1692  true, _bt_build_callback,
1693  (void *) &buildstate, scan);
1694 
1695  /*
1696  * Execute this worker's part of the sort.
1697  *
1698  * Unlike leader and serial cases, we cannot avoid calling
1699  * tuplesort_performsort() for spool2 if it ends up containing no dead
1700  * tuples (this is disallowed for workers by tuplesort).
1701  */
1702  tuplesort_performsort(btspool->sortstate);
1703  if (btspool2)
1704  tuplesort_performsort(btspool2->sortstate);
1705 
1706  /*
1707  * Done. Record ambuild statistics, and whether we encountered a broken
1708  * HOT chain.
1709  */
1710  SpinLockAcquire(&btshared->mutex);
1711  btshared->nparticipantsdone++;
1712  btshared->reltuples += reltuples;
1713  if (buildstate.havedead)
1714  btshared->havedead = true;
1715  btshared->indtuples += buildstate.indtuples;
1716  if (indexInfo->ii_BrokenHotChain)
1717  btshared->brokenhotchain = true;
1718  SpinLockRelease(&btshared->mutex);
1719 
1720  /* Notify leader */
1722 
1723  /* We can end tuplesorts immediately */
1724  tuplesort_end(btspool->sortstate);
1725  if (btspool2)
1726  tuplesort_end(btspool2->sortstate);
1727 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1790
Relation heap
Definition: nbtsort.c:216
int nparticipantsdone
Definition: nbtsort.c:160
#define Min(x, y)
Definition: c.h:857
bool havedead
Definition: nbtsort.c:215
Sharedsort * sharedsort
Definition: tuplesort.h:56
bool isunique
Definition: nbtsort.c:124
bool isconcurrent
Definition: nbtsort.c:125
Tuplesortstate * sortstate
Definition: nbtsort.c:104
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:1745
BTSpool * spool
Definition: nbtsort.c:217
ParallelHeapScanDescData heapdesc
Definition: nbtsort.c:171
Relation heap
Definition: nbtsort.c:105
#define SpinLockAcquire(lock)
Definition: spin.h:62
static void _bt_build_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:561
double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state, HeapScanDesc scan)
Definition: index.c:2418
bool isunique
Definition: nbtsort.c:107
void ConditionVariableSignal(ConditionVariable *cv)
BTSpool * spool2
Definition: nbtsort.c:223
double indtuples
Definition: nbtsort.c:224
bool isunique
Definition: nbtsort.c:214
bool ii_BrokenHotChain
Definition: execnodes.h:167
bool brokenhotchain
Definition: nbtsort.c:164
Relation index
Definition: nbtsort.c:106
#define SpinLockRelease(lock)
Definition: spin.h:64
void * palloc0(Size size)
Definition: mcxt.c:955
BTLeader * btleader
Definition: nbtsort.c:231
int work_mem
Definition: globals.c:122
double reltuples
Definition: nbtsort.c:161
double indtuples
Definition: nbtsort.c:163
slock_t mutex
Definition: nbtsort.c:142
bool havedead
Definition: nbtsort.c:162
bool ii_Concurrent
Definition: execnodes.h:166
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
ConditionVariable workersdonecv
Definition: nbtsort.c:134
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1235
HeapScanDesc heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan)
Definition: heapam.c:1666
float4 reltuples
Definition: pg_class.h:44

◆ _bt_slideleft()

static void _bt_slideleft ( Page  page)
static

Definition at line 708 of file nbtsort.c.

References OffsetNumberNext, P_FIRSTKEY, P_HIKEY, PageGetItemId, PageGetMaxOffsetNumber, and PageIsEmpty.

Referenced by _bt_uppershutdown().

709 {
710  OffsetNumber off;
711  OffsetNumber maxoff;
712  ItemId previi;
713  ItemId thisii;
714 
715  if (!PageIsEmpty(page))
716  {
717  maxoff = PageGetMaxOffsetNumber(page);
718  previi = PageGetItemId(page, P_HIKEY);
719  for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
720  {
721  thisii = PageGetItemId(page, off);
722  *previi = *thisii;
723  previi = thisii;
724  }
725  ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
726  }
727 }
#define PageIsEmpty(page)
Definition: bufpage.h:218
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
uint16 OffsetNumber
Definition: off.h:24
#define P_FIRSTKEY
Definition: nbtree.h:186
struct ItemIdData ItemIdData
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define P_HIKEY
Definition: nbtree.h:185

◆ _bt_sortaddtup()

static void _bt_sortaddtup ( Page  page,
Size  itemsize,
IndexTuple  itup,
OffsetNumber  itup_off 
)
static

Definition at line 743 of file nbtsort.c.

References BTreeTupleSetNAtts, elog, ERROR, InvalidOffsetNumber, P_FIRSTKEY, P_ISLEAF, PageAddItem, PageGetSpecialPointer, and IndexTupleData::t_info.

Referenced by _bt_buildadd().

747 {
749  IndexTupleData trunctuple;
750 
751  if (!P_ISLEAF(opaque) && itup_off == P_FIRSTKEY)
752  {
753  trunctuple = *itup;
754  trunctuple.t_info = sizeof(IndexTupleData);
755  BTreeTupleSetNAtts(&trunctuple, 0);
756  itup = &trunctuple;
757  itemsize = sizeof(IndexTupleData);
758  }
759 
760  if (PageAddItem(page, (Item) itup, itemsize, itup_off,
761  false, false) == InvalidOffsetNumber)
762  elog(ERROR, "failed to add item to the index page");
763 }
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
#define BTreeTupleSetNAtts(itup, n)
Definition: nbtree.h:243
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define ERROR
Definition: elog.h:43
#define P_FIRSTKEY
Definition: nbtree.h:186
struct IndexTupleData IndexTupleData
#define InvalidOffsetNumber
Definition: off.h:26
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define elog
Definition: elog.h:219
unsigned short t_info
Definition: itup.h:49
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_spool()

static void _bt_spool ( BTSpool btspool,
ItemPointer  self,
Datum values,
bool isnull 
)
static

Definition at line 513 of file nbtsort.c.

References BTSpool::index, BTSpool::sortstate, and tuplesort_putindextuplevalues().

Referenced by _bt_build_callback().

514 {
515  tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
516  self, values, isnull);
517 }
Tuplesortstate * sortstate
Definition: nbtsort.c:104
Relation index
Definition: nbtsort.c:106
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
Definition: tuplesort.c:1477
static Datum values[MAXATTR]
Definition: bootstrap.c:164

◆ _bt_spooldestroy()

static void _bt_spooldestroy ( BTSpool btspool)
static

Definition at line 503 of file nbtsort.c.

References pfree(), BTSpool::sortstate, and tuplesort_end().

Referenced by _bt_spools_heapscan(), and btbuild().

504 {
505  tuplesort_end(btspool->sortstate);
506  pfree(btspool);
507 }
Tuplesortstate * sortstate
Definition: nbtsort.c:104
void pfree(void *pointer)
Definition: mcxt.c:1031
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1235

◆ _bt_spools_heapscan()

static double _bt_spools_heapscan ( Relation  heap,
Relation  index,
BTBuildState buildstate,
IndexInfo indexInfo 
)
static

Definition at line 375 of file nbtsort.c.

References _bt_begin_parallel(), _bt_build_callback(), _bt_parallel_heapscan(), _bt_spooldestroy(), BTBuildState::btleader, BTSpool::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, IndexInfo::ii_ParallelWorkers, IndexInfo::ii_Unique, BTSpool::index, IndexBuildHeapScan(), BTSpool::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, maintenance_work_mem, SortCoordinateData::nParticipants, BTLeader::nparticipanttuplesorts, palloc0(), reltuples, SortCoordinateData::sharedsort, BTLeader::sharedsort, BTLeader::sharedsort2, BTSpool::sortstate, BTBuildState::spool, BTBuildState::spool2, tuplesort_begin_index_btree(), and work_mem.

Referenced by btbuild().

377 {
378  BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
379  SortCoordinate coordinate = NULL;
380  double reltuples = 0;
381 
382  /*
383  * We size the sort area as maintenance_work_mem rather than work_mem to
384  * speed index creation. This should be OK since a single backend can't
385  * run multiple index creations in parallel (see also: notes on
386  * parallelism and maintenance_work_mem below).
387  */
388  btspool->heap = heap;
389  btspool->index = index;
390  btspool->isunique = indexInfo->ii_Unique;
391 
392  /* Save as primary spool */
393  buildstate->spool = btspool;
394 
395  /* Attempt to launch parallel worker scan when required */
396  if (indexInfo->ii_ParallelWorkers > 0)
397  _bt_begin_parallel(buildstate, indexInfo->ii_Concurrent,
398  indexInfo->ii_ParallelWorkers);
399 
400  /*
401  * If parallel build requested and at least one worker process was
402  * successfully launched, set up coordination state
403  */
404  if (buildstate->btleader)
405  {
406  coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
407  coordinate->isWorker = false;
408  coordinate->nParticipants =
409  buildstate->btleader->nparticipanttuplesorts;
410  coordinate->sharedsort = buildstate->btleader->sharedsort;
411  }
412 
413  /*
414  * Begin serial/leader tuplesort.
415  *
416  * In cases where parallelism is involved, the leader receives the same
417  * share of maintenance_work_mem as a serial sort (it is generally treated
418  * in the same way as a serial sort once we return). Parallel worker
419  * Tuplesortstates will have received only a fraction of
420  * maintenance_work_mem, though.
421  *
422  * We rely on the lifetime of the Leader Tuplesortstate almost not
423  * overlapping with any worker Tuplesortstate's lifetime. There may be
424  * some small overlap, but that's okay because we rely on leader
425  * Tuplesortstate only allocating a small, fixed amount of memory here.
426  * When its tuplesort_performsort() is called (by our caller), and
427  * significant amounts of memory are likely to be used, all workers must
428  * have already freed almost all memory held by their Tuplesortstates
429  * (they are about to go away completely, too). The overall effect is
430  * that maintenance_work_mem always represents an absolute high watermark
431  * on the amount of memory used by a CREATE INDEX operation, regardless of
432  * the use of parallelism or any other factor.
433  */
434  buildstate->spool->sortstate =
435  tuplesort_begin_index_btree(heap, index, buildstate->isunique,
436  maintenance_work_mem, coordinate,
437  false);
438 
439  /*
440  * If building a unique index, put dead tuples in a second spool to keep
441  * them out of the uniqueness check. We expect that the second spool (for
442  * dead tuples) won't get very full, so we give it only work_mem.
443  */
444  if (indexInfo->ii_Unique)
445  {
446  BTSpool *btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
447  SortCoordinate coordinate2 = NULL;
448 
449  /* Initialize secondary spool */
450  btspool2->heap = heap;
451  btspool2->index = index;
452  btspool2->isunique = false;
453  /* Save as secondary spool */
454  buildstate->spool2 = btspool2;
455 
456  if (buildstate->btleader)
457  {
458  /*
459  * Set up non-private state that is passed to
460  * tuplesort_begin_index_btree() about the basic high level
461  * coordination of a parallel sort.
462  */
463  coordinate2 = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
464  coordinate2->isWorker = false;
465  coordinate2->nParticipants =
466  buildstate->btleader->nparticipanttuplesorts;
467  coordinate2->sharedsort = buildstate->btleader->sharedsort2;
468  }
469 
470  /*
471  * We expect that the second one (for dead tuples) won't get very
472  * full, so we give it only work_mem
473  */
474  buildstate->spool2->sortstate =
475  tuplesort_begin_index_btree(heap, index, false, work_mem,
476  coordinate2, false);
477  }
478 
479  /* Fill spool using either serial or parallel heap scan */
480  if (!buildstate->btleader)
481  reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
482  _bt_build_callback, (void *) buildstate,
483  NULL);
484  else
485  reltuples = _bt_parallel_heapscan(buildstate,
486  &indexInfo->ii_BrokenHotChain);
487 
488  /* okay, all heap tuples are spooled */
489  if (buildstate->spool2 && !buildstate->havedead)
490  {
491  /* spool2 turns out to be unnecessary */
492  _bt_spooldestroy(buildstate->spool2);
493  buildstate->spool2 = NULL;
494  }
495 
496  return reltuples;
497 }
static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Definition: nbtsort.c:1242
Sharedsort * sharedsort2
Definition: nbtsort.c:202
Sharedsort * sharedsort
Definition: nbtsort.c:201
bool havedead
Definition: nbtsort.c:215
Sharedsort * sharedsort
Definition: tuplesort.h:56
Tuplesortstate * sortstate
Definition: nbtsort.c:104
BTSpool * spool
Definition: nbtsort.c:217
Relation heap
Definition: nbtsort.c:105
static void _bt_build_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:561
static double _bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain)
Definition: nbtsort.c:1442
double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state, HeapScanDesc scan)
Definition: index.c:2418
bool isunique
Definition: nbtsort.c:107
BTSpool * spool2
Definition: nbtsort.c:223
bool isunique
Definition: nbtsort.c:214
bool ii_BrokenHotChain
Definition: execnodes.h:167
Relation index
Definition: nbtsort.c:106
void * palloc0(Size size)
Definition: mcxt.c:955
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:59
BTLeader * btleader
Definition: nbtsort.c:231
int work_mem
Definition: globals.c:122
int maintenance_work_mem
Definition: globals.c:123
bool ii_Unique
Definition: execnodes.h:164
int nparticipanttuplesorts
Definition: nbtsort.c:188
int ii_ParallelWorkers
Definition: execnodes.h:168
bool ii_Concurrent
Definition: execnodes.h:166
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:503
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
float4 reltuples
Definition: pg_class.h:44

◆ _bt_uppershutdown()

static void _bt_uppershutdown ( BTWriteState wstate,
BTPageState state 
)
static

Definition at line 1012 of file nbtsort.c.

References _bt_blwritepage(), _bt_buildadd(), _bt_initmetapage(), _bt_slideleft(), Assert, BTP_ROOT, BTPageOpaqueData::btpo_flags, BTPageState::btps_blkno, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_METAPAGE, BTreeInnerTupleSetDownLink, BTreeTupleGetNAtts, BTWriteState::index, IndexRelationGetNumberOfKeyAttributes, P_NONE, PageGetSpecialPointer, palloc(), and pfree().

Referenced by _bt_load().

1013 {
1014  BTPageState *s;
1015  BlockNumber rootblkno = P_NONE;
1016  uint32 rootlevel = 0;
1017  Page metapage;
1018 
1019  /*
1020  * Each iteration of this loop completes one more level of the tree.
1021  */
1022  for (s = state; s != NULL; s = s->btps_next)
1023  {
1024  BlockNumber blkno;
1025  BTPageOpaque opaque;
1026 
1027  blkno = s->btps_blkno;
1029 
1030  /*
1031  * We have to link the last page on this level to somewhere.
1032  *
1033  * If we're at the top, it's the root, so attach it to the metapage.
1034  * Otherwise, add an entry for it to its parent using its minimum key.
1035  * This may cause the last page of the parent level to split, but
1036  * that's not a problem -- we haven't gotten to it yet.
1037  */
1038  if (s->btps_next == NULL)
1039  {
1040  opaque->btpo_flags |= BTP_ROOT;
1041  rootblkno = blkno;
1042  rootlevel = s->btps_level;
1043  }
1044  else
1045  {
1046  Assert(BTreeTupleGetNAtts(s->btps_minkey, wstate->index) ==
1049  _bt_buildadd(wstate, s->btps_next, s->btps_minkey);
1050  pfree(s->btps_minkey);
1051  s->btps_minkey = NULL;
1052  }
1053 
1054  /*
1055  * This is the rightmost page, so the ItemId array needs to be slid
1056  * back one slot. Then we can dump out the page.
1057  */
1059  _bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
1060  s->btps_page = NULL; /* writepage freed the workspace */
1061  }
1062 
1063  /*
1064  * As the last step in the process, construct the metapage and make it
1065  * point to the new root (unless we had no data at all, in which case it's
1066  * set to point to "P_NONE"). This changes the index to the "valid" state
1067  * by filling in a valid magic number in the metapage.
1068  */
1069  metapage = (Page) palloc(BLCKSZ);
1070  _bt_initmetapage(metapage, rootblkno, rootlevel);
1071  _bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
1072 }
#define BTP_ROOT
Definition: nbtree.h:72
#define P_NONE
Definition: nbtree.h:150
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:250
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:233
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:617
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
IndexTuple btps_minkey
Definition: nbtsort.c:251
Page btps_page
Definition: nbtsort.c:249
void pfree(void *pointer)
Definition: mcxt.c:1031
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:801
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:50
unsigned int uint32
Definition: c.h:325
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
#define BTREE_METAPAGE
Definition: nbtree.h:115
Relation index
Definition: nbtsort.c:264
uint32 btps_level
Definition: nbtsort.c:253
struct BTPageState * btps_next
Definition: nbtsort.c:255
#define Assert(condition)
Definition: c.h:699
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BTreeInnerTupleSetDownLink(itup, blkno)
Definition: nbtree.h:226
static void _bt_slideleft(Page page)
Definition: nbtsort.c:708
void * palloc(Size size)
Definition: mcxt.c:924
uint16 btpo_flags
Definition: nbtree.h:64
Pointer Page
Definition: bufpage.h:74

◆ btbuild()

IndexBuildResult* btbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 306 of file nbtsort.c.

References _bt_end_parallel(), _bt_leafbuild(), _bt_spooldestroy(), _bt_spools_heapscan(), BTBuildState::btleader, elog, ERROR, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, IndexBuildResult::heap_tuples, IndexInfo::ii_Unique, IndexBuildResult::index_tuples, BTBuildState::indtuples, BTBuildState::isunique, log_btree_build_stats, palloc(), RelationGetNumberOfBlocks, RelationGetRelationName, reltuples, ResetUsage(), ShowUsage(), BTBuildState::spool, and BTBuildState::spool2.

Referenced by bthandler().

307 {
308  IndexBuildResult *result;
309  BTBuildState buildstate;
310  double reltuples;
311 
312 #ifdef BTREE_BUILD_STATS
314  ResetUsage();
315 #endif /* BTREE_BUILD_STATS */
316 
317  buildstate.isunique = indexInfo->ii_Unique;
318  buildstate.havedead = false;
319  buildstate.heap = heap;
320  buildstate.spool = NULL;
321  buildstate.spool2 = NULL;
322  buildstate.indtuples = 0;
323  buildstate.btleader = NULL;
324 
325  /*
326  * We expect to be called exactly once for any index relation. If that's
327  * not the case, big trouble's what we have.
328  */
329  if (RelationGetNumberOfBlocks(index) != 0)
330  elog(ERROR, "index \"%s\" already contains data",
331  RelationGetRelationName(index));
332 
333  reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
334 
335  /*
336  * Finish the build by (1) completing the sort of the spool file, (2)
337  * inserting the sorted tuples into btree pages and (3) building the upper
338  * levels. Finally, it may also be necessary to end use of parallelism.
339  */
340  _bt_leafbuild(buildstate.spool, buildstate.spool2);
341  _bt_spooldestroy(buildstate.spool);
342  if (buildstate.spool2)
343  _bt_spooldestroy(buildstate.spool2);
344  if (buildstate.btleader)
345  _bt_end_parallel(buildstate.btleader);
346 
347  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
348 
349  result->heap_tuples = reltuples;
350  result->index_tuples = buildstate.indtuples;
351 
352 #ifdef BTREE_BUILD_STATS
354  {
355  ShowUsage("BTREE BUILD STATS");
356  ResetUsage();
357  }
358 #endif /* BTREE_BUILD_STATS */
359 
360  return result;
361 }
Relation heap
Definition: nbtsort.c:216
void ShowUsage(const char *title)
Definition: postgres.c:4453
bool havedead
Definition: nbtsort.c:215
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1400
static void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:524
BTSpool * spool
Definition: nbtsort.c:217
void ResetUsage(void)
Definition: postgres.c:4446
#define ERROR
Definition: elog.h:43
BTSpool * spool2
Definition: nbtsort.c:223
double indtuples
Definition: nbtsort.c:224
bool isunique
Definition: nbtsort.c:214
#define RelationGetRelationName(relation)
Definition: rel.h:441
static double _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
Definition: nbtsort.c:375
bool log_btree_build_stats
Definition: guc.c:445
BTLeader * btleader
Definition: nbtsort.c:231
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
bool ii_Unique
Definition: execnodes.h:164
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:503
void * palloc(Size size)
Definition: mcxt.c:924
#define elog
Definition: elog.h:219
float4 reltuples
Definition: pg_class.h:44
double index_tuples
Definition: genam.h:33
double heap_tuples
Definition: genam.h:32