PostgreSQL Source Code  git master
nbtsort.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/parallel.h"
#include "access/relscan.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "commands/progress.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/rel.h"
#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
Include dependency graph for nbtsort.c:

Go to the source code of this file.

Data Structures

struct  BTSpool
 
struct  BTShared
 
struct  BTLeader
 
struct  BTBuildState
 
struct  BTPageState
 
struct  BTWriteState
 

Macros

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)
 
#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)
 
#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)
 
#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)
 
#define ParallelTableScanFromBTShared(shared)   (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))
 

Typedefs

typedef struct BTSpool BTSpool
 
typedef struct BTShared BTShared
 
typedef struct BTLeader BTLeader
 
typedef struct BTBuildState BTBuildState
 
typedef struct BTPageState BTPageState
 
typedef struct BTWriteState BTWriteState
 

Functions

static double _bt_spools_heapscan (Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
 
static void _bt_spooldestroy (BTSpool *btspool)
 
static void _bt_spool (BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
 
static void _bt_leafbuild (BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_build_callback (Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static Page _bt_blnewpage (uint32 level)
 
static BTPageState_bt_pagestate (BTWriteState *wstate, uint32 level)
 
static void _bt_slideleft (Page page)
 
static void _bt_sortaddtup (Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
 
static void _bt_buildadd (BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 
static void _bt_uppershutdown (BTWriteState *wstate, BTPageState *state)
 
static void _bt_load (BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_begin_parallel (BTBuildState *buildstate, bool isconcurrent, int request)
 
static void _bt_end_parallel (BTLeader *btleader)
 
static Size _bt_parallel_estimate_shared (Relation heap, Snapshot snapshot)
 
static double _bt_parallel_heapscan (BTBuildState *buildstate, bool *brokenhotchain)
 
static void _bt_leader_participate_as_worker (BTBuildState *buildstate)
 
static void _bt_parallel_scan_and_sort (BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
 
IndexBuildResultbtbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
static void _bt_blwritepage (BTWriteState *wstate, Page page, BlockNumber blkno)
 
void _bt_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 

Macro Definition Documentation

◆ PARALLEL_KEY_BTREE_SHARED

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)

Definition at line 80 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_QUERY_TEXT

#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)

Definition at line 83 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT

#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)

Definition at line 81 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT_SPOOL2

#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)

Definition at line 82 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ ParallelTableScanFromBTShared

#define ParallelTableScanFromBTShared (   shared)    (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))

Definition at line 173 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_scan_and_sort().

Typedef Documentation

◆ BTBuildState

typedef struct BTBuildState BTBuildState

◆ BTLeader

typedef struct BTLeader BTLeader

◆ BTPageState

typedef struct BTPageState BTPageState

◆ BTShared

typedef struct BTShared BTShared

◆ BTSpool

typedef struct BTSpool BTSpool

◆ BTWriteState

typedef struct BTWriteState BTWriteState

Function Documentation

◆ _bt_begin_parallel()

static void _bt_begin_parallel ( BTBuildState buildstate,
bool  isconcurrent,
int  request 
)
static

Definition at line 1318 of file nbtsort.c.

References _bt_end_parallel(), _bt_leader_participate_as_worker(), _bt_parallel_estimate_shared(), Assert, BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableInit(), CreateParallelContext(), debug_query_string, EnterParallelMode(), ParallelContext::estimator, GetTransactionSnapshot(), BTShared::havedead, BTSpool::heap, BTShared::heaprelid, BTSpool::index, BTShared::indexrelid, BTShared::indtuples, InitializeParallelDSM(), BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, LaunchParallelWorkers(), BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, ParallelContext::nworkers_launched, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, ParallelTableScanFromBTShared, BTLeader::pcxt, RegisterSnapshot(), RelationGetRelid, BTShared::reltuples, BTShared::scantuplesortstates, ParallelContext::seg, BTLeader::sharedsort, BTLeader::sharedsort2, shm_toc_allocate(), shm_toc_estimate_chunk, shm_toc_estimate_keys, shm_toc_insert(), BTLeader::snapshot, SnapshotAny, SpinLockInit, BTBuildState::spool, table_parallelscan_initialize(), ParallelContext::toc, tuplesort_estimate_shared(), tuplesort_initialize_shared(), WaitForParallelWorkersToAttach(), and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1319 {
1320  ParallelContext *pcxt;
1321  int scantuplesortstates;
1322  Snapshot snapshot;
1323  Size estbtshared;
1324  Size estsort;
1325  BTShared *btshared;
1326  Sharedsort *sharedsort;
1327  Sharedsort *sharedsort2;
1328  BTSpool *btspool = buildstate->spool;
1329  BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
1330  bool leaderparticipates = true;
1331  char *sharedquery;
1332  int querylen;
1333 
1334 #ifdef DISABLE_LEADER_PARTICIPATION
1335  leaderparticipates = false;
1336 #endif
1337 
1338  /*
1339  * Enter parallel mode, and create context for parallel build of btree
1340  * index
1341  */
1343  Assert(request > 0);
1344  pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
1345  request);
1346  scantuplesortstates = leaderparticipates ? request + 1 : request;
1347 
1348  /*
1349  * Prepare for scan of the base relation. In a normal index build, we use
1350  * SnapshotAny because we must retrieve all tuples and do our own time
1351  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1352  * concurrent build, we take a regular MVCC snapshot and index whatever's
1353  * live according to that.
1354  */
1355  if (!isconcurrent)
1356  snapshot = SnapshotAny;
1357  else
1359 
1360  /*
1361  * Estimate size for our own PARALLEL_KEY_BTREE_SHARED workspace, and
1362  * PARALLEL_KEY_TUPLESORT tuplesort workspace
1363  */
1364  estbtshared = _bt_parallel_estimate_shared(btspool->heap, snapshot);
1365  shm_toc_estimate_chunk(&pcxt->estimator, estbtshared);
1366  estsort = tuplesort_estimate_shared(scantuplesortstates);
1367  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1368 
1369  /*
1370  * Unique case requires a second spool, and so we may have to account for
1371  * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2
1372  */
1373  if (!btspool->isunique)
1374  shm_toc_estimate_keys(&pcxt->estimator, 2);
1375  else
1376  {
1377  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1378  shm_toc_estimate_keys(&pcxt->estimator, 3);
1379  }
1380 
1381  /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
1382  querylen = strlen(debug_query_string);
1383  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
1384  shm_toc_estimate_keys(&pcxt->estimator, 1);
1385 
1386  /* Everyone's had a chance to ask for space, so now create the DSM */
1387  InitializeParallelDSM(pcxt);
1388 
1389  /* Store shared build state, for which we reserved space */
1390  btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
1391  /* Initialize immutable state */
1392  btshared->heaprelid = RelationGetRelid(btspool->heap);
1393  btshared->indexrelid = RelationGetRelid(btspool->index);
1394  btshared->isunique = btspool->isunique;
1395  btshared->isconcurrent = isconcurrent;
1396  btshared->scantuplesortstates = scantuplesortstates;
1398  SpinLockInit(&btshared->mutex);
1399  /* Initialize mutable state */
1400  btshared->nparticipantsdone = 0;
1401  btshared->reltuples = 0.0;
1402  btshared->havedead = false;
1403  btshared->indtuples = 0.0;
1404  btshared->brokenhotchain = false;
1407  snapshot);
1408 
1409  /*
1410  * Store shared tuplesort-private state, for which we reserved space.
1411  * Then, initialize opaque state using tuplesort routine.
1412  */
1413  sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1414  tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1415  pcxt->seg);
1416 
1417  shm_toc_insert(pcxt->toc, PARALLEL_KEY_BTREE_SHARED, btshared);
1418  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1419 
1420  /* Unique case requires a second spool, and associated shared state */
1421  if (!btspool->isunique)
1422  sharedsort2 = NULL;
1423  else
1424  {
1425  /*
1426  * Store additional shared tuplesort-private state, for which we
1427  * reserved space. Then, initialize opaque state using tuplesort
1428  * routine.
1429  */
1430  sharedsort2 = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1431  tuplesort_initialize_shared(sharedsort2, scantuplesortstates,
1432  pcxt->seg);
1433 
1434  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2);
1435  }
1436 
1437  /* Store query string for workers */
1438  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1439  memcpy(sharedquery, debug_query_string, querylen + 1);
1440  shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
1441 
1442  /* Launch workers, saving status for leader/caller */
1443  LaunchParallelWorkers(pcxt);
1444  btleader->pcxt = pcxt;
1445  btleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1446  if (leaderparticipates)
1447  btleader->nparticipanttuplesorts++;
1448  btleader->btshared = btshared;
1449  btleader->sharedsort = sharedsort;
1450  btleader->sharedsort2 = sharedsort2;
1451  btleader->snapshot = snapshot;
1452 
1453  /* If no workers were successfully launched, back out (do serial build) */
1454  if (pcxt->nworkers_launched == 0)
1455  {
1456  _bt_end_parallel(btleader);
1457  return;
1458  }
1459 
1460  /* Save leader state now that it's clear build will be parallel */
1461  buildstate->btleader = btleader;
1462 
1463  /* Join heap scan ourselves */
1464  if (leaderparticipates)
1466 
1467  /*
1468  * Caller needs to wait for all launched workers when we return. Make
1469  * sure that the failure-to-start case will not hang forever.
1470  */
1472 }
Sharedsort * sharedsort2
Definition: nbtsort.c:204
int scantuplesortstates
Definition: nbtsort.c:120
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:81
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:159
BTShared * btshared
Definition: nbtsort.c:202
int nparticipantsdone
Definition: nbtsort.c:154
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:865
Oid indexrelid
Definition: nbtsort.c:117
dsm_segment * seg
Definition: parallel.h:42
Sharedsort * sharedsort
Definition: nbtsort.c:203
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
Snapshot snapshot
Definition: nbtsort.c:205
bool isunique
Definition: nbtsort.c:118
#define ParallelTableScanFromBTShared(shared)
Definition: nbtsort.c:173
bool isconcurrent
Definition: nbtsort.c:119
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1478
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:4391
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
BTSpool * spool
Definition: nbtsort.c:219
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
ParallelContext * pcxt
Definition: nbtsort.c:182
Relation heap
Definition: nbtsort.c:99
void ConditionVariableInit(ConditionVariable *cv)
bool isunique
Definition: nbtsort.c:101
Oid heaprelid
Definition: nbtsort.c:116
#define PARALLEL_KEY_QUERY_TEXT
Definition: nbtsort.c:83
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:82
int nworkers_launched
Definition: parallel.h:37
bool brokenhotchain
Definition: nbtsort.c:158
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:494
Relation index
Definition: nbtsort.c:100
const char * debug_query_string
Definition: postgres.c:87
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:196
void * palloc0(Size size)
Definition: mcxt.c:980
BTLeader * btleader
Definition: nbtsort.c:233
double reltuples
Definition: nbtsort.c:155
double indtuples
Definition: nbtsort.c:157
slock_t mutex
Definition: nbtsort.c:136
bool havedead
Definition: nbtsort.c:156
#define Assert(condition)
Definition: c.h:732
int nparticipanttuplesorts
Definition: nbtsort.c:190
size_t Size
Definition: c.h:466
static Size _bt_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition: nbtsort.c:1494
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void EnterParallelMode(void)
Definition: xact.c:961
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
static void _bt_leader_participate_as_worker(BTBuildState *buildstate)
Definition: nbtsort.c:1548
Size tuplesort_estimate_shared(int nWorkers)
Definition: tuplesort.c:4370
#define SnapshotAny
Definition: snapmgr.h:69
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable workersdonecv
Definition: nbtsort.c:128
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:80
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:141
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:614
#define RelationGetRelid(relation)
Definition: rel.h:419
shm_toc * toc
Definition: parallel.h:44

◆ _bt_blnewpage()

static Page _bt_blnewpage ( uint32  level)
static

Definition at line 625 of file nbtsort.c.

References _bt_pageinit(), BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageOpaqueData::level, P_NONE, PageGetSpecialPointer, and palloc().

Referenced by _bt_buildadd(), and _bt_pagestate().

626 {
627  Page page;
628  BTPageOpaque opaque;
629 
630  page = (Page) palloc(BLCKSZ);
631 
632  /* Zero the page and set up standard page header info */
633  _bt_pageinit(page, BLCKSZ);
634 
635  /* Initialize BT opaque state */
636  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
637  opaque->btpo_prev = opaque->btpo_next = P_NONE;
638  opaque->btpo.level = level;
639  opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
640  opaque->btpo_cycleid = 0;
641 
642  /* Make the P_HIKEY line pointer appear allocated */
643  ((PageHeader) page)->pd_lower += sizeof(ItemIdData);
644 
645  return page;
646 }
BlockNumber btpo_next
Definition: nbtree.h:58
#define BTP_LEAF
Definition: nbtree.h:71
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:181
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
BTCycleId btpo_cycleid
Definition: nbtree.h:65
BlockNumber btpo_prev
Definition: nbtree.h:57
struct ItemIdData ItemIdData
uint32 level
Definition: nbtree.h:61
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
void * palloc(Size size)
Definition: mcxt.c:949
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:924
uint16 btpo_flags
Definition: nbtree.h:64
Pointer Page
Definition: bufpage.h:78

◆ _bt_blwritepage()

static void _bt_blwritepage ( BTWriteState wstate,
Page  page,
BlockNumber  blkno 
)
static

Definition at line 652 of file nbtsort.c.

References BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTWriteState::index, log_newpage(), MAIN_FORKNUM, PageSetChecksumInplace(), palloc0(), pfree(), RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, smgrextend(), and smgrwrite().

Referenced by _bt_buildadd(), and _bt_uppershutdown().

653 {
654  /* Ensure rd_smgr is open (could have been closed by relcache flush!) */
655  RelationOpenSmgr(wstate->index);
656 
657  /* XLOG stuff */
658  if (wstate->btws_use_wal)
659  {
660  /* We use the XLOG_FPI record type for this */
661  log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
662  }
663 
664  /*
665  * If we have to write pages nonsequentially, fill in the space with
666  * zeroes until we come back and overwrite. This is not logically
667  * necessary on standard Unix filesystems (unwritten space will read as
668  * zeroes anyway), but it should help to avoid fragmentation. The dummy
669  * pages aren't WAL-logged though.
670  */
671  while (blkno > wstate->btws_pages_written)
672  {
673  if (!wstate->btws_zeropage)
674  wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
675  /* don't set checksum for all-zero page */
677  wstate->btws_pages_written++,
678  (char *) wstate->btws_zeropage,
679  true);
680  }
681 
682  PageSetChecksumInplace(page, blkno);
683 
684  /*
685  * Now write the page. There's no need for smgr to schedule an fsync for
686  * this write; we'll do it ourselves before ending the build.
687  */
688  if (blkno == wstate->btws_pages_written)
689  {
690  /* extending the file... */
691  smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
692  (char *) page, true);
693  wstate->btws_pages_written++;
694  }
695  else
696  {
697  /* overwriting a block we zero-filled before */
698  smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
699  (char *) page, true);
700  }
701 
702  pfree(page);
703 }
Page btws_zeropage
Definition: nbtsort.c:271
struct SMgrRelationData * rd_smgr
Definition: rel.h:56
bool btws_use_wal
Definition: nbtsort.c:268
#define RelationOpenSmgr(relation)
Definition: rel.h:476
void pfree(void *pointer)
Definition: mcxt.c:1056
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:530
Relation index
Definition: nbtsort.c:266
BlockNumber btws_pages_written
Definition: nbtsort.c:270
void * palloc0(Size size)
Definition: mcxt.c:980
RelFileNode rd_node
Definition: rel.h:54
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1198
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:483
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
Pointer Page
Definition: bufpage.h:78

◆ _bt_build_callback()

static void _bt_build_callback ( Relation  index,
HeapTuple  htup,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
)
static

Definition at line 596 of file nbtsort.c.

References _bt_spool(), BTBuildState::havedead, BTBuildState::indtuples, BTBuildState::spool, BTBuildState::spool2, and HeapTupleData::t_self.

Referenced by _bt_parallel_scan_and_sort(), and _bt_spools_heapscan().

602 {
603  BTBuildState *buildstate = (BTBuildState *) state;
604 
605  /*
606  * insert the index tuple into the appropriate spool file for subsequent
607  * processing
608  */
609  if (tupleIsAlive || buildstate->spool2 == NULL)
610  _bt_spool(buildstate->spool, &htup->t_self, values, isnull);
611  else
612  {
613  /* dead tuples are put into spool2 */
614  buildstate->havedead = true;
615  _bt_spool(buildstate->spool2, &htup->t_self, values, isnull);
616  }
617 
618  buildstate->indtuples += 1;
619 }
bool havedead
Definition: nbtsort.c:217
BTSpool * spool
Definition: nbtsort.c:219
ItemPointerData t_self
Definition: htup.h:65
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
Definition: regguts.h:298
static Datum values[MAXATTR]
Definition: bootstrap.c:167
static void _bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
Definition: nbtsort.c:539

◆ _bt_buildadd()

static void _bt_buildadd ( BTWriteState wstate,
BTPageState state,
IndexTuple  itup 
)
static

Definition at line 837 of file nbtsort.c.

References _bt_blnewpage(), _bt_blwritepage(), _bt_check_third_page(), _bt_pagestate(), _bt_sortaddtup(), _bt_truncate(), Assert, BTMaxItemSize, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageState::btps_blkno, BTPageState::btps_full, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTreeInnerTupleSetDownLink, BTreeTupleGetNAtts, BTreeTupleSetNAtts, BTWriteState::btws_pages_alloced, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), elog, ERROR, BTWriteState::heap, BTWriteState::index, IndexRelationGetNumberOfKeyAttributes, IndexTupleSize, BTWriteState::inskey, ItemIdGetLength, ItemIdSetUnused, MAXALIGN, OffsetNumberNext, OffsetNumberPrev, P_FIRSTKEY, P_HIKEY, P_LEFTMOST, P_NONE, PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetSpecialPointer, PageIndexTupleOverwrite(), pfree(), and unlikely.

Referenced by _bt_load(), and _bt_uppershutdown().

838 {
839  Page npage;
840  BlockNumber nblkno;
841  OffsetNumber last_off;
842  Size pgspc;
843  Size itupsz;
844  bool isleaf;
845 
846  /*
847  * This is a handy place to check for cancel interrupts during the btree
848  * load phase of index creation.
849  */
851 
852  npage = state->btps_page;
853  nblkno = state->btps_blkno;
854  last_off = state->btps_lastoff;
855 
856  pgspc = PageGetFreeSpace(npage);
857  itupsz = IndexTupleSize(itup);
858  itupsz = MAXALIGN(itupsz);
859  /* Leaf case has slightly different rules due to suffix truncation */
860  isleaf = (state->btps_level == 0);
861 
862  /*
863  * Check whether the new item can fit on a btree page on current level at
864  * all.
865  *
866  * Every newly built index will treat heap TID as part of the keyspace,
867  * which imposes the requirement that new high keys must occasionally have
868  * a heap TID appended within _bt_truncate(). That may leave a new pivot
869  * tuple one or two MAXALIGN() quantums larger than the original first
870  * right tuple it's derived from. v4 deals with the problem by decreasing
871  * the limit on the size of tuples inserted on the leaf level by the same
872  * small amount. Enforce the new v4+ limit on the leaf level, and the old
873  * limit on internal levels, since pivot tuples may need to make use of
874  * the reserved space. This should never fail on internal pages.
875  */
876  if (unlikely(itupsz > BTMaxItemSize(npage)))
877  _bt_check_third_page(wstate->index, wstate->heap, isleaf, npage,
878  itup);
879 
880  /*
881  * Check to see if current page will fit new item, with space left over to
882  * append a heap TID during suffix truncation when page is a leaf page.
883  *
884  * It is guaranteed that we can fit at least 2 non-pivot tuples plus a
885  * high key with heap TID when finishing off a leaf page, since we rely on
886  * _bt_check_third_page() rejecting oversized non-pivot tuples. On
887  * internal pages we can always fit 3 pivot tuples with larger internal
888  * page tuple limit (includes page high key).
889  *
890  * Most of the time, a page is only "full" in the sense that the soft
891  * fillfactor-wise limit has been exceeded. However, we must always leave
892  * at least two items plus a high key on each page before starting a new
893  * page. Disregard fillfactor and insert on "full" current page if we
894  * don't have the minimum number of items yet. (Note that we deliberately
895  * assume that suffix truncation neither enlarges nor shrinks new high key
896  * when applying soft limit.)
897  */
898  if (pgspc < itupsz + (isleaf ? MAXALIGN(sizeof(ItemPointerData)) : 0) ||
899  (pgspc < state->btps_full && last_off > P_FIRSTKEY))
900  {
901  /*
902  * Finish off the page and write it out.
903  */
904  Page opage = npage;
905  BlockNumber oblkno = nblkno;
906  ItemId ii;
907  ItemId hii;
908  IndexTuple oitup;
909 
910  /* Create new page of same level */
911  npage = _bt_blnewpage(state->btps_level);
912 
913  /* and assign it a page position */
914  nblkno = wstate->btws_pages_alloced++;
915 
916  /*
917  * We copy the last item on the page into the new page, and then
918  * rearrange the old page so that the 'last item' becomes its high key
919  * rather than a true data item. There had better be at least two
920  * items on the page already, else the page would be empty of useful
921  * data.
922  */
923  Assert(last_off > P_FIRSTKEY);
924  ii = PageGetItemId(opage, last_off);
925  oitup = (IndexTuple) PageGetItem(opage, ii);
926  _bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY);
927 
928  /*
929  * Move 'last' into the high key position on opage. _bt_blnewpage()
930  * allocated empty space for a line pointer when opage was first
931  * created, so this is a matter of rearranging already-allocated space
932  * on page, and initializing high key line pointer. (Actually, leaf
933  * pages must also swap oitup with a truncated version of oitup, which
934  * is sometimes larger than oitup, though never by more than the space
935  * needed to append a heap TID.)
936  */
937  hii = PageGetItemId(opage, P_HIKEY);
938  *hii = *ii;
939  ItemIdSetUnused(ii); /* redundant */
940  ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
941 
942  if (isleaf)
943  {
944  IndexTuple lastleft;
945  IndexTuple truncated;
946 
947  /*
948  * Truncate away any unneeded attributes from high key on leaf
949  * level. This is only done at the leaf level because downlinks
950  * in internal pages are either negative infinity items, or get
951  * their contents from copying from one level down. See also:
952  * _bt_split().
953  *
954  * We don't try to bias our choice of split point to make it more
955  * likely that _bt_truncate() can truncate away more attributes,
956  * whereas the split point used within _bt_split() is chosen much
957  * more delicately. Suffix truncation is mostly useful because it
958  * improves space utilization for workloads with random
959  * insertions. It doesn't seem worthwhile to add logic for
960  * choosing a split point here for a benefit that is bound to be
961  * much smaller.
962  *
963  * Overwrite the old item with new truncated high key directly.
964  * oitup is already located at the physical beginning of tuple
965  * space, so this should directly reuse the existing tuple space.
966  */
967  ii = PageGetItemId(opage, OffsetNumberPrev(last_off));
968  lastleft = (IndexTuple) PageGetItem(opage, ii);
969 
970  truncated = _bt_truncate(wstate->index, lastleft, oitup,
971  wstate->inskey);
972  if (!PageIndexTupleOverwrite(opage, P_HIKEY, (Item) truncated,
973  IndexTupleSize(truncated)))
974  elog(ERROR, "failed to add high key to the index page");
975  pfree(truncated);
976 
977  /* oitup should continue to point to the page's high key */
978  hii = PageGetItemId(opage, P_HIKEY);
979  oitup = (IndexTuple) PageGetItem(opage, hii);
980  }
981 
982  /*
983  * Link the old page into its parent, using its minimum key. If we
984  * don't have a parent, we have to create one; this adds a new btree
985  * level.
986  */
987  if (state->btps_next == NULL)
988  state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
989 
990  Assert((BTreeTupleGetNAtts(state->btps_minkey, wstate->index) <=
992  BTreeTupleGetNAtts(state->btps_minkey, wstate->index) > 0) ||
994  Assert(BTreeTupleGetNAtts(state->btps_minkey, wstate->index) == 0 ||
996  BTreeInnerTupleSetDownLink(state->btps_minkey, oblkno);
997  _bt_buildadd(wstate, state->btps_next, state->btps_minkey);
998  pfree(state->btps_minkey);
999 
1000  /*
1001  * Save a copy of the high key from the old page. It is also used as
1002  * the minimum key for the new page.
1003  */
1004  state->btps_minkey = CopyIndexTuple(oitup);
1005 
1006  /*
1007  * Set the sibling links for both pages.
1008  */
1009  {
1010  BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
1011  BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage);
1012 
1013  oopaque->btpo_next = nblkno;
1014  nopaque->btpo_prev = oblkno;
1015  nopaque->btpo_next = P_NONE; /* redundant */
1016  }
1017 
1018  /*
1019  * Write out the old page. We never need to touch it again, so we can
1020  * free the opage workspace too.
1021  */
1022  _bt_blwritepage(wstate, opage, oblkno);
1023 
1024  /*
1025  * Reset last_off to point to new page
1026  */
1027  last_off = P_FIRSTKEY;
1028  }
1029 
1030  /*
1031  * By here, either original page is still the current page, or a new page
1032  * was created that became the current page. Either way, the current page
1033  * definitely has space for new item.
1034  *
1035  * If the new item is the first for its page, stash a copy for later. Note
1036  * this will only happen for the first item on a level; on later pages,
1037  * the first item for a page is copied from the prior page in the code
1038  * above. The minimum key for an entire level is nothing more than a
1039  * minus infinity (downlink only) pivot tuple placeholder.
1040  */
1041  if (last_off == P_HIKEY)
1042  {
1043  Assert(state->btps_minkey == NULL);
1044  state->btps_minkey = CopyIndexTuple(itup);
1045  /* _bt_sortaddtup() will perform full truncation later */
1046  BTreeTupleSetNAtts(state->btps_minkey, 0);
1047  }
1048 
1049  /*
1050  * Add the new item into the current page.
1051  */
1052  last_off = OffsetNumberNext(last_off);
1053  _bt_sortaddtup(npage, itupsz, itup, last_off);
1054 
1055  state->btps_page = npage;
1056  state->btps_blkno = nblkno;
1057  state->btps_lastoff = last_off;
1058 }
BlockNumber btpo_next
Definition: nbtree.h:58
BlockNumber btws_pages_alloced
Definition: nbtsort.c:269
Pointer Item
Definition: item.h:17
#define P_NONE
Definition: nbtree.h:181
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:252
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:327
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:652
#define BTreeTupleSetNAtts(itup, n)
Definition: nbtree.h:336
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:581
IndexTuple btps_minkey
Definition: nbtsort.c:253
uint16 OffsetNumber
Definition: off.h:24
IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright, BTScanInsert itup_key)
Definition: nbtutils.c:2111
Page btps_page
Definition: nbtsort.c:251
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
Size btps_full
Definition: nbtsort.c:256
#define ERROR
Definition: elog.h:43
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:837
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1065
BlockNumber btpo_prev
Definition: nbtree.h:57
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:509
OffsetNumber btps_lastoff
Definition: nbtsort.c:254
IndexTupleData * IndexTuple
Definition: itup.h:53
#define P_FIRSTKEY
Definition: nbtree.h:218
#define P_LEFTMOST(opaque)
Definition: nbtree.h:187
struct ItemIdData ItemIdData
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:438
Relation index
Definition: nbtsort.c:266
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
uint32 btps_level
Definition: nbtsort.c:255
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:710
struct BTPageState * btps_next
Definition: nbtsort.c:257
BTScanInsert inskey
Definition: nbtsort.c:267
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define Assert(condition)
Definition: c.h:732
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:466
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
#define MAXALIGN(LEN)
Definition: c.h:685
void _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, Page page, IndexTuple newtup)
Definition: nbtutils.c:2527
#define BTreeInnerTupleSetDownLink(itup, blkno)
Definition: nbtree.h:303
#define BTMaxItemSize(page)
Definition: nbtree.h:147
#define P_HIKEY
Definition: nbtree.h:217
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:625
#define elog(elevel,...)
Definition: elog.h:226
static void _bt_sortaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
Definition: nbtsort.c:778
#define unlikely(x)
Definition: c.h:208
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
Relation heap
Definition: nbtsort.c:265
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_end_parallel()

static void _bt_end_parallel ( BTLeader btleader)
static

Definition at line 1478 of file nbtsort.c.

References DestroyParallelContext(), ExitParallelMode(), IsMVCCSnapshot, BTLeader::pcxt, BTLeader::snapshot, UnregisterSnapshot(), and WaitForParallelWorkersToFinish().

Referenced by _bt_begin_parallel(), and btbuild().

1479 {
1480  /* Shutdown worker processes */
1482  /* Free last reference to MVCC snapshot, if one was used */
1483  if (IsMVCCSnapshot(btleader->snapshot))
1484  UnregisterSnapshot(btleader->snapshot);
1485  DestroyParallelContext(btleader->pcxt);
1486  ExitParallelMode();
1487 }
Snapshot snapshot
Definition: nbtsort.c:205
ParallelContext * pcxt
Definition: nbtsort.c:182
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:717
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:871
void ExitParallelMode(void)
Definition: xact.c:974
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:907
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:97

◆ _bt_leader_participate_as_worker()

static void _bt_leader_participate_as_worker ( BTBuildState buildstate)
static

Definition at line 1548 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), BTBuildState::btleader, BTLeader::btshared, BTSpool::heap, BTSpool::index, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, BTLeader::nparticipanttuplesorts, palloc0(), ResetUsage(), BTLeader::sharedsort, BTLeader::sharedsort2, ShowUsage(), and BTBuildState::spool.

Referenced by _bt_begin_parallel().

1549 {
1550  BTLeader *btleader = buildstate->btleader;
1551  BTSpool *leaderworker;
1552  BTSpool *leaderworker2;
1553  int sortmem;
1554 
1555  /* Allocate memory and initialize private spool */
1556  leaderworker = (BTSpool *) palloc0(sizeof(BTSpool));
1557  leaderworker->heap = buildstate->spool->heap;
1558  leaderworker->index = buildstate->spool->index;
1559  leaderworker->isunique = buildstate->spool->isunique;
1560 
1561  /* Initialize second spool, if required */
1562  if (!btleader->btshared->isunique)
1563  leaderworker2 = NULL;
1564  else
1565  {
1566  /* Allocate memory for worker's own private secondary spool */
1567  leaderworker2 = (BTSpool *) palloc0(sizeof(BTSpool));
1568 
1569  /* Initialize worker's own secondary spool */
1570  leaderworker2->heap = leaderworker->heap;
1571  leaderworker2->index = leaderworker->index;
1572  leaderworker2->isunique = false;
1573  }
1574 
1575  /*
1576  * Might as well use reliable figure when doling out maintenance_work_mem
1577  * (when requested number of workers were not launched, this will be
1578  * somewhat higher than it is for other workers).
1579  */
1580  sortmem = maintenance_work_mem / btleader->nparticipanttuplesorts;
1581 
1582  /* Perform work common to all participants */
1583  _bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared,
1584  btleader->sharedsort, btleader->sharedsort2,
1585  sortmem, true);
1586 
1587 #ifdef BTREE_BUILD_STATS
1589  {
1590  ShowUsage("BTREE BUILD (Leader Partial Spool) STATISTICS");
1591  ResetUsage();
1592  }
1593 #endif /* BTREE_BUILD_STATS */
1594 }
Sharedsort * sharedsort2
Definition: nbtsort.c:204
BTShared * btshared
Definition: nbtsort.c:202
void ShowUsage(const char *title)
Definition: postgres.c:4559
Sharedsort * sharedsort
Definition: nbtsort.c:203
bool isunique
Definition: nbtsort.c:118
BTSpool * spool
Definition: nbtsort.c:219
Relation heap
Definition: nbtsort.c:99
void ResetUsage(void)
Definition: postgres.c:4552
bool isunique
Definition: nbtsort.c:101
Relation index
Definition: nbtsort.c:100
void * palloc0(Size size)
Definition: mcxt.c:980
bool log_btree_build_stats
Definition: guc.c:496
BTLeader * btleader
Definition: nbtsort.c:233
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
Definition: nbtsort.c:1703
int maintenance_work_mem
Definition: globals.c:122
int nparticipanttuplesorts
Definition: nbtsort.c:190

◆ _bt_leafbuild()

static void _bt_leafbuild ( BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 550 of file nbtsort.c.

References _bt_load(), _bt_mkscankey(), BTREE_METAPAGE, BTWriteState::btws_pages_alloced, BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTSpool::heap, BTWriteState::heap, BTSpool::index, BTWriteState::index, BTWriteState::inskey, log_btree_build_stats, pgstat_progress_update_param(), PROGRESS_BTREE_PHASE_LEAF_LOAD, PROGRESS_BTREE_PHASE_PERFORMSORT_1, PROGRESS_BTREE_PHASE_PERFORMSORT_2, PROGRESS_CREATEIDX_SUBPHASE, RelationNeedsWAL, ResetUsage(), ShowUsage(), BTSpool::sortstate, tuplesort_performsort(), and XLogIsNeeded.

Referenced by btbuild().

551 {
552  BTWriteState wstate;
553 
554 #ifdef BTREE_BUILD_STATS
556  {
557  ShowUsage("BTREE BUILD (Spool) STATISTICS");
558  ResetUsage();
559  }
560 #endif /* BTREE_BUILD_STATS */
561 
565  if (btspool2)
566  {
569  tuplesort_performsort(btspool2->sortstate);
570  }
571 
572  wstate.heap = btspool->heap;
573  wstate.index = btspool->index;
574  wstate.inskey = _bt_mkscankey(wstate.index, NULL);
575 
576  /*
577  * We need to log index creation in WAL iff WAL archiving/streaming is
578  * enabled UNLESS the index isn't WAL-logged anyway.
579  */
580  wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);
581 
582  /* reserve the metapage */
583  wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
584  wstate.btws_pages_written = 0;
585  wstate.btws_zeropage = NULL; /* until needed */
586 
589  _bt_load(&wstate, btspool, btspool2);
590 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
Page btws_zeropage
Definition: nbtsort.c:271
static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:1135
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:85
#define PROGRESS_BTREE_PHASE_PERFORMSORT_1
Definition: nbtree.h:689
void ShowUsage(const char *title)
Definition: postgres.c:4559
BlockNumber btws_pages_alloced
Definition: nbtsort.c:269
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3220
#define XLogIsNeeded()
Definition: xlog.h:181
Tuplesortstate * sortstate
Definition: nbtsort.c:98
bool btws_use_wal
Definition: nbtsort.c:268
Relation heap
Definition: nbtsort.c:99
void ResetUsage(void)
Definition: postgres.c:4552
#define PROGRESS_BTREE_PHASE_PERFORMSORT_2
Definition: nbtree.h:690
#define BTREE_METAPAGE
Definition: nbtree.h:131
Relation index
Definition: nbtsort.c:100
Relation index
Definition: nbtsort.c:266
BlockNumber btws_pages_written
Definition: nbtsort.c:270
bool log_btree_build_stats
Definition: guc.c:496
BTScanInsert inskey
Definition: nbtsort.c:267
#define RelationNeedsWAL(relation)
Definition: rel.h:521
#define PROGRESS_BTREE_PHASE_LEAF_LOAD
Definition: nbtree.h:691
#define PROGRESS_CREATEIDX_SUBPHASE
Definition: progress.h:66
Relation heap
Definition: nbtsort.c:265

◆ _bt_load()

static void _bt_load ( BTWriteState wstate,
BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 1135 of file nbtsort.c.

References _bt_buildadd(), _bt_pagestate(), _bt_uppershutdown(), SortSupportData::abbreviate, ApplySortComparator(), Assert, AssertState, BTGreaterStrategyNumber, BTLessStrategyNumber, compare(), CurrentMemoryContext, i, BTWriteState::index, index_getattr, IndexRelationGetNumberOfKeyAttributes, BTWriteState::inskey, ItemPointerCompare(), MAIN_FORKNUM, merge(), palloc0(), pfree(), pgstat_progress_update_param(), PrepareSortSupportFromIndexRel(), PROGRESS_CREATEIDX_TUPLES_DONE, RelationData::rd_smgr, RelationGetDescr, RelationNeedsWAL, RelationOpenSmgr, BTScanInsertData::scankeys, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, smgrimmedsync(), BTSpool::sortstate, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, IndexTupleData::t_tid, and tuplesort_getindextuple().

Referenced by _bt_leafbuild().

1136 {
1137  BTPageState *state = NULL;
1138  bool merge = (btspool2 != NULL);
1139  IndexTuple itup,
1140  itup2 = NULL;
1141  bool load1;
1142  TupleDesc tupdes = RelationGetDescr(wstate->index);
1143  int i,
1145  SortSupport sortKeys;
1146  int64 tuples_done = 0;
1147 
1148  if (merge)
1149  {
1150  /*
1151  * Another BTSpool for dead tuples exists. Now we have to merge
1152  * btspool and btspool2.
1153  */
1154 
1155  /* the preparation of merge */
1156  itup = tuplesort_getindextuple(btspool->sortstate, true);
1157  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1158 
1159  /* Prepare SortSupport data for each column */
1160  sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
1161 
1162  for (i = 0; i < keysz; i++)
1163  {
1164  SortSupport sortKey = sortKeys + i;
1165  ScanKey scanKey = wstate->inskey->scankeys + i;
1166  int16 strategy;
1167 
1168  sortKey->ssup_cxt = CurrentMemoryContext;
1169  sortKey->ssup_collation = scanKey->sk_collation;
1170  sortKey->ssup_nulls_first =
1171  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1172  sortKey->ssup_attno = scanKey->sk_attno;
1173  /* Abbreviation is not supported here */
1174  sortKey->abbreviate = false;
1175 
1176  AssertState(sortKey->ssup_attno != 0);
1177 
1178  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
1180 
1181  PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
1182  }
1183 
1184  for (;;)
1185  {
1186  load1 = true; /* load BTSpool next ? */
1187  if (itup2 == NULL)
1188  {
1189  if (itup == NULL)
1190  break;
1191  }
1192  else if (itup != NULL)
1193  {
1194  int32 compare = 0;
1195 
1196  for (i = 1; i <= keysz; i++)
1197  {
1198  SortSupport entry;
1199  Datum attrDatum1,
1200  attrDatum2;
1201  bool isNull1,
1202  isNull2;
1203 
1204  entry = sortKeys + i - 1;
1205  attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
1206  attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
1207 
1208  compare = ApplySortComparator(attrDatum1, isNull1,
1209  attrDatum2, isNull2,
1210  entry);
1211  if (compare > 0)
1212  {
1213  load1 = false;
1214  break;
1215  }
1216  else if (compare < 0)
1217  break;
1218  }
1219 
1220  /*
1221  * If key values are equal, we sort on ItemPointer. This is
1222  * required for btree indexes, since heap TID is treated as an
1223  * implicit last key attribute in order to ensure that all
1224  * keys in the index are physically unique.
1225  */
1226  if (compare == 0)
1227  {
1228  compare = ItemPointerCompare(&itup->t_tid, &itup2->t_tid);
1229  Assert(compare != 0);
1230  if (compare > 0)
1231  load1 = false;
1232  }
1233  }
1234  else
1235  load1 = false;
1236 
1237  /* When we see first tuple, create first index page */
1238  if (state == NULL)
1239  state = _bt_pagestate(wstate, 0);
1240 
1241  if (load1)
1242  {
1243  _bt_buildadd(wstate, state, itup);
1244  itup = tuplesort_getindextuple(btspool->sortstate, true);
1245  }
1246  else
1247  {
1248  _bt_buildadd(wstate, state, itup2);
1249  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1250  }
1251 
1252  /* Report progress */
1254  ++tuples_done);
1255  }
1256  pfree(sortKeys);
1257  }
1258  else
1259  {
1260  /* merge is unnecessary */
1261  while ((itup = tuplesort_getindextuple(btspool->sortstate,
1262  true)) != NULL)
1263  {
1264  /* When we see first tuple, create first index page */
1265  if (state == NULL)
1266  state = _bt_pagestate(wstate, 0);
1267 
1268  _bt_buildadd(wstate, state, itup);
1269 
1270  /* Report progress */
1272  ++tuples_done);
1273  }
1274  }
1275 
1276  /* Close down final pages and write the metapage */
1277  _bt_uppershutdown(wstate, state);
1278 
1279  /*
1280  * If the index is WAL-logged, we must fsync it down to disk before it's
1281  * safe to commit the transaction. (For a non-WAL-logged index we don't
1282  * care since the index will be uninteresting after a crash anyway.)
1283  *
1284  * It's obvious that we must do this when not WAL-logging the build. It's
1285  * less obvious that we have to do it even if we did WAL-log the index
1286  * pages. The reason is that since we're building outside shared buffers,
1287  * a CHECKPOINT occurring during the build has no way to flush the
1288  * previously written data to disk (indeed it won't know the index even
1289  * exists). A crash later on would replay WAL from the checkpoint,
1290  * therefore it wouldn't replay our earlier WAL entries. If we do not
1291  * fsync those pages here, they might still not be on disk when the crash
1292  * occurs.
1293  */
1294  if (RelationNeedsWAL(wstate->index))
1295  {
1296  RelationOpenSmgr(wstate->index);
1298  }
1299 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2216
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
signed short int16
Definition: c.h:345
bool ssup_nulls_first
Definition: sortsupport.h:75
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define AssertState(condition)
Definition: c.h:735
#define RelationGetDescr(relation)
Definition: rel.h:445
struct SMgrRelationData * rd_smgr
Definition: rel.h:56
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3220
ItemPointerData t_tid
Definition: itup.h:37
Tuplesortstate * sortstate
Definition: nbtsort.c:98
static pairingheap_node * merge(pairingheap *heap, pairingheap_node *a, pairingheap_node *b)
Definition: pairingheap.c:79
signed int int32
Definition: c.h:346
#define RelationOpenSmgr(relation)
Definition: rel.h:476
void pfree(void *pointer)
Definition: mcxt.c:1056
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
MemoryContext ssup_cxt
Definition: sortsupport.h:66
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:837
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:438
void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy, SortSupport ssup)
Definition: sortsupport.c:161
Relation index
Definition: nbtsort.c:266
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:681
void * palloc0(Size size)
Definition: mcxt.c:980
uintptr_t Datum
Definition: postgres.h:367
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:710
AttrNumber ssup_attno
Definition: sortsupport.h:81
BTScanInsert inskey
Definition: nbtsort.c:267
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:732
#define SK_BT_DESC
Definition: nbtree.h:680
Definition: regguts.h:298
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
ScanKeyData scankeys[INDEX_MAX_KEYS]
Definition: nbtree.h:477
#define RelationNeedsWAL(relation)
Definition: rel.h:521
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition: progress.h:68
Oid sk_collation
Definition: skey.h:70
int i
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
Definition: nbtsort.c:1064
#define BTLessStrategyNumber
Definition: stratnum.h:29
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:637
AttrNumber sk_attno
Definition: skey.h:67

◆ _bt_pagestate()

static BTPageState * _bt_pagestate ( BTWriteState wstate,
uint32  level 
)
static

Definition at line 710 of file nbtsort.c.

References _bt_blnewpage(), BTPageState::btps_blkno, BTPageState::btps_full, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_DEFAULT_FILLFACTOR, BTREE_NONLEAF_FILLFACTOR, BTWriteState::btws_pages_alloced, BTWriteState::index, P_HIKEY, palloc0(), and RelationGetTargetPageFreeSpace.

Referenced by _bt_buildadd(), and _bt_load().

711 {
713 
714  /* create initial page for level */
715  state->btps_page = _bt_blnewpage(level);
716 
717  /* and assign it a page position */
718  state->btps_blkno = wstate->btws_pages_alloced++;
719 
720  state->btps_minkey = NULL;
721  /* initialize lastoff so first item goes into P_FIRSTKEY */
722  state->btps_lastoff = P_HIKEY;
723  state->btps_level = level;
724  /* set "full" threshold based on level. See notes at head of file. */
725  if (level > 0)
726  state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
727  else
730  /* no parent level, yet */
731  state->btps_next = NULL;
732 
733  return state;
734 }
BlockNumber btws_pages_alloced
Definition: nbtsort.c:269
BlockNumber btps_blkno
Definition: nbtsort.c:252
IndexTuple btps_minkey
Definition: nbtsort.c:253
Page btps_page
Definition: nbtsort.c:251
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:170
Size btps_full
Definition: nbtsort.c:256
OffsetNumber btps_lastoff
Definition: nbtsort.c:254
#define BTREE_DEFAULT_FILLFACTOR
Definition: nbtree.h:169
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:307
Relation index
Definition: nbtsort.c:266
void * palloc0(Size size)
Definition: mcxt.c:980
uint32 btps_level
Definition: nbtsort.c:255
struct BTPageState * btps_next
Definition: nbtsort.c:257
Definition: regguts.h:298
#define P_HIKEY
Definition: nbtree.h:217
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:625

◆ _bt_parallel_build_main()

void _bt_parallel_build_main ( dsm_segment seg,
shm_toc toc 
)

Definition at line 1600 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), AccessExclusiveLock, debug_query_string, BTSpool::heap, BTShared::heaprelid, BTSpool::index, index_close(), index_open(), BTShared::indexrelid, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, pgstat_report_activity(), ResetUsage(), RowExclusiveLock, BTShared::scantuplesortstates, ShareLock, ShareUpdateExclusiveLock, shm_toc_lookup(), ShowUsage(), STATE_RUNNING, table_close(), table_open(), and tuplesort_attach_shared().

1601 {
1602  char *sharedquery;
1603  BTSpool *btspool;
1604  BTSpool *btspool2;
1605  BTShared *btshared;
1606  Sharedsort *sharedsort;
1607  Sharedsort *sharedsort2;
1608  Relation heapRel;
1609  Relation indexRel;
1610  LOCKMODE heapLockmode;
1611  LOCKMODE indexLockmode;
1612  int sortmem;
1613 
1614 #ifdef BTREE_BUILD_STATS
1616  ResetUsage();
1617 #endif /* BTREE_BUILD_STATS */
1618 
1619  /* Set debug_query_string for individual workers first */
1620  sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, false);
1621  debug_query_string = sharedquery;
1622 
1623  /* Report the query string from leader */
1625 
1626  /* Look up nbtree shared state */
1627  btshared = shm_toc_lookup(toc, PARALLEL_KEY_BTREE_SHARED, false);
1628 
1629  /* Open relations using lock modes known to be obtained by index.c */
1630  if (!btshared->isconcurrent)
1631  {
1632  heapLockmode = ShareLock;
1633  indexLockmode = AccessExclusiveLock;
1634  }
1635  else
1636  {
1637  heapLockmode = ShareUpdateExclusiveLock;
1638  indexLockmode = RowExclusiveLock;
1639  }
1640 
1641  /* Open relations within worker */
1642  heapRel = table_open(btshared->heaprelid, heapLockmode);
1643  indexRel = index_open(btshared->indexrelid, indexLockmode);
1644 
1645  /* Initialize worker's own spool */
1646  btspool = (BTSpool *) palloc0(sizeof(BTSpool));
1647  btspool->heap = heapRel;
1648  btspool->index = indexRel;
1649  btspool->isunique = btshared->isunique;
1650 
1651  /* Look up shared state private to tuplesort.c */
1652  sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
1653  tuplesort_attach_shared(sharedsort, seg);
1654  if (!btshared->isunique)
1655  {
1656  btspool2 = NULL;
1657  sharedsort2 = NULL;
1658  }
1659  else
1660  {
1661  /* Allocate memory for worker's own private secondary spool */
1662  btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
1663 
1664  /* Initialize worker's own secondary spool */
1665  btspool2->heap = btspool->heap;
1666  btspool2->index = btspool->index;
1667  btspool2->isunique = false;
1668  /* Look up shared state private to tuplesort.c */
1669  sharedsort2 = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_SPOOL2, false);
1670  tuplesort_attach_shared(sharedsort2, seg);
1671  }
1672 
1673  /* Perform sorting of spool, and possibly a spool2 */
1674  sortmem = maintenance_work_mem / btshared->scantuplesortstates;
1675  _bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
1676  sharedsort2, sortmem, false);
1677 
1678 #ifdef BTREE_BUILD_STATS
1680  {
1681  ShowUsage("BTREE BUILD (Worker Partial Spool) STATISTICS");
1682  ResetUsage();
1683  }
1684 #endif /* BTREE_BUILD_STATS */
1685 
1686  index_close(indexRel, indexLockmode);
1687  table_close(heapRel, heapLockmode);
1688 }
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
int scantuplesortstates
Definition: nbtsort.c:120
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:81
int LOCKMODE
Definition: lockdefs.h:26
Oid indexrelid
Definition: nbtsort.c:117
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:3121
void ShowUsage(const char *title)
Definition: postgres.c:4559
bool isunique
Definition: nbtsort.c:118
bool isconcurrent
Definition: nbtsort.c:119
Relation heap
Definition: nbtsort.c:99
void ResetUsage(void)
Definition: postgres.c:4552
bool isunique
Definition: nbtsort.c:101
Oid heaprelid
Definition: nbtsort.c:116
#define RowExclusiveLock
Definition: lockdefs.h:38
#define PARALLEL_KEY_QUERY_TEXT
Definition: nbtsort.c:83
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:82
Relation index
Definition: nbtsort.c:100
const char * debug_query_string
Definition: postgres.c:87
void * palloc0(Size size)
Definition: mcxt.c:980
bool log_btree_build_stats
Definition: guc.c:496
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
Definition: nbtsort.c:1703
int maintenance_work_mem
Definition: globals.c:122
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:4414
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:152
#define AccessExclusiveLock
Definition: lockdefs.h:45
#define ShareLock
Definition: lockdefs.h:41
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:80
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:126

◆ _bt_parallel_estimate_shared()

static Size _bt_parallel_estimate_shared ( Relation  heap,
Snapshot  snapshot 
)
static

Definition at line 1494 of file nbtsort.c.

References add_size(), BUFFERALIGN, and table_parallelscan_estimate().

Referenced by _bt_begin_parallel().

1495 {
1496  /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1497  return add_size(BUFFERALIGN(sizeof(BTShared)),
1498  table_parallelscan_estimate(heap, snapshot));
1499 }
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:126
#define BUFFERALIGN(LEN)
Definition: c.h:687

◆ _bt_parallel_heapscan()

static double _bt_parallel_heapscan ( BTBuildState buildstate,
bool brokenhotchain 
)
static

Definition at line 1514 of file nbtsort.c.

References BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTShared::havedead, BTBuildState::havedead, BTShared::indtuples, BTBuildState::indtuples, BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, reltuples, BTShared::reltuples, SpinLockAcquire, SpinLockRelease, WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN, and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1515 {
1516  BTShared *btshared = buildstate->btleader->btshared;
1517  int nparticipanttuplesorts;
1518  double reltuples;
1519 
1520  nparticipanttuplesorts = buildstate->btleader->nparticipanttuplesorts;
1521  for (;;)
1522  {
1523  SpinLockAcquire(&btshared->mutex);
1524  if (btshared->nparticipantsdone == nparticipanttuplesorts)
1525  {
1526  buildstate->havedead = btshared->havedead;
1527  buildstate->indtuples = btshared->indtuples;
1528  *brokenhotchain = btshared->brokenhotchain;
1529  reltuples = btshared->reltuples;
1530  SpinLockRelease(&btshared->mutex);
1531  break;
1532  }
1533  SpinLockRelease(&btshared->mutex);
1534 
1537  }
1538 
1540 
1541  return reltuples;
1542 }
BTShared * btshared
Definition: nbtsort.c:202
int nparticipantsdone
Definition: nbtsort.c:154
bool havedead
Definition: nbtsort.c:217
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
double indtuples
Definition: nbtsort.c:226
bool brokenhotchain
Definition: nbtsort.c:158
#define SpinLockRelease(lock)
Definition: spin.h:64
BTLeader * btleader
Definition: nbtsort.c:233
double reltuples
Definition: nbtsort.c:155
double indtuples
Definition: nbtsort.c:157
slock_t mutex
Definition: nbtsort.c:136
bool havedead
Definition: nbtsort.c:156
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int nparticipanttuplesorts
Definition: nbtsort.c:190
ConditionVariable workersdonecv
Definition: nbtsort.c:128
float4 reltuples
Definition: pg_class.h:63

◆ _bt_parallel_scan_and_sort()

static void _bt_parallel_scan_and_sort ( BTSpool btspool,
BTSpool btspool2,
BTShared btshared,
Sharedsort sharedsort,
Sharedsort sharedsort2,
int  sortmem,
bool  progress 
)
static

Definition at line 1703 of file nbtsort.c.

References _bt_build_callback(), BTShared::brokenhotchain, BTBuildState::btleader, BuildIndexInfo(), ConditionVariableSignal(), BTShared::havedead, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, BTSpool::index, BTShared::indtuples, BTBuildState::indtuples, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, Min, BTShared::mutex, SortCoordinateData::nParticipants, BTShared::nparticipantsdone, palloc0(), ParallelTableScanFromBTShared, reltuples, BTShared::reltuples, SortCoordinateData::sharedsort, BTSpool::sortstate, SpinLockAcquire, SpinLockRelease, BTBuildState::spool, BTBuildState::spool2, table_beginscan_parallel(), table_index_build_scan(), tuplesort_begin_index_btree(), tuplesort_end(), tuplesort_performsort(), work_mem, and BTShared::workersdonecv.

Referenced by _bt_leader_participate_as_worker(), and _bt_parallel_build_main().

1706 {
1707  SortCoordinate coordinate;
1708  BTBuildState buildstate;
1709  TableScanDesc scan;
1710  double reltuples;
1711  IndexInfo *indexInfo;
1712 
1713  /* Initialize local tuplesort coordination state */
1714  coordinate = palloc0(sizeof(SortCoordinateData));
1715  coordinate->isWorker = true;
1716  coordinate->nParticipants = -1;
1717  coordinate->sharedsort = sharedsort;
1718 
1719  /* Begin "partial" tuplesort */
1720  btspool->sortstate = tuplesort_begin_index_btree(btspool->heap,
1721  btspool->index,
1722  btspool->isunique,
1723  sortmem, coordinate,
1724  false);
1725 
1726  /*
1727  * Just as with serial case, there may be a second spool. If so, a
1728  * second, dedicated spool2 partial tuplesort is required.
1729  */
1730  if (btspool2)
1731  {
1732  SortCoordinate coordinate2;
1733 
1734  /*
1735  * We expect that the second one (for dead tuples) won't get very
1736  * full, so we give it only work_mem (unless sortmem is less for
1737  * worker). Worker processes are generally permitted to allocate
1738  * work_mem independently.
1739  */
1740  coordinate2 = palloc0(sizeof(SortCoordinateData));
1741  coordinate2->isWorker = true;
1742  coordinate2->nParticipants = -1;
1743  coordinate2->sharedsort = sharedsort2;
1744  btspool2->sortstate =
1745  tuplesort_begin_index_btree(btspool->heap, btspool->index, false,
1746  Min(sortmem, work_mem), coordinate2,
1747  false);
1748  }
1749 
1750  /* Fill in buildstate for _bt_build_callback() */
1751  buildstate.isunique = btshared->isunique;
1752  buildstate.havedead = false;
1753  buildstate.heap = btspool->heap;
1754  buildstate.spool = btspool;
1755  buildstate.spool2 = btspool2;
1756  buildstate.indtuples = 0;
1757  buildstate.btleader = NULL;
1758 
1759  /* Join parallel scan */
1760  indexInfo = BuildIndexInfo(btspool->index);
1761  indexInfo->ii_Concurrent = btshared->isconcurrent;
1762  scan = table_beginscan_parallel(btspool->heap,
1763  ParallelTableScanFromBTShared(btshared));
1764  reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
1766  (void *) &buildstate, scan);
1767 
1768  /*
1769  * Execute this worker's part of the sort.
1770  *
1771  * Unlike leader and serial cases, we cannot avoid calling
1772  * tuplesort_performsort() for spool2 if it ends up containing no dead
1773  * tuples (this is disallowed for workers by tuplesort).
1774  */
1775  tuplesort_performsort(btspool->sortstate);
1776  if (btspool2)
1777  tuplesort_performsort(btspool2->sortstate);
1778 
1779  /*
1780  * Done. Record ambuild statistics, and whether we encountered a broken
1781  * HOT chain.
1782  */
1783  SpinLockAcquire(&btshared->mutex);
1784  btshared->nparticipantsdone++;
1785  btshared->reltuples += reltuples;
1786  if (buildstate.havedead)
1787  btshared->havedead = true;
1788  btshared->indtuples += buildstate.indtuples;
1789  if (indexInfo->ii_BrokenHotChain)
1790  btshared->brokenhotchain = true;
1791  SpinLockRelease(&btshared->mutex);
1792 
1793  /* Notify leader */
1795 
1796  /* We can end tuplesorts immediately */
1797  tuplesort_end(btspool->sortstate);
1798  if (btspool2)
1799  tuplesort_end(btspool2->sortstate);
1800 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
Relation heap
Definition: nbtsort.c:218
int nparticipantsdone
Definition: nbtsort.c:154
#define Min(x, y)
Definition: c.h:904
bool havedead
Definition: nbtsort.c:217
Sharedsort * sharedsort
Definition: tuplesort.h:55
bool isunique
Definition: nbtsort.c:118
#define ParallelTableScanFromBTShared(shared)
Definition: nbtsort.c:173
bool isconcurrent
Definition: nbtsort.c:119
Tuplesortstate * sortstate
Definition: nbtsort.c:98
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2279
BTSpool * spool
Definition: nbtsort.c:219
Relation heap
Definition: nbtsort.c:99
#define SpinLockAcquire(lock)
Definition: spin.h:62
static void _bt_build_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:596
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1499
bool isunique
Definition: nbtsort.c:101
void ConditionVariableSignal(ConditionVariable *cv)
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
bool isunique
Definition: nbtsort.c:216
bool ii_BrokenHotChain
Definition: execnodes.h:173
bool brokenhotchain
Definition: nbtsort.c:158
Relation index
Definition: nbtsort.c:100
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
Definition: tableam.c:161
int progress
Definition: pgbench.c:232
#define SpinLockRelease(lock)
Definition: spin.h:64
void * palloc0(Size size)
Definition: mcxt.c:980
BTLeader * btleader
Definition: nbtsort.c:233
int work_mem
Definition: globals.c:121
double reltuples
Definition: nbtsort.c:155
double indtuples
Definition: nbtsort.c:157
slock_t mutex
Definition: nbtsort.c:136
bool havedead
Definition: nbtsort.c:156
bool ii_Concurrent
Definition: execnodes.h:172
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
ConditionVariable workersdonecv
Definition: nbtsort.c:128
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236
float4 reltuples
Definition: pg_class.h:63

◆ _bt_slideleft()

static void _bt_slideleft ( Page  page)
static

Definition at line 743 of file nbtsort.c.

References OffsetNumberNext, P_FIRSTKEY, P_HIKEY, PageGetItemId, PageGetMaxOffsetNumber, and PageIsEmpty.

Referenced by _bt_uppershutdown().

744 {
745  OffsetNumber off;
746  OffsetNumber maxoff;
747  ItemId previi;
748  ItemId thisii;
749 
750  if (!PageIsEmpty(page))
751  {
752  maxoff = PageGetMaxOffsetNumber(page);
753  previi = PageGetItemId(page, P_HIKEY);
754  for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
755  {
756  thisii = PageGetItemId(page, off);
757  *previi = *thisii;
758  previi = thisii;
759  }
760  ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
761  }
762 }
#define PageIsEmpty(page)
Definition: bufpage.h:222
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
#define P_FIRSTKEY
Definition: nbtree.h:218
struct ItemIdData ItemIdData
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define P_HIKEY
Definition: nbtree.h:217

◆ _bt_sortaddtup()

static void _bt_sortaddtup ( Page  page,
Size  itemsize,
IndexTuple  itup,
OffsetNumber  itup_off 
)
static

Definition at line 778 of file nbtsort.c.

References BTreeTupleSetNAtts, elog, ERROR, InvalidOffsetNumber, P_FIRSTKEY, P_ISLEAF, PageAddItem, PageGetSpecialPointer, and IndexTupleData::t_info.

Referenced by _bt_buildadd().

782 {
784  IndexTupleData trunctuple;
785 
786  if (!P_ISLEAF(opaque) && itup_off == P_FIRSTKEY)
787  {
788  trunctuple = *itup;
789  trunctuple.t_info = sizeof(IndexTupleData);
790  /* Deliberately zero INDEX_ALT_TID_MASK bits */
791  BTreeTupleSetNAtts(&trunctuple, 0);
792  itup = &trunctuple;
793  itemsize = sizeof(IndexTupleData);
794  }
795 
796  if (PageAddItem(page, (Item) itup, itemsize, itup_off,
797  false, false) == InvalidOffsetNumber)
798  elog(ERROR, "failed to add item to the index page");
799 }
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
#define BTreeTupleSetNAtts(itup, n)
Definition: nbtree.h:336
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define ERROR
Definition: elog.h:43
#define P_FIRSTKEY
Definition: nbtree.h:218
struct IndexTupleData IndexTupleData
#define InvalidOffsetNumber
Definition: off.h:26
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define elog(elevel,...)
Definition: elog.h:226
unsigned short t_info
Definition: itup.h:49
#define P_ISLEAF(opaque)
Definition: nbtree.h:189

◆ _bt_spool()

static void _bt_spool ( BTSpool btspool,
ItemPointer  self,
Datum values,
bool isnull 
)
static

Definition at line 539 of file nbtsort.c.

References BTSpool::index, BTSpool::sortstate, and tuplesort_putindextuplevalues().

Referenced by _bt_build_callback().

540 {
541  tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
542  self, values, isnull);
543 }
Tuplesortstate * sortstate
Definition: nbtsort.c:98
Relation index
Definition: nbtsort.c:100
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
Definition: tuplesort.c:1478
static Datum values[MAXATTR]
Definition: bootstrap.c:167

◆ _bt_spooldestroy()

static void _bt_spooldestroy ( BTSpool btspool)
static

Definition at line 529 of file nbtsort.c.

References pfree(), BTSpool::sortstate, and tuplesort_end().

Referenced by _bt_spools_heapscan(), and btbuild().

530 {
531  tuplesort_end(btspool->sortstate);
532  pfree(btspool);
533 }
Tuplesortstate * sortstate
Definition: nbtsort.c:98
void pfree(void *pointer)
Definition: mcxt.c:1056
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236

◆ _bt_spools_heapscan()

static double _bt_spools_heapscan ( Relation  heap,
Relation  index,
BTBuildState buildstate,
IndexInfo indexInfo 
)
static

Definition at line 379 of file nbtsort.c.

References _bt_begin_parallel(), _bt_build_callback(), _bt_parallel_heapscan(), _bt_spooldestroy(), BTBuildState::btleader, BTSpool::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, IndexInfo::ii_ParallelWorkers, IndexInfo::ii_Unique, BTSpool::index, BTSpool::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, maintenance_work_mem, SortCoordinateData::nParticipants, BTLeader::nparticipanttuplesorts, palloc0(), pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN, PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_CREATEIDX_TUPLES_TOTAL, PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, reltuples, SortCoordinateData::sharedsort, BTLeader::sharedsort, BTLeader::sharedsort2, BTSpool::sortstate, BTBuildState::spool, BTBuildState::spool2, table_index_build_scan(), tuplesort_begin_index_btree(), val, and work_mem.

Referenced by btbuild().

381 {
382  BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
383  SortCoordinate coordinate = NULL;
384  double reltuples = 0;
385 
386  /*
387  * We size the sort area as maintenance_work_mem rather than work_mem to
388  * speed index creation. This should be OK since a single backend can't
389  * run multiple index creations in parallel (see also: notes on
390  * parallelism and maintenance_work_mem below).
391  */
392  btspool->heap = heap;
393  btspool->index = index;
394  btspool->isunique = indexInfo->ii_Unique;
395 
396  /* Save as primary spool */
397  buildstate->spool = btspool;
398 
399  /* Report table scan phase started */
402 
403  /* Attempt to launch parallel worker scan when required */
404  if (indexInfo->ii_ParallelWorkers > 0)
405  _bt_begin_parallel(buildstate, indexInfo->ii_Concurrent,
406  indexInfo->ii_ParallelWorkers);
407 
408  /*
409  * If parallel build requested and at least one worker process was
410  * successfully launched, set up coordination state
411  */
412  if (buildstate->btleader)
413  {
414  coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
415  coordinate->isWorker = false;
416  coordinate->nParticipants =
417  buildstate->btleader->nparticipanttuplesorts;
418  coordinate->sharedsort = buildstate->btleader->sharedsort;
419  }
420 
421  /*
422  * Begin serial/leader tuplesort.
423  *
424  * In cases where parallelism is involved, the leader receives the same
425  * share of maintenance_work_mem as a serial sort (it is generally treated
426  * in the same way as a serial sort once we return). Parallel worker
427  * Tuplesortstates will have received only a fraction of
428  * maintenance_work_mem, though.
429  *
430  * We rely on the lifetime of the Leader Tuplesortstate almost not
431  * overlapping with any worker Tuplesortstate's lifetime. There may be
432  * some small overlap, but that's okay because we rely on leader
433  * Tuplesortstate only allocating a small, fixed amount of memory here.
434  * When its tuplesort_performsort() is called (by our caller), and
435  * significant amounts of memory are likely to be used, all workers must
436  * have already freed almost all memory held by their Tuplesortstates
437  * (they are about to go away completely, too). The overall effect is
438  * that maintenance_work_mem always represents an absolute high watermark
439  * on the amount of memory used by a CREATE INDEX operation, regardless of
440  * the use of parallelism or any other factor.
441  */
442  buildstate->spool->sortstate =
443  tuplesort_begin_index_btree(heap, index, buildstate->isunique,
444  maintenance_work_mem, coordinate,
445  false);
446 
447  /*
448  * If building a unique index, put dead tuples in a second spool to keep
449  * them out of the uniqueness check. We expect that the second spool (for
450  * dead tuples) won't get very full, so we give it only work_mem.
451  */
452  if (indexInfo->ii_Unique)
453  {
454  BTSpool *btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
455  SortCoordinate coordinate2 = NULL;
456 
457  /* Initialize secondary spool */
458  btspool2->heap = heap;
459  btspool2->index = index;
460  btspool2->isunique = false;
461  /* Save as secondary spool */
462  buildstate->spool2 = btspool2;
463 
464  if (buildstate->btleader)
465  {
466  /*
467  * Set up non-private state that is passed to
468  * tuplesort_begin_index_btree() about the basic high level
469  * coordination of a parallel sort.
470  */
471  coordinate2 = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
472  coordinate2->isWorker = false;
473  coordinate2->nParticipants =
474  buildstate->btleader->nparticipanttuplesorts;
475  coordinate2->sharedsort = buildstate->btleader->sharedsort2;
476  }
477 
478  /*
479  * We expect that the second one (for dead tuples) won't get very
480  * full, so we give it only work_mem
481  */
482  buildstate->spool2->sortstate =
483  tuplesort_begin_index_btree(heap, index, false, work_mem,
484  coordinate2, false);
485  }
486 
487  /* Fill spool using either serial or parallel heap scan */
488  if (!buildstate->btleader)
489  reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
490  _bt_build_callback, (void *) buildstate,
491  NULL);
492  else
493  reltuples = _bt_parallel_heapscan(buildstate,
494  &indexInfo->ii_BrokenHotChain);
495 
496  /*
497  * Set the progress target for the next phase. Reset the block number
498  * values set by table_index_build_scan
499  */
500  {
501  const int index[] = {
505  };
506  const int64 val[] = {
507  buildstate->indtuples,
508  0, 0
509  };
510 
511  pgstat_progress_update_multi_param(3, index, val);
512  }
513 
514  /* okay, all heap tuples are spooled */
515  if (buildstate->spool2 && !buildstate->havedead)
516  {
517  /* spool2 turns out to be unnecessary */
518  _bt_spooldestroy(buildstate->spool2);
519  buildstate->spool2 = NULL;
520  }
521 
522  return reltuples;
523 }
static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Definition: nbtsort.c:1318
Sharedsort * sharedsort2
Definition: nbtsort.c:204
#define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN
Definition: nbtree.h:688
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3220
Sharedsort * sharedsort
Definition: nbtsort.c:203
bool havedead
Definition: nbtsort.c:217
Sharedsort * sharedsort
Definition: tuplesort.h:55
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:67
Tuplesortstate * sortstate
Definition: nbtsort.c:98
BTSpool * spool
Definition: nbtsort.c:219
Relation heap
Definition: nbtsort.c:99
static void _bt_build_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:596
static double _bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain)
Definition: nbtsort.c:1514
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1499
bool isunique
Definition: nbtsort.c:101
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
bool isunique
Definition: nbtsort.c:216
bool ii_BrokenHotChain
Definition: execnodes.h:173
Relation index
Definition: nbtsort.c:100
void * palloc0(Size size)
Definition: mcxt.c:980
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:58
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:103
BTLeader * btleader
Definition: nbtsort.c:233
int work_mem
Definition: globals.c:121
int maintenance_work_mem
Definition: globals.c:122
bool ii_Unique
Definition: execnodes.h:170
int nparticipanttuplesorts
Definition: nbtsort.c:190
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3242
int ii_ParallelWorkers
Definition: execnodes.h:174
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:102
bool ii_Concurrent
Definition: execnodes.h:172
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:529
#define PROGRESS_CREATEIDX_SUBPHASE
Definition: progress.h:66
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
long val
Definition: informix.c:684
float4 reltuples
Definition: pg_class.h:63

◆ _bt_uppershutdown()

static void _bt_uppershutdown ( BTWriteState wstate,
BTPageState state 
)
static

Definition at line 1064 of file nbtsort.c.

References _bt_blwritepage(), _bt_buildadd(), _bt_initmetapage(), _bt_slideleft(), Assert, BTP_ROOT, BTPageOpaqueData::btpo_flags, BTPageState::btps_blkno, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_METAPAGE, BTreeInnerTupleSetDownLink, BTreeTupleGetNAtts, BTWriteState::index, IndexRelationGetNumberOfKeyAttributes, P_LEFTMOST, P_NONE, PageGetSpecialPointer, palloc(), and pfree().

Referenced by _bt_load().

1065 {
1066  BTPageState *s;
1067  BlockNumber rootblkno = P_NONE;
1068  uint32 rootlevel = 0;
1069  Page metapage;
1070 
1071  /*
1072  * Each iteration of this loop completes one more level of the tree.
1073  */
1074  for (s = state; s != NULL; s = s->btps_next)
1075  {
1076  BlockNumber blkno;
1077  BTPageOpaque opaque;
1078 
1079  blkno = s->btps_blkno;
1081 
1082  /*
1083  * We have to link the last page on this level to somewhere.
1084  *
1085  * If we're at the top, it's the root, so attach it to the metapage.
1086  * Otherwise, add an entry for it to its parent using its minimum key.
1087  * This may cause the last page of the parent level to split, but
1088  * that's not a problem -- we haven't gotten to it yet.
1089  */
1090  if (s->btps_next == NULL)
1091  {
1092  opaque->btpo_flags |= BTP_ROOT;
1093  rootblkno = blkno;
1094  rootlevel = s->btps_level;
1095  }
1096  else
1097  {
1098  Assert((BTreeTupleGetNAtts(s->btps_minkey, wstate->index) <=
1100  BTreeTupleGetNAtts(s->btps_minkey, wstate->index) > 0) ||
1101  P_LEFTMOST(opaque));
1102  Assert(BTreeTupleGetNAtts(s->btps_minkey, wstate->index) == 0 ||
1103  !P_LEFTMOST(opaque));
1105  _bt_buildadd(wstate, s->btps_next, s->btps_minkey);
1106  pfree(s->btps_minkey);
1107  s->btps_minkey = NULL;
1108  }
1109 
1110  /*
1111  * This is the rightmost page, so the ItemId array needs to be slid
1112  * back one slot. Then we can dump out the page.
1113  */
1115  _bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
1116  s->btps_page = NULL; /* writepage freed the workspace */
1117  }
1118 
1119  /*
1120  * As the last step in the process, construct the metapage and make it
1121  * point to the new root (unless we had no data at all, in which case it's
1122  * set to point to "P_NONE"). This changes the index to the "valid" state
1123  * by filling in a valid magic number in the metapage.
1124  */
1125  metapage = (Page) palloc(BLCKSZ);
1126  _bt_initmetapage(metapage, rootblkno, rootlevel);
1127  _bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
1128 }
#define BTP_ROOT
Definition: nbtree.h:72
#define P_NONE
Definition: nbtree.h:181
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:252
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:327
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:652
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
IndexTuple btps_minkey
Definition: nbtsort.c:253
Page btps_page
Definition: nbtsort.c:251
void pfree(void *pointer)
Definition: mcxt.c:1056
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:837
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:50
#define P_LEFTMOST(opaque)
Definition: nbtree.h:187
unsigned int uint32
Definition: c.h:358
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:438
#define BTREE_METAPAGE
Definition: nbtree.h:131
Relation index
Definition: nbtsort.c:266
uint32 btps_level
Definition: nbtsort.c:255
struct BTPageState * btps_next
Definition: nbtsort.c:257
#define Assert(condition)
Definition: c.h:732
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define BTreeInnerTupleSetDownLink(itup, blkno)
Definition: nbtree.h:303
static void _bt_slideleft(Page page)
Definition: nbtsort.c:743
void * palloc(Size size)
Definition: mcxt.c:949
uint16 btpo_flags
Definition: nbtree.h:64
Pointer Page
Definition: bufpage.h:78

◆ btbuild()

IndexBuildResult* btbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 310 of file nbtsort.c.

References _bt_end_parallel(), _bt_leafbuild(), _bt_spooldestroy(), _bt_spools_heapscan(), BTBuildState::btleader, elog, ERROR, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, IndexBuildResult::heap_tuples, IndexInfo::ii_Unique, IndexBuildResult::index_tuples, BTBuildState::indtuples, BTBuildState::isunique, log_btree_build_stats, palloc(), RelationGetNumberOfBlocks, RelationGetRelationName, reltuples, ResetUsage(), ShowUsage(), BTBuildState::spool, and BTBuildState::spool2.

Referenced by bthandler().

311 {
312  IndexBuildResult *result;
313  BTBuildState buildstate;
314  double reltuples;
315 
316 #ifdef BTREE_BUILD_STATS
318  ResetUsage();
319 #endif /* BTREE_BUILD_STATS */
320 
321  buildstate.isunique = indexInfo->ii_Unique;
322  buildstate.havedead = false;
323  buildstate.heap = heap;
324  buildstate.spool = NULL;
325  buildstate.spool2 = NULL;
326  buildstate.indtuples = 0;
327  buildstate.btleader = NULL;
328 
329  /*
330  * We expect to be called exactly once for any index relation. If that's
331  * not the case, big trouble's what we have.
332  */
333  if (RelationGetNumberOfBlocks(index) != 0)
334  elog(ERROR, "index \"%s\" already contains data",
335  RelationGetRelationName(index));
336 
337  reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
338 
339  /*
340  * Finish the build by (1) completing the sort of the spool file, (2)
341  * inserting the sorted tuples into btree pages and (3) building the upper
342  * levels. Finally, it may also be necessary to end use of parallelism.
343  */
344  _bt_leafbuild(buildstate.spool, buildstate.spool2);
345  _bt_spooldestroy(buildstate.spool);
346  if (buildstate.spool2)
347  _bt_spooldestroy(buildstate.spool2);
348  if (buildstate.btleader)
349  _bt_end_parallel(buildstate.btleader);
350 
351  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
352 
353  result->heap_tuples = reltuples;
354  result->index_tuples = buildstate.indtuples;
355 
356 #ifdef BTREE_BUILD_STATS
358  {
359  ShowUsage("BTREE BUILD STATS");
360  ResetUsage();
361  }
362 #endif /* BTREE_BUILD_STATS */
363 
364  return result;
365 }
Relation heap
Definition: nbtsort.c:218
void ShowUsage(const char *title)
Definition: postgres.c:4559
bool havedead
Definition: nbtsort.c:217
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1478
static void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:550
BTSpool * spool
Definition: nbtsort.c:219
void ResetUsage(void)
Definition: postgres.c:4552
#define ERROR
Definition: elog.h:43
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
bool isunique
Definition: nbtsort.c:216
#define RelationGetRelationName(relation)
Definition: rel.h:453
static double _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
Definition: nbtsort.c:379
bool log_btree_build_stats
Definition: guc.c:496
BTLeader * btleader
Definition: nbtsort.c:233
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:198
bool ii_Unique
Definition: execnodes.h:170
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:529
void * palloc(Size size)
Definition: mcxt.c:949
#define elog(elevel,...)
Definition: elog.h:226
float4 reltuples
Definition: pg_class.h:63
double index_tuples
Definition: genam.h:33
double heap_tuples
Definition: genam.h:32