PostgreSQL Source Code  git master
nbtsort.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/parallel.h"
#include "access/relscan.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "commands/progress.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/rel.h"
#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
Include dependency graph for nbtsort.c:

Go to the source code of this file.

Data Structures

struct  BTSpool
 
struct  BTShared
 
struct  BTLeader
 
struct  BTBuildState
 
struct  BTPageState
 
struct  BTWriteState
 

Macros

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)
 
#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)
 
#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)
 
#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)
 
#define ParallelTableScanFromBTShared(shared)   (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))
 

Typedefs

typedef struct BTSpool BTSpool
 
typedef struct BTShared BTShared
 
typedef struct BTLeader BTLeader
 
typedef struct BTBuildState BTBuildState
 
typedef struct BTPageState BTPageState
 
typedef struct BTWriteState BTWriteState
 

Functions

static double _bt_spools_heapscan (Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
 
static void _bt_spooldestroy (BTSpool *btspool)
 
static void _bt_spool (BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
 
static void _bt_leafbuild (BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_build_callback (Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static Page _bt_blnewpage (uint32 level)
 
static BTPageState_bt_pagestate (BTWriteState *wstate, uint32 level)
 
static void _bt_slideleft (Page page)
 
static void _bt_sortaddtup (Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
 
static void _bt_buildadd (BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
 
static void _bt_sort_dedup_finish_pending (BTWriteState *wstate, BTPageState *state, BTDedupState dstate)
 
static void _bt_uppershutdown (BTWriteState *wstate, BTPageState *state)
 
static void _bt_load (BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_begin_parallel (BTBuildState *buildstate, bool isconcurrent, int request)
 
static void _bt_end_parallel (BTLeader *btleader)
 
static Size _bt_parallel_estimate_shared (Relation heap, Snapshot snapshot)
 
static double _bt_parallel_heapscan (BTBuildState *buildstate, bool *brokenhotchain)
 
static void _bt_leader_participate_as_worker (BTBuildState *buildstate)
 
static void _bt_parallel_scan_and_sort (BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
 
IndexBuildResultbtbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
static void _bt_blwritepage (BTWriteState *wstate, Page page, BlockNumber blkno)
 
void _bt_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 

Macro Definition Documentation

◆ PARALLEL_KEY_BTREE_SHARED

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)

Definition at line 80 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_QUERY_TEXT

#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)

Definition at line 83 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT

#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)

Definition at line 81 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ PARALLEL_KEY_TUPLESORT_SPOOL2

#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)

Definition at line 82 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_build_main().

◆ ParallelTableScanFromBTShared

#define ParallelTableScanFromBTShared (   shared)    (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))

Definition at line 173 of file nbtsort.c.

Referenced by _bt_begin_parallel(), and _bt_parallel_scan_and_sort().

Typedef Documentation

◆ BTBuildState

typedef struct BTBuildState BTBuildState

◆ BTLeader

typedef struct BTLeader BTLeader

◆ BTPageState

typedef struct BTPageState BTPageState

◆ BTShared

typedef struct BTShared BTShared

◆ BTSpool

typedef struct BTSpool BTSpool

◆ BTWriteState

typedef struct BTWriteState BTWriteState

Function Documentation

◆ _bt_begin_parallel()

static void _bt_begin_parallel ( BTBuildState buildstate,
bool  isconcurrent,
int  request 
)
static

Definition at line 1467 of file nbtsort.c.

References _bt_end_parallel(), _bt_leader_participate_as_worker(), _bt_parallel_estimate_shared(), Assert, BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableInit(), CreateParallelContext(), debug_query_string, DestroyParallelContext(), EnterParallelMode(), ParallelContext::estimator, ExitParallelMode(), GetTransactionSnapshot(), BTShared::havedead, BTSpool::heap, BTShared::heaprelid, BTSpool::index, BTShared::indexrelid, BTShared::indtuples, InitializeParallelDSM(), BTShared::isconcurrent, IsMVCCSnapshot, BTSpool::isunique, BTShared::isunique, LaunchParallelWorkers(), BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, ParallelContext::nworkers_launched, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, ParallelTableScanFromBTShared, BTLeader::pcxt, RegisterSnapshot(), RelationGetRelid, BTShared::reltuples, BTShared::scantuplesortstates, ParallelContext::seg, BTLeader::sharedsort, BTLeader::sharedsort2, shm_toc_allocate(), shm_toc_estimate_chunk, shm_toc_estimate_keys, shm_toc_insert(), BTLeader::snapshot, SnapshotAny, SpinLockInit, BTBuildState::spool, table_parallelscan_initialize(), ParallelContext::toc, tuplesort_estimate_shared(), tuplesort_initialize_shared(), UnregisterSnapshot(), WaitForParallelWorkersToAttach(), and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1468 {
1469  ParallelContext *pcxt;
1470  int scantuplesortstates;
1471  Snapshot snapshot;
1472  Size estbtshared;
1473  Size estsort;
1474  BTShared *btshared;
1475  Sharedsort *sharedsort;
1476  Sharedsort *sharedsort2;
1477  BTSpool *btspool = buildstate->spool;
1478  BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
1479  bool leaderparticipates = true;
1480  char *sharedquery;
1481  int querylen;
1482 
1483 #ifdef DISABLE_LEADER_PARTICIPATION
1484  leaderparticipates = false;
1485 #endif
1486 
1487  /*
1488  * Enter parallel mode, and create context for parallel build of btree
1489  * index
1490  */
1492  Assert(request > 0);
1493  pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
1494  request);
1495 
1496  scantuplesortstates = leaderparticipates ? request + 1 : request;
1497 
1498  /*
1499  * Prepare for scan of the base relation. In a normal index build, we use
1500  * SnapshotAny because we must retrieve all tuples and do our own time
1501  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1502  * concurrent build, we take a regular MVCC snapshot and index whatever's
1503  * live according to that.
1504  */
1505  if (!isconcurrent)
1506  snapshot = SnapshotAny;
1507  else
1509 
1510  /*
1511  * Estimate size for our own PARALLEL_KEY_BTREE_SHARED workspace, and
1512  * PARALLEL_KEY_TUPLESORT tuplesort workspace
1513  */
1514  estbtshared = _bt_parallel_estimate_shared(btspool->heap, snapshot);
1515  shm_toc_estimate_chunk(&pcxt->estimator, estbtshared);
1516  estsort = tuplesort_estimate_shared(scantuplesortstates);
1517  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1518 
1519  /*
1520  * Unique case requires a second spool, and so we may have to account for
1521  * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2
1522  */
1523  if (!btspool->isunique)
1524  shm_toc_estimate_keys(&pcxt->estimator, 2);
1525  else
1526  {
1527  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1528  shm_toc_estimate_keys(&pcxt->estimator, 3);
1529  }
1530 
1531  /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
1532  querylen = strlen(debug_query_string);
1533  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
1534  shm_toc_estimate_keys(&pcxt->estimator, 1);
1535 
1536  /* Everyone's had a chance to ask for space, so now create the DSM */
1537  InitializeParallelDSM(pcxt);
1538 
1539  /* If no DSM segment was available, back out (do serial build) */
1540  if (pcxt->seg == NULL)
1541  {
1542  if (IsMVCCSnapshot(snapshot))
1543  UnregisterSnapshot(snapshot);
1544  DestroyParallelContext(pcxt);
1545  ExitParallelMode();
1546  return;
1547  }
1548 
1549  /* Store shared build state, for which we reserved space */
1550  btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
1551  /* Initialize immutable state */
1552  btshared->heaprelid = RelationGetRelid(btspool->heap);
1553  btshared->indexrelid = RelationGetRelid(btspool->index);
1554  btshared->isunique = btspool->isunique;
1555  btshared->isconcurrent = isconcurrent;
1556  btshared->scantuplesortstates = scantuplesortstates;
1558  SpinLockInit(&btshared->mutex);
1559  /* Initialize mutable state */
1560  btshared->nparticipantsdone = 0;
1561  btshared->reltuples = 0.0;
1562  btshared->havedead = false;
1563  btshared->indtuples = 0.0;
1564  btshared->brokenhotchain = false;
1567  snapshot);
1568 
1569  /*
1570  * Store shared tuplesort-private state, for which we reserved space.
1571  * Then, initialize opaque state using tuplesort routine.
1572  */
1573  sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1574  tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1575  pcxt->seg);
1576 
1577  shm_toc_insert(pcxt->toc, PARALLEL_KEY_BTREE_SHARED, btshared);
1578  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1579 
1580  /* Unique case requires a second spool, and associated shared state */
1581  if (!btspool->isunique)
1582  sharedsort2 = NULL;
1583  else
1584  {
1585  /*
1586  * Store additional shared tuplesort-private state, for which we
1587  * reserved space. Then, initialize opaque state using tuplesort
1588  * routine.
1589  */
1590  sharedsort2 = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1591  tuplesort_initialize_shared(sharedsort2, scantuplesortstates,
1592  pcxt->seg);
1593 
1594  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2);
1595  }
1596 
1597  /* Store query string for workers */
1598  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1599  memcpy(sharedquery, debug_query_string, querylen + 1);
1600  shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
1601 
1602  /* Launch workers, saving status for leader/caller */
1603  LaunchParallelWorkers(pcxt);
1604  btleader->pcxt = pcxt;
1605  btleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1606  if (leaderparticipates)
1607  btleader->nparticipanttuplesorts++;
1608  btleader->btshared = btshared;
1609  btleader->sharedsort = sharedsort;
1610  btleader->sharedsort2 = sharedsort2;
1611  btleader->snapshot = snapshot;
1612 
1613  /* If no workers were successfully launched, back out (do serial build) */
1614  if (pcxt->nworkers_launched == 0)
1615  {
1616  _bt_end_parallel(btleader);
1617  return;
1618  }
1619 
1620  /* Save leader state now that it's clear build will be parallel */
1621  buildstate->btleader = btleader;
1622 
1623  /* Join heap scan ourselves */
1624  if (leaderparticipates)
1626 
1627  /*
1628  * Caller needs to wait for all launched workers when we return. Make
1629  * sure that the failure-to-start case will not hang forever.
1630  */
1632 }
Sharedsort * sharedsort2
Definition: nbtsort.c:204
int scantuplesortstates
Definition: nbtsort.c:120
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:81
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:162
BTShared * btshared
Definition: nbtsort.c:202
int nparticipantsdone
Definition: nbtsort.c:154
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:865
Oid indexrelid
Definition: nbtsort.c:117
dsm_segment * seg
Definition: parallel.h:43
Sharedsort * sharedsort
Definition: nbtsort.c:203
shm_toc_estimator estimator
Definition: parallel.h:42
#define SpinLockInit(lock)
Definition: spin.h:60
Snapshot snapshot
Definition: nbtsort.c:205
bool isunique
Definition: nbtsort.c:118
#define ParallelTableScanFromBTShared(shared)
Definition: nbtsort.c:173
bool isconcurrent
Definition: nbtsort.c:119
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1638
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:4332
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
BTSpool * spool
Definition: nbtsort.c:219
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
ParallelContext * pcxt
Definition: nbtsort.c:182
Relation heap
Definition: nbtsort.c:99
void ConditionVariableInit(ConditionVariable *cv)
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:892
bool isunique
Definition: nbtsort.c:101
void ExitParallelMode(void)
Definition: xact.c:976
Oid heaprelid
Definition: nbtsort.c:116
#define PARALLEL_KEY_QUERY_TEXT
Definition: nbtsort.c:83
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:82
int nworkers_launched
Definition: parallel.h:38
bool brokenhotchain
Definition: nbtsort.c:158
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:515
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:907
Relation index
Definition: nbtsort.c:100
const char * debug_query_string
Definition: postgres.c:88
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:200
void * palloc0(Size size)
Definition: mcxt.c:980
BTLeader * btleader
Definition: nbtsort.c:233
double reltuples
Definition: nbtsort.c:155
double indtuples
Definition: nbtsort.c:157
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:97
slock_t mutex
Definition: nbtsort.c:136
bool havedead
Definition: nbtsort.c:156
#define Assert(condition)
Definition: c.h:738
int nparticipanttuplesorts
Definition: nbtsort.c:190
size_t Size
Definition: c.h:466
static Size _bt_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition: nbtsort.c:1654
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void EnterParallelMode(void)
Definition: xact.c:963
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
static void _bt_leader_participate_as_worker(BTBuildState *buildstate)
Definition: nbtsort.c:1708
Size tuplesort_estimate_shared(int nWorkers)
Definition: tuplesort.c:4311
#define SnapshotAny
Definition: snapmgr.h:69
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable workersdonecv
Definition: nbtsort.c:128
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:80
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:141
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:635
#define RelationGetRelid(relation)
Definition: rel.h:436
shm_toc * toc
Definition: parallel.h:45

◆ _bt_blnewpage()

static Page _bt_blnewpage ( uint32  level)
static

Definition at line 622 of file nbtsort.c.

References _bt_pageinit(), BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageOpaqueData::level, P_NONE, PageGetSpecialPointer, and palloc().

Referenced by _bt_buildadd(), and _bt_pagestate().

623 {
624  Page page;
625  BTPageOpaque opaque;
626 
627  page = (Page) palloc(BLCKSZ);
628 
629  /* Zero the page and set up standard page header info */
630  _bt_pageinit(page, BLCKSZ);
631 
632  /* Initialize BT opaque state */
633  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
634  opaque->btpo_prev = opaque->btpo_next = P_NONE;
635  opaque->btpo.level = level;
636  opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
637  opaque->btpo_cycleid = 0;
638 
639  /* Make the P_HIKEY line pointer appear allocated */
640  ((PageHeader) page)->pd_lower += sizeof(ItemIdData);
641 
642  return page;
643 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define BTP_LEAF
Definition: nbtree.h:72
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:206
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
BTCycleId btpo_cycleid
Definition: nbtree.h:66
BlockNumber btpo_prev
Definition: nbtree.h:58
struct ItemIdData ItemIdData
uint32 level
Definition: nbtree.h:62
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
void * palloc(Size size)
Definition: mcxt.c:949
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:952
uint16 btpo_flags
Definition: nbtree.h:65
Pointer Page
Definition: bufpage.h:78

◆ _bt_blwritepage()

static void _bt_blwritepage ( BTWriteState wstate,
Page  page,
BlockNumber  blkno 
)
static

Definition at line 649 of file nbtsort.c.

References BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTWriteState::index, log_newpage(), MAIN_FORKNUM, PageSetChecksumInplace(), palloc0(), pfree(), RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, smgrextend(), and smgrwrite().

Referenced by _bt_buildadd(), and _bt_uppershutdown().

650 {
651  /* Ensure rd_smgr is open (could have been closed by relcache flush!) */
652  RelationOpenSmgr(wstate->index);
653 
654  /* XLOG stuff */
655  if (wstate->btws_use_wal)
656  {
657  /* We use the XLOG_FPI record type for this */
658  log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
659  }
660 
661  /*
662  * If we have to write pages nonsequentially, fill in the space with
663  * zeroes until we come back and overwrite. This is not logically
664  * necessary on standard Unix filesystems (unwritten space will read as
665  * zeroes anyway), but it should help to avoid fragmentation. The dummy
666  * pages aren't WAL-logged though.
667  */
668  while (blkno > wstate->btws_pages_written)
669  {
670  if (!wstate->btws_zeropage)
671  wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
672  /* don't set checksum for all-zero page */
674  wstate->btws_pages_written++,
675  (char *) wstate->btws_zeropage,
676  true);
677  }
678 
679  PageSetChecksumInplace(page, blkno);
680 
681  /*
682  * Now write the page. There's no need for smgr to schedule an fsync for
683  * this write; we'll do it ourselves before ending the build.
684  */
685  if (blkno == wstate->btws_pages_written)
686  {
687  /* extending the file... */
688  smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
689  (char *) page, true);
690  wstate->btws_pages_written++;
691  }
692  else
693  {
694  /* overwriting a block we zero-filled before */
695  smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
696  (char *) page, true);
697  }
698 
699  pfree(page);
700 }
Page btws_zeropage
Definition: nbtsort.c:263
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
bool btws_use_wal
Definition: nbtsort.c:260
#define RelationOpenSmgr(relation)
Definition: rel.h:493
void pfree(void *pointer)
Definition: mcxt.c:1056
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:530
Relation index
Definition: nbtsort.c:258
BlockNumber btws_pages_written
Definition: nbtsort.c:262
void * palloc0(Size size)
Definition: mcxt.c:980
RelFileNode rd_node
Definition: rel.h:55
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1194
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:483
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
Pointer Page
Definition: bufpage.h:78

◆ _bt_build_callback()

static void _bt_build_callback ( Relation  index,
ItemPointer  tid,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
)
static

Definition at line 593 of file nbtsort.c.

References _bt_spool(), BTBuildState::havedead, BTBuildState::indtuples, BTBuildState::spool, and BTBuildState::spool2.

Referenced by _bt_parallel_scan_and_sort(), and _bt_spools_heapscan().

599 {
600  BTBuildState *buildstate = (BTBuildState *) state;
601 
602  /*
603  * insert the index tuple into the appropriate spool file for subsequent
604  * processing
605  */
606  if (tupleIsAlive || buildstate->spool2 == NULL)
607  _bt_spool(buildstate->spool, tid, values, isnull);
608  else
609  {
610  /* dead tuples are put into spool2 */
611  buildstate->havedead = true;
612  _bt_spool(buildstate->spool2, tid, values, isnull);
613  }
614 
615  buildstate->indtuples += 1;
616 }
bool havedead
Definition: nbtsort.c:217
BTSpool * spool
Definition: nbtsort.c:219
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
Definition: regguts.h:298
static Datum values[MAXATTR]
Definition: bootstrap.c:167
static void _bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
Definition: nbtsort.c:534

◆ _bt_buildadd()

static void _bt_buildadd ( BTWriteState wstate,
BTPageState state,
IndexTuple  itup,
Size  truncextra 
)
static

Definition at line 846 of file nbtsort.c.

References _bt_blnewpage(), _bt_blwritepage(), _bt_check_third_page(), _bt_pagestate(), _bt_sortaddtup(), _bt_truncate(), Assert, BTMaxItemSize, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageState::btps_blkno, BTPageState::btps_lastextra, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_lowkey, BTPageState::btps_next, BTPageState::btps_page, BTreeTupleGetNAtts, BTreeTupleSetDownLink(), BTreeTupleSetNAtts(), BTWriteState::btws_pages_alloced, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), elog, ERROR, BTWriteState::heap, BTWriteState::index, IndexRelationGetNumberOfKeyAttributes, IndexTupleSize, BTWriteState::inskey, ItemIdGetLength, ItemIdSetUnused, MAXALIGN, OffsetNumberNext, OffsetNumberPrev, P_FIRSTKEY, P_HIKEY, P_LEFTMOST, P_NONE, PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetSpecialPointer, PageIndexTupleOverwrite(), palloc0(), pfree(), IndexTupleData::t_info, and unlikely.

Referenced by _bt_load(), _bt_sort_dedup_finish_pending(), and _bt_uppershutdown().

848 {
849  Page npage;
850  BlockNumber nblkno;
851  OffsetNumber last_off;
852  Size last_truncextra;
853  Size pgspc;
854  Size itupsz;
855  bool isleaf;
856 
857  /*
858  * This is a handy place to check for cancel interrupts during the btree
859  * load phase of index creation.
860  */
862 
863  npage = state->btps_page;
864  nblkno = state->btps_blkno;
865  last_off = state->btps_lastoff;
866  last_truncextra = state->btps_lastextra;
867  state->btps_lastextra = truncextra;
868 
869  pgspc = PageGetFreeSpace(npage);
870  itupsz = IndexTupleSize(itup);
871  itupsz = MAXALIGN(itupsz);
872  /* Leaf case has slightly different rules due to suffix truncation */
873  isleaf = (state->btps_level == 0);
874 
875  /*
876  * Check whether the new item can fit on a btree page on current level at
877  * all.
878  *
879  * Every newly built index will treat heap TID as part of the keyspace,
880  * which imposes the requirement that new high keys must occasionally have
881  * a heap TID appended within _bt_truncate(). That may leave a new pivot
882  * tuple one or two MAXALIGN() quantums larger than the original first
883  * right tuple it's derived from. v4 deals with the problem by decreasing
884  * the limit on the size of tuples inserted on the leaf level by the same
885  * small amount. Enforce the new v4+ limit on the leaf level, and the old
886  * limit on internal levels, since pivot tuples may need to make use of
887  * the reserved space. This should never fail on internal pages.
888  */
889  if (unlikely(itupsz > BTMaxItemSize(npage)))
890  _bt_check_third_page(wstate->index, wstate->heap, isleaf, npage,
891  itup);
892 
893  /*
894  * Check to see if current page will fit new item, with space left over to
895  * append a heap TID during suffix truncation when page is a leaf page.
896  *
897  * It is guaranteed that we can fit at least 2 non-pivot tuples plus a
898  * high key with heap TID when finishing off a leaf page, since we rely on
899  * _bt_check_third_page() rejecting oversized non-pivot tuples. On
900  * internal pages we can always fit 3 pivot tuples with larger internal
901  * page tuple limit (includes page high key).
902  *
903  * Most of the time, a page is only "full" in the sense that the soft
904  * fillfactor-wise limit has been exceeded. However, we must always leave
905  * at least two items plus a high key on each page before starting a new
906  * page. Disregard fillfactor and insert on "full" current page if we
907  * don't have the minimum number of items yet. (Note that we deliberately
908  * assume that suffix truncation neither enlarges nor shrinks new high key
909  * when applying soft limit, except when last tuple has a posting list.)
910  */
911  Assert(last_truncextra == 0 || isleaf);
912  if (pgspc < itupsz + (isleaf ? MAXALIGN(sizeof(ItemPointerData)) : 0) ||
913  (pgspc + last_truncextra < state->btps_full && last_off > P_FIRSTKEY))
914  {
915  /*
916  * Finish off the page and write it out.
917  */
918  Page opage = npage;
919  BlockNumber oblkno = nblkno;
920  ItemId ii;
921  ItemId hii;
922  IndexTuple oitup;
923 
924  /* Create new page of same level */
925  npage = _bt_blnewpage(state->btps_level);
926 
927  /* and assign it a page position */
928  nblkno = wstate->btws_pages_alloced++;
929 
930  /*
931  * We copy the last item on the page into the new page, and then
932  * rearrange the old page so that the 'last item' becomes its high key
933  * rather than a true data item. There had better be at least two
934  * items on the page already, else the page would be empty of useful
935  * data.
936  */
937  Assert(last_off > P_FIRSTKEY);
938  ii = PageGetItemId(opage, last_off);
939  oitup = (IndexTuple) PageGetItem(opage, ii);
940  _bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY);
941 
942  /*
943  * Move 'last' into the high key position on opage. _bt_blnewpage()
944  * allocated empty space for a line pointer when opage was first
945  * created, so this is a matter of rearranging already-allocated space
946  * on page, and initializing high key line pointer. (Actually, leaf
947  * pages must also swap oitup with a truncated version of oitup, which
948  * is sometimes larger than oitup, though never by more than the space
949  * needed to append a heap TID.)
950  */
951  hii = PageGetItemId(opage, P_HIKEY);
952  *hii = *ii;
953  ItemIdSetUnused(ii); /* redundant */
954  ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
955 
956  if (isleaf)
957  {
958  IndexTuple lastleft;
959  IndexTuple truncated;
960 
961  /*
962  * Truncate away any unneeded attributes from high key on leaf
963  * level. This is only done at the leaf level because downlinks
964  * in internal pages are either negative infinity items, or get
965  * their contents from copying from one level down. See also:
966  * _bt_split().
967  *
968  * We don't try to bias our choice of split point to make it more
969  * likely that _bt_truncate() can truncate away more attributes,
970  * whereas the split point used within _bt_split() is chosen much
971  * more delicately. Even still, the lastleft and firstright
972  * tuples passed to _bt_truncate() here are at least not fully
973  * equal to each other when deduplication is used, unless there is
974  * a large group of duplicates (also, unique index builds usually
975  * have few or no spool2 duplicates). When the split point is
976  * between two unequal tuples, _bt_truncate() will avoid including
977  * a heap TID in the new high key, which is the most important
978  * benefit of suffix truncation.
979  *
980  * Overwrite the old item with new truncated high key directly.
981  * oitup is already located at the physical beginning of tuple
982  * space, so this should directly reuse the existing tuple space.
983  */
984  ii = PageGetItemId(opage, OffsetNumberPrev(last_off));
985  lastleft = (IndexTuple) PageGetItem(opage, ii);
986 
987  Assert(IndexTupleSize(oitup) > last_truncextra);
988  truncated = _bt_truncate(wstate->index, lastleft, oitup,
989  wstate->inskey);
990  if (!PageIndexTupleOverwrite(opage, P_HIKEY, (Item) truncated,
991  IndexTupleSize(truncated)))
992  elog(ERROR, "failed to add high key to the index page");
993  pfree(truncated);
994 
995  /* oitup should continue to point to the page's high key */
996  hii = PageGetItemId(opage, P_HIKEY);
997  oitup = (IndexTuple) PageGetItem(opage, hii);
998  }
999 
1000  /*
1001  * Link the old page into its parent, using its low key. If we don't
1002  * have a parent, we have to create one; this adds a new btree level.
1003  */
1004  if (state->btps_next == NULL)
1005  state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
1006 
1007  Assert((BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) <=
1009  BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) > 0) ||
1011  Assert(BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) == 0 ||
1013  BTreeTupleSetDownLink(state->btps_lowkey, oblkno);
1014  _bt_buildadd(wstate, state->btps_next, state->btps_lowkey, 0);
1015  pfree(state->btps_lowkey);
1016 
1017  /*
1018  * Save a copy of the high key from the old page. It is also the low
1019  * key for the new page.
1020  */
1021  state->btps_lowkey = CopyIndexTuple(oitup);
1022 
1023  /*
1024  * Set the sibling links for both pages.
1025  */
1026  {
1027  BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
1028  BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage);
1029 
1030  oopaque->btpo_next = nblkno;
1031  nopaque->btpo_prev = oblkno;
1032  nopaque->btpo_next = P_NONE; /* redundant */
1033  }
1034 
1035  /*
1036  * Write out the old page. We never need to touch it again, so we can
1037  * free the opage workspace too.
1038  */
1039  _bt_blwritepage(wstate, opage, oblkno);
1040 
1041  /*
1042  * Reset last_off to point to new page
1043  */
1044  last_off = P_FIRSTKEY;
1045  }
1046 
1047  /*
1048  * By here, either original page is still the current page, or a new page
1049  * was created that became the current page. Either way, the current page
1050  * definitely has space for new item.
1051  *
1052  * If the new item is the first for its page, it must also be the first
1053  * item on its entire level. On later same-level pages, a low key for a
1054  * page will be copied from the prior page in the code above. Generate a
1055  * minus infinity low key here instead.
1056  */
1057  if (last_off == P_HIKEY)
1058  {
1059  Assert(state->btps_lowkey == NULL);
1060  state->btps_lowkey = palloc0(sizeof(IndexTupleData));
1061  state->btps_lowkey->t_info = sizeof(IndexTupleData);
1062  BTreeTupleSetNAtts(state->btps_lowkey, 0);
1063  }
1064 
1065  /*
1066  * Add the new item into the current page.
1067  */
1068  last_off = OffsetNumberNext(last_off);
1069  _bt_sortaddtup(npage, itupsz, itup, last_off);
1070 
1071  state->btps_page = npage;
1072  state->btps_blkno = nblkno;
1073  state->btps_lastoff = last_off;
1074 }
BlockNumber btpo_next
Definition: nbtree.h:59
BlockNumber btws_pages_alloced
Definition: nbtsort.c:261
Pointer Item
Definition: item.h:17
#define P_NONE
Definition: nbtree.h:206
static void BTreeTupleSetNAtts(IndexTuple itup, int natts)
Definition: nbtree.h:466
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:243
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:452
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:649
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:574
uint16 OffsetNumber
Definition: off.h:24
IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright, BTScanInsert itup_key)
Definition: nbtutils.c:2179
Page btps_page
Definition: nbtsort.c:242
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ERROR
Definition: elog.h:43
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1060
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:510
OffsetNumber btps_lastoff
Definition: nbtsort.c:245
IndexTupleData * IndexTuple
Definition: itup.h:53
#define P_FIRSTKEY
Definition: nbtree.h:243
#define P_LEFTMOST(opaque)
Definition: nbtree.h:212
struct ItemIdData ItemIdData
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:437
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:455
Size btps_lastextra
Definition: nbtsort.c:246
Relation index
Definition: nbtsort.c:258
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void * palloc0(Size size)
Definition: mcxt.c:980
uint32 btps_level
Definition: nbtsort.c:247
struct IndexTupleData IndexTupleData
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:707
struct BTPageState * btps_next
Definition: nbtsort.c:249
BTScanInsert inskey
Definition: nbtsort.c:259
IndexTuple btps_lowkey
Definition: nbtsort.c:244
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
Definition: nbtsort.c:846
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define Assert(condition)
Definition: c.h:738
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:466
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
#define MAXALIGN(LEN)
Definition: c.h:691
void _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, Page page, IndexTuple newtup)
Definition: nbtutils.c:2616
#define BTMaxItemSize(page)
Definition: nbtree.h:157
#define P_HIKEY
Definition: nbtree.h:242
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:622
#define elog(elevel,...)
Definition: elog.h:214
static void _bt_sortaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
Definition: nbtsort.c:776
#define unlikely(x)
Definition: c.h:206
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
unsigned short t_info
Definition: itup.h:49
Relation heap
Definition: nbtsort.c:257
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_end_parallel()

static void _bt_end_parallel ( BTLeader btleader)
static

Definition at line 1638 of file nbtsort.c.

References DestroyParallelContext(), ExitParallelMode(), IsMVCCSnapshot, BTLeader::pcxt, BTLeader::snapshot, UnregisterSnapshot(), and WaitForParallelWorkersToFinish().

Referenced by _bt_begin_parallel(), and btbuild().

1639 {
1640  /* Shutdown worker processes */
1642  /* Free last reference to MVCC snapshot, if one was used */
1643  if (IsMVCCSnapshot(btleader->snapshot))
1644  UnregisterSnapshot(btleader->snapshot);
1645  DestroyParallelContext(btleader->pcxt);
1646  ExitParallelMode();
1647 }
Snapshot snapshot
Definition: nbtsort.c:205
ParallelContext * pcxt
Definition: nbtsort.c:182
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:738
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:892
void ExitParallelMode(void)
Definition: xact.c:976
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:907
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:97

◆ _bt_leader_participate_as_worker()

static void _bt_leader_participate_as_worker ( BTBuildState buildstate)
static

Definition at line 1708 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), BTBuildState::btleader, BTLeader::btshared, BTSpool::heap, BTSpool::index, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, BTLeader::nparticipanttuplesorts, palloc0(), ResetUsage(), BTLeader::sharedsort, BTLeader::sharedsort2, ShowUsage(), and BTBuildState::spool.

Referenced by _bt_begin_parallel().

1709 {
1710  BTLeader *btleader = buildstate->btleader;
1711  BTSpool *leaderworker;
1712  BTSpool *leaderworker2;
1713  int sortmem;
1714 
1715  /* Allocate memory and initialize private spool */
1716  leaderworker = (BTSpool *) palloc0(sizeof(BTSpool));
1717  leaderworker->heap = buildstate->spool->heap;
1718  leaderworker->index = buildstate->spool->index;
1719  leaderworker->isunique = buildstate->spool->isunique;
1720 
1721  /* Initialize second spool, if required */
1722  if (!btleader->btshared->isunique)
1723  leaderworker2 = NULL;
1724  else
1725  {
1726  /* Allocate memory for worker's own private secondary spool */
1727  leaderworker2 = (BTSpool *) palloc0(sizeof(BTSpool));
1728 
1729  /* Initialize worker's own secondary spool */
1730  leaderworker2->heap = leaderworker->heap;
1731  leaderworker2->index = leaderworker->index;
1732  leaderworker2->isunique = false;
1733  }
1734 
1735  /*
1736  * Might as well use reliable figure when doling out maintenance_work_mem
1737  * (when requested number of workers were not launched, this will be
1738  * somewhat higher than it is for other workers).
1739  */
1740  sortmem = maintenance_work_mem / btleader->nparticipanttuplesorts;
1741 
1742  /* Perform work common to all participants */
1743  _bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared,
1744  btleader->sharedsort, btleader->sharedsort2,
1745  sortmem, true);
1746 
1747 #ifdef BTREE_BUILD_STATS
1749  {
1750  ShowUsage("BTREE BUILD (Leader Partial Spool) STATISTICS");
1751  ResetUsage();
1752  }
1753 #endif /* BTREE_BUILD_STATS */
1754 }
Sharedsort * sharedsort2
Definition: nbtsort.c:204
BTShared * btshared
Definition: nbtsort.c:202
void ShowUsage(const char *title)
Definition: postgres.c:4601
Sharedsort * sharedsort
Definition: nbtsort.c:203
bool isunique
Definition: nbtsort.c:118
BTSpool * spool
Definition: nbtsort.c:219
Relation heap
Definition: nbtsort.c:99
void ResetUsage(void)
Definition: postgres.c:4594
bool isunique
Definition: nbtsort.c:101
Relation index
Definition: nbtsort.c:100
void * palloc0(Size size)
Definition: mcxt.c:980
bool log_btree_build_stats
Definition: guc.c:529
BTLeader * btleader
Definition: nbtsort.c:233
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
Definition: nbtsort.c:1863
int maintenance_work_mem
Definition: globals.c:122
int nparticipanttuplesorts
Definition: nbtsort.c:190

◆ _bt_leafbuild()

static void _bt_leafbuild ( BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 545 of file nbtsort.c.

References _bt_allequalimage(), _bt_load(), _bt_mkscankey(), BTScanInsertData::allequalimage, BTREE_METAPAGE, BTWriteState::btws_pages_alloced, BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTSpool::heap, BTWriteState::heap, BTSpool::index, BTWriteState::index, BTWriteState::inskey, log_btree_build_stats, pgstat_progress_update_param(), PROGRESS_BTREE_PHASE_LEAF_LOAD, PROGRESS_BTREE_PHASE_PERFORMSORT_1, PROGRESS_BTREE_PHASE_PERFORMSORT_2, PROGRESS_CREATEIDX_SUBPHASE, RelationNeedsWAL, ResetUsage(), ShowUsage(), BTSpool::sortstate, tuplesort_performsort(), and XLogIsNeeded.

Referenced by btbuild().

546 {
547  BTWriteState wstate;
548 
549 #ifdef BTREE_BUILD_STATS
551  {
552  ShowUsage("BTREE BUILD (Spool) STATISTICS");
553  ResetUsage();
554  }
555 #endif /* BTREE_BUILD_STATS */
556 
560  if (btspool2)
561  {
564  tuplesort_performsort(btspool2->sortstate);
565  }
566 
567  wstate.heap = btspool->heap;
568  wstate.index = btspool->index;
569  wstate.inskey = _bt_mkscankey(wstate.index, NULL);
570  /* _bt_mkscankey() won't set allequalimage without metapage */
571  wstate.inskey->allequalimage = _bt_allequalimage(wstate.index, true);
572 
573  /*
574  * We need to log index creation in WAL iff WAL archiving/streaming is
575  * enabled UNLESS the index isn't WAL-logged anyway.
576  */
577  wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);
578 
579  /* reserve the metapage */
580  wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
581  wstate.btws_pages_written = 0;
582  wstate.btws_zeropage = NULL; /* until needed */
583 
586  _bt_load(&wstate, btspool, btspool2);
587 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
Page btws_zeropage
Definition: nbtsort.c:263
static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:1189
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:90
#define PROGRESS_BTREE_PHASE_PERFORMSORT_1
Definition: nbtree.h:998
void ShowUsage(const char *title)
Definition: postgres.c:4601
BlockNumber btws_pages_alloced
Definition: nbtsort.c:261
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3161
#define XLogIsNeeded()
Definition: xlog.h:182
bool allequalimage
Definition: nbtree.h:671
Tuplesortstate * sortstate
Definition: nbtsort.c:98
bool btws_use_wal
Definition: nbtsort.c:260
Relation heap
Definition: nbtsort.c:99
void ResetUsage(void)
Definition: postgres.c:4594
#define PROGRESS_BTREE_PHASE_PERFORMSORT_2
Definition: nbtree.h:999
#define BTREE_METAPAGE
Definition: nbtree.h:141
Relation index
Definition: nbtsort.c:100
Relation index
Definition: nbtsort.c:258
BlockNumber btws_pages_written
Definition: nbtsort.c:262
bool log_btree_build_stats
Definition: guc.c:529
BTScanInsert inskey
Definition: nbtsort.c:259
#define RelationNeedsWAL(relation)
Definition: rel.h:538
#define PROGRESS_BTREE_PHASE_LEAF_LOAD
Definition: nbtree.h:1000
#define PROGRESS_CREATEIDX_SUBPHASE
Definition: progress.h:83
Relation heap
Definition: nbtsort.c:257
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2674

◆ _bt_load()

static void _bt_load ( BTWriteState wstate,
BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 1189 of file nbtsort.c.

References _bt_buildadd(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), _bt_keep_natts_fast(), _bt_pagestate(), _bt_sort_dedup_finish_pending(), _bt_uppershutdown(), SortSupportData::abbreviate, BTScanInsertData::allequalimage, ApplySortComparator(), Assert, AssertState, BTDedupStateData::base, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTGetDeduplicateItems, BTGreaterStrategyNumber, BTLessStrategyNumber, BTMaxItemSize, BTPageState::btps_page, compare(), CopyIndexTuple(), CurrentMemoryContext, BTDedupStateData::deduplicate, BTDedupStateData::htids, i, BTWriteState::index, index_getattr, INDEX_SIZE_MASK, IndexRelationGetNumberOfKeyAttributes, BTWriteState::inskey, InvalidOffsetNumber, ItemPointerCompare(), MAIN_FORKNUM, MAXALIGN_DOWN, BTDedupStateData::maxpostingsize, merge(), BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupStateData::nitems, palloc(), palloc0(), pfree(), pgstat_progress_update_param(), BTDedupStateData::phystupsize, PrepareSortSupportFromIndexRel(), PROGRESS_CREATEIDX_TUPLES_DONE, RelationData::rd_smgr, RelationGetDescr, RelationNeedsWAL, RelationOpenSmgr, BTScanInsertData::scankeys, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, smgrimmedsync(), BTSpool::sortstate, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, IndexTupleData::t_tid, and tuplesort_getindextuple().

Referenced by _bt_leafbuild().

1190 {
1191  BTPageState *state = NULL;
1192  bool merge = (btspool2 != NULL);
1193  IndexTuple itup,
1194  itup2 = NULL;
1195  bool load1;
1196  TupleDesc tupdes = RelationGetDescr(wstate->index);
1197  int i,
1199  SortSupport sortKeys;
1200  int64 tuples_done = 0;
1201  bool deduplicate;
1202 
1203  deduplicate = wstate->inskey->allequalimage &&
1204  BTGetDeduplicateItems(wstate->index);
1205 
1206  if (merge)
1207  {
1208  /*
1209  * Another BTSpool for dead tuples exists. Now we have to merge
1210  * btspool and btspool2.
1211  */
1212 
1213  /* the preparation of merge */
1214  itup = tuplesort_getindextuple(btspool->sortstate, true);
1215  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1216 
1217  /* Prepare SortSupport data for each column */
1218  sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
1219 
1220  for (i = 0; i < keysz; i++)
1221  {
1222  SortSupport sortKey = sortKeys + i;
1223  ScanKey scanKey = wstate->inskey->scankeys + i;
1224  int16 strategy;
1225 
1226  sortKey->ssup_cxt = CurrentMemoryContext;
1227  sortKey->ssup_collation = scanKey->sk_collation;
1228  sortKey->ssup_nulls_first =
1229  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1230  sortKey->ssup_attno = scanKey->sk_attno;
1231  /* Abbreviation is not supported here */
1232  sortKey->abbreviate = false;
1233 
1234  AssertState(sortKey->ssup_attno != 0);
1235 
1236  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
1238 
1239  PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
1240  }
1241 
1242  for (;;)
1243  {
1244  load1 = true; /* load BTSpool next ? */
1245  if (itup2 == NULL)
1246  {
1247  if (itup == NULL)
1248  break;
1249  }
1250  else if (itup != NULL)
1251  {
1252  int32 compare = 0;
1253 
1254  for (i = 1; i <= keysz; i++)
1255  {
1256  SortSupport entry;
1257  Datum attrDatum1,
1258  attrDatum2;
1259  bool isNull1,
1260  isNull2;
1261 
1262  entry = sortKeys + i - 1;
1263  attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
1264  attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
1265 
1266  compare = ApplySortComparator(attrDatum1, isNull1,
1267  attrDatum2, isNull2,
1268  entry);
1269  if (compare > 0)
1270  {
1271  load1 = false;
1272  break;
1273  }
1274  else if (compare < 0)
1275  break;
1276  }
1277 
1278  /*
1279  * If key values are equal, we sort on ItemPointer. This is
1280  * required for btree indexes, since heap TID is treated as an
1281  * implicit last key attribute in order to ensure that all
1282  * keys in the index are physically unique.
1283  */
1284  if (compare == 0)
1285  {
1286  compare = ItemPointerCompare(&itup->t_tid, &itup2->t_tid);
1287  Assert(compare != 0);
1288  if (compare > 0)
1289  load1 = false;
1290  }
1291  }
1292  else
1293  load1 = false;
1294 
1295  /* When we see first tuple, create first index page */
1296  if (state == NULL)
1297  state = _bt_pagestate(wstate, 0);
1298 
1299  if (load1)
1300  {
1301  _bt_buildadd(wstate, state, itup, 0);
1302  itup = tuplesort_getindextuple(btspool->sortstate, true);
1303  }
1304  else
1305  {
1306  _bt_buildadd(wstate, state, itup2, 0);
1307  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1308  }
1309 
1310  /* Report progress */
1312  ++tuples_done);
1313  }
1314  pfree(sortKeys);
1315  }
1316  else if (deduplicate)
1317  {
1318  /* merge is unnecessary, deduplicate into posting lists */
1319  BTDedupState dstate;
1320 
1321  dstate = (BTDedupState) palloc(sizeof(BTDedupStateData));
1322  dstate->deduplicate = true; /* unused */
1323  dstate->maxpostingsize = 0; /* set later */
1324  /* Metadata about base tuple of current pending posting list */
1325  dstate->base = NULL;
1326  dstate->baseoff = InvalidOffsetNumber; /* unused */
1327  dstate->basetupsize = 0;
1328  /* Metadata about current pending posting list TIDs */
1329  dstate->htids = NULL;
1330  dstate->nhtids = 0;
1331  dstate->nitems = 0;
1332  dstate->phystupsize = 0; /* unused */
1333  dstate->nintervals = 0; /* unused */
1334 
1335  while ((itup = tuplesort_getindextuple(btspool->sortstate,
1336  true)) != NULL)
1337  {
1338  /* When we see first tuple, create first index page */
1339  if (state == NULL)
1340  {
1341  state = _bt_pagestate(wstate, 0);
1342 
1343  /*
1344  * Limit size of posting list tuples to 1/10 space we want to
1345  * leave behind on the page, plus space for final item's line
1346  * pointer. This is equal to the space that we'd like to
1347  * leave behind on each leaf page when fillfactor is 90,
1348  * allowing us to get close to fillfactor% space utilization
1349  * when there happen to be a great many duplicates. (This
1350  * makes higher leaf fillfactor settings ineffective when
1351  * building indexes that have many duplicates, but packing
1352  * leaf pages full with few very large tuples doesn't seem
1353  * like a useful goal.)
1354  */
1355  dstate->maxpostingsize = MAXALIGN_DOWN((BLCKSZ * 10 / 100)) -
1356  sizeof(ItemIdData);
1357  Assert(dstate->maxpostingsize <= BTMaxItemSize(state->btps_page) &&
1358  dstate->maxpostingsize <= INDEX_SIZE_MASK);
1359  dstate->htids = palloc(dstate->maxpostingsize);
1360 
1361  /* start new pending posting list with itup copy */
1364  }
1365  else if (_bt_keep_natts_fast(wstate->index, dstate->base,
1366  itup) > keysz &&
1367  _bt_dedup_save_htid(dstate, itup))
1368  {
1369  /*
1370  * Tuple is equal to base tuple of pending posting list. Heap
1371  * TID from itup has been saved in state.
1372  */
1373  }
1374  else
1375  {
1376  /*
1377  * Tuple is not equal to pending posting list tuple, or
1378  * _bt_dedup_save_htid() opted to not merge current item into
1379  * pending posting list.
1380  */
1381  _bt_sort_dedup_finish_pending(wstate, state, dstate);
1382  pfree(dstate->base);
1383 
1384  /* start new pending posting list with itup copy */
1387  }
1388 
1389  /* Report progress */
1391  ++tuples_done);
1392  }
1393 
1394  if (state)
1395  {
1396  /*
1397  * Handle the last item (there must be a last item when the
1398  * tuplesort returned one or more tuples)
1399  */
1400  _bt_sort_dedup_finish_pending(wstate, state, dstate);
1401  pfree(dstate->base);
1402  pfree(dstate->htids);
1403  }
1404 
1405  pfree(dstate);
1406  }
1407  else
1408  {
1409  /* merging and deduplication are both unnecessary */
1410  while ((itup = tuplesort_getindextuple(btspool->sortstate,
1411  true)) != NULL)
1412  {
1413  /* When we see first tuple, create first index page */
1414  if (state == NULL)
1415  state = _bt_pagestate(wstate, 0);
1416 
1417  _bt_buildadd(wstate, state, itup, 0);
1418 
1419  /* Report progress */
1421  ++tuples_done);
1422  }
1423  }
1424 
1425  /* Close down final pages and write the metapage */
1426  _bt_uppershutdown(wstate, state);
1427 
1428  /*
1429  * If the index is WAL-logged, we must fsync it down to disk before it's
1430  * safe to commit the transaction. (For a non-WAL-logged index we don't
1431  * care since the index will be uninteresting after a crash anyway.)
1432  *
1433  * It's obvious that we must do this when not WAL-logging the build. It's
1434  * less obvious that we have to do it even if we did WAL-log the index
1435  * pages. The reason is that since we're building outside shared buffers,
1436  * a CHECKPOINT occurring during the build has no way to flush the
1437  * previously written data to disk (indeed it won't know the index even
1438  * exists). A crash later on would replay WAL from the checkpoint,
1439  * therefore it wouldn't replay our earlier WAL entries. If we do not
1440  * fsync those pages here, they might still not be on disk when the crash
1441  * occurs.
1442  */
1443  if (RelationNeedsWAL(wstate->index))
1444  {
1445  RelationOpenSmgr(wstate->index);
1447  }
1448 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2216
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
signed short int16
Definition: c.h:354
bool ssup_nulls_first
Definition: sortsupport.h:75
IndexTuple base
Definition: nbtree.h:756
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define AssertState(condition)
Definition: c.h:741
OffsetNumber baseoff
Definition: nbtree.h:757
#define RelationGetDescr(relation)
Definition: rel.h:462
#define BTGetDeduplicateItems(relation)
Definition: nbtree.h:986
static void _bt_sort_dedup_finish_pending(BTWriteState *wstate, BTPageState *state, BTDedupState dstate)
Definition: nbtsort.c:1084
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3161
ItemPointerData t_tid
Definition: itup.h:37
#define INDEX_SIZE_MASK
Definition: itup.h:65
bool allequalimage
Definition: nbtree.h:671
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:326
Tuplesortstate * sortstate
Definition: nbtsort.c:98
static pairingheap_node * merge(pairingheap *heap, pairingheap_node *a, pairingheap_node *b)
Definition: pairingheap.c:79
signed int int32
Definition: c.h:355
#define RelationOpenSmgr(relation)
Definition: rel.h:493
ItemPointer htids
Definition: nbtree.h:761
Page btps_page
Definition: nbtsort.c:242
void pfree(void *pointer)
Definition: mcxt.c:1056
Size phystupsize
Definition: nbtree.h:764
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
MemoryContext ssup_cxt
Definition: sortsupport.h:66
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:510
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:377
struct ItemIdData ItemIdData
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:455
void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy, SortSupport ssup)
Definition: sortsupport.c:161
Relation index
Definition: nbtsort.c:258
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:967
void * palloc0(Size size)
Definition: mcxt.c:980
uintptr_t Datum
Definition: postgres.h:367
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:707
AttrNumber ssup_attno
Definition: sortsupport.h:81
BTScanInsert inskey
Definition: nbtsort.c:259
#define InvalidOffsetNumber
Definition: off.h:26
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
Definition: nbtsort.c:846
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:738
#define SK_BT_DESC
Definition: nbtree.h:966
Definition: regguts.h:298
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
Size basetupsize
Definition: nbtree.h:758
ScanKeyData scankeys[INDEX_MAX_KEYS]
Definition: nbtree.h:677
#define RelationNeedsWAL(relation)
Definition: rel.h:538
Size maxpostingsize
Definition: nbtree.h:753
#define BTMaxItemSize(page)
Definition: nbtree.h:157
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2401
void * palloc(Size size)
Definition: mcxt.c:949
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition: progress.h:85
bool deduplicate
Definition: nbtree.h:752
Oid sk_collation
Definition: skey.h:70
int i
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
Definition: nbtsort.c:1117
BTDedupStateData * BTDedupState
Definition: nbtree.h:776
#define BTLessStrategyNumber
Definition: stratnum.h:29
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:637
AttrNumber sk_attno
Definition: skey.h:67
#define MAXALIGN_DOWN(LEN)
Definition: c.h:703

◆ _bt_pagestate()

static BTPageState * _bt_pagestate ( BTWriteState wstate,
uint32  level 
)
static

Definition at line 707 of file nbtsort.c.

References _bt_blnewpage(), BTGetTargetPageFreeSpace, BTPageState::btps_blkno, BTPageState::btps_full, BTPageState::btps_lastextra, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_lowkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_NONLEAF_FILLFACTOR, BTWriteState::btws_pages_alloced, BTWriteState::index, P_HIKEY, and palloc0().

Referenced by _bt_buildadd(), and _bt_load().

708 {
710 
711  /* create initial page for level */
712  state->btps_page = _bt_blnewpage(level);
713 
714  /* and assign it a page position */
715  state->btps_blkno = wstate->btws_pages_alloced++;
716 
717  state->btps_lowkey = NULL;
718  /* initialize lastoff so first item goes into P_FIRSTKEY */
719  state->btps_lastoff = P_HIKEY;
720  state->btps_lastextra = 0;
721  state->btps_level = level;
722  /* set "full" threshold based on level. See notes at head of file. */
723  if (level > 0)
724  state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
725  else
726  state->btps_full = BTGetTargetPageFreeSpace(wstate->index);
727 
728  /* no parent level, yet */
729  state->btps_next = NULL;
730 
731  return state;
732 }
#define BTGetTargetPageFreeSpace(relation)
Definition: nbtree.h:984
BlockNumber btws_pages_alloced
Definition: nbtsort.c:261
BlockNumber btps_blkno
Definition: nbtsort.c:243
Page btps_page
Definition: nbtsort.c:242
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:195
Size btps_full
Definition: nbtsort.c:248
OffsetNumber btps_lastoff
Definition: nbtsort.c:245
Size btps_lastextra
Definition: nbtsort.c:246
Relation index
Definition: nbtsort.c:258
void * palloc0(Size size)
Definition: mcxt.c:980
uint32 btps_level
Definition: nbtsort.c:247
struct BTPageState * btps_next
Definition: nbtsort.c:249
IndexTuple btps_lowkey
Definition: nbtsort.c:244
Definition: regguts.h:298
#define P_HIKEY
Definition: nbtree.h:242
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:622

◆ _bt_parallel_build_main()

void _bt_parallel_build_main ( dsm_segment seg,
shm_toc toc 
)

Definition at line 1760 of file nbtsort.c.

References _bt_parallel_scan_and_sort(), AccessExclusiveLock, debug_query_string, BTSpool::heap, BTShared::heaprelid, BTSpool::index, index_close(), index_open(), BTShared::indexrelid, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, pgstat_report_activity(), ResetUsage(), RowExclusiveLock, BTShared::scantuplesortstates, ShareLock, ShareUpdateExclusiveLock, shm_toc_lookup(), ShowUsage(), STATE_RUNNING, table_close(), table_open(), and tuplesort_attach_shared().

1761 {
1762  char *sharedquery;
1763  BTSpool *btspool;
1764  BTSpool *btspool2;
1765  BTShared *btshared;
1766  Sharedsort *sharedsort;
1767  Sharedsort *sharedsort2;
1768  Relation heapRel;
1769  Relation indexRel;
1770  LOCKMODE heapLockmode;
1771  LOCKMODE indexLockmode;
1772  int sortmem;
1773 
1774 #ifdef BTREE_BUILD_STATS
1776  ResetUsage();
1777 #endif /* BTREE_BUILD_STATS */
1778 
1779  /* Set debug_query_string for individual workers first */
1780  sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, false);
1781  debug_query_string = sharedquery;
1782 
1783  /* Report the query string from leader */
1785 
1786  /* Look up nbtree shared state */
1787  btshared = shm_toc_lookup(toc, PARALLEL_KEY_BTREE_SHARED, false);
1788 
1789  /* Open relations using lock modes known to be obtained by index.c */
1790  if (!btshared->isconcurrent)
1791  {
1792  heapLockmode = ShareLock;
1793  indexLockmode = AccessExclusiveLock;
1794  }
1795  else
1796  {
1797  heapLockmode = ShareUpdateExclusiveLock;
1798  indexLockmode = RowExclusiveLock;
1799  }
1800 
1801  /* Open relations within worker */
1802  heapRel = table_open(btshared->heaprelid, heapLockmode);
1803  indexRel = index_open(btshared->indexrelid, indexLockmode);
1804 
1805  /* Initialize worker's own spool */
1806  btspool = (BTSpool *) palloc0(sizeof(BTSpool));
1807  btspool->heap = heapRel;
1808  btspool->index = indexRel;
1809  btspool->isunique = btshared->isunique;
1810 
1811  /* Look up shared state private to tuplesort.c */
1812  sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
1813  tuplesort_attach_shared(sharedsort, seg);
1814  if (!btshared->isunique)
1815  {
1816  btspool2 = NULL;
1817  sharedsort2 = NULL;
1818  }
1819  else
1820  {
1821  /* Allocate memory for worker's own private secondary spool */
1822  btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
1823 
1824  /* Initialize worker's own secondary spool */
1825  btspool2->heap = btspool->heap;
1826  btspool2->index = btspool->index;
1827  btspool2->isunique = false;
1828  /* Look up shared state private to tuplesort.c */
1829  sharedsort2 = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_SPOOL2, false);
1830  tuplesort_attach_shared(sharedsort2, seg);
1831  }
1832 
1833  /* Perform sorting of spool, and possibly a spool2 */
1834  sortmem = maintenance_work_mem / btshared->scantuplesortstates;
1835  _bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
1836  sharedsort2, sortmem, false);
1837 
1838 #ifdef BTREE_BUILD_STATS
1840  {
1841  ShowUsage("BTREE BUILD (Worker Partial Spool) STATISTICS");
1842  ResetUsage();
1843  }
1844 #endif /* BTREE_BUILD_STATS */
1845 
1846  index_close(indexRel, indexLockmode);
1847  table_close(heapRel, heapLockmode);
1848 }
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
int scantuplesortstates
Definition: nbtsort.c:120
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:81
int LOCKMODE
Definition: lockdefs.h:26
Oid indexrelid
Definition: nbtsort.c:117
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:3062
void ShowUsage(const char *title)
Definition: postgres.c:4601
bool isunique
Definition: nbtsort.c:118
bool isconcurrent
Definition: nbtsort.c:119
Relation heap
Definition: nbtsort.c:99
void ResetUsage(void)
Definition: postgres.c:4594
bool isunique
Definition: nbtsort.c:101
Oid heaprelid
Definition: nbtsort.c:116
#define RowExclusiveLock
Definition: lockdefs.h:38
#define PARALLEL_KEY_QUERY_TEXT
Definition: nbtsort.c:83
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:82
Relation index
Definition: nbtsort.c:100
const char * debug_query_string
Definition: postgres.c:88
void * palloc0(Size size)
Definition: mcxt.c:980
bool log_btree_build_stats
Definition: guc.c:529
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
Definition: nbtsort.c:1863
int maintenance_work_mem
Definition: globals.c:122
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:4355
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
#define AccessExclusiveLock
Definition: lockdefs.h:45
#define ShareLock
Definition: lockdefs.h:41
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:80
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132

◆ _bt_parallel_estimate_shared()

static Size _bt_parallel_estimate_shared ( Relation  heap,
Snapshot  snapshot 
)
static

Definition at line 1654 of file nbtsort.c.

References add_size(), BUFFERALIGN, and table_parallelscan_estimate().

Referenced by _bt_begin_parallel().

1655 {
1656  /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1657  return add_size(BUFFERALIGN(sizeof(BTShared)),
1658  table_parallelscan_estimate(heap, snapshot));
1659 }
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:126
#define BUFFERALIGN(LEN)
Definition: c.h:693

◆ _bt_parallel_heapscan()

static double _bt_parallel_heapscan ( BTBuildState buildstate,
bool brokenhotchain 
)
static

Definition at line 1674 of file nbtsort.c.

References BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTShared::havedead, BTBuildState::havedead, BTShared::indtuples, BTBuildState::indtuples, BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, BTShared::reltuples, SpinLockAcquire, SpinLockRelease, WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN, and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

1675 {
1676  BTShared *btshared = buildstate->btleader->btshared;
1677  int nparticipanttuplesorts;
1678  double reltuples;
1679 
1680  nparticipanttuplesorts = buildstate->btleader->nparticipanttuplesorts;
1681  for (;;)
1682  {
1683  SpinLockAcquire(&btshared->mutex);
1684  if (btshared->nparticipantsdone == nparticipanttuplesorts)
1685  {
1686  buildstate->havedead = btshared->havedead;
1687  buildstate->indtuples = btshared->indtuples;
1688  *brokenhotchain = btshared->brokenhotchain;
1689  reltuples = btshared->reltuples;
1690  SpinLockRelease(&btshared->mutex);
1691  break;
1692  }
1693  SpinLockRelease(&btshared->mutex);
1694 
1697  }
1698 
1700 
1701  return reltuples;
1702 }
BTShared * btshared
Definition: nbtsort.c:202
int nparticipantsdone
Definition: nbtsort.c:154
bool havedead
Definition: nbtsort.c:217
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
double indtuples
Definition: nbtsort.c:226
bool brokenhotchain
Definition: nbtsort.c:158
#define SpinLockRelease(lock)
Definition: spin.h:64
BTLeader * btleader
Definition: nbtsort.c:233
double reltuples
Definition: nbtsort.c:155
double indtuples
Definition: nbtsort.c:157
slock_t mutex
Definition: nbtsort.c:136
bool havedead
Definition: nbtsort.c:156
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int nparticipanttuplesorts
Definition: nbtsort.c:190
ConditionVariable workersdonecv
Definition: nbtsort.c:128

◆ _bt_parallel_scan_and_sort()

static void _bt_parallel_scan_and_sort ( BTSpool btspool,
BTSpool btspool2,
BTShared btshared,
Sharedsort sharedsort,
Sharedsort sharedsort2,
int  sortmem,
bool  progress 
)
static

Definition at line 1863 of file nbtsort.c.

References _bt_build_callback(), BTShared::brokenhotchain, BTBuildState::btleader, BuildIndexInfo(), ConditionVariableSignal(), BTShared::havedead, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, BTSpool::index, BTShared::indtuples, BTBuildState::indtuples, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, Min, BTShared::mutex, SortCoordinateData::nParticipants, BTShared::nparticipantsdone, palloc0(), ParallelTableScanFromBTShared, BTShared::reltuples, SortCoordinateData::sharedsort, BTSpool::sortstate, SpinLockAcquire, SpinLockRelease, BTBuildState::spool, BTBuildState::spool2, table_beginscan_parallel(), table_index_build_scan(), tuplesort_begin_index_btree(), tuplesort_end(), tuplesort_performsort(), work_mem, and BTShared::workersdonecv.

Referenced by _bt_leader_participate_as_worker(), and _bt_parallel_build_main().

1866 {
1867  SortCoordinate coordinate;
1868  BTBuildState buildstate;
1869  TableScanDesc scan;
1870  double reltuples;
1871  IndexInfo *indexInfo;
1872 
1873  /* Initialize local tuplesort coordination state */
1874  coordinate = palloc0(sizeof(SortCoordinateData));
1875  coordinate->isWorker = true;
1876  coordinate->nParticipants = -1;
1877  coordinate->sharedsort = sharedsort;
1878 
1879  /* Begin "partial" tuplesort */
1880  btspool->sortstate = tuplesort_begin_index_btree(btspool->heap,
1881  btspool->index,
1882  btspool->isunique,
1883  sortmem, coordinate,
1884  false);
1885 
1886  /*
1887  * Just as with serial case, there may be a second spool. If so, a
1888  * second, dedicated spool2 partial tuplesort is required.
1889  */
1890  if (btspool2)
1891  {
1892  SortCoordinate coordinate2;
1893 
1894  /*
1895  * We expect that the second one (for dead tuples) won't get very
1896  * full, so we give it only work_mem (unless sortmem is less for
1897  * worker). Worker processes are generally permitted to allocate
1898  * work_mem independently.
1899  */
1900  coordinate2 = palloc0(sizeof(SortCoordinateData));
1901  coordinate2->isWorker = true;
1902  coordinate2->nParticipants = -1;
1903  coordinate2->sharedsort = sharedsort2;
1904  btspool2->sortstate =
1905  tuplesort_begin_index_btree(btspool->heap, btspool->index, false,
1906  Min(sortmem, work_mem), coordinate2,
1907  false);
1908  }
1909 
1910  /* Fill in buildstate for _bt_build_callback() */
1911  buildstate.isunique = btshared->isunique;
1912  buildstate.havedead = false;
1913  buildstate.heap = btspool->heap;
1914  buildstate.spool = btspool;
1915  buildstate.spool2 = btspool2;
1916  buildstate.indtuples = 0;
1917  buildstate.btleader = NULL;
1918 
1919  /* Join parallel scan */
1920  indexInfo = BuildIndexInfo(btspool->index);
1921  indexInfo->ii_Concurrent = btshared->isconcurrent;
1922  scan = table_beginscan_parallel(btspool->heap,
1923  ParallelTableScanFromBTShared(btshared));
1924  reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
1926  (void *) &buildstate, scan);
1927 
1928  /*
1929  * Execute this worker's part of the sort.
1930  *
1931  * Unlike leader and serial cases, we cannot avoid calling
1932  * tuplesort_performsort() for spool2 if it ends up containing no dead
1933  * tuples (this is disallowed for workers by tuplesort).
1934  */
1935  tuplesort_performsort(btspool->sortstate);
1936  if (btspool2)
1937  tuplesort_performsort(btspool2->sortstate);
1938 
1939  /*
1940  * Done. Record ambuild statistics, and whether we encountered a broken
1941  * HOT chain.
1942  */
1943  SpinLockAcquire(&btshared->mutex);
1944  btshared->nparticipantsdone++;
1945  btshared->reltuples += reltuples;
1946  if (buildstate.havedead)
1947  btshared->havedead = true;
1948  btshared->indtuples += buildstate.indtuples;
1949  if (indexInfo->ii_BrokenHotChain)
1950  btshared->brokenhotchain = true;
1951  SpinLockRelease(&btshared->mutex);
1952 
1953  /* Notify leader */
1955 
1956  /* We can end tuplesorts immediately */
1957  tuplesort_end(btspool->sortstate);
1958  if (btspool2)
1959  tuplesort_end(btspool2->sortstate);
1960 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
Relation heap
Definition: nbtsort.c:218
int nparticipantsdone
Definition: nbtsort.c:154
#define Min(x, y)
Definition: c.h:920
bool havedead
Definition: nbtsort.c:217
Sharedsort * sharedsort
Definition: tuplesort.h:55
bool isunique
Definition: nbtsort.c:118
#define ParallelTableScanFromBTShared(shared)
Definition: nbtsort.c:173
bool isconcurrent
Definition: nbtsort.c:119
Tuplesortstate * sortstate
Definition: nbtsort.c:98
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2312
BTSpool * spool
Definition: nbtsort.c:219
Relation heap
Definition: nbtsort.c:99
#define SpinLockAcquire(lock)
Definition: spin.h:62
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1531
bool isunique
Definition: nbtsort.c:101
void ConditionVariableSignal(ConditionVariable *cv)
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
bool isunique
Definition: nbtsort.c:216
bool ii_BrokenHotChain
Definition: execnodes.h:175
bool brokenhotchain
Definition: nbtsort.c:158
Relation index
Definition: nbtsort.c:100
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
Definition: tableam.c:161
int progress
Definition: pgbench.c:234
#define SpinLockRelease(lock)
Definition: spin.h:64
void * palloc0(Size size)
Definition: mcxt.c:980
static void _bt_build_callback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:593
BTLeader * btleader
Definition: nbtsort.c:233
int work_mem
Definition: globals.c:121
double reltuples
Definition: nbtsort.c:155
double indtuples
Definition: nbtsort.c:157
slock_t mutex
Definition: nbtsort.c:136
bool havedead
Definition: nbtsort.c:156
bool ii_Concurrent
Definition: execnodes.h:174
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
ConditionVariable workersdonecv
Definition: nbtsort.c:128
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236

◆ _bt_slideleft()

static void _bt_slideleft ( Page  page)
static

Definition at line 741 of file nbtsort.c.

References OffsetNumberNext, P_FIRSTKEY, P_HIKEY, PageGetItemId, PageGetMaxOffsetNumber, and PageIsEmpty.

Referenced by _bt_uppershutdown().

742 {
743  OffsetNumber off;
744  OffsetNumber maxoff;
745  ItemId previi;
746  ItemId thisii;
747 
748  if (!PageIsEmpty(page))
749  {
750  maxoff = PageGetMaxOffsetNumber(page);
751  previi = PageGetItemId(page, P_HIKEY);
752  for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
753  {
754  thisii = PageGetItemId(page, off);
755  *previi = *thisii;
756  previi = thisii;
757  }
758  ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
759  }
760 }
#define PageIsEmpty(page)
Definition: bufpage.h:222
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
#define P_FIRSTKEY
Definition: nbtree.h:243
struct ItemIdData ItemIdData
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define P_HIKEY
Definition: nbtree.h:242

◆ _bt_sort_dedup_finish_pending()

static void _bt_sort_dedup_finish_pending ( BTWriteState wstate,
BTPageState state,
BTDedupState  dstate 
)
static

Definition at line 1084 of file nbtsort.c.

References _bt_buildadd(), _bt_form_posting(), Assert, BTDedupStateData::base, BTreeTupleGetPostingOffset(), BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::nhtids, BTDedupStateData::nitems, pfree(), and BTDedupStateData::phystupsize.

Referenced by _bt_load().

1086 {
1087  Assert(dstate->nitems > 0);
1088 
1089  if (dstate->nitems == 1)
1090  _bt_buildadd(wstate, state, dstate->base, 0);
1091  else
1092  {
1093  IndexTuple postingtuple;
1094  Size truncextra;
1095 
1096  /* form a tuple with a posting list */
1097  postingtuple = _bt_form_posting(dstate->base,
1098  dstate->htids,
1099  dstate->nhtids);
1100  /* Calculate posting list overhead */
1101  truncextra = IndexTupleSize(postingtuple) -
1102  BTreeTupleGetPostingOffset(postingtuple);
1103 
1104  _bt_buildadd(wstate, state, postingtuple, truncextra);
1105  pfree(postingtuple);
1106  }
1107 
1108  dstate->nhtids = 0;
1109  dstate->nitems = 0;
1110  dstate->phystupsize = 0;
1111 }
IndexTuple base
Definition: nbtree.h:756
IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
Definition: nbtdedup.c:601
ItemPointer htids
Definition: nbtree.h:761
void pfree(void *pointer)
Definition: mcxt.c:1056
Size phystupsize
Definition: nbtree.h:764
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
Definition: nbtsort.c:846
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:404
#define Assert(condition)
Definition: c.h:738
size_t Size
Definition: c.h:466
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_sortaddtup()

static void _bt_sortaddtup ( Page  page,
Size  itemsize,
IndexTuple  itup,
OffsetNumber  itup_off 
)
static

Definition at line 776 of file nbtsort.c.

References BTreeTupleSetNAtts(), elog, ERROR, InvalidOffsetNumber, P_FIRSTKEY, P_ISLEAF, PageAddItem, PageGetSpecialPointer, and IndexTupleData::t_info.

Referenced by _bt_buildadd().

780 {
782  IndexTupleData trunctuple;
783 
784  if (!P_ISLEAF(opaque) && itup_off == P_FIRSTKEY)
785  {
786  trunctuple = *itup;
787  trunctuple.t_info = sizeof(IndexTupleData);
788  BTreeTupleSetNAtts(&trunctuple, 0);
789  itup = &trunctuple;
790  itemsize = sizeof(IndexTupleData);
791  }
792 
793  if (PageAddItem(page, (Item) itup, itemsize, itup_off,
794  false, false) == InvalidOffsetNumber)
795  elog(ERROR, "failed to add item to the index page");
796 }
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
static void BTreeTupleSetNAtts(IndexTuple itup, int natts)
Definition: nbtree.h:466
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define ERROR
Definition: elog.h:43
#define P_FIRSTKEY
Definition: nbtree.h:243
struct IndexTupleData IndexTupleData
#define InvalidOffsetNumber
Definition: off.h:26
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define elog(elevel,...)
Definition: elog.h:214
unsigned short t_info
Definition: itup.h:49
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_spool()

static void _bt_spool ( BTSpool btspool,
ItemPointer  self,
Datum values,
bool isnull 
)
static

Definition at line 534 of file nbtsort.c.

References BTSpool::index, BTSpool::sortstate, and tuplesort_putindextuplevalues().

Referenced by _bt_build_callback().

535 {
536  tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
537  self, values, isnull);
538 }
Tuplesortstate * sortstate
Definition: nbtsort.c:98
Relation index
Definition: nbtsort.c:100
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
Definition: tuplesort.c:1478
static Datum values[MAXATTR]
Definition: bootstrap.c:167

◆ _bt_spooldestroy()

static void _bt_spooldestroy ( BTSpool btspool)
static

Definition at line 524 of file nbtsort.c.

References pfree(), BTSpool::sortstate, and tuplesort_end().

Referenced by _bt_spools_heapscan(), and btbuild().

525 {
526  tuplesort_end(btspool->sortstate);
527  pfree(btspool);
528 }
Tuplesortstate * sortstate
Definition: nbtsort.c:98
void pfree(void *pointer)
Definition: mcxt.c:1056
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236

◆ _bt_spools_heapscan()

static double _bt_spools_heapscan ( Relation  heap,
Relation  index,
BTBuildState buildstate,
IndexInfo indexInfo 
)
static

Definition at line 374 of file nbtsort.c.

References _bt_begin_parallel(), _bt_build_callback(), _bt_parallel_heapscan(), _bt_spooldestroy(), BTBuildState::btleader, BTSpool::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, IndexInfo::ii_ParallelWorkers, IndexInfo::ii_Unique, BTSpool::index, BTSpool::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, maintenance_work_mem, SortCoordinateData::nParticipants, BTLeader::nparticipanttuplesorts, palloc0(), pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN, PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_CREATEIDX_TUPLES_TOTAL, PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, SortCoordinateData::sharedsort, BTLeader::sharedsort, BTLeader::sharedsort2, BTSpool::sortstate, BTBuildState::spool, BTBuildState::spool2, table_index_build_scan(), tuplesort_begin_index_btree(), val, and work_mem.

Referenced by btbuild().

376 {
377  BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
378  SortCoordinate coordinate = NULL;
379  double reltuples = 0;
380 
381  /*
382  * We size the sort area as maintenance_work_mem rather than work_mem to
383  * speed index creation. This should be OK since a single backend can't
384  * run multiple index creations in parallel (see also: notes on
385  * parallelism and maintenance_work_mem below).
386  */
387  btspool->heap = heap;
388  btspool->index = index;
389  btspool->isunique = indexInfo->ii_Unique;
390 
391  /* Save as primary spool */
392  buildstate->spool = btspool;
393 
394  /* Report table scan phase started */
397 
398  /* Attempt to launch parallel worker scan when required */
399  if (indexInfo->ii_ParallelWorkers > 0)
400  _bt_begin_parallel(buildstate, indexInfo->ii_Concurrent,
401  indexInfo->ii_ParallelWorkers);
402 
403  /*
404  * If parallel build requested and at least one worker process was
405  * successfully launched, set up coordination state
406  */
407  if (buildstate->btleader)
408  {
409  coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
410  coordinate->isWorker = false;
411  coordinate->nParticipants =
412  buildstate->btleader->nparticipanttuplesorts;
413  coordinate->sharedsort = buildstate->btleader->sharedsort;
414  }
415 
416  /*
417  * Begin serial/leader tuplesort.
418  *
419  * In cases where parallelism is involved, the leader receives the same
420  * share of maintenance_work_mem as a serial sort (it is generally treated
421  * in the same way as a serial sort once we return). Parallel worker
422  * Tuplesortstates will have received only a fraction of
423  * maintenance_work_mem, though.
424  *
425  * We rely on the lifetime of the Leader Tuplesortstate almost not
426  * overlapping with any worker Tuplesortstate's lifetime. There may be
427  * some small overlap, but that's okay because we rely on leader
428  * Tuplesortstate only allocating a small, fixed amount of memory here.
429  * When its tuplesort_performsort() is called (by our caller), and
430  * significant amounts of memory are likely to be used, all workers must
431  * have already freed almost all memory held by their Tuplesortstates
432  * (they are about to go away completely, too). The overall effect is
433  * that maintenance_work_mem always represents an absolute high watermark
434  * on the amount of memory used by a CREATE INDEX operation, regardless of
435  * the use of parallelism or any other factor.
436  */
437  buildstate->spool->sortstate =
438  tuplesort_begin_index_btree(heap, index, buildstate->isunique,
439  maintenance_work_mem, coordinate,
440  false);
441 
442  /*
443  * If building a unique index, put dead tuples in a second spool to keep
444  * them out of the uniqueness check. We expect that the second spool (for
445  * dead tuples) won't get very full, so we give it only work_mem.
446  */
447  if (indexInfo->ii_Unique)
448  {
449  BTSpool *btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
450  SortCoordinate coordinate2 = NULL;
451 
452  /* Initialize secondary spool */
453  btspool2->heap = heap;
454  btspool2->index = index;
455  btspool2->isunique = false;
456  /* Save as secondary spool */
457  buildstate->spool2 = btspool2;
458 
459  if (buildstate->btleader)
460  {
461  /*
462  * Set up non-private state that is passed to
463  * tuplesort_begin_index_btree() about the basic high level
464  * coordination of a parallel sort.
465  */
466  coordinate2 = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
467  coordinate2->isWorker = false;
468  coordinate2->nParticipants =
469  buildstate->btleader->nparticipanttuplesorts;
470  coordinate2->sharedsort = buildstate->btleader->sharedsort2;
471  }
472 
473  /*
474  * We expect that the second one (for dead tuples) won't get very
475  * full, so we give it only work_mem
476  */
477  buildstate->spool2->sortstate =
478  tuplesort_begin_index_btree(heap, index, false, work_mem,
479  coordinate2, false);
480  }
481 
482  /* Fill spool using either serial or parallel heap scan */
483  if (!buildstate->btleader)
484  reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
485  _bt_build_callback, (void *) buildstate,
486  NULL);
487  else
488  reltuples = _bt_parallel_heapscan(buildstate,
489  &indexInfo->ii_BrokenHotChain);
490 
491  /*
492  * Set the progress target for the next phase. Reset the block number
493  * values set by table_index_build_scan
494  */
495  {
496  const int index[] = {
500  };
501  const int64 val[] = {
502  buildstate->indtuples,
503  0, 0
504  };
505 
506  pgstat_progress_update_multi_param(3, index, val);
507  }
508 
509  /* okay, all heap tuples are spooled */
510  if (buildstate->spool2 && !buildstate->havedead)
511  {
512  /* spool2 turns out to be unnecessary */
513  _bt_spooldestroy(buildstate->spool2);
514  buildstate->spool2 = NULL;
515  }
516 
517  return reltuples;
518 }
static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Definition: nbtsort.c:1467
Sharedsort * sharedsort2
Definition: nbtsort.c:204
#define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN
Definition: nbtree.h:997
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3161
Sharedsort * sharedsort
Definition: nbtsort.c:203
bool havedead
Definition: nbtsort.c:217
Sharedsort * sharedsort
Definition: tuplesort.h:55
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:84
Tuplesortstate * sortstate
Definition: nbtsort.c:98
BTSpool * spool
Definition: nbtsort.c:219
Relation heap
Definition: nbtsort.c:99
static double _bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain)
Definition: nbtsort.c:1674
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1531
bool isunique
Definition: nbtsort.c:101
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
bool isunique
Definition: nbtsort.c:216
bool ii_BrokenHotChain
Definition: execnodes.h:175
Relation index
Definition: nbtsort.c:100
void * palloc0(Size size)
Definition: mcxt.c:980
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:58
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
static void _bt_build_callback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:593
BTLeader * btleader
Definition: nbtsort.c:233
int work_mem
Definition: globals.c:121
int maintenance_work_mem
Definition: globals.c:122
bool ii_Unique
Definition: execnodes.h:172
int nparticipanttuplesorts
Definition: nbtsort.c:190
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3183
int ii_ParallelWorkers
Definition: execnodes.h:176
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
bool ii_Concurrent
Definition: execnodes.h:174
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:524
#define PROGRESS_CREATEIDX_SUBPHASE
Definition: progress.h:83
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
long val
Definition: informix.c:664

◆ _bt_uppershutdown()

static void _bt_uppershutdown ( BTWriteState wstate,
BTPageState state 
)
static

Definition at line 1117 of file nbtsort.c.

References _bt_blwritepage(), _bt_buildadd(), _bt_initmetapage(), _bt_slideleft(), BTScanInsertData::allequalimage, Assert, BTP_ROOT, BTPageOpaqueData::btpo_flags, BTPageState::btps_blkno, BTPageState::btps_level, BTPageState::btps_lowkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_METAPAGE, BTreeTupleGetNAtts, BTreeTupleSetDownLink(), BTWriteState::index, IndexRelationGetNumberOfKeyAttributes, BTWriteState::inskey, P_LEFTMOST, P_NONE, PageGetSpecialPointer, palloc(), and pfree().

Referenced by _bt_load().

1118 {
1119  BTPageState *s;
1120  BlockNumber rootblkno = P_NONE;
1121  uint32 rootlevel = 0;
1122  Page metapage;
1123 
1124  /*
1125  * Each iteration of this loop completes one more level of the tree.
1126  */
1127  for (s = state; s != NULL; s = s->btps_next)
1128  {
1129  BlockNumber blkno;
1130  BTPageOpaque opaque;
1131 
1132  blkno = s->btps_blkno;
1134 
1135  /*
1136  * We have to link the last page on this level to somewhere.
1137  *
1138  * If we're at the top, it's the root, so attach it to the metapage.
1139  * Otherwise, add an entry for it to its parent using its low key.
1140  * This may cause the last page of the parent level to split, but
1141  * that's not a problem -- we haven't gotten to it yet.
1142  */
1143  if (s->btps_next == NULL)
1144  {
1145  opaque->btpo_flags |= BTP_ROOT;
1146  rootblkno = blkno;
1147  rootlevel = s->btps_level;
1148  }
1149  else
1150  {
1151  Assert((BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) <=
1153  BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) > 0) ||
1154  P_LEFTMOST(opaque));
1155  Assert(BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) == 0 ||
1156  !P_LEFTMOST(opaque));
1158  _bt_buildadd(wstate, s->btps_next, s->btps_lowkey, 0);
1159  pfree(s->btps_lowkey);
1160  s->btps_lowkey = NULL;
1161  }
1162 
1163  /*
1164  * This is the rightmost page, so the ItemId array needs to be slid
1165  * back one slot. Then we can dump out the page.
1166  */
1168  _bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
1169  s->btps_page = NULL; /* writepage freed the workspace */
1170  }
1171 
1172  /*
1173  * As the last step in the process, construct the metapage and make it
1174  * point to the new root (unless we had no data at all, in which case it's
1175  * set to point to "P_NONE"). This changes the index to the "valid" state
1176  * by filling in a valid magic number in the metapage.
1177  */
1178  metapage = (Page) palloc(BLCKSZ);
1179  _bt_initmetapage(metapage, rootblkno, rootlevel,
1180  wstate->inskey->allequalimage);
1181  _bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
1182 }
#define BTP_ROOT
Definition: nbtree.h:73
#define P_NONE
Definition: nbtree.h:206
bool allequalimage
Definition: nbtree.h:671
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:53
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:243
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:452
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:649
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
Page btps_page
Definition: nbtsort.c:242
void pfree(void *pointer)
Definition: mcxt.c:1056
#define P_LEFTMOST(opaque)
Definition: nbtree.h:212
unsigned int uint32
Definition: c.h:367
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:437
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:455
#define BTREE_METAPAGE
Definition: nbtree.h:141
Relation index
Definition: nbtsort.c:258
uint32 btps_level
Definition: nbtsort.c:247
struct BTPageState * btps_next
Definition: nbtsort.c:249
BTScanInsert inskey
Definition: nbtsort.c:259
IndexTuple btps_lowkey
Definition: nbtsort.c:244
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
Definition: nbtsort.c:846
#define Assert(condition)
Definition: c.h:738
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
static void _bt_slideleft(Page page)
Definition: nbtsort.c:741
void * palloc(Size size)
Definition: mcxt.c:949
uint16 btpo_flags
Definition: nbtree.h:65
Pointer Page
Definition: bufpage.h:78

◆ btbuild()

IndexBuildResult* btbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 305 of file nbtsort.c.

References _bt_end_parallel(), _bt_leafbuild(), _bt_spooldestroy(), _bt_spools_heapscan(), BTBuildState::btleader, elog, ERROR, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, IndexBuildResult::heap_tuples, IndexInfo::ii_Unique, IndexBuildResult::index_tuples, BTBuildState::indtuples, BTBuildState::isunique, log_btree_build_stats, palloc(), RelationGetNumberOfBlocks, RelationGetRelationName, ResetUsage(), ShowUsage(), BTBuildState::spool, and BTBuildState::spool2.

Referenced by bthandler().

306 {
307  IndexBuildResult *result;
308  BTBuildState buildstate;
309  double reltuples;
310 
311 #ifdef BTREE_BUILD_STATS
313  ResetUsage();
314 #endif /* BTREE_BUILD_STATS */
315 
316  buildstate.isunique = indexInfo->ii_Unique;
317  buildstate.havedead = false;
318  buildstate.heap = heap;
319  buildstate.spool = NULL;
320  buildstate.spool2 = NULL;
321  buildstate.indtuples = 0;
322  buildstate.btleader = NULL;
323 
324  /*
325  * We expect to be called exactly once for any index relation. If that's
326  * not the case, big trouble's what we have.
327  */
328  if (RelationGetNumberOfBlocks(index) != 0)
329  elog(ERROR, "index \"%s\" already contains data",
330  RelationGetRelationName(index));
331 
332  reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
333 
334  /*
335  * Finish the build by (1) completing the sort of the spool file, (2)
336  * inserting the sorted tuples into btree pages and (3) building the upper
337  * levels. Finally, it may also be necessary to end use of parallelism.
338  */
339  _bt_leafbuild(buildstate.spool, buildstate.spool2);
340  _bt_spooldestroy(buildstate.spool);
341  if (buildstate.spool2)
342  _bt_spooldestroy(buildstate.spool2);
343  if (buildstate.btleader)
344  _bt_end_parallel(buildstate.btleader);
345 
346  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
347 
348  result->heap_tuples = reltuples;
349  result->index_tuples = buildstate.indtuples;
350 
351 #ifdef BTREE_BUILD_STATS
353  {
354  ShowUsage("BTREE BUILD STATS");
355  ResetUsage();
356  }
357 #endif /* BTREE_BUILD_STATS */
358 
359  return result;
360 }
Relation heap
Definition: nbtsort.c:218
void ShowUsage(const char *title)
Definition: postgres.c:4601
bool havedead
Definition: nbtsort.c:217
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1638
static void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:545
BTSpool * spool
Definition: nbtsort.c:219
void ResetUsage(void)
Definition: postgres.c:4594
#define ERROR
Definition: elog.h:43
BTSpool * spool2
Definition: nbtsort.c:225
double indtuples
Definition: nbtsort.c:226
bool isunique
Definition: nbtsort.c:216
#define RelationGetRelationName(relation)
Definition: rel.h:470
static double _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
Definition: nbtsort.c:374
bool log_btree_build_stats
Definition: guc.c:529
BTLeader * btleader
Definition: nbtsort.c:233
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:195
bool ii_Unique
Definition: execnodes.h:172
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:524
void * palloc(Size size)
Definition: mcxt.c:949
#define elog(elevel,...)
Definition: elog.h:214
double index_tuples
Definition: genam.h:33
double heap_tuples
Definition: genam.h:32