PostgreSQL Source Code  git master
nbtsort.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/parallel.h"
#include "access/relscan.h"
#include "access/table.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "commands/progress.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/rel.h"
#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
Include dependency graph for nbtsort.c:

Go to the source code of this file.

Data Structures

struct  BTSpool
 
struct  BTShared
 
struct  BTLeader
 
struct  BTBuildState
 
struct  BTPageState
 
struct  BTWriteState
 

Macros

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)
 
#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)
 
#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)
 
#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)
 
#define PARALLEL_KEY_WAL_USAGE   UINT64CONST(0xA000000000000005)
 
#define PARALLEL_KEY_BUFFER_USAGE   UINT64CONST(0xA000000000000006)
 
#define ParallelTableScanFromBTShared(shared)    (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))
 

Typedefs

typedef struct BTSpool BTSpool
 
typedef struct BTShared BTShared
 
typedef struct BTLeader BTLeader
 
typedef struct BTBuildState BTBuildState
 
typedef struct BTPageState BTPageState
 
typedef struct BTWriteState BTWriteState
 

Functions

static double _bt_spools_heapscan (Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
 
static void _bt_spooldestroy (BTSpool *btspool)
 
static void _bt_spool (BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
 
static void _bt_leafbuild (BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_build_callback (Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static Page _bt_blnewpage (uint32 level)
 
static BTPageState_bt_pagestate (BTWriteState *wstate, uint32 level)
 
static void _bt_slideleft (Page rightmostpage)
 
static void _bt_sortaddtup (Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off, bool newfirstdataitem)
 
static void _bt_buildadd (BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
 
static void _bt_sort_dedup_finish_pending (BTWriteState *wstate, BTPageState *state, BTDedupState dstate)
 
static void _bt_uppershutdown (BTWriteState *wstate, BTPageState *state)
 
static void _bt_load (BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_begin_parallel (BTBuildState *buildstate, bool isconcurrent, int request)
 
static void _bt_end_parallel (BTLeader *btleader)
 
static Size _bt_parallel_estimate_shared (Relation heap, Snapshot snapshot)
 
static double _bt_parallel_heapscan (BTBuildState *buildstate, bool *brokenhotchain)
 
static void _bt_leader_participate_as_worker (BTBuildState *buildstate)
 
static void _bt_parallel_scan_and_sort (BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
 
IndexBuildResultbtbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
static void _bt_blwritepage (BTWriteState *wstate, Page page, BlockNumber blkno)
 
void _bt_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 

Macro Definition Documentation

◆ PARALLEL_KEY_BTREE_SHARED

#define PARALLEL_KEY_BTREE_SHARED   UINT64CONST(0xA000000000000001)

Definition at line 68 of file nbtsort.c.

◆ PARALLEL_KEY_BUFFER_USAGE

#define PARALLEL_KEY_BUFFER_USAGE   UINT64CONST(0xA000000000000006)

Definition at line 73 of file nbtsort.c.

◆ PARALLEL_KEY_QUERY_TEXT

#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xA000000000000004)

Definition at line 71 of file nbtsort.c.

◆ PARALLEL_KEY_TUPLESORT

#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xA000000000000002)

Definition at line 69 of file nbtsort.c.

◆ PARALLEL_KEY_TUPLESORT_SPOOL2

#define PARALLEL_KEY_TUPLESORT_SPOOL2   UINT64CONST(0xA000000000000003)

Definition at line 70 of file nbtsort.c.

◆ PARALLEL_KEY_WAL_USAGE

#define PARALLEL_KEY_WAL_USAGE   UINT64CONST(0xA000000000000005)

Definition at line 72 of file nbtsort.c.

◆ ParallelTableScanFromBTShared

#define ParallelTableScanFromBTShared (   shared)     (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))

Definition at line 165 of file nbtsort.c.

Typedef Documentation

◆ BTBuildState

typedef struct BTBuildState BTBuildState

◆ BTLeader

typedef struct BTLeader BTLeader

◆ BTPageState

typedef struct BTPageState BTPageState

◆ BTShared

typedef struct BTShared BTShared

◆ BTSpool

typedef struct BTSpool BTSpool

◆ BTWriteState

typedef struct BTWriteState BTWriteState

Function Documentation

◆ _bt_begin_parallel()

static void _bt_begin_parallel ( BTBuildState buildstate,
bool  isconcurrent,
int  request 
)
static

Definition at line 1456 of file nbtsort.c.

1457 {
1458  ParallelContext *pcxt;
1459  int scantuplesortstates;
1460  Snapshot snapshot;
1461  Size estbtshared;
1462  Size estsort;
1463  BTShared *btshared;
1464  Sharedsort *sharedsort;
1465  Sharedsort *sharedsort2;
1466  BTSpool *btspool = buildstate->spool;
1467  BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
1468  WalUsage *walusage;
1469  BufferUsage *bufferusage;
1470  bool leaderparticipates = true;
1471  int querylen;
1472 
1473 #ifdef DISABLE_LEADER_PARTICIPATION
1474  leaderparticipates = false;
1475 #endif
1476 
1477  /*
1478  * Enter parallel mode, and create context for parallel build of btree
1479  * index
1480  */
1482  Assert(request > 0);
1483  pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
1484  request);
1485 
1486  scantuplesortstates = leaderparticipates ? request + 1 : request;
1487 
1488  /*
1489  * Prepare for scan of the base relation. In a normal index build, we use
1490  * SnapshotAny because we must retrieve all tuples and do our own time
1491  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1492  * concurrent build, we take a regular MVCC snapshot and index whatever's
1493  * live according to that.
1494  */
1495  if (!isconcurrent)
1496  snapshot = SnapshotAny;
1497  else
1499 
1500  /*
1501  * Estimate size for our own PARALLEL_KEY_BTREE_SHARED workspace, and
1502  * PARALLEL_KEY_TUPLESORT tuplesort workspace
1503  */
1504  estbtshared = _bt_parallel_estimate_shared(btspool->heap, snapshot);
1505  shm_toc_estimate_chunk(&pcxt->estimator, estbtshared);
1506  estsort = tuplesort_estimate_shared(scantuplesortstates);
1507  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1508 
1509  /*
1510  * Unique case requires a second spool, and so we may have to account for
1511  * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2
1512  */
1513  if (!btspool->isunique)
1514  shm_toc_estimate_keys(&pcxt->estimator, 2);
1515  else
1516  {
1517  shm_toc_estimate_chunk(&pcxt->estimator, estsort);
1518  shm_toc_estimate_keys(&pcxt->estimator, 3);
1519  }
1520 
1521  /*
1522  * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
1523  * and PARALLEL_KEY_BUFFER_USAGE.
1524  *
1525  * If there are no extensions loaded that care, we could skip this. We
1526  * have no way of knowing whether anyone's looking at pgWalUsage or
1527  * pgBufferUsage, so do it unconditionally.
1528  */
1530  mul_size(sizeof(WalUsage), pcxt->nworkers));
1531  shm_toc_estimate_keys(&pcxt->estimator, 1);
1533  mul_size(sizeof(BufferUsage), pcxt->nworkers));
1534  shm_toc_estimate_keys(&pcxt->estimator, 1);
1535 
1536  /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
1537  if (debug_query_string)
1538  {
1539  querylen = strlen(debug_query_string);
1540  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
1541  shm_toc_estimate_keys(&pcxt->estimator, 1);
1542  }
1543  else
1544  querylen = 0; /* keep compiler quiet */
1545 
1546  /* Everyone's had a chance to ask for space, so now create the DSM */
1547  InitializeParallelDSM(pcxt);
1548 
1549  /* If no DSM segment was available, back out (do serial build) */
1550  if (pcxt->seg == NULL)
1551  {
1552  if (IsMVCCSnapshot(snapshot))
1553  UnregisterSnapshot(snapshot);
1554  DestroyParallelContext(pcxt);
1555  ExitParallelMode();
1556  return;
1557  }
1558 
1559  /* Store shared build state, for which we reserved space */
1560  btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
1561  /* Initialize immutable state */
1562  btshared->heaprelid = RelationGetRelid(btspool->heap);
1563  btshared->indexrelid = RelationGetRelid(btspool->index);
1564  btshared->isunique = btspool->isunique;
1565  btshared->nulls_not_distinct = btspool->nulls_not_distinct;
1566  btshared->isconcurrent = isconcurrent;
1567  btshared->scantuplesortstates = scantuplesortstates;
1569  SpinLockInit(&btshared->mutex);
1570  /* Initialize mutable state */
1571  btshared->nparticipantsdone = 0;
1572  btshared->reltuples = 0.0;
1573  btshared->havedead = false;
1574  btshared->indtuples = 0.0;
1575  btshared->brokenhotchain = false;
1578  snapshot);
1579 
1580  /*
1581  * Store shared tuplesort-private state, for which we reserved space.
1582  * Then, initialize opaque state using tuplesort routine.
1583  */
1584  sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1585  tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1586  pcxt->seg);
1587 
1588  shm_toc_insert(pcxt->toc, PARALLEL_KEY_BTREE_SHARED, btshared);
1589  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1590 
1591  /* Unique case requires a second spool, and associated shared state */
1592  if (!btspool->isunique)
1593  sharedsort2 = NULL;
1594  else
1595  {
1596  /*
1597  * Store additional shared tuplesort-private state, for which we
1598  * reserved space. Then, initialize opaque state using tuplesort
1599  * routine.
1600  */
1601  sharedsort2 = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1602  tuplesort_initialize_shared(sharedsort2, scantuplesortstates,
1603  pcxt->seg);
1604 
1605  shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2);
1606  }
1607 
1608  /* Store query string for workers */
1609  if (debug_query_string)
1610  {
1611  char *sharedquery;
1612 
1613  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1614  memcpy(sharedquery, debug_query_string, querylen + 1);
1615  shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
1616  }
1617 
1618  /*
1619  * Allocate space for each worker's WalUsage and BufferUsage; no need to
1620  * initialize.
1621  */
1622  walusage = shm_toc_allocate(pcxt->toc,
1623  mul_size(sizeof(WalUsage), pcxt->nworkers));
1624  shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
1625  bufferusage = shm_toc_allocate(pcxt->toc,
1626  mul_size(sizeof(BufferUsage), pcxt->nworkers));
1627  shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
1628 
1629  /* Launch workers, saving status for leader/caller */
1630  LaunchParallelWorkers(pcxt);
1631  btleader->pcxt = pcxt;
1632  btleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1633  if (leaderparticipates)
1634  btleader->nparticipanttuplesorts++;
1635  btleader->btshared = btshared;
1636  btleader->sharedsort = sharedsort;
1637  btleader->sharedsort2 = sharedsort2;
1638  btleader->snapshot = snapshot;
1639  btleader->walusage = walusage;
1640  btleader->bufferusage = bufferusage;
1641 
1642  /* If no workers were successfully launched, back out (do serial build) */
1643  if (pcxt->nworkers_launched == 0)
1644  {
1645  _bt_end_parallel(btleader);
1646  return;
1647  }
1648 
1649  /* Save leader state now that it's clear build will be parallel */
1650  buildstate->btleader = btleader;
1651 
1652  /* Join heap scan ourselves */
1653  if (leaderparticipates)
1655 
1656  /*
1657  * Caller needs to wait for all launched workers when we return. Make
1658  * sure that the failure-to-start case will not hang forever.
1659  */
1661 }
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:204
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:551
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:928
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:166
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:671
size_t Size
Definition: c.h:594
void ConditionVariableInit(ConditionVariable *cv)
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc0(Size size)
Definition: mcxt.c:1257
#define PARALLEL_KEY_BUFFER_USAGE
Definition: nbtsort.c:73
#define ParallelTableScanFromBTShared(shared)
Definition: nbtsort.c:165
static void _bt_end_parallel(BTLeader *btleader)
Definition: nbtsort.c:1667
#define PARALLEL_KEY_TUPLESORT_SPOOL2
Definition: nbtsort.c:70
static Size _bt_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition: nbtsort.c:1693
#define PARALLEL_KEY_BTREE_SHARED
Definition: nbtsort.c:68
#define PARALLEL_KEY_TUPLESORT
Definition: nbtsort.c:69
#define PARALLEL_KEY_QUERY_TEXT
Definition: nbtsort.c:71
#define PARALLEL_KEY_WAL_USAGE
Definition: nbtsort.c:72
static void _bt_leader_participate_as_worker(BTBuildState *buildstate)
Definition: nbtsort.c:1747
const char * debug_query_string
Definition: postgres.c:85
#define RelationGetRelid(relation)
Definition: rel.h:504
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:197
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:817
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:775
#define SnapshotAny
Definition: snapmgr.h:33
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
#define SpinLockInit(lock)
Definition: spin.h:60
BTSpool * spool
Definition: nbtsort.c:214
BTLeader * btleader
Definition: nbtsort.c:228
ParallelContext * pcxt
Definition: nbtsort.c:174
BTShared * btshared
Definition: nbtsort.c:194
Sharedsort * sharedsort
Definition: nbtsort.c:195
Sharedsort * sharedsort2
Definition: nbtsort.c:196
int nparticipanttuplesorts
Definition: nbtsort.c:182
BufferUsage * bufferusage
Definition: nbtsort.c:199
Snapshot snapshot
Definition: nbtsort.c:197
WalUsage * walusage
Definition: nbtsort.c:198
slock_t mutex
Definition: nbtsort.c:128
bool isconcurrent
Definition: nbtsort.c:111
double indtuples
Definition: nbtsort.c:149
double reltuples
Definition: nbtsort.c:147
Oid heaprelid
Definition: nbtsort.c:107
bool brokenhotchain
Definition: nbtsort.c:150
bool isunique
Definition: nbtsort.c:109
int nparticipantsdone
Definition: nbtsort.c:146
ConditionVariable workersdonecv
Definition: nbtsort.c:120
int scantuplesortstates
Definition: nbtsort.c:112
Oid indexrelid
Definition: nbtsort.c:108
bool havedead
Definition: nbtsort.c:148
bool nulls_not_distinct
Definition: nbtsort.c:110
bool isunique
Definition: nbtsort.c:91
bool nulls_not_distinct
Definition: nbtsort.c:92
Relation heap
Definition: nbtsort.c:89
Relation index
Definition: nbtsort.c:90
dsm_segment * seg
Definition: parallel.h:43
shm_toc_estimator estimator
Definition: parallel.h:42
shm_toc * toc
Definition: parallel.h:45
int nworkers_launched
Definition: parallel.h:38
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:155
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:2973
Size tuplesort_estimate_shared(int nWorkers)
Definition: tuplesort.c:2952
void ExitParallelMode(void)
Definition: xact.c:1049
void EnterParallelMode(void)
Definition: xact.c:1036

References _bt_end_parallel(), _bt_leader_participate_as_worker(), _bt_parallel_estimate_shared(), Assert(), BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, BTLeader::bufferusage, ConditionVariableInit(), CreateParallelContext(), debug_query_string, DestroyParallelContext(), EnterParallelMode(), ParallelContext::estimator, ExitParallelMode(), GetTransactionSnapshot(), BTShared::havedead, BTSpool::heap, BTShared::heaprelid, BTSpool::index, BTShared::indexrelid, BTShared::indtuples, InitializeParallelDSM(), BTShared::isconcurrent, IsMVCCSnapshot, BTSpool::isunique, BTShared::isunique, LaunchParallelWorkers(), mul_size(), BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, BTSpool::nulls_not_distinct, BTShared::nulls_not_distinct, ParallelContext::nworkers, ParallelContext::nworkers_launched, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_BUFFER_USAGE, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, PARALLEL_KEY_WAL_USAGE, ParallelTableScanFromBTShared, BTLeader::pcxt, RegisterSnapshot(), RelationGetRelid, BTShared::reltuples, BTShared::scantuplesortstates, ParallelContext::seg, BTLeader::sharedsort, BTLeader::sharedsort2, shm_toc_allocate(), shm_toc_estimate_chunk, shm_toc_estimate_keys, shm_toc_insert(), BTLeader::snapshot, SnapshotAny, SpinLockInit, BTBuildState::spool, table_parallelscan_initialize(), ParallelContext::toc, tuplesort_estimate_shared(), tuplesort_initialize_shared(), UnregisterSnapshot(), WaitForParallelWorkersToAttach(), BTLeader::walusage, and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

◆ _bt_blnewpage()

static Page _bt_blnewpage ( uint32  level)
static

Definition at line 617 of file nbtsort.c.

618 {
619  Page page;
620  BTPageOpaque opaque;
621 
622  page = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
623 
624  /* Zero the page and set up standard page header info */
625  _bt_pageinit(page, BLCKSZ);
626 
627  /* Initialize BT opaque state */
628  opaque = BTPageGetOpaque(page);
629  opaque->btpo_prev = opaque->btpo_next = P_NONE;
630  opaque->btpo_level = level;
631  opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
632  opaque->btpo_cycleid = 0;
633 
634  /* Make the P_HIKEY line pointer appear allocated */
635  ((PageHeader) page)->pd_lower += sizeof(ItemIdData);
636 
637  return page;
638 }
PageHeaderData * PageHeader
Definition: bufpage.h:170
Pointer Page
Definition: bufpage.h:78
struct ItemIdData ItemIdData
void * palloc_aligned(Size size, Size alignto, int flags)
Definition: mcxt.c:1446
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:1129
#define BTP_LEAF
Definition: nbtree.h:76
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_NONE
Definition: nbtree.h:212
#define PG_IO_ALIGN_SIZE
BlockNumber btpo_next
Definition: nbtree.h:65
BlockNumber btpo_prev
Definition: nbtree.h:64
uint16 btpo_flags
Definition: nbtree.h:67
uint32 btpo_level
Definition: nbtree.h:66
BTCycleId btpo_cycleid
Definition: nbtree.h:68

References _bt_pageinit(), BTP_LEAF, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, P_NONE, palloc_aligned(), and PG_IO_ALIGN_SIZE.

Referenced by _bt_buildadd(), and _bt_pagestate().

◆ _bt_blwritepage()

static void _bt_blwritepage ( BTWriteState wstate,
Page  page,
BlockNumber  blkno 
)
static

Definition at line 644 of file nbtsort.c.

645 {
646  /* XLOG stuff */
647  if (wstate->btws_use_wal)
648  {
649  /* We use the XLOG_FPI record type for this */
650  log_newpage(&wstate->index->rd_locator, MAIN_FORKNUM, blkno, page, true);
651  }
652 
653  /*
654  * If we have to write pages nonsequentially, fill in the space with
655  * zeroes until we come back and overwrite. This is not logically
656  * necessary on standard Unix filesystems (unwritten space will read as
657  * zeroes anyway), but it should help to avoid fragmentation. The dummy
658  * pages aren't WAL-logged though.
659  */
660  while (blkno > wstate->btws_pages_written)
661  {
662  if (!wstate->btws_zeropage)
663  wstate->btws_zeropage = (Page) palloc_aligned(BLCKSZ,
666  /* don't set checksum for all-zero page */
668  wstate->btws_pages_written++,
669  wstate->btws_zeropage,
670  true);
671  }
672 
673  PageSetChecksumInplace(page, blkno);
674 
675  /*
676  * Now write the page. There's no need for smgr to schedule an fsync for
677  * this write; we'll do it ourselves before ending the build.
678  */
679  if (blkno == wstate->btws_pages_written)
680  {
681  /* extending the file... */
682  smgrextend(RelationGetSmgr(wstate->index), MAIN_FORKNUM, blkno,
683  page, true);
684  wstate->btws_pages_written++;
685  }
686  else
687  {
688  /* overwriting a block we zero-filled before */
689  smgrwrite(RelationGetSmgr(wstate->index), MAIN_FORKNUM, blkno,
690  page, true);
691  }
692 
693  pfree(page);
694 }
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1542
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:18
void pfree(void *pointer)
Definition: mcxt.c:1456
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:572
@ MAIN_FORKNUM
Definition: relpath.h:50
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:584
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:498
Page btws_zeropage
Definition: nbtsort.c:258
Relation index
Definition: nbtsort.c:253
BlockNumber btws_pages_written
Definition: nbtsort.c:257
bool btws_use_wal
Definition: nbtsort.c:255
RelFileLocator rd_locator
Definition: rel.h:57
XLogRecPtr log_newpage(RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:1131

References BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTWriteState::index, log_newpage(), MAIN_FORKNUM, MCXT_ALLOC_ZERO, PageSetChecksumInplace(), palloc_aligned(), pfree(), PG_IO_ALIGN_SIZE, RelationData::rd_locator, RelationGetSmgr(), smgrextend(), and smgrwrite().

Referenced by _bt_buildadd(), and _bt_uppershutdown().

◆ _bt_build_callback()

static void _bt_build_callback ( Relation  index,
ItemPointer  tid,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
)
static

Definition at line 588 of file nbtsort.c.

594 {
595  BTBuildState *buildstate = (BTBuildState *) state;
596 
597  /*
598  * insert the index tuple into the appropriate spool file for subsequent
599  * processing
600  */
601  if (tupleIsAlive || buildstate->spool2 == NULL)
602  _bt_spool(buildstate->spool, tid, values, isnull);
603  else
604  {
605  /* dead tuples are put into spool2 */
606  buildstate->havedead = true;
607  _bt_spool(buildstate->spool2, tid, values, isnull);
608  }
609 
610  buildstate->indtuples += 1;
611 }
static Datum values[MAXATTR]
Definition: bootstrap.c:156
static void _bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
Definition: nbtsort.c:533
bool havedead
Definition: nbtsort.c:212
BTSpool * spool2
Definition: nbtsort.c:220
double indtuples
Definition: nbtsort.c:221
Definition: regguts.h:323

References _bt_spool(), BTBuildState::havedead, BTBuildState::indtuples, BTBuildState::spool, BTBuildState::spool2, and values.

Referenced by _bt_parallel_scan_and_sort(), and _bt_spools_heapscan().

◆ _bt_buildadd()

static void _bt_buildadd ( BTWriteState wstate,
BTPageState state,
IndexTuple  itup,
Size  truncextra 
)
static

Definition at line 839 of file nbtsort.c.

841 {
842  Page npage;
843  BlockNumber nblkno;
844  OffsetNumber last_off;
845  Size last_truncextra;
846  Size pgspc;
847  Size itupsz;
848  bool isleaf;
849 
850  /*
851  * This is a handy place to check for cancel interrupts during the btree
852  * load phase of index creation.
853  */
855 
856  npage = state->btps_page;
857  nblkno = state->btps_blkno;
858  last_off = state->btps_lastoff;
859  last_truncextra = state->btps_lastextra;
860  state->btps_lastextra = truncextra;
861 
862  pgspc = PageGetFreeSpace(npage);
863  itupsz = IndexTupleSize(itup);
864  itupsz = MAXALIGN(itupsz);
865  /* Leaf case has slightly different rules due to suffix truncation */
866  isleaf = (state->btps_level == 0);
867 
868  /*
869  * Check whether the new item can fit on a btree page on current level at
870  * all.
871  *
872  * Every newly built index will treat heap TID as part of the keyspace,
873  * which imposes the requirement that new high keys must occasionally have
874  * a heap TID appended within _bt_truncate(). That may leave a new pivot
875  * tuple one or two MAXALIGN() quantums larger than the original
876  * firstright tuple it's derived from. v4 deals with the problem by
877  * decreasing the limit on the size of tuples inserted on the leaf level
878  * by the same small amount. Enforce the new v4+ limit on the leaf level,
879  * and the old limit on internal levels, since pivot tuples may need to
880  * make use of the reserved space. This should never fail on internal
881  * pages.
882  */
883  if (unlikely(itupsz > BTMaxItemSize(npage)))
884  _bt_check_third_page(wstate->index, wstate->heap, isleaf, npage,
885  itup);
886 
887  /*
888  * Check to see if current page will fit new item, with space left over to
889  * append a heap TID during suffix truncation when page is a leaf page.
890  *
891  * It is guaranteed that we can fit at least 2 non-pivot tuples plus a
892  * high key with heap TID when finishing off a leaf page, since we rely on
893  * _bt_check_third_page() rejecting oversized non-pivot tuples. On
894  * internal pages we can always fit 3 pivot tuples with larger internal
895  * page tuple limit (includes page high key).
896  *
897  * Most of the time, a page is only "full" in the sense that the soft
898  * fillfactor-wise limit has been exceeded. However, we must always leave
899  * at least two items plus a high key on each page before starting a new
900  * page. Disregard fillfactor and insert on "full" current page if we
901  * don't have the minimum number of items yet. (Note that we deliberately
902  * assume that suffix truncation neither enlarges nor shrinks new high key
903  * when applying soft limit, except when last tuple has a posting list.)
904  */
905  Assert(last_truncextra == 0 || isleaf);
906  if (pgspc < itupsz + (isleaf ? MAXALIGN(sizeof(ItemPointerData)) : 0) ||
907  (pgspc + last_truncextra < state->btps_full && last_off > P_FIRSTKEY))
908  {
909  /*
910  * Finish off the page and write it out.
911  */
912  Page opage = npage;
913  BlockNumber oblkno = nblkno;
914  ItemId ii;
915  ItemId hii;
916  IndexTuple oitup;
917 
918  /* Create new page of same level */
919  npage = _bt_blnewpage(state->btps_level);
920 
921  /* and assign it a page position */
922  nblkno = wstate->btws_pages_alloced++;
923 
924  /*
925  * We copy the last item on the page into the new page, and then
926  * rearrange the old page so that the 'last item' becomes its high key
927  * rather than a true data item. There had better be at least two
928  * items on the page already, else the page would be empty of useful
929  * data.
930  */
931  Assert(last_off > P_FIRSTKEY);
932  ii = PageGetItemId(opage, last_off);
933  oitup = (IndexTuple) PageGetItem(opage, ii);
934  _bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY,
935  !isleaf);
936 
937  /*
938  * Move 'last' into the high key position on opage. _bt_blnewpage()
939  * allocated empty space for a line pointer when opage was first
940  * created, so this is a matter of rearranging already-allocated space
941  * on page, and initializing high key line pointer. (Actually, leaf
942  * pages must also swap oitup with a truncated version of oitup, which
943  * is sometimes larger than oitup, though never by more than the space
944  * needed to append a heap TID.)
945  */
946  hii = PageGetItemId(opage, P_HIKEY);
947  *hii = *ii;
948  ItemIdSetUnused(ii); /* redundant */
949  ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
950 
951  if (isleaf)
952  {
953  IndexTuple lastleft;
954  IndexTuple truncated;
955 
956  /*
957  * Truncate away any unneeded attributes from high key on leaf
958  * level. This is only done at the leaf level because downlinks
959  * in internal pages are either negative infinity items, or get
960  * their contents from copying from one level down. See also:
961  * _bt_split().
962  *
963  * We don't try to bias our choice of split point to make it more
964  * likely that _bt_truncate() can truncate away more attributes,
965  * whereas the split point used within _bt_split() is chosen much
966  * more delicately. Even still, the lastleft and firstright
967  * tuples passed to _bt_truncate() here are at least not fully
968  * equal to each other when deduplication is used, unless there is
969  * a large group of duplicates (also, unique index builds usually
970  * have few or no spool2 duplicates). When the split point is
971  * between two unequal tuples, _bt_truncate() will avoid including
972  * a heap TID in the new high key, which is the most important
973  * benefit of suffix truncation.
974  *
975  * Overwrite the old item with new truncated high key directly.
976  * oitup is already located at the physical beginning of tuple
977  * space, so this should directly reuse the existing tuple space.
978  */
979  ii = PageGetItemId(opage, OffsetNumberPrev(last_off));
980  lastleft = (IndexTuple) PageGetItem(opage, ii);
981 
982  Assert(IndexTupleSize(oitup) > last_truncextra);
983  truncated = _bt_truncate(wstate->index, lastleft, oitup,
984  wstate->inskey);
985  if (!PageIndexTupleOverwrite(opage, P_HIKEY, (Item) truncated,
986  IndexTupleSize(truncated)))
987  elog(ERROR, "failed to add high key to the index page");
988  pfree(truncated);
989 
990  /* oitup should continue to point to the page's high key */
991  hii = PageGetItemId(opage, P_HIKEY);
992  oitup = (IndexTuple) PageGetItem(opage, hii);
993  }
994 
995  /*
996  * Link the old page into its parent, using its low key. If we don't
997  * have a parent, we have to create one; this adds a new btree level.
998  */
999  if (state->btps_next == NULL)
1000  state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
1001 
1002  Assert((BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) <=
1004  BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) > 0) ||
1005  P_LEFTMOST(BTPageGetOpaque(opage)));
1006  Assert(BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) == 0 ||
1007  !P_LEFTMOST(BTPageGetOpaque(opage)));
1008  BTreeTupleSetDownLink(state->btps_lowkey, oblkno);
1009  _bt_buildadd(wstate, state->btps_next, state->btps_lowkey, 0);
1010  pfree(state->btps_lowkey);
1011 
1012  /*
1013  * Save a copy of the high key from the old page. It is also the low
1014  * key for the new page.
1015  */
1016  state->btps_lowkey = CopyIndexTuple(oitup);
1017 
1018  /*
1019  * Set the sibling links for both pages.
1020  */
1021  {
1022  BTPageOpaque oopaque = BTPageGetOpaque(opage);
1023  BTPageOpaque nopaque = BTPageGetOpaque(npage);
1024 
1025  oopaque->btpo_next = nblkno;
1026  nopaque->btpo_prev = oblkno;
1027  nopaque->btpo_next = P_NONE; /* redundant */
1028  }
1029 
1030  /*
1031  * Write out the old page. We never need to touch it again, so we can
1032  * free the opage workspace too.
1033  */
1034  _bt_blwritepage(wstate, opage, oblkno);
1035 
1036  /*
1037  * Reset last_off to point to new page
1038  */
1039  last_off = P_FIRSTKEY;
1040  }
1041 
1042  /*
1043  * By here, either original page is still the current page, or a new page
1044  * was created that became the current page. Either way, the current page
1045  * definitely has space for new item.
1046  *
1047  * If the new item is the first for its page, it must also be the first
1048  * item on its entire level. On later same-level pages, a low key for a
1049  * page will be copied from the prior page in the code above. Generate a
1050  * minus infinity low key here instead.
1051  */
1052  if (last_off == P_HIKEY)
1053  {
1054  Assert(state->btps_lowkey == NULL);
1055  state->btps_lowkey = palloc0(sizeof(IndexTupleData));
1056  state->btps_lowkey->t_info = sizeof(IndexTupleData);
1057  BTreeTupleSetNAtts(state->btps_lowkey, 0, false);
1058  }
1059 
1060  /*
1061  * Add the new item into the current page.
1062  */
1063  last_off = OffsetNumberNext(last_off);
1064  _bt_sortaddtup(npage, itupsz, itup, last_off,
1065  !isleaf && last_off == P_FIRSTKEY);
1066 
1067  state->btps_page = npage;
1068  state->btps_blkno = nblkno;
1069  state->btps_lastoff = last_off;
1070 }
uint32 BlockNumber
Definition: block.h:31
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
#define MAXALIGN(LEN)
Definition: c.h:800
#define unlikely(x)
Definition: c.h:300
#define ERROR
Definition: elog.h:39
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
Pointer Item
Definition: item.h:17
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
struct IndexTupleData IndexTupleData
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define P_HIKEY
Definition: nbtree.h:367
#define P_LEFTMOST(opaque)
Definition: nbtree.h:218
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:562
#define BTMaxItemSize(page)
Definition: nbtree.h:164
#define P_FIRSTKEY
Definition: nbtree.h:368
static void BTreeTupleSetNAtts(IndexTuple itup, uint16 nkeyatts, bool heaptid)
Definition: nbtree.h:595
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:577
static void _bt_sortaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off, bool newfirstdataitem)
Definition: nbtsort.c:769
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:701
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:644
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup, Size truncextra)
Definition: nbtsort.c:839
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:617
void _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, Page page, IndexTuple newtup)
Definition: nbtutils.c:2631
IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright, BTScanInsert itup_key)
Definition: nbtutils.c:2205
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:523
Relation heap
Definition: nbtsort.c:252
BlockNumber btws_pages_alloced
Definition: nbtsort.c:256
BTScanInsert inskey
Definition: nbtsort.c:254

References _bt_blnewpage(), _bt_blwritepage(), _bt_check_third_page(), _bt_pagestate(), _bt_sortaddtup(), _bt_truncate(), Assert(), BTMaxItemSize, BTPageGetOpaque, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleGetNAtts, BTreeTupleSetDownLink(), BTreeTupleSetNAtts(), BTWriteState::btws_pages_alloced, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), elog(), ERROR, BTWriteState::heap, BTWriteState::index, IndexRelationGetNumberOfKeyAttributes, IndexTupleSize, BTWriteState::inskey, ItemIdGetLength, ItemIdSetUnused, MAXALIGN, OffsetNumberNext, OffsetNumberPrev, P_FIRSTKEY, P_HIKEY, P_LEFTMOST, P_NONE, PageGetFreeSpace(), PageGetItem(), PageGetItemId(), PageIndexTupleOverwrite(), palloc0(), pfree(), and unlikely.

Referenced by _bt_load(), _bt_sort_dedup_finish_pending(), and _bt_uppershutdown().

◆ _bt_end_parallel()

static void _bt_end_parallel ( BTLeader btleader)
static

Definition at line 1667 of file nbtsort.c.

1668 {
1669  int i;
1670 
1671  /* Shutdown worker processes */
1673 
1674  /*
1675  * Next, accumulate WAL usage. (This must wait for the workers to finish,
1676  * or we might get incomplete data.)
1677  */
1678  for (i = 0; i < btleader->pcxt->nworkers_launched; i++)
1679  InstrAccumParallelQuery(&btleader->bufferusage[i], &btleader->walusage[i]);
1680 
1681  /* Free last reference to MVCC snapshot, if one was used */
1682  if (IsMVCCSnapshot(btleader->snapshot))
1683  UnregisterSnapshot(btleader->snapshot);
1684  DestroyParallelContext(btleader->pcxt);
1685  ExitParallelMode();
1686 }
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:774
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:218
int i
Definition: isn.c:73

References BTLeader::bufferusage, DestroyParallelContext(), ExitParallelMode(), i, InstrAccumParallelQuery(), IsMVCCSnapshot, ParallelContext::nworkers_launched, BTLeader::pcxt, BTLeader::snapshot, UnregisterSnapshot(), WaitForParallelWorkersToFinish(), and BTLeader::walusage.

Referenced by _bt_begin_parallel(), and btbuild().

◆ _bt_leader_participate_as_worker()

static void _bt_leader_participate_as_worker ( BTBuildState buildstate)
static

Definition at line 1747 of file nbtsort.c.

1748 {
1749  BTLeader *btleader = buildstate->btleader;
1750  BTSpool *leaderworker;
1751  BTSpool *leaderworker2;
1752  int sortmem;
1753 
1754  /* Allocate memory and initialize private spool */
1755  leaderworker = (BTSpool *) palloc0(sizeof(BTSpool));
1756  leaderworker->heap = buildstate->spool->heap;
1757  leaderworker->index = buildstate->spool->index;
1758  leaderworker->isunique = buildstate->spool->isunique;
1759  leaderworker->nulls_not_distinct = buildstate->spool->nulls_not_distinct;
1760 
1761  /* Initialize second spool, if required */
1762  if (!btleader->btshared->isunique)
1763  leaderworker2 = NULL;
1764  else
1765  {
1766  /* Allocate memory for worker's own private secondary spool */
1767  leaderworker2 = (BTSpool *) palloc0(sizeof(BTSpool));
1768 
1769  /* Initialize worker's own secondary spool */
1770  leaderworker2->heap = leaderworker->heap;
1771  leaderworker2->index = leaderworker->index;
1772  leaderworker2->isunique = false;
1773  }
1774 
1775  /*
1776  * Might as well use reliable figure when doling out maintenance_work_mem
1777  * (when requested number of workers were not launched, this will be
1778  * somewhat higher than it is for other workers).
1779  */
1780  sortmem = maintenance_work_mem / btleader->nparticipanttuplesorts;
1781 
1782  /* Perform work common to all participants */
1783  _bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared,
1784  btleader->sharedsort, btleader->sharedsort2,
1785  sortmem, true);
1786 
1787 #ifdef BTREE_BUILD_STATS
1789  {
1790  ShowUsage("BTREE BUILD (Leader Partial Spool) STATISTICS");
1791  ResetUsage();
1792  }
1793 #endif /* BTREE_BUILD_STATS */
1794 }
int maintenance_work_mem
Definition: globals.c:127
bool log_btree_build_stats
Definition: guc_tables.c:504
static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress)
Definition: nbtsort.c:1922
void ShowUsage(const char *title)
Definition: postgres.c:4972
void ResetUsage(void)
Definition: postgres.c:4965

References _bt_parallel_scan_and_sort(), BTBuildState::btleader, BTLeader::btshared, BTSpool::heap, BTSpool::index, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, BTLeader::nparticipanttuplesorts, BTSpool::nulls_not_distinct, palloc0(), ResetUsage(), BTLeader::sharedsort, BTLeader::sharedsort2, ShowUsage(), and BTBuildState::spool.

Referenced by _bt_begin_parallel().

◆ _bt_leafbuild()

static void _bt_leafbuild ( BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 544 of file nbtsort.c.

545 {
546  BTWriteState wstate;
547 
548 #ifdef BTREE_BUILD_STATS
550  {
551  ShowUsage("BTREE BUILD (Spool) STATISTICS");
552  ResetUsage();
553  }
554 #endif /* BTREE_BUILD_STATS */
555 
556  /* Execute the sort */
560  if (btspool2)
561  {
564  tuplesort_performsort(btspool2->sortstate);
565  }
566 
567  wstate.heap = btspool->heap;
568  wstate.index = btspool->index;
569  wstate.inskey = _bt_mkscankey(wstate.index, NULL);
570  /* _bt_mkscankey() won't set allequalimage without metapage */
571  wstate.inskey->allequalimage = _bt_allequalimage(wstate.index, true);
572  wstate.btws_use_wal = RelationNeedsWAL(wstate.index);
573 
574  /* reserve the metapage */
575  wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
576  wstate.btws_pages_written = 0;
577  wstate.btws_zeropage = NULL; /* until needed */
578 
581  _bt_load(&wstate, btspool, btspool2);
582 }
void pgstat_progress_update_param(int index, int64 val)
#define PROGRESS_BTREE_PHASE_PERFORMSORT_2
Definition: nbtree.h:1121
#define PROGRESS_BTREE_PHASE_LEAF_LOAD
Definition: nbtree.h:1122
#define PROGRESS_BTREE_PHASE_PERFORMSORT_1
Definition: nbtree.h:1120
#define BTREE_METAPAGE
Definition: nbtree.h:148
static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:1186
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:90
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2689
#define PROGRESS_CREATEIDX_SUBPHASE
Definition: progress.h:85
#define RelationNeedsWAL(relation)
Definition: rel.h:629
bool allequalimage
Definition: nbtree.h:792
Tuplesortstate * sortstate
Definition: nbtsort.c:88
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1382

References _bt_allequalimage(), _bt_load(), _bt_mkscankey(), BTScanInsertData::allequalimage, BTREE_METAPAGE, BTWriteState::btws_pages_alloced, BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTSpool::heap, BTWriteState::heap, BTSpool::index, BTWriteState::index, BTWriteState::inskey, log_btree_build_stats, pgstat_progress_update_param(), PROGRESS_BTREE_PHASE_LEAF_LOAD, PROGRESS_BTREE_PHASE_PERFORMSORT_1, PROGRESS_BTREE_PHASE_PERFORMSORT_2, PROGRESS_CREATEIDX_SUBPHASE, RelationNeedsWAL, ResetUsage(), ShowUsage(), BTSpool::sortstate, and tuplesort_performsort().

Referenced by btbuild().

◆ _bt_load()

static void _bt_load ( BTWriteState wstate,
BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 1186 of file nbtsort.c.

1187 {
1188  BTPageState *state = NULL;
1189  bool merge = (btspool2 != NULL);
1190  IndexTuple itup,
1191  itup2 = NULL;
1192  bool load1;
1193  TupleDesc tupdes = RelationGetDescr(wstate->index);
1194  int i,
1196  SortSupport sortKeys;
1197  int64 tuples_done = 0;
1198  bool deduplicate;
1199 
1200  deduplicate = wstate->inskey->allequalimage && !btspool->isunique &&
1201  BTGetDeduplicateItems(wstate->index);
1202 
1203  if (merge)
1204  {
1205  /*
1206  * Another BTSpool for dead tuples exists. Now we have to merge
1207  * btspool and btspool2.
1208  */
1209 
1210  /* the preparation of merge */
1211  itup = tuplesort_getindextuple(btspool->sortstate, true);
1212  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1213 
1214  /* Prepare SortSupport data for each column */
1215  sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
1216 
1217  for (i = 0; i < keysz; i++)
1218  {
1219  SortSupport sortKey = sortKeys + i;
1220  ScanKey scanKey = wstate->inskey->scankeys + i;
1221  int16 strategy;
1222 
1223  sortKey->ssup_cxt = CurrentMemoryContext;
1224  sortKey->ssup_collation = scanKey->sk_collation;
1225  sortKey->ssup_nulls_first =
1226  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1227  sortKey->ssup_attno = scanKey->sk_attno;
1228  /* Abbreviation is not supported here */
1229  sortKey->abbreviate = false;
1230 
1231  Assert(sortKey->ssup_attno != 0);
1232 
1233  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
1235 
1236  PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
1237  }
1238 
1239  for (;;)
1240  {
1241  load1 = true; /* load BTSpool next ? */
1242  if (itup2 == NULL)
1243  {
1244  if (itup == NULL)
1245  break;
1246  }
1247  else if (itup != NULL)
1248  {
1249  int32 compare = 0;
1250 
1251  for (i = 1; i <= keysz; i++)
1252  {
1253  SortSupport entry;
1254  Datum attrDatum1,
1255  attrDatum2;
1256  bool isNull1,
1257  isNull2;
1258 
1259  entry = sortKeys + i - 1;
1260  attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
1261  attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
1262 
1263  compare = ApplySortComparator(attrDatum1, isNull1,
1264  attrDatum2, isNull2,
1265  entry);
1266  if (compare > 0)
1267  {
1268  load1 = false;
1269  break;
1270  }
1271  else if (compare < 0)
1272  break;
1273  }
1274 
1275  /*
1276  * If key values are equal, we sort on ItemPointer. This is
1277  * required for btree indexes, since heap TID is treated as an
1278  * implicit last key attribute in order to ensure that all
1279  * keys in the index are physically unique.
1280  */
1281  if (compare == 0)
1282  {
1283  compare = ItemPointerCompare(&itup->t_tid, &itup2->t_tid);
1284  Assert(compare != 0);
1285  if (compare > 0)
1286  load1 = false;
1287  }
1288  }
1289  else
1290  load1 = false;
1291 
1292  /* When we see first tuple, create first index page */
1293  if (state == NULL)
1294  state = _bt_pagestate(wstate, 0);
1295 
1296  if (load1)
1297  {
1298  _bt_buildadd(wstate, state, itup, 0);
1299  itup = tuplesort_getindextuple(btspool->sortstate, true);
1300  }
1301  else
1302  {
1303  _bt_buildadd(wstate, state, itup2, 0);
1304  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
1305  }
1306 
1307  /* Report progress */
1309  ++tuples_done);
1310  }
1311  pfree(sortKeys);
1312  }
1313  else if (deduplicate)
1314  {
1315  /* merge is unnecessary, deduplicate into posting lists */
1316  BTDedupState dstate;
1317 
1318  dstate = (BTDedupState) palloc(sizeof(BTDedupStateData));
1319  dstate->deduplicate = true; /* unused */
1320  dstate->nmaxitems = 0; /* unused */
1321  dstate->maxpostingsize = 0; /* set later */
1322  /* Metadata about base tuple of current pending posting list */
1323  dstate->base = NULL;
1324  dstate->baseoff = InvalidOffsetNumber; /* unused */
1325  dstate->basetupsize = 0;
1326  /* Metadata about current pending posting list TIDs */
1327  dstate->htids = NULL;
1328  dstate->nhtids = 0;
1329  dstate->nitems = 0;
1330  dstate->phystupsize = 0; /* unused */
1331  dstate->nintervals = 0; /* unused */
1332 
1333  while ((itup = tuplesort_getindextuple(btspool->sortstate,
1334  true)) != NULL)
1335  {
1336  /* When we see first tuple, create first index page */
1337  if (state == NULL)
1338  {
1339  state = _bt_pagestate(wstate, 0);
1340 
1341  /*
1342  * Limit size of posting list tuples to 1/10 space we want to
1343  * leave behind on the page, plus space for final item's line
1344  * pointer. This is equal to the space that we'd like to
1345  * leave behind on each leaf page when fillfactor is 90,
1346  * allowing us to get close to fillfactor% space utilization
1347  * when there happen to be a great many duplicates. (This
1348  * makes higher leaf fillfactor settings ineffective when
1349  * building indexes that have many duplicates, but packing
1350  * leaf pages full with few very large tuples doesn't seem
1351  * like a useful goal.)
1352  */
1353  dstate->maxpostingsize = MAXALIGN_DOWN((BLCKSZ * 10 / 100)) -
1354  sizeof(ItemIdData);
1355  Assert(dstate->maxpostingsize <= BTMaxItemSize(state->btps_page) &&
1356  dstate->maxpostingsize <= INDEX_SIZE_MASK);
1357  dstate->htids = palloc(dstate->maxpostingsize);
1358 
1359  /* start new pending posting list with itup copy */
1362  }
1363  else if (_bt_keep_natts_fast(wstate->index, dstate->base,
1364  itup) > keysz &&
1365  _bt_dedup_save_htid(dstate, itup))
1366  {
1367  /*
1368  * Tuple is equal to base tuple of pending posting list. Heap
1369  * TID from itup has been saved in state.
1370  */
1371  }
1372  else
1373  {
1374  /*
1375  * Tuple is not equal to pending posting list tuple, or
1376  * _bt_dedup_save_htid() opted to not merge current item into
1377  * pending posting list.
1378  */
1379  _bt_sort_dedup_finish_pending(wstate, state, dstate);
1380  pfree(dstate->base);
1381 
1382  /* start new pending posting list with itup copy */
1385  }
1386 
1387  /* Report progress */
1389  ++tuples_done);
1390  }
1391 
1392  if (state)
1393  {
1394  /*
1395  * Handle the last item (there must be a last item when the
1396  * tuplesort returned one or more tuples)
1397  */
1398  _bt_sort_dedup_finish_pending(wstate, state, dstate);
1399  pfree(dstate->base);
1400  pfree(dstate->htids);
1401  }
1402 
1403  pfree(dstate);
1404  }
1405  else
1406  {
1407  /* merging and deduplication are both unnecessary */
1408  while ((itup = tuplesort_getindextuple(btspool->sortstate,
1409  true)) != NULL)
1410  {
1411  /* When we see first tuple, create first index page */
1412  if (state == NULL)
1413  state = _bt_pagestate(wstate, 0);
1414 
1415  _bt_buildadd(wstate, state, itup, 0);
1416 
1417  /* Report progress */
1419  ++tuples_done);
1420  }
1421  }
1422 
1423  /* Close down final pages and write the metapage */
1424  _bt_uppershutdown(wstate, state);
1425 
1426  /*
1427  * When we WAL-logged index pages, we must nonetheless fsync index files.
1428  * Since we're building outside shared buffers, a CHECKPOINT occurring
1429  * during the build has no way to flush the previously written data to
1430  * disk (indeed it won't know the index even exists). A crash later on
1431  * would replay WAL from the checkpoint, therefore it wouldn't replay our
1432  * earlier WAL entries. If we do not fsync those pages here, they might
1433  * still not be on disk when the crash occurs.
1434  */
1435  if (wstate->btws_use_wal)
1437 }
#define MAXALIGN_DOWN(LEN)
Definition: c.h:812
signed short int16
Definition: c.h:482
signed int int32
Definition: c.h:483
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
static Datum index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
Definition: itup.h:117
#define INDEX_SIZE_MASK
Definition: itup.h:65
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void * palloc(Size size)
Definition: mcxt.c:1226
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:484
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:433
#define BTGetDeduplicateItems(relation)
Definition: nbtree.h:1108
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:1090
#define SK_BT_DESC
Definition: nbtree.h:1089
BTDedupStateData * BTDedupState
Definition: nbtree.h:898
static void _bt_sort_dedup_finish_pending(BTWriteState *wstate, BTPageState *state, BTDedupState dstate)
Definition: nbtsort.c:1080
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
Definition: nbtsort.c:1114
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2424
#define InvalidOffsetNumber
Definition: off.h:26
static pairingheap_node * merge(pairingheap *heap, pairingheap_node *a, pairingheap_node *b)
Definition: pairingheap.c:79
uintptr_t Datum
Definition: postgres.h:64
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition: progress.h:87
#define RelationGetDescr(relation)
Definition: rel.h:530
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:721
void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy, SortSupport ssup)
Definition: sortsupport.c:162
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
Size maxpostingsize
Definition: nbtree.h:875
ItemPointer htids
Definition: nbtree.h:883
bool deduplicate
Definition: nbtree.h:873
OffsetNumber baseoff
Definition: nbtree.h:879
Size basetupsize
Definition: nbtree.h:880
IndexTuple base
Definition: nbtree.h:878
Size phystupsize
Definition: nbtree.h:886
ScanKeyData scankeys[INDEX_MAX_KEYS]
Definition: nbtree.h:798
ItemPointerData t_tid
Definition: itup.h:37
int sk_flags
Definition: skey.h:66
Oid sk_collation
Definition: skey.h:70
AttrNumber sk_attno
Definition: skey.h:67
AttrNumber ssup_attno
Definition: sortsupport.h:81
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)

References _bt_buildadd(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), _bt_keep_natts_fast(), _bt_pagestate(), _bt_sort_dedup_finish_pending(), _bt_uppershutdown(), SortSupportData::abbreviate, BTScanInsertData::allequalimage, ApplySortComparator(), Assert(), BTDedupStateData::base, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTGetDeduplicateItems, BTGreaterStrategyNumber, BTLessStrategyNumber, BTMaxItemSize, BTWriteState::btws_use_wal, compare(), CopyIndexTuple(), CurrentMemoryContext, BTDedupStateData::deduplicate, BTDedupStateData::htids, i, BTWriteState::index, index_getattr(), INDEX_SIZE_MASK, IndexRelationGetNumberOfKeyAttributes, BTWriteState::inskey, InvalidOffsetNumber, BTSpool::isunique, ItemPointerCompare(), MAIN_FORKNUM, MAXALIGN_DOWN, BTDedupStateData::maxpostingsize, merge(), BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, palloc(), palloc0(), pfree(), pgstat_progress_update_param(), BTDedupStateData::phystupsize, PrepareSortSupportFromIndexRel(), PROGRESS_CREATEIDX_TUPLES_DONE, RelationGetDescr, RelationGetSmgr(), BTScanInsertData::scankeys, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, smgrimmedsync(), BTSpool::sortstate, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, IndexTupleData::t_tid, and tuplesort_getindextuple().

Referenced by _bt_leafbuild().

◆ _bt_pagestate()

static BTPageState * _bt_pagestate ( BTWriteState wstate,
uint32  level 
)
static

Definition at line 701 of file nbtsort.c.

702 {
704 
705  /* create initial page for level */
706  state->btps_page = _bt_blnewpage(level);
707 
708  /* and assign it a page position */
709  state->btps_blkno = wstate->btws_pages_alloced++;
710 
711  state->btps_lowkey = NULL;
712  /* initialize lastoff so first item goes into P_FIRSTKEY */
713  state->btps_lastoff = P_HIKEY;
714  state->btps_lastextra = 0;
715  state->btps_level = level;
716  /* set "full" threshold based on level. See notes at head of file. */
717  if (level > 0)
718  state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
719  else
720  state->btps_full = BTGetTargetPageFreeSpace(wstate->index);
721 
722  /* no parent level, yet */
723  state->btps_next = NULL;
724 
725  return state;
726 }
#define BTGetTargetPageFreeSpace(relation)
Definition: nbtree.h:1106
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:201

References _bt_blnewpage(), BTGetTargetPageFreeSpace, BTREE_NONLEAF_FILLFACTOR, BTWriteState::btws_pages_alloced, BTWriteState::index, P_HIKEY, and palloc0().

Referenced by _bt_buildadd(), and _bt_load().

◆ _bt_parallel_build_main()

void _bt_parallel_build_main ( dsm_segment seg,
shm_toc toc 
)

Definition at line 1800 of file nbtsort.c.

1801 {
1802  char *sharedquery;
1803  BTSpool *btspool;
1804  BTSpool *btspool2;
1805  BTShared *btshared;
1806  Sharedsort *sharedsort;
1807  Sharedsort *sharedsort2;
1808  Relation heapRel;
1809  Relation indexRel;
1810  LOCKMODE heapLockmode;
1811  LOCKMODE indexLockmode;
1812  WalUsage *walusage;
1813  BufferUsage *bufferusage;
1814  int sortmem;
1815 
1816 #ifdef BTREE_BUILD_STATS
1818  ResetUsage();
1819 #endif /* BTREE_BUILD_STATS */
1820 
1821  /*
1822  * The only possible status flag that can be set to the parallel worker is
1823  * PROC_IN_SAFE_IC.
1824  */
1825  Assert((MyProc->statusFlags == 0) ||
1827 
1828  /* Set debug_query_string for individual workers first */
1829  sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
1830  debug_query_string = sharedquery;
1831 
1832  /* Report the query string from leader */
1834 
1835  /* Look up nbtree shared state */
1836  btshared = shm_toc_lookup(toc, PARALLEL_KEY_BTREE_SHARED, false);
1837 
1838  /* Open relations using lock modes known to be obtained by index.c */
1839  if (!btshared->isconcurrent)
1840  {
1841  heapLockmode = ShareLock;
1842  indexLockmode = AccessExclusiveLock;
1843  }
1844  else
1845  {
1846  heapLockmode = ShareUpdateExclusiveLock;
1847  indexLockmode = RowExclusiveLock;
1848  }
1849 
1850  /* Open relations within worker */
1851  heapRel = table_open(btshared->heaprelid, heapLockmode);
1852  indexRel = index_open(btshared->indexrelid, indexLockmode);
1853 
1854  /* Initialize worker's own spool */
1855  btspool = (BTSpool *) palloc0(sizeof(BTSpool));
1856  btspool->heap = heapRel;
1857  btspool->index = indexRel;
1858  btspool->isunique = btshared->isunique;
1859  btspool->nulls_not_distinct = btshared->nulls_not_distinct;
1860 
1861  /* Look up shared state private to tuplesort.c */
1862  sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
1863  tuplesort_attach_shared(sharedsort, seg);
1864  if (!btshared->isunique)
1865  {
1866  btspool2 = NULL;
1867  sharedsort2 = NULL;
1868  }
1869  else
1870  {
1871  /* Allocate memory for worker's own private secondary spool */
1872  btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
1873 
1874  /* Initialize worker's own secondary spool */
1875  btspool2->heap = btspool->heap;
1876  btspool2->index = btspool->index;
1877  btspool2->isunique = false;
1878  /* Look up shared state private to tuplesort.c */
1879  sharedsort2 = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_SPOOL2, false);
1880  tuplesort_attach_shared(sharedsort2, seg);
1881  }
1882 
1883  /* Prepare to track buffer usage during parallel execution */
1885 
1886  /* Perform sorting of spool, and possibly a spool2 */
1887  sortmem = maintenance_work_mem / btshared->scantuplesortstates;
1888  _bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
1889  sharedsort2, sortmem, false);
1890 
1891  /* Report WAL/buffer usage during parallel execution */
1892  bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
1893  walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
1895  &walusage[ParallelWorkerNumber]);
1896 
1897 #ifdef BTREE_BUILD_STATS
1899  {
1900  ShowUsage("BTREE BUILD (Worker Partial Spool) STATISTICS");
1901  ResetUsage();
1902  }
1903 #endif /* BTREE_BUILD_STATS */
1904 
1905  index_close(indexRel, indexLockmode);
1906  table_close(heapRel, heapLockmode);
1907 }
int ParallelWorkerNumber
Definition: parallel.c:114
void pgstat_report_activity(BackendState state, const char *cmd_str)
@ STATE_RUNNING
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:208
void InstrStartParallelQuery(void)
Definition: instrument.c:200
int LOCKMODE
Definition: lockdefs.h:26
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define ShareLock
Definition: lockdefs.h:40
#define RowExclusiveLock
Definition: lockdefs.h:38
#define PROC_IN_SAFE_IC
Definition: proc.h:58
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
PGPROC * MyProc
Definition: proc.c:66
uint8 statusFlags
Definition: proc.h:233
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:2996

References _bt_parallel_scan_and_sort(), AccessExclusiveLock, Assert(), debug_query_string, BTSpool::heap, BTShared::heaprelid, BTSpool::index, index_close(), index_open(), BTShared::indexrelid, InstrEndParallelQuery(), InstrStartParallelQuery(), BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, log_btree_build_stats, maintenance_work_mem, MyProc, BTSpool::nulls_not_distinct, BTShared::nulls_not_distinct, palloc0(), PARALLEL_KEY_BTREE_SHARED, PARALLEL_KEY_BUFFER_USAGE, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_TUPLESORT_SPOOL2, PARALLEL_KEY_WAL_USAGE, ParallelWorkerNumber, pgstat_report_activity(), PROC_IN_SAFE_IC, ResetUsage(), RowExclusiveLock, BTShared::scantuplesortstates, ShareLock, ShareUpdateExclusiveLock, shm_toc_lookup(), ShowUsage(), STATE_RUNNING, PGPROC::statusFlags, table_close(), table_open(), and tuplesort_attach_shared().

◆ _bt_parallel_estimate_shared()

static Size _bt_parallel_estimate_shared ( Relation  heap,
Snapshot  snapshot 
)
static

Definition at line 1693 of file nbtsort.c.

1694 {
1695  /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1696  return add_size(BUFFERALIGN(sizeof(BTShared)),
1697  table_parallelscan_estimate(heap, snapshot));
1698 }
#define BUFFERALIGN(LEN)
Definition: c.h:802
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:140

References add_size(), BUFFERALIGN, and table_parallelscan_estimate().

Referenced by _bt_begin_parallel().

◆ _bt_parallel_heapscan()

static double _bt_parallel_heapscan ( BTBuildState buildstate,
bool brokenhotchain 
)
static

Definition at line 1713 of file nbtsort.c.

1714 {
1715  BTShared *btshared = buildstate->btleader->btshared;
1716  int nparticipanttuplesorts;
1717  double reltuples;
1718 
1719  nparticipanttuplesorts = buildstate->btleader->nparticipanttuplesorts;
1720  for (;;)
1721  {
1722  SpinLockAcquire(&btshared->mutex);
1723  if (btshared->nparticipantsdone == nparticipanttuplesorts)
1724  {
1725  buildstate->havedead = btshared->havedead;
1726  buildstate->indtuples = btshared->indtuples;
1727  *brokenhotchain = btshared->brokenhotchain;
1728  reltuples = btshared->reltuples;
1729  SpinLockRelease(&btshared->mutex);
1730  break;
1731  }
1732  SpinLockRelease(&btshared->mutex);
1733 
1735  WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
1736  }
1737 
1739 
1740  return reltuples;
1741 }
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62

References BTShared::brokenhotchain, BTBuildState::btleader, BTLeader::btshared, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTShared::havedead, BTBuildState::havedead, BTShared::indtuples, BTBuildState::indtuples, BTShared::mutex, BTShared::nparticipantsdone, BTLeader::nparticipanttuplesorts, BTShared::reltuples, SpinLockAcquire, SpinLockRelease, and BTShared::workersdonecv.

Referenced by _bt_spools_heapscan().

◆ _bt_parallel_scan_and_sort()

static void _bt_parallel_scan_and_sort ( BTSpool btspool,
BTSpool btspool2,
BTShared btshared,
Sharedsort sharedsort,
Sharedsort sharedsort2,
int  sortmem,
bool  progress 
)
static

Definition at line 1922 of file nbtsort.c.

1925 {
1926  SortCoordinate coordinate;
1927  BTBuildState buildstate;
1928  TableScanDesc scan;
1929  double reltuples;
1930  IndexInfo *indexInfo;
1931 
1932  /* Initialize local tuplesort coordination state */
1933  coordinate = palloc0(sizeof(SortCoordinateData));
1934  coordinate->isWorker = true;
1935  coordinate->nParticipants = -1;
1936  coordinate->sharedsort = sharedsort;
1937 
1938  /* Begin "partial" tuplesort */
1939  btspool->sortstate = tuplesort_begin_index_btree(btspool->heap,
1940  btspool->index,
1941  btspool->isunique,
1942  btspool->nulls_not_distinct,
1943  sortmem, coordinate,
1944  TUPLESORT_NONE);
1945 
1946  /*
1947  * Just as with serial case, there may be a second spool. If so, a
1948  * second, dedicated spool2 partial tuplesort is required.
1949  */
1950  if (btspool2)
1951  {
1952  SortCoordinate coordinate2;
1953 
1954  /*
1955  * We expect that the second one (for dead tuples) won't get very
1956  * full, so we give it only work_mem (unless sortmem is less for
1957  * worker). Worker processes are generally permitted to allocate
1958  * work_mem independently.
1959  */
1960  coordinate2 = palloc0(sizeof(SortCoordinateData));
1961  coordinate2->isWorker = true;
1962  coordinate2->nParticipants = -1;
1963  coordinate2->sharedsort = sharedsort2;
1964  btspool2->sortstate =
1965  tuplesort_begin_index_btree(btspool->heap, btspool->index, false, false,
1966  Min(sortmem, work_mem), coordinate2,
1967  false);
1968  }
1969 
1970  /* Fill in buildstate for _bt_build_callback() */
1971  buildstate.isunique = btshared->isunique;
1972  buildstate.nulls_not_distinct = btshared->nulls_not_distinct;
1973  buildstate.havedead = false;
1974  buildstate.heap = btspool->heap;
1975  buildstate.spool = btspool;
1976  buildstate.spool2 = btspool2;
1977  buildstate.indtuples = 0;
1978  buildstate.btleader = NULL;
1979 
1980  /* Join parallel scan */
1981  indexInfo = BuildIndexInfo(btspool->index);
1982  indexInfo->ii_Concurrent = btshared->isconcurrent;
1983  scan = table_beginscan_parallel(btspool->heap,
1984  ParallelTableScanFromBTShared(btshared));
1985  reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
1987  (void *) &buildstate, scan);
1988 
1989  /* Execute this worker's part of the sort */
1990  if (progress)
1993  tuplesort_performsort(btspool->sortstate);
1994  if (btspool2)
1995  {
1996  if (progress)
1999  tuplesort_performsort(btspool2->sortstate);
2000  }
2001 
2002  /*
2003  * Done. Record ambuild statistics, and whether we encountered a broken
2004  * HOT chain.
2005  */
2006  SpinLockAcquire(&btshared->mutex);
2007  btshared->nparticipantsdone++;
2008  btshared->reltuples += reltuples;
2009  if (buildstate.havedead)
2010  btshared->havedead = true;
2011  btshared->indtuples += buildstate.indtuples;
2012  if (indexInfo->ii_BrokenHotChain)
2013  btshared->brokenhotchain = true;
2014  SpinLockRelease(&btshared->mutex);
2015 
2016  /* Notify leader */
2018 
2019  /* We can end tuplesorts immediately */
2020  tuplesort_end(btspool->sortstate);
2021  if (btspool2)
2022  tuplesort_end(btspool2->sortstate);
2023 }
#define Min(x, y)
Definition: c.h:993
void ConditionVariableSignal(ConditionVariable *cv)
int work_mem
Definition: globals.c:125
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2427
static void _bt_build_callback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtsort.c:588
int progress
Definition: pgbench.c:261
bool isunique
Definition: nbtsort.c:210
bool nulls_not_distinct
Definition: nbtsort.c:211
Relation heap
Definition: nbtsort.c:213
bool ii_BrokenHotChain
Definition: execnodes.h:197
bool ii_Concurrent
Definition: execnodes.h:196
Sharedsort * sharedsort
Definition: tuplesort.h:57
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:175
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1772
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:969
#define TUPLESORT_NONE
Definition: tuplesort.h:92
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, bool uniqueNullsNotDistinct, int workMem, SortCoordinate coordinate, int sortopt)

References _bt_build_callback(), BTShared::brokenhotchain, BTBuildState::btleader, BuildIndexInfo(), ConditionVariableSignal(), BTShared::havedead, BTBuildState::havedead, BTSpool::heap, BTBuildState::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, BTSpool::index, BTShared::indtuples, BTBuildState::indtuples, BTShared::isconcurrent, BTSpool::isunique, BTShared::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, Min, BTShared::mutex, SortCoordinateData::nParticipants, BTShared::nparticipantsdone, BTSpool::nulls_not_distinct, BTShared::nulls_not_distinct, BTBuildState::nulls_not_distinct, palloc0(), ParallelTableScanFromBTShared, pgstat_progress_update_param(), progress, PROGRESS_BTREE_PHASE_PERFORMSORT_1, PROGRESS_BTREE_PHASE_PERFORMSORT_2, PROGRESS_CREATEIDX_SUBPHASE, BTShared::reltuples, SortCoordinateData::sharedsort, BTSpool::sortstate, SpinLockAcquire, SpinLockRelease, BTBuildState::spool, BTBuildState::spool2, table_beginscan_parallel(), table_index_build_scan(), tuplesort_begin_index_btree(), tuplesort_end(), TUPLESORT_NONE, tuplesort_performsort(), work_mem, and BTShared::workersdonecv.

Referenced by _bt_leader_participate_as_worker(), and _bt_parallel_build_main().

◆ _bt_slideleft()

static void _bt_slideleft ( Page  rightmostpage)
static

Definition at line 738 of file nbtsort.c.

739 {
740  OffsetNumber off;
741  OffsetNumber maxoff;
742  ItemId previi;
743 
744  maxoff = PageGetMaxOffsetNumber(rightmostpage);
745  Assert(maxoff >= P_FIRSTKEY);
746  previi = PageGetItemId(rightmostpage, P_HIKEY);
747  for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
748  {
749  ItemId thisii = PageGetItemId(rightmostpage, off);
750 
751  *previi = *thisii;
752  previi = thisii;
753  }
754  ((PageHeader) rightmostpage)->pd_lower -= sizeof(ItemIdData);
755 }
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369

References Assert(), OffsetNumberNext, P_FIRSTKEY, P_HIKEY, PageGetItemId(), and PageGetMaxOffsetNumber().

Referenced by _bt_uppershutdown().

◆ _bt_sort_dedup_finish_pending()

static void _bt_sort_dedup_finish_pending ( BTWriteState wstate,
BTPageState state,
BTDedupState  dstate 
)
static

Definition at line 1080 of file nbtsort.c.

1082 {
1083  Assert(dstate->nitems > 0);
1084 
1085  if (dstate->nitems == 1)
1086  _bt_buildadd(wstate, state, dstate->base, 0);
1087  else
1088  {
1089  IndexTuple postingtuple;
1090  Size truncextra;
1091 
1092  /* form a tuple with a posting list */
1093  postingtuple = _bt_form_posting(dstate->base,
1094  dstate->htids,
1095  dstate->nhtids);
1096  /* Calculate posting list overhead */
1097  truncextra = IndexTupleSize(postingtuple) -
1098  BTreeTupleGetPostingOffset(postingtuple);
1099 
1100  _bt_buildadd(wstate, state, postingtuple, truncextra);
1101  pfree(postingtuple);
1102  }
1103 
1104  dstate->nmaxitems = 0;
1105  dstate->nhtids = 0;
1106  dstate->nitems = 0;
1107  dstate->phystupsize = 0;
1108 }
IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
Definition: nbtdedup.c:864
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:529

References _bt_buildadd(), _bt_form_posting(), Assert(), BTDedupStateData::base, BTreeTupleGetPostingOffset(), BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::nhtids, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, pfree(), and BTDedupStateData::phystupsize.

Referenced by _bt_load().

◆ _bt_sortaddtup()

static void _bt_sortaddtup ( Page  page,
Size  itemsize,
IndexTuple  itup,
OffsetNumber  itup_off,
bool  newfirstdataitem 
)
static

Definition at line 769 of file nbtsort.c.

774 {
775  IndexTupleData trunctuple;
776 
777  if (newfirstdataitem)
778  {
779  trunctuple = *itup;
780  trunctuple.t_info = sizeof(IndexTupleData);
781  BTreeTupleSetNAtts(&trunctuple, 0, false);
782  itup = &trunctuple;
783  itemsize = sizeof(IndexTupleData);
784  }
785 
786  if (PageAddItem(page, (Item) itup, itemsize, itup_off,
787  false, false) == InvalidOffsetNumber)
788  elog(ERROR, "failed to add item to the index page");
789 }
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned short t_info
Definition: itup.h:49

References BTreeTupleSetNAtts(), elog(), ERROR, InvalidOffsetNumber, PageAddItem, and IndexTupleData::t_info.

Referenced by _bt_buildadd().

◆ _bt_spool()

static void _bt_spool ( BTSpool btspool,
ItemPointer  self,
Datum values,
bool isnull 
)
static

Definition at line 533 of file nbtsort.c.

534 {
535  tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
536  self, values, isnull);
537 }
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)

References BTSpool::index, BTSpool::sortstate, tuplesort_putindextuplevalues(), and values.

Referenced by _bt_build_callback().

◆ _bt_spooldestroy()

static void _bt_spooldestroy ( BTSpool btspool)
static

Definition at line 523 of file nbtsort.c.

524 {
525  tuplesort_end(btspool->sortstate);
526  pfree(btspool);
527 }

References pfree(), BTSpool::sortstate, and tuplesort_end().

Referenced by _bt_spools_heapscan(), and btbuild().

◆ _bt_spools_heapscan()

static double _bt_spools_heapscan ( Relation  heap,
Relation  index,
BTBuildState buildstate,
IndexInfo indexInfo 
)
static

Definition at line 371 of file nbtsort.c.

373 {
374  BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
375  SortCoordinate coordinate = NULL;
376  double reltuples = 0;
377 
378  /*
379  * We size the sort area as maintenance_work_mem rather than work_mem to
380  * speed index creation. This should be OK since a single backend can't
381  * run multiple index creations in parallel (see also: notes on
382  * parallelism and maintenance_work_mem below).
383  */
384  btspool->heap = heap;
385  btspool->index = index;
386  btspool->isunique = indexInfo->ii_Unique;
387  btspool->nulls_not_distinct = indexInfo->ii_NullsNotDistinct;
388 
389  /* Save as primary spool */
390  buildstate->spool = btspool;
391 
392  /* Report table scan phase started */
395 
396  /* Attempt to launch parallel worker scan when required */
397  if (indexInfo->ii_ParallelWorkers > 0)
398  _bt_begin_parallel(buildstate, indexInfo->ii_Concurrent,
399  indexInfo->ii_ParallelWorkers);
400 
401  /*
402  * If parallel build requested and at least one worker process was
403  * successfully launched, set up coordination state
404  */
405  if (buildstate->btleader)
406  {
407  coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
408  coordinate->isWorker = false;
409  coordinate->nParticipants =
410  buildstate->btleader->nparticipanttuplesorts;
411  coordinate->sharedsort = buildstate->btleader->sharedsort;
412  }
413 
414  /*
415  * Begin serial/leader tuplesort.
416  *
417  * In cases where parallelism is involved, the leader receives the same
418  * share of maintenance_work_mem as a serial sort (it is generally treated
419  * in the same way as a serial sort once we return). Parallel worker
420  * Tuplesortstates will have received only a fraction of
421  * maintenance_work_mem, though.
422  *
423  * We rely on the lifetime of the Leader Tuplesortstate almost not
424  * overlapping with any worker Tuplesortstate's lifetime. There may be
425  * some small overlap, but that's okay because we rely on leader
426  * Tuplesortstate only allocating a small, fixed amount of memory here.
427  * When its tuplesort_performsort() is called (by our caller), and
428  * significant amounts of memory are likely to be used, all workers must
429  * have already freed almost all memory held by their Tuplesortstates
430  * (they are about to go away completely, too). The overall effect is
431  * that maintenance_work_mem always represents an absolute high watermark
432  * on the amount of memory used by a CREATE INDEX operation, regardless of
433  * the use of parallelism or any other factor.
434  */
435  buildstate->spool->sortstate =
436  tuplesort_begin_index_btree(heap, index, buildstate->isunique,
437  buildstate->nulls_not_distinct,
438  maintenance_work_mem, coordinate,
440 
441  /*
442  * If building a unique index, put dead tuples in a second spool to keep
443  * them out of the uniqueness check. We expect that the second spool (for
444  * dead tuples) won't get very full, so we give it only work_mem.
445  */
446  if (indexInfo->ii_Unique)
447  {
448  BTSpool *btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
449  SortCoordinate coordinate2 = NULL;
450 
451  /* Initialize secondary spool */
452  btspool2->heap = heap;
453  btspool2->index = index;
454  btspool2->isunique = false;
455  /* Save as secondary spool */
456  buildstate->spool2 = btspool2;
457 
458  if (buildstate->btleader)
459  {
460  /*
461  * Set up non-private state that is passed to
462  * tuplesort_begin_index_btree() about the basic high level
463  * coordination of a parallel sort.
464  */
465  coordinate2 = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
466  coordinate2->isWorker = false;
467  coordinate2->nParticipants =
468  buildstate->btleader->nparticipanttuplesorts;
469  coordinate2->sharedsort = buildstate->btleader->sharedsort2;
470  }
471 
472  /*
473  * We expect that the second one (for dead tuples) won't get very
474  * full, so we give it only work_mem
475  */
476  buildstate->spool2->sortstate =
477  tuplesort_begin_index_btree(heap, index, false, false, work_mem,
478  coordinate2, TUPLESORT_NONE);
479  }
480 
481  /* Fill spool using either serial or parallel heap scan */
482  if (!buildstate->btleader)
483  reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
484  _bt_build_callback, (void *) buildstate,
485  NULL);
486  else
487  reltuples = _bt_parallel_heapscan(buildstate,
488  &indexInfo->ii_BrokenHotChain);
489 
490  /*
491  * Set the progress target for the next phase. Reset the block number
492  * values set by table_index_build_scan
493  */
494  {
495  const int progress_index[] = {
499  };
500  const int64 progress_vals[] = {
501  buildstate->indtuples,
502  0, 0
503  };
504 
505  pgstat_progress_update_multi_param(3, progress_index, progress_vals);
506  }
507 
508  /* okay, all heap tuples are spooled */
509  if (buildstate->spool2 && !buildstate->havedead)
510  {
511  /* spool2 turns out to be unnecessary */
512  _bt_spooldestroy(buildstate->spool2);
513  buildstate->spool2 = NULL;
514  }
515 
516  return reltuples;
517 }
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
#define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN
Definition: nbtree.h:1119
static double _bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain)
Definition: nbtsort.c:1713
static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Definition: nbtsort.c:1456
static void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:523
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:86
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:122
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:121
bool ii_Unique
Definition: execnodes.h:191
bool ii_NullsNotDistinct
Definition: execnodes.h:192
int ii_ParallelWorkers
Definition: execnodes.h:199
Definition: type.h:95
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:60

References _bt_begin_parallel(), _bt_build_callback(), _bt_parallel_heapscan(), _bt_spooldestroy(), BTBuildState::btleader, BTBuildState::havedead, BTSpool::heap, IndexInfo::ii_BrokenHotChain, IndexInfo::ii_Concurrent, IndexInfo::ii_NullsNotDistinct, IndexInfo::ii_ParallelWorkers, IndexInfo::ii_Unique, BTSpool::index, BTBuildState::indtuples, BTSpool::isunique, BTBuildState::isunique, SortCoordinateData::isWorker, maintenance_work_mem, SortCoordinateData::nParticipants, BTLeader::nparticipanttuplesorts, BTSpool::nulls_not_distinct, BTBuildState::nulls_not_distinct, palloc0(), pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN, PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_CREATEIDX_TUPLES_TOTAL, PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, BTLeader::sharedsort, SortCoordinateData::sharedsort, BTLeader::sharedsort2, BTSpool::sortstate, BTBuildState::spool, BTBuildState::spool2, table_index_build_scan(), tuplesort_begin_index_btree(), TUPLESORT_NONE, and work_mem.

Referenced by btbuild().

◆ _bt_uppershutdown()

static void _bt_uppershutdown ( BTWriteState wstate,
BTPageState state 
)
static

Definition at line 1114 of file nbtsort.c.

1115 {
1116  BTPageState *s;
1117  BlockNumber rootblkno = P_NONE;
1118  uint32 rootlevel = 0;
1119  Page metapage;
1120 
1121  /*
1122  * Each iteration of this loop completes one more level of the tree.
1123  */
1124  for (s = state; s != NULL; s = s->btps_next)
1125  {
1126  BlockNumber blkno;
1127  BTPageOpaque opaque;
1128 
1129  blkno = s->btps_blkno;
1130  opaque = BTPageGetOpaque(s->btps_page);
1131 
1132  /*
1133  * We have to link the last page on this level to somewhere.
1134  *
1135  * If we're at the top, it's the root, so attach it to the metapage.
1136  * Otherwise, add an entry for it to its parent using its low key.
1137  * This may cause the last page of the parent level to split, but
1138  * that's not a problem -- we haven't gotten to it yet.
1139  */
1140  if (s->btps_next == NULL)
1141  {
1142  opaque->btpo_flags |= BTP_ROOT;
1143  rootblkno = blkno;
1144  rootlevel = s->btps_level;
1145  }
1146  else
1147  {
1148  Assert((BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) <=
1150  BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) > 0) ||
1151  P_LEFTMOST(opaque));
1152  Assert(BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) == 0 ||
1153  !P_LEFTMOST(opaque));
1155  _bt_buildadd(wstate, s->btps_next, s->btps_lowkey, 0);
1156  pfree(s->btps_lowkey);
1157  s->btps_lowkey = NULL;
1158  }
1159 
1160  /*
1161  * This is the rightmost page, so the ItemId array needs to be slid
1162  * back one slot. Then we can dump out the page.
1163  */
1165  _bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
1166  s->btps_page = NULL; /* writepage freed the workspace */
1167  }
1168 
1169  /*
1170  * As the last step in the process, construct the metapage and make it
1171  * point to the new root (unless we had no data at all, in which case it's
1172  * set to point to "P_NONE"). This changes the index to the "valid" state
1173  * by filling in a valid magic number in the metapage.
1174  */
1175  metapage = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
1176  _bt_initmetapage(metapage, rootblkno, rootlevel,
1177  wstate->inskey->allequalimage);
1178  _bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
1179 }
unsigned int uint32
Definition: c.h:495
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:67
#define BTP_ROOT
Definition: nbtree.h:77
static void _bt_slideleft(Page rightmostpage)
Definition: nbtsort.c:738
Page btps_page
Definition: nbtsort.c:237
IndexTuple btps_lowkey
Definition: nbtsort.c:239
BlockNumber btps_blkno
Definition: nbtsort.c:238
struct BTPageState * btps_next
Definition: nbtsort.c:244
uint32 btps_level
Definition: nbtsort.c:242

References _bt_blwritepage(), _bt_buildadd(), _bt_initmetapage(), _bt_slideleft(), BTScanInsertData::allequalimage, Assert(), BTP_ROOT, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, BTPageState::btps_blkno, BTPageState::btps_level, BTPageState::btps_lowkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_METAPAGE, BTreeTupleGetNAtts, BTreeTupleSetDownLink(), BTWriteState::index, IndexRelationGetNumberOfKeyAttributes, BTWriteState::inskey, P_LEFTMOST, P_NONE, palloc_aligned(), pfree(), and PG_IO_ALIGN_SIZE.

Referenced by _bt_load().

◆ btbuild()

IndexBuildResult* btbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 301 of file nbtsort.c.

302 {
303  IndexBuildResult *result;
304  BTBuildState buildstate;
305  double reltuples;
306 
307 #ifdef BTREE_BUILD_STATS
309  ResetUsage();
310 #endif /* BTREE_BUILD_STATS */
311 
312  buildstate.isunique = indexInfo->ii_Unique;
313  buildstate.nulls_not_distinct = indexInfo->ii_NullsNotDistinct;
314  buildstate.havedead = false;
315  buildstate.heap = heap;
316  buildstate.spool = NULL;
317  buildstate.spool2 = NULL;
318  buildstate.indtuples = 0;
319  buildstate.btleader = NULL;
320 
321  /*
322  * We expect to be called exactly once for any index relation. If that's
323  * not the case, big trouble's what we have.
324  */
326  elog(ERROR, "index \"%s\" already contains data",
328 
329  reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
330 
331  /*
332  * Finish the build by (1) completing the sort of the spool file, (2)
333  * inserting the sorted tuples into btree pages and (3) building the upper
334  * levels. Finally, it may also be necessary to end use of parallelism.
335  */
336  _bt_leafbuild(buildstate.spool, buildstate.spool2);
337  _bt_spooldestroy(buildstate.spool);
338  if (buildstate.spool2)
339  _bt_spooldestroy(buildstate.spool2);
340  if (buildstate.btleader)
341  _bt_end_parallel(buildstate.btleader);
342 
343  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
344 
345  result->heap_tuples = reltuples;
346  result->index_tuples = buildstate.indtuples;
347 
348 #ifdef BTREE_BUILD_STATS
350  {
351  ShowUsage("BTREE BUILD STATS");
352  ResetUsage();
353  }
354 #endif /* BTREE_BUILD_STATS */
355 
356  return result;
357 }
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:227
static void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:544
static double _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, IndexInfo *indexInfo)
Definition: nbtsort.c:371
#define RelationGetRelationName(relation)
Definition: rel.h:538
double heap_tuples
Definition: genam.h:32
double index_tuples
Definition: genam.h:33

References _bt_end_parallel(), _bt_leafbuild(), _bt_spooldestroy(), _bt_spools_heapscan(), BTBuildState::btleader, elog(), ERROR, BTBuildState::havedead, BTBuildState::heap, IndexBuildResult::heap_tuples, IndexInfo::ii_NullsNotDistinct, IndexInfo::ii_Unique, IndexBuildResult::index_tuples, BTBuildState::indtuples, BTBuildState::isunique, log_btree_build_stats, BTBuildState::nulls_not_distinct, palloc(), RelationGetNumberOfBlocks, RelationGetRelationName, ResetUsage(), ShowUsage(), BTBuildState::spool, and BTBuildState::spool2.

Referenced by bthandler().