PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
#include "pgstat.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTBuildState
 
struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct
BTParallelScanDescData 
BTParallelScanDescData
 
typedef struct
BTParallelScanDescData
BTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btbuildCallback (Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
IndexBuildResultbtbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

Definition at line 104 of file nbtree.c.

Enumeration Type Documentation

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 80 of file nbtree.c.

Function Documentation

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 888 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

889 {
890  BTScanOpaque so = (BTScanOpaque) scan->opaque;
891  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
892  BTParallelScanDesc btscan;
893 
894  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
895  parallel_scan->ps_offset);
896 
897  so->arrayKeyCount++;
898  SpinLockAcquire(&btscan->btps_mutex);
899  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
900  {
901  btscan->btps_scanPage = InvalidBlockNumber;
902  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
903  btscan->btps_arrayKeyCount++;
904  }
905  SpinLockRelease(&btscan->btps_mutex);
906 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:139
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int arrayKeyCount
Definition: nbtree.h:386
#define OffsetToPointer(base, offset)
Definition: c.h:535
#define SpinLockRelease(lock)
Definition: spin.h:64
#define InvalidBlockNumber
Definition: block.h:33
void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 847 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), NULL, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

848 {
849  BTScanOpaque so = (BTScanOpaque) scan->opaque;
850  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
851  BTParallelScanDesc btscan;
852  bool status_changed = false;
853 
854  /* Do nothing, for non-parallel scans */
855  if (parallel_scan == NULL)
856  return;
857 
858  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
859  parallel_scan->ps_offset);
860 
861  /*
862  * Mark the parallel scan as done for this combination of scan keys,
863  * unless some other process already did so. See also
864  * _bt_advance_array_keys.
865  */
866  SpinLockAcquire(&btscan->btps_mutex);
867  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
868  btscan->btps_pageStatus != BTPARALLEL_DONE)
869  {
870  btscan->btps_pageStatus = BTPARALLEL_DONE;
871  status_changed = true;
872  }
873  SpinLockRelease(&btscan->btps_mutex);
874 
875  /* wake up all the workers associated with this parallel scan */
876  if (status_changed)
877  ConditionVariableBroadcast(&btscan->btps_cv);
878 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:139
int ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:104
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int arrayKeyCount
Definition: nbtree.h:386
#define OffsetToPointer(base, offset)
Definition: c.h:535
#define SpinLockRelease(lock)
Definition: spin.h:64
#define NULL
Definition: c.h:229
void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 824 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readpage().

825 {
826  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
827  BTParallelScanDesc btscan;
828 
829  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
830  parallel_scan->ps_offset);
831 
832  SpinLockAcquire(&btscan->btps_mutex);
833  btscan->btps_scanPage = scan_page;
835  SpinLockRelease(&btscan->btps_mutex);
837 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:139
bool ConditionVariableSignal(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:104
#define SpinLockAcquire(lock)
Definition: spin.h:62
ConditionVariable btps_cv
Definition: nbtree.c:101
#define OffsetToPointer(base, offset)
Definition: c.h:535
#define SpinLockRelease(lock)
Definition: spin.h:64
BlockNumber btps_scanPage
Definition: nbtree.c:94
BTPS_State btps_pageStatus
Definition: nbtree.c:95
bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 766 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

767 {
768  BTScanOpaque so = (BTScanOpaque) scan->opaque;
769  BTPS_State pageStatus;
770  bool exit_loop = false;
771  bool status = true;
772  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
773  BTParallelScanDesc btscan;
774 
775  *pageno = P_NONE;
776 
777  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
778  parallel_scan->ps_offset);
779 
780  while (1)
781  {
782  SpinLockAcquire(&btscan->btps_mutex);
783  pageStatus = btscan->btps_pageStatus;
784 
785  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
786  {
787  /* Parallel scan has already advanced to a new set of scankeys. */
788  status = false;
789  }
790  else if (pageStatus == BTPARALLEL_DONE)
791  {
792  /*
793  * We're done with this set of scankeys. This may be the end, or
794  * there could be more sets to try.
795  */
796  status = false;
797  }
798  else if (pageStatus != BTPARALLEL_ADVANCING)
799  {
800  /*
801  * We have successfully seized control of the scan for the purpose
802  * of advancing it to a new page!
803  */
804  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
805  *pageno = btscan->btps_scanPage;
806  exit_loop = true;
807  }
808  SpinLockRelease(&btscan->btps_mutex);
809  if (exit_loop || !status)
810  break;
812  }
814 
815  return status;
816 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:139
#define P_NONE
Definition: nbtree.h:168
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
BTPS_State
Definition: nbtree.c:80
int arrayKeyCount
Definition: nbtree.h:386
#define OffsetToPointer(base, offset)
Definition: c.h:535
#define SpinLockRelease(lock)
Definition: spin.h:64
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:224
IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 473 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, NULL, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

474 {
475  IndexScanDesc scan;
476  BTScanOpaque so;
477 
478  /* no order by operators allowed */
479  Assert(norderbys == 0);
480 
481  /* get the scan */
482  scan = RelationGetIndexScan(rel, nkeys, norderbys);
483 
484  /* allocate private workspace */
485  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
488  if (scan->numberOfKeys > 0)
489  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
490  else
491  so->keyData = NULL;
492 
493  so->arrayKeyData = NULL; /* assume no array keys for now */
494  so->numArrayKeys = 0;
495  so->arrayKeys = NULL;
496  so->arrayContext = NULL;
497 
498  so->killedItems = NULL; /* until needed */
499  so->numKilled = 0;
500 
501  /*
502  * We don't know yet whether the scan will be index-only, so we do not
503  * allocate the tuple workspace arrays until btrescan. However, we set up
504  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
505  */
506  so->currTuples = so->markTuples = NULL;
507 
508  scan->xs_itupdesc = RelationGetDescr(rel);
509 
510  scan->opaque = so;
511 
512  return scan;
513 }
#define RelationGetDescr(relation)
Definition: rel.h:428
MemoryContext arrayContext
Definition: nbtree.h:389
char * currTuples
Definition: nbtree.h:400
TupleDesc xs_itupdesc
Definition: relscan.h:114
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:401
ScanKey arrayKeyData
Definition: nbtree.h:383
int * killedItems
Definition: nbtree.h:392
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
BTScanPosData currPos
Definition: nbtree.h:413
void * palloc(Size size)
Definition: mcxt.c:849
ScanKey keyData
Definition: nbtree.h:380
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:388
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:78
IndexBuildResult* btbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 173 of file nbtree.c.

References _bt_leafbuild(), _bt_spooldestroy(), _bt_spoolinit(), btbuildCallback(), elog, ERROR, BTBuildState::haveDead, IndexBuildResult::heap_tuples, BTBuildState::heapRel, IndexInfo::ii_Unique, IndexBuildResult::index_tuples, IndexBuildHeapScan(), BTBuildState::indtuples, BTBuildState::isUnique, log_btree_build_stats, NULL, palloc(), RelationGetNumberOfBlocks, RelationGetRelationName, ResetUsage(), result, ShowUsage(), BTBuildState::spool, and BTBuildState::spool2.

Referenced by bthandler().

174 {
176  double reltuples;
177  BTBuildState buildstate;
178 
179  buildstate.isUnique = indexInfo->ii_Unique;
180  buildstate.haveDead = false;
181  buildstate.heapRel = heap;
182  buildstate.spool = NULL;
183  buildstate.spool2 = NULL;
184  buildstate.indtuples = 0;
185 
186 #ifdef BTREE_BUILD_STATS
188  ResetUsage();
189 #endif /* BTREE_BUILD_STATS */
190 
191  /*
192  * We expect to be called exactly once for any index relation. If that's
193  * not the case, big trouble's what we have.
194  */
195  if (RelationGetNumberOfBlocks(index) != 0)
196  elog(ERROR, "index \"%s\" already contains data",
197  RelationGetRelationName(index));
198 
199  buildstate.spool = _bt_spoolinit(heap, index, indexInfo->ii_Unique, false);
200 
201  /*
202  * If building a unique index, put dead tuples in a second spool to keep
203  * them out of the uniqueness check.
204  */
205  if (indexInfo->ii_Unique)
206  buildstate.spool2 = _bt_spoolinit(heap, index, false, true);
207 
208  /* do the heap scan */
209  reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
210  btbuildCallback, (void *) &buildstate);
211 
212  /* okay, all heap tuples are indexed */
213  if (buildstate.spool2 && !buildstate.haveDead)
214  {
215  /* spool2 turns out to be unnecessary */
216  _bt_spooldestroy(buildstate.spool2);
217  buildstate.spool2 = NULL;
218  }
219 
220  /*
221  * Finish the build by (1) completing the sort of the spool file, (2)
222  * inserting the sorted tuples into btree pages and (3) building the upper
223  * levels.
224  */
225  _bt_leafbuild(buildstate.spool, buildstate.spool2);
226  _bt_spooldestroy(buildstate.spool);
227  if (buildstate.spool2)
228  _bt_spooldestroy(buildstate.spool2);
229 
230 #ifdef BTREE_BUILD_STATS
232  {
233  ShowUsage("BTREE BUILD STATS");
234  ResetUsage();
235  }
236 #endif /* BTREE_BUILD_STATS */
237 
238  /*
239  * Return statistics
240  */
241  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
242 
243  result->heap_tuples = reltuples;
244  result->index_tuples = buildstate.indtuples;
245 
246  return result;
247 }
void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:201
static void btbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtree.c:253
void ShowUsage(const char *title)
Definition: postgres.c:4395
return result
Definition: formatting.c:1633
BTSpool * _bt_spoolinit(Relation heap, Relation index, bool isunique, bool isdead)
Definition: nbtsort.c:152
void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:180
BTSpool * spool
Definition: nbtree.c:44
void ResetUsage(void)
Definition: postgres.c:4388
#define ERROR
Definition: elog.h:43
Relation heapRel
Definition: nbtree.c:43
BTSpool * spool2
Definition: nbtree.c:50
bool haveDead
Definition: nbtree.c:42
double indtuples
Definition: nbtree.c:51
#define RelationGetRelationName(relation)
Definition: rel.h:436
bool log_btree_build_stats
Definition: guc.c:443
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
bool ii_Unique
Definition: execnodes.h:146
#define NULL
Definition: c.h:229
double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state)
Definition: index.c:2169
void * palloc(Size size)
Definition: mcxt.c:849
bool isUnique
Definition: nbtree.c:41
#define elog
Definition: elog.h:219
double index_tuples
Definition: genam.h:33
double heap_tuples
Definition: genam.h:32
static void btbuildCallback ( Relation  index,
HeapTuple  htup,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
)
static

Definition at line 253 of file nbtree.c.

References _bt_spool(), BTBuildState::haveDead, BTBuildState::indtuples, NULL, BTBuildState::spool, BTBuildState::spool2, and HeapTupleData::t_self.

Referenced by btbuild().

259 {
260  BTBuildState *buildstate = (BTBuildState *) state;
261 
262  /*
263  * insert the index tuple into the appropriate spool file for subsequent
264  * processing
265  */
266  if (tupleIsAlive || buildstate->spool2 == NULL)
267  _bt_spool(buildstate->spool, &htup->t_self, values, isnull);
268  else
269  {
270  /* dead tuples are put into spool2 */
271  buildstate->haveDead = true;
272  _bt_spool(buildstate->spool2, &htup->t_self, values, isnull);
273  }
274 
275  buildstate->indtuples += 1;
276 }
BTSpool * spool
Definition: nbtree.c:44
void _bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
Definition: nbtsort.c:190
ItemPointerData t_self
Definition: htup.h:65
BTSpool * spool2
Definition: nbtree.c:50
bool haveDead
Definition: nbtree.c:42
double indtuples
Definition: nbtree.c:51
#define NULL
Definition: c.h:229
Definition: regguts.h:298
static Datum values[MAXATTR]
Definition: bootstrap.c:163
void btbuildempty ( Relation  index)

Definition at line 282 of file nbtree.c.

References _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

283 {
284  Page metapage;
285 
286  /* Construct metapage. */
287  metapage = (Page) palloc(BLCKSZ);
288  _bt_initmetapage(metapage, P_NONE, 0);
289 
290  /*
291  * Write the page and log it. It might seem that an immediate sync would
292  * be sufficient to guarantee that the file exists on disk, but recovery
293  * itself might remove it while replaying, for example, an
294  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
295  * this even when wal_level=minimal.
296  */
299  (char *) metapage, true);
301  BTREE_METAPAGE, metapage, false);
302 
303  /*
304  * An immediate sync is required even if we xlog'd the page, because the
305  * write did not go through shared_buffers and therefore a concurrent
306  * checkpoint may have moved the redo pointer past our xlog record.
307  */
309 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
#define P_NONE
Definition: nbtree.h:168
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:49
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
#define BTREE_METAPAGE
Definition: nbtree.h:109
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1199
void * palloc(Size size)
Definition: mcxt.c:849
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:734
Pointer Page
Definition: bufpage.h:74
IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 916 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), IndexVacuumInfo::index, NULL, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

918 {
919  Relation rel = info->index;
920  BTCycleId cycleid;
921 
922  /* allocate stats if first time through, else re-use existing struct */
923  if (stats == NULL)
925 
926  /* Establish the vacuum cycle ID to use for this scan */
927  /* The ENSURE stuff ensures we clean up shared memory on failure */
929  {
930  cycleid = _bt_start_vacuum(rel);
931 
932  btvacuumscan(info, stats, callback, callback_state, cycleid);
933  }
935  _bt_end_vacuum(rel);
936 
937  return stats;
938 }
#define PointerGetDatum(X)
Definition: postgres.h:562
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:1992
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:998
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:26
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:1964
void * palloc0(Size size)
Definition: mcxt.c:878
#define NULL
Definition: c.h:229
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1907
bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1389 of file nbtree.c.

Referenced by bthandler().

1390 {
1391  return true;
1392 }
void btendscan ( IndexScanDesc  scan)

Definition at line 579 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), NULL, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

580 {
581  BTScanOpaque so = (BTScanOpaque) scan->opaque;
582 
583  /* we aren't holding any read locks, but gotta drop the pins */
584  if (BTScanPosIsValid(so->currPos))
585  {
586  /* Before leaving current page, deal with any killed items */
587  if (so->numKilled > 0)
588  _bt_killitems(scan);
590  }
591 
592  so->markItemIndex = -1;
594 
595  /* No need to invalidate positions, the RAM is about to be freed. */
596 
597  /* Release storage */
598  if (so->keyData != NULL)
599  pfree(so->keyData);
600  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
601  if (so->arrayContext != NULL)
603  if (so->killedItems != NULL)
604  pfree(so->killedItems);
605  if (so->currTuples != NULL)
606  pfree(so->currTuples);
607  /* so->markTuples should not be pfree'd, see btrescan */
608  pfree(so);
609 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
MemoryContext arrayContext
Definition: nbtree.h:389
char * currTuples
Definition: nbtree.h:400
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1732
BTScanPosData markPos
Definition: nbtree.h:414
void pfree(void *pointer)
Definition: mcxt.c:950
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int markItemIndex
Definition: nbtree.h:410
int * killedItems
Definition: nbtree.h:392
#define NULL
Definition: c.h:229
BTScanPosData currPos
Definition: nbtree.h:413
ScanKey keyData
Definition: nbtree.h:380
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
Size btestimateparallelscan ( void  )

Definition at line 701 of file nbtree.c.

Referenced by bthandler().

702 {
703  return sizeof(BTParallelScanDescData);
704 }
struct BTParallelScanDescData BTParallelScanDescData
int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 415 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, HeapTupleData::t_self, tbm_add_tuples(), and IndexScanDescData::xs_ctup.

Referenced by bthandler().

416 {
417  BTScanOpaque so = (BTScanOpaque) scan->opaque;
418  int64 ntids = 0;
419  ItemPointer heapTid;
420 
421  /*
422  * If we have any array keys, initialize them.
423  */
424  if (so->numArrayKeys)
425  {
426  /* punt if we have any unsatisfiable array keys */
427  if (so->numArrayKeys < 0)
428  return ntids;
429 
431  }
432 
433  /* This loop handles advancing to the next array elements, if any */
434  do
435  {
436  /* Fetch the first page & tuple */
437  if (_bt_first(scan, ForwardScanDirection))
438  {
439  /* Save tuple ID, and continue scanning */
440  heapTid = &scan->xs_ctup.t_self;
441  tbm_add_tuples(tbm, heapTid, 1, false);
442  ntids++;
443 
444  for (;;)
445  {
446  /*
447  * Advance to next tuple within page. This is the same as the
448  * easy case in _bt_next().
449  */
450  if (++so->currPos.itemIndex > so->currPos.lastItem)
451  {
452  /* let _bt_next do the heavy lifting */
453  if (!_bt_next(scan, ForwardScanDirection))
454  break;
455  }
456 
457  /* Save tuple ID, and continue scanning */
458  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
459  tbm_add_tuples(tbm, heapTid, 1, false);
460  ntids++;
461  }
462  }
463  /* Now see if we have more array keys to deal with */
465 
466  return ntids;
467 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:547
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:328
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:521
int itemIndex
Definition: nbtree.h:326
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:403
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:536
int lastItem
Definition: nbtree.h:325
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
ItemPointerData t_self
Definition: htup.h:65
int numArrayKeys
Definition: nbtree.h:384
HeapTupleData xs_ctup
Definition: relscan.h:119
ItemPointerData heapTid
Definition: nbtree.h:289
BTScanPosData currPos
Definition: nbtree.h:413
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1129
bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 341 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxIndexTuplesPerPage, NULL, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

342 {
343  BTScanOpaque so = (BTScanOpaque) scan->opaque;
344  bool res;
345 
346  /* btree indexes are never lossy */
347  scan->xs_recheck = false;
348 
349  /*
350  * If we have any array keys, initialize them during first call for a
351  * scan. We can't do this in btrescan because we don't know the scan
352  * direction at that time.
353  */
354  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
355  {
356  /* punt if we have any unsatisfiable array keys */
357  if (so->numArrayKeys < 0)
358  return false;
359 
360  _bt_start_array_keys(scan, dir);
361  }
362 
363  /* This loop handles advancing to the next array elements, if any */
364  do
365  {
366  /*
367  * If we've already initialized this scan, we can just advance it in
368  * the appropriate direction. If we haven't done so yet, we call
369  * _bt_first() to get the first item in the scan.
370  */
371  if (!BTScanPosIsValid(so->currPos))
372  res = _bt_first(scan, dir);
373  else
374  {
375  /*
376  * Check to see if we should kill the previously-fetched tuple.
377  */
378  if (scan->kill_prior_tuple)
379  {
380  /*
381  * Yes, remember it for later. (We'll deal with all such
382  * tuples at once right before leaving the index page.) The
383  * test for numKilled overrun is not just paranoia: if the
384  * caller reverses direction in the indexscan then the same
385  * item might get entered multiple times. It's not worth
386  * trying to optimize that, so we don't detect it, but instead
387  * just forget any excess entries.
388  */
389  if (so->killedItems == NULL)
390  so->killedItems = (int *)
391  palloc(MaxIndexTuplesPerPage * sizeof(int));
393  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
394  }
395 
396  /*
397  * Now continue the scan.
398  */
399  res = _bt_next(scan, dir);
400  }
401 
402  /* If we have a tuple, return it ... */
403  if (res)
404  break;
405  /* ... otherwise see if we have more array keys to deal with */
406  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
407 
408  return res;
409 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:547
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:521
int itemIndex
Definition: nbtree.h:326
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:536
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
int * killedItems
Definition: nbtree.h:392
#define NULL
Definition: c.h:229
BTScanPosData currPos
Definition: nbtree.h:413
#define MaxIndexTuplesPerPage
Definition: itup.h:137
void * palloc(Size size)
Definition: mcxt.c:849
bool kill_prior_tuple
Definition: relscan.h:99
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1129
Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 125 of file nbtree.c.

References IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amparallelrescan, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btbeginscan(), btbuild(), btbuildempty(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, and PG_RETURN_POINTER.

126 {
127  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
128 
129  amroutine->amstrategies = BTMaxStrategyNumber;
130  amroutine->amsupport = BTNProcs;
131  amroutine->amcanorder = true;
132  amroutine->amcanorderbyop = false;
133  amroutine->amcanbackward = true;
134  amroutine->amcanunique = true;
135  amroutine->amcanmulticol = true;
136  amroutine->amoptionalkey = true;
137  amroutine->amsearcharray = true;
138  amroutine->amsearchnulls = true;
139  amroutine->amstorage = false;
140  amroutine->amclusterable = true;
141  amroutine->ampredlocks = true;
142  amroutine->amcanparallel = true;
143  amroutine->amkeytype = InvalidOid;
144 
145  amroutine->ambuild = btbuild;
146  amroutine->ambuildempty = btbuildempty;
147  amroutine->aminsert = btinsert;
148  amroutine->ambulkdelete = btbulkdelete;
149  amroutine->amvacuumcleanup = btvacuumcleanup;
150  amroutine->amcanreturn = btcanreturn;
151  amroutine->amcostestimate = btcostestimate;
152  amroutine->amoptions = btoptions;
153  amroutine->amproperty = btproperty;
154  amroutine->amvalidate = btvalidate;
155  amroutine->ambeginscan = btbeginscan;
156  amroutine->amrescan = btrescan;
157  amroutine->amgettuple = btgettuple;
158  amroutine->amgetbitmap = btgetbitmap;
159  amroutine->amendscan = btendscan;
160  amroutine->ammarkpos = btmarkpos;
161  amroutine->amrestrpos = btrestrpos;
164  amroutine->amparallelrescan = btparallelrescan;
165 
166  PG_RETURN_POINTER(amroutine);
167 }
ambeginscan_function ambeginscan
Definition: amapi.h:208
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
ambulkdelete_function ambulkdelete
Definition: amapi.h:201
bool amcanmulticol
Definition: amapi.h:179
uint16 amsupport
Definition: amapi.h:169
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:519
amgettuple_function amgettuple
Definition: amapi.h:210
bool amcanorderbyop
Definition: amapi.h:173
amproperty_function amproperty
Definition: amapi.h:206
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:318
amparallelrescan_function amparallelrescan
Definition: amapi.h:219
bool amstorage
Definition: amapi.h:187
bool ampredlocks
Definition: amapi.h:191
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:473
aminsert_function aminsert
Definition: amapi.h:200
Oid amkeytype
Definition: amapi.h:195
bool amoptionalkey
Definition: amapi.h:181
amvalidate_function amvalidate
Definition: amapi.h:207
Size btestimateparallelscan(void)
Definition: nbtree.c:701
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:38
void btbuildempty(Relation index)
Definition: nbtree.c:282
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:946
amgetbitmap_function amgetbitmap
Definition: amapi.h:211
ambuild_function ambuild
Definition: amapi.h:198
amoptions_function amoptions
Definition: amapi.h:205
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:415
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:579
amcostestimate_function amcostestimate
Definition: amapi.h:204
bool amcanunique
Definition: amapi.h:177
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6682
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:202
amendscan_function amendscan
Definition: amapi.h:212
bool amcanbackward
Definition: amapi.h:175
amrescan_function amrescan
Definition: amapi.h:209
bool amcanparallel
Definition: amapi.h:193
bool amsearchnulls
Definition: amapi.h:185
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:341
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:615
bool amclusterable
Definition: amapi.h:189
bool amsearcharray
Definition: amapi.h:183
#define InvalidOid
Definition: postgres_ext.h:36
#define makeNode(_type_)
Definition: nodes.h:557
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2054
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:645
ammarkpos_function ammarkpos
Definition: amapi.h:213
bool amcanorder
Definition: amapi.h:171
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1389
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:725
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:217
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtree.c:173
#define BTNProcs
Definition: nbtree.h:231
uint16 amstrategies
Definition: amapi.h:167
void btinitparallelscan(void *target)
Definition: nbtree.c:710
ambuildempty_function ambuildempty
Definition: amapi.h:199
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2042
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:203
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:218
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:916
amrestrpos_function amrestrpos
Definition: amapi.h:214
void btinitparallelscan ( void *  target)

Definition at line 710 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

711 {
712  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
713 
714  SpinLockInit(&bt_target->btps_mutex);
715  bt_target->btps_scanPage = InvalidBlockNumber;
717  bt_target->btps_arrayKeyCount = 0;
718  ConditionVariableInit(&bt_target->btps_cv);
719 }
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:104
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:101
BlockNumber btps_scanPage
Definition: nbtree.c:94
BTPS_State btps_pageStatus
Definition: nbtree.c:95
#define InvalidBlockNumber
Definition: block.h:33
bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
IndexInfo indexInfo 
)

Definition at line 318 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, result, and IndexTupleData::t_tid.

Referenced by bthandler().

322 {
323  bool result;
324  IndexTuple itup;
325 
326  /* generate an index tuple */
327  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
328  itup->t_tid = *ht_ctid;
329 
330  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
331 
332  pfree(itup);
333 
334  return result;
335 }
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:108
#define RelationGetDescr(relation)
Definition: rel.h:428
ItemPointerData t_tid
Definition: itup.h:37
return result
Definition: formatting.c:1633
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:37
void pfree(void *pointer)
Definition: mcxt.c:950
static Datum values[MAXATTR]
Definition: bootstrap.c:163
void btmarkpos ( IndexScanDesc  scan)

Definition at line 615 of file nbtree.c.

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

616 {
617  BTScanOpaque so = (BTScanOpaque) scan->opaque;
618 
619  /* There may be an old mark with a pin (but no lock). */
621 
622  /*
623  * Just record the current itemIndex. If we later step to next page
624  * before releasing the marked position, _bt_steppage makes a full copy of
625  * the currPos struct in markPos. If (as often happens) the mark is moved
626  * before we leave the page, we don't have to do that work.
627  */
628  if (BTScanPosIsValid(so->currPos))
629  so->markItemIndex = so->currPos.itemIndex;
630  else
631  {
633  so->markItemIndex = -1;
634  }
635 
636  /* Also record the current positions of any array keys */
637  if (so->numArrayKeys)
638  _bt_mark_array_keys(scan);
639 }
int itemIndex
Definition: nbtree.h:326
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:606
int markItemIndex
Definition: nbtree.h:410
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
BTScanPosData currPos
Definition: nbtree.h:413
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
void btparallelrescan ( IndexScanDesc  scan)

Definition at line 725 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

726 {
727  BTParallelScanDesc btscan;
728  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
729 
730  Assert(parallel_scan);
731 
732  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
733  parallel_scan->ps_offset);
734 
735  /*
736  * In theory, we don't need to acquire the spinlock here, because there
737  * shouldn't be any other workers running at this point, but we do so for
738  * consistency.
739  */
740  SpinLockAcquire(&btscan->btps_mutex);
743  btscan->btps_arrayKeyCount = 0;
744  SpinLockRelease(&btscan->btps_mutex);
745 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:139
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:104
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:535
#define SpinLockRelease(lock)
Definition: spin.h:64
BlockNumber btps_scanPage
Definition: nbtree.c:94
#define Assert(condition)
Definition: c.h:675
BTPS_State btps_pageStatus
Definition: nbtree.c:95
#define InvalidBlockNumber
Definition: block.h:33
void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 519 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, memmove, NULL, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

521 {
522  BTScanOpaque so = (BTScanOpaque) scan->opaque;
523 
524  /* we aren't holding any read locks, but gotta drop the pins */
525  if (BTScanPosIsValid(so->currPos))
526  {
527  /* Before leaving current page, deal with any killed items */
528  if (so->numKilled > 0)
529  _bt_killitems(scan);
532  }
533 
534  so->markItemIndex = -1;
535  so->arrayKeyCount = 0;
538 
539  /*
540  * Allocate tuple workspace arrays, if needed for an index-only scan and
541  * not already done in a previous rescan call. To save on palloc
542  * overhead, both workspaces are allocated as one palloc block; only this
543  * function and btendscan know that.
544  *
545  * NOTE: this data structure also makes it safe to return data from a
546  * "name" column, even though btree name_ops uses an underlying storage
547  * datatype of cstring. The risk there is that "name" is supposed to be
548  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
549  * However, since we only return data out of tuples sitting in the
550  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
551  * data out of the markTuples array --- running off the end of memory for
552  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
553  * adding special-case treatment for name_ops elsewhere.
554  */
555  if (scan->xs_want_itup && so->currTuples == NULL)
556  {
557  so->currTuples = (char *) palloc(BLCKSZ * 2);
558  so->markTuples = so->currTuples + BLCKSZ;
559  }
560 
561  /*
562  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
563  * - vadim 05/05/97
564  */
565  if (scankey && scan->numberOfKeys > 0)
566  memmove(scan->keyData,
567  scankey,
568  scan->numberOfKeys * sizeof(ScanKeyData));
569  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
570 
571  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
573 }
char * currTuples
Definition: nbtree.h:400
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1732
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
#define memmove(d, s, c)
Definition: c.h:1058
int arrayKeyCount
Definition: nbtree.h:386
char * markTuples
Definition: nbtree.h:401
int markItemIndex
Definition: nbtree.h:410
bool xs_want_itup
Definition: relscan.h:95
#define NULL
Definition: c.h:229
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
int numberOfKeys
Definition: nbtree.h:379
ScanKey keyData
Definition: relscan.h:93
BTScanPosData currPos
Definition: nbtree.h:413
void * palloc(Size size)
Definition: mcxt.c:849
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:192
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
void btrestrpos ( IndexScanDesc  scan)

Definition at line 645 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

646 {
647  BTScanOpaque so = (BTScanOpaque) scan->opaque;
648 
649  /* Restore the marked positions of any array keys */
650  if (so->numArrayKeys)
652 
653  if (so->markItemIndex >= 0)
654  {
655  /*
656  * The scan has never moved to a new page since the last mark. Just
657  * restore the itemIndex.
658  *
659  * NB: In this case we can't count on anything in so->markPos to be
660  * accurate.
661  */
662  so->currPos.itemIndex = so->markItemIndex;
663  }
664  else
665  {
666  /*
667  * The scan moved to a new page after last mark or restore, and we are
668  * now restoring to the marked page. We aren't holding any read
669  * locks, but if we're still holding the pin for the current position,
670  * we must drop it.
671  */
672  if (BTScanPosIsValid(so->currPos))
673  {
674  /* Before leaving current page, deal with any killed items */
675  if (so->numKilled > 0)
676  _bt_killitems(scan);
678  }
679 
680  if (BTScanPosIsValid(so->markPos))
681  {
682  /* bump pin on mark buffer for assignment to current buffer */
683  if (BTScanPosIsPinned(so->markPos))
685  memcpy(&so->currPos, &so->markPos,
686  offsetof(BTScanPosData, items[1]) +
687  so->markPos.lastItem * sizeof(BTScanPosItem));
688  if (so->currTuples)
689  memcpy(so->currTuples, so->markTuples,
691  }
692  else
694  }
695 }
int itemIndex
Definition: nbtree.h:326
char * currTuples
Definition: nbtree.h:400
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:625
int nextTupleOffset
Definition: nbtree.h:315
int lastItem
Definition: nbtree.h:325
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1732
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:333
char * markTuples
Definition: nbtree.h:401
int markItemIndex
Definition: nbtree.h:410
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
BTScanPosData currPos
Definition: nbtree.h:413
Buffer buf
Definition: nbtree.h:296
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3347
#define offsetof(type, field)
Definition: c.h:555
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 946 of file nbtree.c.

References IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), NULL, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

947 {
948  /* No-op in ANALYZE ONLY mode */
949  if (info->analyze_only)
950  return stats;
951 
952  /*
953  * If btbulkdelete was called, we need not do anything, just return the
954  * stats from the latest btbulkdelete call. If it wasn't called, we must
955  * still do a pass over the index, to recycle any newly-recyclable pages
956  * and to obtain index statistics.
957  *
958  * Since we aren't going to actually delete any leaf items, there's no
959  * need to go through all the vacuum-cycle-ID pushups.
960  */
961  if (stats == NULL)
962  {
964  btvacuumscan(info, stats, NULL, NULL, 0);
965  }
966 
967  /* Finally, vacuum the FSM */
969 
970  /*
971  * It's quite possible for us to be fooled by concurrent page splits into
972  * double-counting some index tuples, so disbelieve any total that exceeds
973  * the underlying heap's count ... if we know that accurately. Otherwise
974  * this might just make matters worse.
975  */
976  if (!info->estimated_count)
977  {
978  if (stats->num_index_tuples > info->num_heap_tuples)
979  stats->num_index_tuples = info->num_heap_tuples;
980  }
981 
982  return stats;
983 }
bool analyze_only
Definition: genam.h:47
Relation index
Definition: genam.h:46
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:998
void * palloc0(Size size)
Definition: mcxt.c:878
double num_heap_tuples
Definition: genam.h:50
#define NULL
Definition: c.h:229
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
double num_index_tuples
Definition: genam.h:76
bool estimated_count
Definition: genam.h:48
static void btvacuumpage ( BTVacState vstate,
BlockNumber  blkno,
BlockNumber  orig_blkno 
)
static

Definition at line 1128 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, buf, BUFFER_LOCK_UNLOCK, BufferGetPage, callback(), BTVacState::callback, BTVacState::callback_state, BTVacState::cycleid, IndexVacuumInfo::index, BTVacState::info, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirtyHint(), MaxOffsetNumber, MemoryContextReset(), MemoryContextSwitchTo(), NULL, IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_IGNORE, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

1129 {
1130  IndexVacuumInfo *info = vstate->info;
1131  IndexBulkDeleteResult *stats = vstate->stats;
1133  void *callback_state = vstate->callback_state;
1134  Relation rel = info->index;
1135  bool delete_now;
1136  BlockNumber recurse_to;
1137  Buffer buf;
1138  Page page;
1139  BTPageOpaque opaque = NULL;
1140 
1141 restart:
1142  delete_now = false;
1143  recurse_to = P_NONE;
1144 
1145  /* call vacuum_delay_point while not holding any buffer lock */
1147 
1148  /*
1149  * We can't use _bt_getbuf() here because it always applies
1150  * _bt_checkpage(), which will barf on an all-zero page. We want to
1151  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1152  * buffer access strategy.
1153  */
1154  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1155  info->strategy);
1156  LockBuffer(buf, BT_READ);
1157  page = BufferGetPage(buf);
1158  if (!PageIsNew(page))
1159  {
1160  _bt_checkpage(rel, buf);
1161  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1162  }
1163 
1164  /*
1165  * If we are recursing, the only case we want to do anything with is a
1166  * live leaf page having the current vacuum cycle ID. Any other state
1167  * implies we already saw the page (eg, deleted it as being empty).
1168  */
1169  if (blkno != orig_blkno)
1170  {
1171  if (_bt_page_recyclable(page) ||
1172  P_IGNORE(opaque) ||
1173  !P_ISLEAF(opaque) ||
1174  opaque->btpo_cycleid != vstate->cycleid)
1175  {
1176  _bt_relbuf(rel, buf);
1177  return;
1178  }
1179  }
1180 
1181  /* Page is valid, see what to do with it */
1182  if (_bt_page_recyclable(page))
1183  {
1184  /* Okay to recycle this page */
1185  RecordFreeIndexPage(rel, blkno);
1186  vstate->totFreePages++;
1187  stats->pages_deleted++;
1188  }
1189  else if (P_ISDELETED(opaque))
1190  {
1191  /* Already deleted, but can't recycle yet */
1192  stats->pages_deleted++;
1193  }
1194  else if (P_ISHALFDEAD(opaque))
1195  {
1196  /* Half-dead, try to delete */
1197  delete_now = true;
1198  }
1199  else if (P_ISLEAF(opaque))
1200  {
1201  OffsetNumber deletable[MaxOffsetNumber];
1202  int ndeletable;
1203  OffsetNumber offnum,
1204  minoff,
1205  maxoff;
1206 
1207  /*
1208  * Trade in the initial read lock for a super-exclusive write lock on
1209  * this page. We must get such a lock on every leaf page over the
1210  * course of the vacuum scan, whether or not it actually contains any
1211  * deletable tuples --- see nbtree/README.
1212  */
1214  LockBufferForCleanup(buf);
1215 
1216  /*
1217  * Remember highest leaf page number we've taken cleanup lock on; see
1218  * notes in btvacuumscan
1219  */
1220  if (blkno > vstate->lastBlockLocked)
1221  vstate->lastBlockLocked = blkno;
1222 
1223  /*
1224  * Check whether we need to recurse back to earlier pages. What we
1225  * are concerned about is a page split that happened since we started
1226  * the vacuum scan. If the split moved some tuples to a lower page
1227  * then we might have missed 'em. If so, set up for tail recursion.
1228  * (Must do this before possibly clearing btpo_cycleid below!)
1229  */
1230  if (vstate->cycleid != 0 &&
1231  opaque->btpo_cycleid == vstate->cycleid &&
1232  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1233  !P_RIGHTMOST(opaque) &&
1234  opaque->btpo_next < orig_blkno)
1235  recurse_to = opaque->btpo_next;
1236 
1237  /*
1238  * Scan over all items to see which ones need deleted according to the
1239  * callback function.
1240  */
1241  ndeletable = 0;
1242  minoff = P_FIRSTDATAKEY(opaque);
1243  maxoff = PageGetMaxOffsetNumber(page);
1244  if (callback)
1245  {
1246  for (offnum = minoff;
1247  offnum <= maxoff;
1248  offnum = OffsetNumberNext(offnum))
1249  {
1250  IndexTuple itup;
1251  ItemPointer htup;
1252 
1253  itup = (IndexTuple) PageGetItem(page,
1254  PageGetItemId(page, offnum));
1255  htup = &(itup->t_tid);
1256 
1257  /*
1258  * During Hot Standby we currently assume that
1259  * XLOG_BTREE_VACUUM records do not produce conflicts. That is
1260  * only true as long as the callback function depends only
1261  * upon whether the index tuple refers to heap tuples removed
1262  * in the initial heap scan. When vacuum starts it derives a
1263  * value of OldestXmin. Backends taking later snapshots could
1264  * have a RecentGlobalXmin with a later xid than the vacuum's
1265  * OldestXmin, so it is possible that row versions deleted
1266  * after OldestXmin could be marked as killed by other
1267  * backends. The callback function *could* look at the index
1268  * tuple state in isolation and decide to delete the index
1269  * tuple, though currently it does not. If it ever did, we
1270  * would need to reconsider whether XLOG_BTREE_VACUUM records
1271  * should cause conflicts. If they did cause conflicts they
1272  * would be fairly harsh conflicts, since we haven't yet
1273  * worked out a way to pass a useful value for
1274  * latestRemovedXid on the XLOG_BTREE_VACUUM records. This
1275  * applies to *any* type of index that marks index tuples as
1276  * killed.
1277  */
1278  if (callback(htup, callback_state))
1279  deletable[ndeletable++] = offnum;
1280  }
1281  }
1282 
1283  /*
1284  * Apply any needed deletes. We issue just one _bt_delitems_vacuum()
1285  * call per page, so as to minimize WAL traffic.
1286  */
1287  if (ndeletable > 0)
1288  {
1289  /*
1290  * Notice that the issued XLOG_BTREE_VACUUM WAL record includes
1291  * all information to the replay code to allow it to get a cleanup
1292  * lock on all pages between the previous lastBlockVacuumed and
1293  * this page. This ensures that WAL replay locks all leaf pages at
1294  * some point, which is important should non-MVCC scans be
1295  * requested. This is currently unused on standby, but we record
1296  * it anyway, so that the WAL contains the required information.
1297  *
1298  * Since we can visit leaf pages out-of-order when recursing,
1299  * replay might end up locking such pages an extra time, but it
1300  * doesn't seem worth the amount of bookkeeping it'd take to avoid
1301  * that.
1302  */
1303  _bt_delitems_vacuum(rel, buf, deletable, ndeletable,
1304  vstate->lastBlockVacuumed);
1305 
1306  /*
1307  * Remember highest leaf page number we've issued a
1308  * XLOG_BTREE_VACUUM WAL record for.
1309  */
1310  if (blkno > vstate->lastBlockVacuumed)
1311  vstate->lastBlockVacuumed = blkno;
1312 
1313  stats->tuples_removed += ndeletable;
1314  /* must recompute maxoff */
1315  maxoff = PageGetMaxOffsetNumber(page);
1316  }
1317  else
1318  {
1319  /*
1320  * If the page has been split during this vacuum cycle, it seems
1321  * worth expending a write to clear btpo_cycleid even if we don't
1322  * have any deletions to do. (If we do, _bt_delitems_vacuum takes
1323  * care of this.) This ensures we won't process the page again.
1324  *
1325  * We treat this like a hint-bit update because there's no need to
1326  * WAL-log it.
1327  */
1328  if (vstate->cycleid != 0 &&
1329  opaque->btpo_cycleid == vstate->cycleid)
1330  {
1331  opaque->btpo_cycleid = 0;
1332  MarkBufferDirtyHint(buf, true);
1333  }
1334  }
1335 
1336  /*
1337  * If it's now empty, try to delete; else count the live tuples. We
1338  * don't delete when recursing, though, to avoid putting entries into
1339  * freePages out-of-order (doesn't seem worth any extra code to handle
1340  * the case).
1341  */
1342  if (minoff > maxoff)
1343  delete_now = (blkno == orig_blkno);
1344  else
1345  stats->num_index_tuples += maxoff - minoff + 1;
1346  }
1347 
1348  if (delete_now)
1349  {
1350  MemoryContext oldcontext;
1351  int ndel;
1352 
1353  /* Run pagedel in a temp context to avoid memory leakage */
1355  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1356 
1357  ndel = _bt_pagedel(rel, buf);
1358 
1359  /* count only this page, else may double-count parent */
1360  if (ndel)
1361  stats->pages_deleted++;
1362 
1363  MemoryContextSwitchTo(oldcontext);
1364  /* pagedel released buffer, so we shouldn't */
1365  }
1366  else
1367  _bt_relbuf(rel, buf);
1368 
1369  /*
1370  * This is really tail recursion, but if the compiler is too stupid to
1371  * optimize it as such, we'd eat an uncomfortably large amount of stack
1372  * space per recursion level (due to the deletable[] array). A failure is
1373  * improbable since the number of levels isn't likely to be large ... but
1374  * just in case, let's hand-optimize into a loop.
1375  */
1376  if (recurse_to != P_NONE)
1377  {
1378  blkno = recurse_to;
1379  goto restart;
1380  }
1381 }
BlockNumber lastBlockLocked
Definition: nbtree.c:63
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3603
#define BTP_SPLIT_END
Definition: nbtree.h:75
BlockNumber btpo_next
Definition: nbtree.h:57
MemoryContext pagedelcontext
Definition: nbtree.c:65
double tuples_removed
Definition: genam.h:77
void * callback_state
Definition: nbtree.c:60
#define P_IGNORE(opaque)
Definition: nbtree.h:181
BlockNumber lastBlockVacuumed
Definition: nbtree.c:62
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3379
BlockNumber totFreePages
Definition: nbtree.c:64
#define MaxOffsetNumber
Definition: off.h:28
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:205
IndexBulkDeleteCallback callback
Definition: nbtree.c:59
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
ItemPointerData t_tid
Definition: itup.h:37
BufferAccessStrategy strategy
Definition: genam.h:51
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:168
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:135
uint32 BlockNumber
Definition: block.h:31
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:744
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:238
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:503
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:180
BTCycleId btpo_cycleid
Definition: nbtree.h:64
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static char * buf
Definition: pg_test_fsync.c:66
IndexTupleData * IndexTuple
Definition: itup.h:53
BlockNumber pages_deleted
Definition: genam.h:78
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define P_ISDELETED(opaque)
Definition: nbtree.h:178
BTCycleId cycleid
Definition: nbtree.c:61
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
int _bt_pagedel(Relation rel, Buffer buf)
Definition: nbtpage.c:1109
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:720
#define NULL
Definition: c.h:229
IndexVacuumInfo * info
Definition: nbtree.c:57
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:789
IndexBulkDeleteResult * stats
Definition: nbtree.c:58
#define PageIsNew(page)
Definition: bufpage.h:225
void vacuum_delay_point(void)
Definition: vacuum.c:1560
uint16 btpo_flags
Definition: nbtree.h:63
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:175
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:83
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define P_ISLEAF(opaque)
Definition: nbtree.h:176
static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 998 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_relbuf(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate(), BTREE_METAPAGE, btvacuumpage(), buf, callback(), BTVacState::callback, BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, BTVacState::info, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBufferForCleanup(), LockRelationForExtension(), MAIN_FORKNUM, MemoryContextDelete(), NULL, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, RBM_NORMAL, ReadBufferExtended(), RELATION_IS_LOCAL, RelationGetNumberOfBlocks, BTVacState::stats, IndexVacuumInfo::strategy, BTVacState::totFreePages, UnlockRelationForExtension(), and XLogStandbyInfoActive.

Referenced by btbulkdelete(), and btvacuumcleanup().

1001 {
1002  Relation rel = info->index;
1003  BTVacState vstate;
1004  BlockNumber num_pages;
1005  BlockNumber blkno;
1006  bool needLock;
1007 
1008  /*
1009  * Reset counts that will be incremented during the scan; needed in case
1010  * of multiple scans during a single VACUUM command
1011  */
1012  stats->estimated_count = false;
1013  stats->num_index_tuples = 0;
1014  stats->pages_deleted = 0;
1015 
1016  /* Set up info to pass down to btvacuumpage */
1017  vstate.info = info;
1018  vstate.stats = stats;
1019  vstate.callback = callback;
1020  vstate.callback_state = callback_state;
1021  vstate.cycleid = cycleid;
1022  vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
1024  vstate.totFreePages = 0;
1025 
1026  /* Create a temporary memory context to run _bt_pagedel in */
1028  "_bt_pagedel",
1030 
1031  /*
1032  * The outer loop iterates over all index pages except the metapage, in
1033  * physical order (we hope the kernel will cooperate in providing
1034  * read-ahead for speed). It is critical that we visit all leaf pages,
1035  * including ones added after we start the scan, else we might fail to
1036  * delete some deletable tuples. Hence, we must repeatedly check the
1037  * relation length. We must acquire the relation-extension lock while
1038  * doing so to avoid a race condition: if someone else is extending the
1039  * relation, there is a window where bufmgr/smgr have created a new
1040  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1041  * we manage to scan such a page here, we'll improperly assume it can be
1042  * recycled. Taking the lock synchronizes things enough to prevent a
1043  * problem: either num_pages won't include the new page, or _bt_getbuf
1044  * already has write lock on the buffer and it will be fully initialized
1045  * before we can examine it. (See also vacuumlazy.c, which has the same
1046  * issue.) Also, we need not worry if a page is added immediately after
1047  * we look; the page splitting code already has write-lock on the left
1048  * page before it adds a right page, so we must already have processed any
1049  * tuples due to be moved into such a page.
1050  *
1051  * We can skip locking for new or temp relations, however, since no one
1052  * else could be accessing them.
1053  */
1054  needLock = !RELATION_IS_LOCAL(rel);
1055 
1056  blkno = BTREE_METAPAGE + 1;
1057  for (;;)
1058  {
1059  /* Get the current relation length */
1060  if (needLock)
1062  num_pages = RelationGetNumberOfBlocks(rel);
1063  if (needLock)
1065 
1066  /* Quit if we've scanned the whole relation */
1067  if (blkno >= num_pages)
1068  break;
1069  /* Iterate over pages, then loop back to recheck length */
1070  for (; blkno < num_pages; blkno++)
1071  {
1072  btvacuumpage(&vstate, blkno, blkno);
1073  }
1074  }
1075 
1076  /*
1077  * Check to see if we need to issue one final WAL record for this index,
1078  * which may be needed for correctness on a hot standby node when non-MVCC
1079  * index scans could take place.
1080  *
1081  * If the WAL is replayed in hot standby, the replay process needs to get
1082  * cleanup locks on all index leaf pages, just as we've been doing here.
1083  * However, we won't issue any WAL records about pages that have no items
1084  * to be deleted. For pages between pages we've vacuumed, the replay code
1085  * will take locks under the direction of the lastBlockVacuumed fields in
1086  * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
1087  * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
1088  * against the last leaf page in the index, if that one wasn't vacuumed.
1089  */
1090  if (XLogStandbyInfoActive() &&
1091  vstate.lastBlockVacuumed < vstate.lastBlockLocked)
1092  {
1093  Buffer buf;
1094 
1095  /*
1096  * The page should be valid, but we can't use _bt_getbuf() because we
1097  * want to use a nondefault buffer access strategy. Since we aren't
1098  * going to delete any items, getting cleanup lock again is probably
1099  * overkill, but for consistency do that anyway.
1100  */
1101  buf = ReadBufferExtended(rel, MAIN_FORKNUM, vstate.lastBlockLocked,
1102  RBM_NORMAL, info->strategy);
1103  LockBufferForCleanup(buf);
1104  _bt_checkpage(rel, buf);
1105  _bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
1106  _bt_relbuf(rel, buf);
1107  }
1108 
1110 
1111  /* update statistics */
1112  stats->num_pages = num_pages;
1113  stats->pages_free = vstate.totFreePages;
1114 }
BlockNumber lastBlockLocked
Definition: nbtree.c:63
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3603
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
MemoryContext pagedelcontext
Definition: nbtree.c:65
void * callback_state
Definition: nbtree.c:60
BlockNumber lastBlockVacuumed
Definition: nbtree.c:62
BlockNumber totFreePages
Definition: nbtree.c:64
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:523
IndexBulkDeleteCallback callback
Definition: nbtree.c:59
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
BufferAccessStrategy strategy
Definition: genam.h:51
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:503
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
Definition: nbtree.c:1128
BlockNumber num_pages
Definition: genam.h:73
BlockNumber pages_free
Definition: genam.h:79
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:165
static char * buf
Definition: pg_test_fsync.c:66
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
BlockNumber pages_deleted
Definition: genam.h:78
#define BTREE_METAPAGE
Definition: nbtree.h:109
BTCycleId cycleid
Definition: nbtree.c:61
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define XLogStandbyInfoActive()
Definition: xlog.h:159
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:720
#define NULL
Definition: c.h:229
IndexVacuumInfo * info
Definition: nbtree.c:57
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:789
IndexBulkDeleteResult * stats
Definition: nbtree.c:58
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
bool estimated_count
Definition: genam.h:75