PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
#include "pgstat.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTBuildState
 
struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct
BTParallelScanDescData 
BTParallelScanDescData
 
typedef struct
BTParallelScanDescData
BTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btbuildCallback (Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
 
static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
IndexBuildResultbtbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

Definition at line 105 of file nbtree.c.

Enumeration Type Documentation

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 80 of file nbtree.c.

Function Documentation

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 889 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

890 {
891  BTScanOpaque so = (BTScanOpaque) scan->opaque;
892  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
893  BTParallelScanDesc btscan;
894 
895  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
896  parallel_scan->ps_offset);
897 
898  so->arrayKeyCount++;
899  SpinLockAcquire(&btscan->btps_mutex);
900  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
901  {
902  btscan->btps_scanPage = InvalidBlockNumber;
903  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
904  btscan->btps_arrayKeyCount++;
905  }
906  SpinLockRelease(&btscan->btps_mutex);
907 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:132
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int arrayKeyCount
Definition: nbtree.h:386
#define OffsetToPointer(base, offset)
Definition: c.h:530
#define SpinLockRelease(lock)
Definition: spin.h:64
#define InvalidBlockNumber
Definition: block.h:33
void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 848 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), NULL, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

849 {
850  BTScanOpaque so = (BTScanOpaque) scan->opaque;
851  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
852  BTParallelScanDesc btscan;
853  bool status_changed = false;
854 
855  /* Do nothing, for non-parallel scans */
856  if (parallel_scan == NULL)
857  return;
858 
859  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
860  parallel_scan->ps_offset);
861 
862  /*
863  * Mark the parallel scan as done for this combination of scan keys,
864  * unless some other process already did so. See also
865  * _bt_advance_array_keys.
866  */
867  SpinLockAcquire(&btscan->btps_mutex);
868  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
869  btscan->btps_pageStatus != BTPARALLEL_DONE)
870  {
871  btscan->btps_pageStatus = BTPARALLEL_DONE;
872  status_changed = true;
873  }
874  SpinLockRelease(&btscan->btps_mutex);
875 
876  /* wake up all the workers associated with this parallel scan */
877  if (status_changed)
878  ConditionVariableBroadcast(&btscan->btps_cv);
879 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:132
int ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:105
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int arrayKeyCount
Definition: nbtree.h:386
#define OffsetToPointer(base, offset)
Definition: c.h:530
#define SpinLockRelease(lock)
Definition: spin.h:64
#define NULL
Definition: c.h:226
void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 825 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readpage().

826 {
827  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
828  BTParallelScanDesc btscan;
829 
830  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
831  parallel_scan->ps_offset);
832 
833  SpinLockAcquire(&btscan->btps_mutex);
834  btscan->btps_scanPage = scan_page;
836  SpinLockRelease(&btscan->btps_mutex);
838 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:132
bool ConditionVariableSignal(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:105
#define SpinLockAcquire(lock)
Definition: spin.h:62
ConditionVariable btps_cv
Definition: nbtree.c:102
#define OffsetToPointer(base, offset)
Definition: c.h:530
#define SpinLockRelease(lock)
Definition: spin.h:64
BlockNumber btps_scanPage
Definition: nbtree.c:94
BTPS_State btps_pageStatus
Definition: nbtree.c:95
bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 767 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

768 {
769  BTScanOpaque so = (BTScanOpaque) scan->opaque;
770  BTPS_State pageStatus;
771  bool exit_loop = false;
772  bool status = true;
773  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
774  BTParallelScanDesc btscan;
775 
776  *pageno = P_NONE;
777 
778  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
779  parallel_scan->ps_offset);
780 
781  while (1)
782  {
783  SpinLockAcquire(&btscan->btps_mutex);
784  pageStatus = btscan->btps_pageStatus;
785 
786  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
787  {
788  /* Parallel scan has already advanced to a new set of scankeys. */
789  status = false;
790  }
791  else if (pageStatus == BTPARALLEL_DONE)
792  {
793  /*
794  * We're done with this set of scankeys. This may be the end, or
795  * there could be more sets to try.
796  */
797  status = false;
798  }
799  else if (pageStatus != BTPARALLEL_ADVANCING)
800  {
801  /*
802  * We have successfully seized control of the scan for the purpose
803  * of advancing it to a new page!
804  */
805  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
806  *pageno = btscan->btps_scanPage;
807  exit_loop = true;
808  }
809  SpinLockRelease(&btscan->btps_mutex);
810  if (exit_loop || !status)
811  break;
813  }
815 
816  return status;
817 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:132
#define P_NONE
Definition: nbtree.h:169
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
BTPS_State
Definition: nbtree.c:80
int arrayKeyCount
Definition: nbtree.h:386
#define OffsetToPointer(base, offset)
Definition: c.h:530
#define SpinLockRelease(lock)
Definition: spin.h:64
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:222
IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 474 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, NULL, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

475 {
476  IndexScanDesc scan;
477  BTScanOpaque so;
478 
479  /* no order by operators allowed */
480  Assert(norderbys == 0);
481 
482  /* get the scan */
483  scan = RelationGetIndexScan(rel, nkeys, norderbys);
484 
485  /* allocate private workspace */
486  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
489  if (scan->numberOfKeys > 0)
490  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
491  else
492  so->keyData = NULL;
493 
494  so->arrayKeyData = NULL; /* assume no array keys for now */
495  so->numArrayKeys = 0;
496  so->arrayKeys = NULL;
497  so->arrayContext = NULL;
498 
499  so->killedItems = NULL; /* until needed */
500  so->numKilled = 0;
501 
502  /*
503  * We don't know yet whether the scan will be index-only, so we do not
504  * allocate the tuple workspace arrays until btrescan. However, we set up
505  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
506  */
507  so->currTuples = so->markTuples = NULL;
508 
509  scan->xs_itupdesc = RelationGetDescr(rel);
510 
511  scan->opaque = so;
512 
513  return scan;
514 }
#define RelationGetDescr(relation)
Definition: rel.h:425
MemoryContext arrayContext
Definition: nbtree.h:389
char * currTuples
Definition: nbtree.h:400
TupleDesc xs_itupdesc
Definition: relscan.h:109
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:401
ScanKey arrayKeyData
Definition: nbtree.h:383
int * killedItems
Definition: nbtree.h:392
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:670
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
BTScanPosData currPos
Definition: nbtree.h:413
void * palloc(Size size)
Definition: mcxt.c:891
ScanKey keyData
Definition: nbtree.h:380
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:388
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:78
IndexBuildResult* btbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 174 of file nbtree.c.

References _bt_leafbuild(), _bt_spooldestroy(), _bt_spoolinit(), btbuildCallback(), elog, ERROR, BTBuildState::haveDead, IndexBuildResult::heap_tuples, BTBuildState::heapRel, IndexInfo::ii_Unique, IndexBuildResult::index_tuples, IndexBuildHeapScan(), BTBuildState::indtuples, BTBuildState::isUnique, log_btree_build_stats, NULL, palloc(), RelationGetNumberOfBlocks, RelationGetRelationName, ResetUsage(), ShowUsage(), BTBuildState::spool, and BTBuildState::spool2.

Referenced by bthandler().

175 {
176  IndexBuildResult *result;
177  double reltuples;
178  BTBuildState buildstate;
179 
180  buildstate.isUnique = indexInfo->ii_Unique;
181  buildstate.haveDead = false;
182  buildstate.heapRel = heap;
183  buildstate.spool = NULL;
184  buildstate.spool2 = NULL;
185  buildstate.indtuples = 0;
186 
187 #ifdef BTREE_BUILD_STATS
189  ResetUsage();
190 #endif /* BTREE_BUILD_STATS */
191 
192  /*
193  * We expect to be called exactly once for any index relation. If that's
194  * not the case, big trouble's what we have.
195  */
196  if (RelationGetNumberOfBlocks(index) != 0)
197  elog(ERROR, "index \"%s\" already contains data",
198  RelationGetRelationName(index));
199 
200  buildstate.spool = _bt_spoolinit(heap, index, indexInfo->ii_Unique, false);
201 
202  /*
203  * If building a unique index, put dead tuples in a second spool to keep
204  * them out of the uniqueness check.
205  */
206  if (indexInfo->ii_Unique)
207  buildstate.spool2 = _bt_spoolinit(heap, index, false, true);
208 
209  /* do the heap scan */
210  reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
211  btbuildCallback, (void *) &buildstate);
212 
213  /* okay, all heap tuples are indexed */
214  if (buildstate.spool2 && !buildstate.haveDead)
215  {
216  /* spool2 turns out to be unnecessary */
217  _bt_spooldestroy(buildstate.spool2);
218  buildstate.spool2 = NULL;
219  }
220 
221  /*
222  * Finish the build by (1) completing the sort of the spool file, (2)
223  * inserting the sorted tuples into btree pages and (3) building the upper
224  * levels.
225  */
226  _bt_leafbuild(buildstate.spool, buildstate.spool2);
227  _bt_spooldestroy(buildstate.spool);
228  if (buildstate.spool2)
229  _bt_spooldestroy(buildstate.spool2);
230 
231 #ifdef BTREE_BUILD_STATS
233  {
234  ShowUsage("BTREE BUILD STATS");
235  ResetUsage();
236  }
237 #endif /* BTREE_BUILD_STATS */
238 
239  /*
240  * Return statistics
241  */
242  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
243 
244  result->heap_tuples = reltuples;
245  result->index_tuples = buildstate.indtuples;
246 
247  return result;
248 }
void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:201
static void btbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: nbtree.c:254
void ShowUsage(const char *title)
Definition: postgres.c:4383
BTSpool * _bt_spoolinit(Relation heap, Relation index, bool isunique, bool isdead)
Definition: nbtsort.c:152
void _bt_spooldestroy(BTSpool *btspool)
Definition: nbtsort.c:180
BTSpool * spool
Definition: nbtree.c:44
void ResetUsage(void)
Definition: postgres.c:4376
#define ERROR
Definition: elog.h:43
Relation heapRel
Definition: nbtree.c:43
BTSpool * spool2
Definition: nbtree.c:50
bool haveDead
Definition: nbtree.c:42
double indtuples
Definition: nbtree.c:51
#define RelationGetRelationName(relation)
Definition: rel.h:433
bool log_btree_build_stats
Definition: guc.c:445
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
bool ii_Unique
Definition: execnodes.h:77
#define NULL
Definition: c.h:226
double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state)
Definition: index.c:2169
void * palloc(Size size)
Definition: mcxt.c:891
bool isUnique
Definition: nbtree.c:41
#define elog
Definition: elog.h:219
double index_tuples
Definition: genam.h:33
double heap_tuples
Definition: genam.h:32
static void btbuildCallback ( Relation  index,
HeapTuple  htup,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
)
static

Definition at line 254 of file nbtree.c.

References _bt_spool(), BTBuildState::haveDead, BTBuildState::indtuples, NULL, BTBuildState::spool, BTBuildState::spool2, and HeapTupleData::t_self.

Referenced by btbuild().

260 {
261  BTBuildState *buildstate = (BTBuildState *) state;
262 
263  /*
264  * insert the index tuple into the appropriate spool file for subsequent
265  * processing
266  */
267  if (tupleIsAlive || buildstate->spool2 == NULL)
268  _bt_spool(buildstate->spool, &htup->t_self, values, isnull);
269  else
270  {
271  /* dead tuples are put into spool2 */
272  buildstate->haveDead = true;
273  _bt_spool(buildstate->spool2, &htup->t_self, values, isnull);
274  }
275 
276  buildstate->indtuples += 1;
277 }
BTSpool * spool
Definition: nbtree.c:44
void _bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
Definition: nbtsort.c:190
ItemPointerData t_self
Definition: htup.h:65
BTSpool * spool2
Definition: nbtree.c:50
bool haveDead
Definition: nbtree.c:42
double indtuples
Definition: nbtree.c:51
#define NULL
Definition: c.h:226
Definition: regguts.h:298
static Datum values[MAXATTR]
Definition: bootstrap.c:162
void btbuildempty ( Relation  index)

Definition at line 283 of file nbtree.c.

References _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

284 {
285  Page metapage;
286 
287  /* Construct metapage. */
288  metapage = (Page) palloc(BLCKSZ);
289  _bt_initmetapage(metapage, P_NONE, 0);
290 
291  /*
292  * Write the page and log it. It might seem that an immediate sync
293  * would be sufficient to guarantee that the file exists on disk, but
294  * recovery itself might remove it while replaying, for example, an
295  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we
296  * need this even when wal_level=minimal.
297  */
300  (char *) metapage, true);
302  BTREE_METAPAGE, metapage, false);
303 
304  /*
305  * An immediate sync is required even if we xlog'd the page, because the
306  * write did not go through shared_buffers and therefore a concurrent
307  * checkpoint may have moved the redo pointer past our xlog record.
308  */
310 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
#define P_NONE
Definition: nbtree.h:169
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:49
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
#define BTREE_METAPAGE
Definition: nbtree.h:109
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1172
void * palloc(Size size)
Definition: mcxt.c:891
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:973
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:734
Pointer Page
Definition: bufpage.h:74
IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 917 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), IndexVacuumInfo::index, NULL, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

919 {
920  Relation rel = info->index;
921  BTCycleId cycleid;
922 
923  /* allocate stats if first time through, else re-use existing struct */
924  if (stats == NULL)
926 
927  /* Establish the vacuum cycle ID to use for this scan */
928  /* The ENSURE stuff ensures we clean up shared memory on failure */
930  {
931  cycleid = _bt_start_vacuum(rel);
932 
933  btvacuumscan(info, stats, callback, callback_state, cycleid);
934  }
936  _bt_end_vacuum(rel);
937 
938  return stats;
939 }
#define PointerGetDatum(X)
Definition: postgres.h:564
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:1992
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:999
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:26
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:49
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:1964
void * palloc0(Size size)
Definition: mcxt.c:920
#define NULL
Definition: c.h:226
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1907
bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1390 of file nbtree.c.

Referenced by bthandler().

1391 {
1392  return true;
1393 }
void btendscan ( IndexScanDesc  scan)

Definition at line 580 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), NULL, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

581 {
582  BTScanOpaque so = (BTScanOpaque) scan->opaque;
583 
584  /* we aren't holding any read locks, but gotta drop the pins */
585  if (BTScanPosIsValid(so->currPos))
586  {
587  /* Before leaving current page, deal with any killed items */
588  if (so->numKilled > 0)
589  _bt_killitems(scan);
591  }
592 
593  so->markItemIndex = -1;
595 
596  /* No need to invalidate positions, the RAM is about to be freed. */
597 
598  /* Release storage */
599  if (so->keyData != NULL)
600  pfree(so->keyData);
601  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
602  if (so->arrayContext != NULL)
604  if (so->killedItems != NULL)
605  pfree(so->killedItems);
606  if (so->currTuples != NULL)
607  pfree(so->currTuples);
608  /* so->markTuples should not be pfree'd, see btrescan */
609  pfree(so);
610 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
MemoryContext arrayContext
Definition: nbtree.h:389
char * currTuples
Definition: nbtree.h:400
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1732
BTScanPosData markPos
Definition: nbtree.h:414
void pfree(void *pointer)
Definition: mcxt.c:992
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int markItemIndex
Definition: nbtree.h:410
int * killedItems
Definition: nbtree.h:392
#define NULL
Definition: c.h:226
BTScanPosData currPos
Definition: nbtree.h:413
ScanKey keyData
Definition: nbtree.h:380
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
Size btestimateparallelscan ( void  )

Definition at line 702 of file nbtree.c.

Referenced by bthandler().

703 {
704  return sizeof(BTParallelScanDescData);
705 }
struct BTParallelScanDescData BTParallelScanDescData
int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 416 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, HeapTupleData::t_self, tbm_add_tuples(), and IndexScanDescData::xs_ctup.

Referenced by bthandler().

417 {
418  BTScanOpaque so = (BTScanOpaque) scan->opaque;
419  int64 ntids = 0;
420  ItemPointer heapTid;
421 
422  /*
423  * If we have any array keys, initialize them.
424  */
425  if (so->numArrayKeys)
426  {
427  /* punt if we have any unsatisfiable array keys */
428  if (so->numArrayKeys < 0)
429  return ntids;
430 
432  }
433 
434  /* This loop handles advancing to the next array elements, if any */
435  do
436  {
437  /* Fetch the first page & tuple */
438  if (_bt_first(scan, ForwardScanDirection))
439  {
440  /* Save tuple ID, and continue scanning */
441  heapTid = &scan->xs_ctup.t_self;
442  tbm_add_tuples(tbm, heapTid, 1, false);
443  ntids++;
444 
445  for (;;)
446  {
447  /*
448  * Advance to next tuple within page. This is the same as the
449  * easy case in _bt_next().
450  */
451  if (++so->currPos.itemIndex > so->currPos.lastItem)
452  {
453  /* let _bt_next do the heavy lifting */
454  if (!_bt_next(scan, ForwardScanDirection))
455  break;
456  }
457 
458  /* Save tuple ID, and continue scanning */
459  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
460  tbm_add_tuples(tbm, heapTid, 1, false);
461  ntids++;
462  }
463  }
464  /* Now see if we have more array keys to deal with */
466 
467  return ntids;
468 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:547
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:328
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:521
int itemIndex
Definition: nbtree.h:326
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:290
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:536
int lastItem
Definition: nbtree.h:325
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
ItemPointerData t_self
Definition: htup.h:65
int numArrayKeys
Definition: nbtree.h:384
HeapTupleData xs_ctup
Definition: relscan.h:112
ItemPointerData heapTid
Definition: nbtree.h:289
BTScanPosData currPos
Definition: nbtree.h:413
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1129
bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 342 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxIndexTuplesPerPage, NULL, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

343 {
344  BTScanOpaque so = (BTScanOpaque) scan->opaque;
345  bool res;
346 
347  /* btree indexes are never lossy */
348  scan->xs_recheck = false;
349 
350  /*
351  * If we have any array keys, initialize them during first call for a
352  * scan. We can't do this in btrescan because we don't know the scan
353  * direction at that time.
354  */
355  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
356  {
357  /* punt if we have any unsatisfiable array keys */
358  if (so->numArrayKeys < 0)
359  return false;
360 
361  _bt_start_array_keys(scan, dir);
362  }
363 
364  /* This loop handles advancing to the next array elements, if any */
365  do
366  {
367  /*
368  * If we've already initialized this scan, we can just advance it in
369  * the appropriate direction. If we haven't done so yet, we call
370  * _bt_first() to get the first item in the scan.
371  */
372  if (!BTScanPosIsValid(so->currPos))
373  res = _bt_first(scan, dir);
374  else
375  {
376  /*
377  * Check to see if we should kill the previously-fetched tuple.
378  */
379  if (scan->kill_prior_tuple)
380  {
381  /*
382  * Yes, remember it for later. (We'll deal with all such
383  * tuples at once right before leaving the index page.) The
384  * test for numKilled overrun is not just paranoia: if the
385  * caller reverses direction in the indexscan then the same
386  * item might get entered multiple times. It's not worth
387  * trying to optimize that, so we don't detect it, but instead
388  * just forget any excess entries.
389  */
390  if (so->killedItems == NULL)
391  so->killedItems = (int *)
392  palloc(MaxIndexTuplesPerPage * sizeof(int));
394  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
395  }
396 
397  /*
398  * Now continue the scan.
399  */
400  res = _bt_next(scan, dir);
401  }
402 
403  /* If we have a tuple, return it ... */
404  if (res)
405  break;
406  /* ... otherwise see if we have more array keys to deal with */
407  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
408 
409  return res;
410 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:547
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:521
int itemIndex
Definition: nbtree.h:326
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:536
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
int * killedItems
Definition: nbtree.h:392
#define NULL
Definition: c.h:226
BTScanPosData currPos
Definition: nbtree.h:413
#define MaxIndexTuplesPerPage
Definition: itup.h:137
void * palloc(Size size)
Definition: mcxt.c:891
bool kill_prior_tuple
Definition: relscan.h:99
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1129
Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 126 of file nbtree.c.

References IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amparallelrescan, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btbeginscan(), btbuild(), btbuildempty(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, and PG_RETURN_POINTER.

127 {
128  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
129 
130  amroutine->amstrategies = BTMaxStrategyNumber;
131  amroutine->amsupport = BTNProcs;
132  amroutine->amcanorder = true;
133  amroutine->amcanorderbyop = false;
134  amroutine->amcanbackward = true;
135  amroutine->amcanunique = true;
136  amroutine->amcanmulticol = true;
137  amroutine->amoptionalkey = true;
138  amroutine->amsearcharray = true;
139  amroutine->amsearchnulls = true;
140  amroutine->amstorage = false;
141  amroutine->amclusterable = true;
142  amroutine->ampredlocks = true;
143  amroutine->amcanparallel = true;
144  amroutine->amkeytype = InvalidOid;
145 
146  amroutine->ambuild = btbuild;
147  amroutine->ambuildempty = btbuildempty;
148  amroutine->aminsert = btinsert;
149  amroutine->ambulkdelete = btbulkdelete;
150  amroutine->amvacuumcleanup = btvacuumcleanup;
151  amroutine->amcanreturn = btcanreturn;
152  amroutine->amcostestimate = btcostestimate;
153  amroutine->amoptions = btoptions;
154  amroutine->amproperty = btproperty;
155  amroutine->amvalidate = btvalidate;
156  amroutine->ambeginscan = btbeginscan;
157  amroutine->amrescan = btrescan;
158  amroutine->amgettuple = btgettuple;
159  amroutine->amgetbitmap = btgetbitmap;
160  amroutine->amendscan = btendscan;
161  amroutine->ammarkpos = btmarkpos;
162  amroutine->amrestrpos = btrestrpos;
165  amroutine->amparallelrescan = btparallelrescan;
166 
167  PG_RETURN_POINTER(amroutine);
168 }
ambeginscan_function ambeginscan
Definition: amapi.h:208
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
ambulkdelete_function ambulkdelete
Definition: amapi.h:201
bool amcanmulticol
Definition: amapi.h:179
uint16 amsupport
Definition: amapi.h:169
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:520
amgettuple_function amgettuple
Definition: amapi.h:210
bool amcanorderbyop
Definition: amapi.h:173
amproperty_function amproperty
Definition: amapi.h:206
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:319
amparallelrescan_function amparallelrescan
Definition: amapi.h:219
bool amstorage
Definition: amapi.h:187
bool ampredlocks
Definition: amapi.h:191
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:474
aminsert_function aminsert
Definition: amapi.h:200
Oid amkeytype
Definition: amapi.h:195
bool amoptionalkey
Definition: amapi.h:181
amvalidate_function amvalidate
Definition: amapi.h:207
Size btestimateparallelscan(void)
Definition: nbtree.c:702
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:38
void btbuildempty(Relation index)
Definition: nbtree.c:283
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:947
amgetbitmap_function amgetbitmap
Definition: amapi.h:211
ambuild_function ambuild
Definition: amapi.h:198
amoptions_function amoptions
Definition: amapi.h:205
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:416
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:580
amcostestimate_function amcostestimate
Definition: amapi.h:204
bool amcanunique
Definition: amapi.h:177
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6471
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:202
amendscan_function amendscan
Definition: amapi.h:212
bool amcanbackward
Definition: amapi.h:175
amrescan_function amrescan
Definition: amapi.h:209
bool amcanparallel
Definition: amapi.h:193
bool amsearchnulls
Definition: amapi.h:185
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:342
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:616
bool amclusterable
Definition: amapi.h:189
bool amsearcharray
Definition: amapi.h:183
#define InvalidOid
Definition: postgres_ext.h:36
#define makeNode(_type_)
Definition: nodes.h:556
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2054
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:646
ammarkpos_function ammarkpos
Definition: amapi.h:213
bool amcanorder
Definition: amapi.h:171
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1390
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:726
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:217
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtree.c:174
#define BTNProcs
Definition: nbtree.h:231
uint16 amstrategies
Definition: amapi.h:167
void btinitparallelscan(void *target)
Definition: nbtree.c:711
ambuildempty_function ambuildempty
Definition: amapi.h:199
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2042
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:203
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:218
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:917
amrestrpos_function amrestrpos
Definition: amapi.h:214
void btinitparallelscan ( void *  target)

Definition at line 711 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

712 {
713  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
714 
715  SpinLockInit(&bt_target->btps_mutex);
716  bt_target->btps_scanPage = InvalidBlockNumber;
718  bt_target->btps_arrayKeyCount = 0;
719  ConditionVariableInit(&bt_target->btps_cv);
720 }
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:105
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:102
BlockNumber btps_scanPage
Definition: nbtree.c:94
BTPS_State btps_pageStatus
Definition: nbtree.c:95
#define InvalidBlockNumber
Definition: block.h:33
bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
IndexInfo indexInfo 
)

Definition at line 319 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

323 {
324  bool result;
325  IndexTuple itup;
326 
327  /* generate an index tuple */
328  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
329  itup->t_tid = *ht_ctid;
330 
331  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
332 
333  pfree(itup);
334 
335  return result;
336 }
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:108
#define RelationGetDescr(relation)
Definition: rel.h:425
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:37
void pfree(void *pointer)
Definition: mcxt.c:992
static Datum values[MAXATTR]
Definition: bootstrap.c:162
void btmarkpos ( IndexScanDesc  scan)

Definition at line 616 of file nbtree.c.

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

617 {
618  BTScanOpaque so = (BTScanOpaque) scan->opaque;
619 
620  /* There may be an old mark with a pin (but no lock). */
622 
623  /*
624  * Just record the current itemIndex. If we later step to next page
625  * before releasing the marked position, _bt_steppage makes a full copy of
626  * the currPos struct in markPos. If (as often happens) the mark is moved
627  * before we leave the page, we don't have to do that work.
628  */
629  if (BTScanPosIsValid(so->currPos))
630  so->markItemIndex = so->currPos.itemIndex;
631  else
632  {
634  so->markItemIndex = -1;
635  }
636 
637  /* Also record the current positions of any array keys */
638  if (so->numArrayKeys)
639  _bt_mark_array_keys(scan);
640 }
int itemIndex
Definition: nbtree.h:326
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:606
int markItemIndex
Definition: nbtree.h:410
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
BTScanPosData currPos
Definition: nbtree.h:413
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
void btparallelrescan ( IndexScanDesc  scan)

Definition at line 726 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

727 {
728  BTParallelScanDesc btscan;
729  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
730 
731  Assert(parallel_scan);
732 
733  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
734  parallel_scan->ps_offset);
735 
736  /*
737  * In theory, we don't need to acquire the spinlock here, because there
738  * shouldn't be any other workers running at this point, but we do so for
739  * consistency.
740  */
741  SpinLockAcquire(&btscan->btps_mutex);
744  btscan->btps_arrayKeyCount = 0;
745  SpinLockRelease(&btscan->btps_mutex);
746 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:132
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:105
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:530
#define SpinLockRelease(lock)
Definition: spin.h:64
BlockNumber btps_scanPage
Definition: nbtree.c:94
#define Assert(condition)
Definition: c.h:670
BTPS_State btps_pageStatus
Definition: nbtree.c:95
#define InvalidBlockNumber
Definition: block.h:33
void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 520 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, memmove, NULL, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

522 {
523  BTScanOpaque so = (BTScanOpaque) scan->opaque;
524 
525  /* we aren't holding any read locks, but gotta drop the pins */
526  if (BTScanPosIsValid(so->currPos))
527  {
528  /* Before leaving current page, deal with any killed items */
529  if (so->numKilled > 0)
530  _bt_killitems(scan);
533  }
534 
535  so->markItemIndex = -1;
536  so->arrayKeyCount = 0;
539 
540  /*
541  * Allocate tuple workspace arrays, if needed for an index-only scan and
542  * not already done in a previous rescan call. To save on palloc
543  * overhead, both workspaces are allocated as one palloc block; only this
544  * function and btendscan know that.
545  *
546  * NOTE: this data structure also makes it safe to return data from a
547  * "name" column, even though btree name_ops uses an underlying storage
548  * datatype of cstring. The risk there is that "name" is supposed to be
549  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
550  * However, since we only return data out of tuples sitting in the
551  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
552  * data out of the markTuples array --- running off the end of memory for
553  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
554  * adding special-case treatment for name_ops elsewhere.
555  */
556  if (scan->xs_want_itup && so->currTuples == NULL)
557  {
558  so->currTuples = (char *) palloc(BLCKSZ * 2);
559  so->markTuples = so->currTuples + BLCKSZ;
560  }
561 
562  /*
563  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
564  * - vadim 05/05/97
565  */
566  if (scankey && scan->numberOfKeys > 0)
567  memmove(scan->keyData,
568  scankey,
569  scan->numberOfKeys * sizeof(ScanKeyData));
570  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
571 
572  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
574 }
char * currTuples
Definition: nbtree.h:400
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1732
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
#define memmove(d, s, c)
Definition: c.h:1057
int arrayKeyCount
Definition: nbtree.h:386
char * markTuples
Definition: nbtree.h:401
int markItemIndex
Definition: nbtree.h:410
bool xs_want_itup
Definition: relscan.h:95
#define NULL
Definition: c.h:226
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
int numberOfKeys
Definition: nbtree.h:379
ScanKey keyData
Definition: relscan.h:93
BTScanPosData currPos
Definition: nbtree.h:413
void * palloc(Size size)
Definition: mcxt.c:891
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:192
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
void btrestrpos ( IndexScanDesc  scan)

Definition at line 646 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

647 {
648  BTScanOpaque so = (BTScanOpaque) scan->opaque;
649 
650  /* Restore the marked positions of any array keys */
651  if (so->numArrayKeys)
653 
654  if (so->markItemIndex >= 0)
655  {
656  /*
657  * The scan has never moved to a new page since the last mark. Just
658  * restore the itemIndex.
659  *
660  * NB: In this case we can't count on anything in so->markPos to be
661  * accurate.
662  */
663  so->currPos.itemIndex = so->markItemIndex;
664  }
665  else
666  {
667  /*
668  * The scan moved to a new page after last mark or restore, and we are
669  * now restoring to the marked page. We aren't holding any read
670  * locks, but if we're still holding the pin for the current position,
671  * we must drop it.
672  */
673  if (BTScanPosIsValid(so->currPos))
674  {
675  /* Before leaving current page, deal with any killed items */
676  if (so->numKilled > 0)
677  _bt_killitems(scan);
679  }
680 
681  if (BTScanPosIsValid(so->markPos))
682  {
683  /* bump pin on mark buffer for assignment to current buffer */
684  if (BTScanPosIsPinned(so->markPos))
686  memcpy(&so->currPos, &so->markPos,
687  offsetof(BTScanPosData, items[1]) +
688  so->markPos.lastItem * sizeof(BTScanPosItem));
689  if (so->currTuples)
690  memcpy(so->currTuples, so->markTuples,
692  }
693  else
695  }
696 }
int itemIndex
Definition: nbtree.h:326
char * currTuples
Definition: nbtree.h:400
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:350
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:625
int nextTupleOffset
Definition: nbtree.h:315
int lastItem
Definition: nbtree.h:325
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1732
BTScanPosData markPos
Definition: nbtree.h:414
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:417
int numArrayKeys
Definition: nbtree.h:384
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:333
char * markTuples
Definition: nbtree.h:401
int markItemIndex
Definition: nbtree.h:410
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:356
BTScanPosData currPos
Definition: nbtree.h:413
Buffer buf
Definition: nbtree.h:296
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3330
#define offsetof(type, field)
Definition: c.h:550
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:344
IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 947 of file nbtree.c.

References IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), NULL, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

948 {
949  /* No-op in ANALYZE ONLY mode */
950  if (info->analyze_only)
951  return stats;
952 
953  /*
954  * If btbulkdelete was called, we need not do anything, just return the
955  * stats from the latest btbulkdelete call. If it wasn't called, we must
956  * still do a pass over the index, to recycle any newly-recyclable pages
957  * and to obtain index statistics.
958  *
959  * Since we aren't going to actually delete any leaf items, there's no
960  * need to go through all the vacuum-cycle-ID pushups.
961  */
962  if (stats == NULL)
963  {
965  btvacuumscan(info, stats, NULL, NULL, 0);
966  }
967 
968  /* Finally, vacuum the FSM */
970 
971  /*
972  * It's quite possible for us to be fooled by concurrent page splits into
973  * double-counting some index tuples, so disbelieve any total that exceeds
974  * the underlying heap's count ... if we know that accurately. Otherwise
975  * this might just make matters worse.
976  */
977  if (!info->estimated_count)
978  {
979  if (stats->num_index_tuples > info->num_heap_tuples)
980  stats->num_index_tuples = info->num_heap_tuples;
981  }
982 
983  return stats;
984 }
bool analyze_only
Definition: genam.h:47
Relation index
Definition: genam.h:46
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:999
void * palloc0(Size size)
Definition: mcxt.c:920
double num_heap_tuples
Definition: genam.h:50
#define NULL
Definition: c.h:226
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
double num_index_tuples
Definition: genam.h:76
bool estimated_count
Definition: genam.h:48
static void btvacuumpage ( BTVacState vstate,
BlockNumber  blkno,
BlockNumber  orig_blkno 
)
static

Definition at line 1129 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, buf, BUFFER_LOCK_UNLOCK, BufferGetPage, callback(), BTVacState::callback, BTVacState::callback_state, BTVacState::cycleid, IndexVacuumInfo::index, BTVacState::info, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirtyHint(), MaxOffsetNumber, MemoryContextReset(), MemoryContextSwitchTo(), NULL, IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_IGNORE, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

1130 {
1131  IndexVacuumInfo *info = vstate->info;
1132  IndexBulkDeleteResult *stats = vstate->stats;
1134  void *callback_state = vstate->callback_state;
1135  Relation rel = info->index;
1136  bool delete_now;
1137  BlockNumber recurse_to;
1138  Buffer buf;
1139  Page page;
1140  BTPageOpaque opaque = NULL;
1141 
1142 restart:
1143  delete_now = false;
1144  recurse_to = P_NONE;
1145 
1146  /* call vacuum_delay_point while not holding any buffer lock */
1148 
1149  /*
1150  * We can't use _bt_getbuf() here because it always applies
1151  * _bt_checkpage(), which will barf on an all-zero page. We want to
1152  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1153  * buffer access strategy.
1154  */
1155  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1156  info->strategy);
1157  LockBuffer(buf, BT_READ);
1158  page = BufferGetPage(buf);
1159  if (!PageIsNew(page))
1160  {
1161  _bt_checkpage(rel, buf);
1162  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1163  }
1164 
1165  /*
1166  * If we are recursing, the only case we want to do anything with is a
1167  * live leaf page having the current vacuum cycle ID. Any other state
1168  * implies we already saw the page (eg, deleted it as being empty).
1169  */
1170  if (blkno != orig_blkno)
1171  {
1172  if (_bt_page_recyclable(page) ||
1173  P_IGNORE(opaque) ||
1174  !P_ISLEAF(opaque) ||
1175  opaque->btpo_cycleid != vstate->cycleid)
1176  {
1177  _bt_relbuf(rel, buf);
1178  return;
1179  }
1180  }
1181 
1182  /* Page is valid, see what to do with it */
1183  if (_bt_page_recyclable(page))
1184  {
1185  /* Okay to recycle this page */
1186  RecordFreeIndexPage(rel, blkno);
1187  vstate->totFreePages++;
1188  stats->pages_deleted++;
1189  }
1190  else if (P_ISDELETED(opaque))
1191  {
1192  /* Already deleted, but can't recycle yet */
1193  stats->pages_deleted++;
1194  }
1195  else if (P_ISHALFDEAD(opaque))
1196  {
1197  /* Half-dead, try to delete */
1198  delete_now = true;
1199  }
1200  else if (P_ISLEAF(opaque))
1201  {
1202  OffsetNumber deletable[MaxOffsetNumber];
1203  int ndeletable;
1204  OffsetNumber offnum,
1205  minoff,
1206  maxoff;
1207 
1208  /*
1209  * Trade in the initial read lock for a super-exclusive write lock on
1210  * this page. We must get such a lock on every leaf page over the
1211  * course of the vacuum scan, whether or not it actually contains any
1212  * deletable tuples --- see nbtree/README.
1213  */
1215  LockBufferForCleanup(buf);
1216 
1217  /*
1218  * Remember highest leaf page number we've taken cleanup lock on; see
1219  * notes in btvacuumscan
1220  */
1221  if (blkno > vstate->lastBlockLocked)
1222  vstate->lastBlockLocked = blkno;
1223 
1224  /*
1225  * Check whether we need to recurse back to earlier pages. What we
1226  * are concerned about is a page split that happened since we started
1227  * the vacuum scan. If the split moved some tuples to a lower page
1228  * then we might have missed 'em. If so, set up for tail recursion.
1229  * (Must do this before possibly clearing btpo_cycleid below!)
1230  */
1231  if (vstate->cycleid != 0 &&
1232  opaque->btpo_cycleid == vstate->cycleid &&
1233  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1234  !P_RIGHTMOST(opaque) &&
1235  opaque->btpo_next < orig_blkno)
1236  recurse_to = opaque->btpo_next;
1237 
1238  /*
1239  * Scan over all items to see which ones need deleted according to the
1240  * callback function.
1241  */
1242  ndeletable = 0;
1243  minoff = P_FIRSTDATAKEY(opaque);
1244  maxoff = PageGetMaxOffsetNumber(page);
1245  if (callback)
1246  {
1247  for (offnum = minoff;
1248  offnum <= maxoff;
1249  offnum = OffsetNumberNext(offnum))
1250  {
1251  IndexTuple itup;
1252  ItemPointer htup;
1253 
1254  itup = (IndexTuple) PageGetItem(page,
1255  PageGetItemId(page, offnum));
1256  htup = &(itup->t_tid);
1257 
1258  /*
1259  * During Hot Standby we currently assume that
1260  * XLOG_BTREE_VACUUM records do not produce conflicts. That is
1261  * only true as long as the callback function depends only
1262  * upon whether the index tuple refers to heap tuples removed
1263  * in the initial heap scan. When vacuum starts it derives a
1264  * value of OldestXmin. Backends taking later snapshots could
1265  * have a RecentGlobalXmin with a later xid than the vacuum's
1266  * OldestXmin, so it is possible that row versions deleted
1267  * after OldestXmin could be marked as killed by other
1268  * backends. The callback function *could* look at the index
1269  * tuple state in isolation and decide to delete the index
1270  * tuple, though currently it does not. If it ever did, we
1271  * would need to reconsider whether XLOG_BTREE_VACUUM records
1272  * should cause conflicts. If they did cause conflicts they
1273  * would be fairly harsh conflicts, since we haven't yet
1274  * worked out a way to pass a useful value for
1275  * latestRemovedXid on the XLOG_BTREE_VACUUM records. This
1276  * applies to *any* type of index that marks index tuples as
1277  * killed.
1278  */
1279  if (callback(htup, callback_state))
1280  deletable[ndeletable++] = offnum;
1281  }
1282  }
1283 
1284  /*
1285  * Apply any needed deletes. We issue just one _bt_delitems_vacuum()
1286  * call per page, so as to minimize WAL traffic.
1287  */
1288  if (ndeletable > 0)
1289  {
1290  /*
1291  * Notice that the issued XLOG_BTREE_VACUUM WAL record includes
1292  * all information to the replay code to allow it to get a cleanup
1293  * lock on all pages between the previous lastBlockVacuumed and
1294  * this page. This ensures that WAL replay locks all leaf pages at
1295  * some point, which is important should non-MVCC scans be
1296  * requested. This is currently unused on standby, but we record
1297  * it anyway, so that the WAL contains the required information.
1298  *
1299  * Since we can visit leaf pages out-of-order when recursing,
1300  * replay might end up locking such pages an extra time, but it
1301  * doesn't seem worth the amount of bookkeeping it'd take to avoid
1302  * that.
1303  */
1304  _bt_delitems_vacuum(rel, buf, deletable, ndeletable,
1305  vstate->lastBlockVacuumed);
1306 
1307  /*
1308  * Remember highest leaf page number we've issued a
1309  * XLOG_BTREE_VACUUM WAL record for.
1310  */
1311  if (blkno > vstate->lastBlockVacuumed)
1312  vstate->lastBlockVacuumed = blkno;
1313 
1314  stats->tuples_removed += ndeletable;
1315  /* must recompute maxoff */
1316  maxoff = PageGetMaxOffsetNumber(page);
1317  }
1318  else
1319  {
1320  /*
1321  * If the page has been split during this vacuum cycle, it seems
1322  * worth expending a write to clear btpo_cycleid even if we don't
1323  * have any deletions to do. (If we do, _bt_delitems_vacuum takes
1324  * care of this.) This ensures we won't process the page again.
1325  *
1326  * We treat this like a hint-bit update because there's no need to
1327  * WAL-log it.
1328  */
1329  if (vstate->cycleid != 0 &&
1330  opaque->btpo_cycleid == vstate->cycleid)
1331  {
1332  opaque->btpo_cycleid = 0;
1333  MarkBufferDirtyHint(buf, true);
1334  }
1335  }
1336 
1337  /*
1338  * If it's now empty, try to delete; else count the live tuples. We
1339  * don't delete when recursing, though, to avoid putting entries into
1340  * freePages out-of-order (doesn't seem worth any extra code to handle
1341  * the case).
1342  */
1343  if (minoff > maxoff)
1344  delete_now = (blkno == orig_blkno);
1345  else
1346  stats->num_index_tuples += maxoff - minoff + 1;
1347  }
1348 
1349  if (delete_now)
1350  {
1351  MemoryContext oldcontext;
1352  int ndel;
1353 
1354  /* Run pagedel in a temp context to avoid memory leakage */
1356  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1357 
1358  ndel = _bt_pagedel(rel, buf);
1359 
1360  /* count only this page, else may double-count parent */
1361  if (ndel)
1362  stats->pages_deleted++;
1363 
1364  MemoryContextSwitchTo(oldcontext);
1365  /* pagedel released buffer, so we shouldn't */
1366  }
1367  else
1368  _bt_relbuf(rel, buf);
1369 
1370  /*
1371  * This is really tail recursion, but if the compiler is too stupid to
1372  * optimize it as such, we'd eat an uncomfortably large amount of stack
1373  * space per recursion level (due to the deletable[] array). A failure is
1374  * improbable since the number of levels isn't likely to be large ... but
1375  * just in case, let's hand-optimize into a loop.
1376  */
1377  if (recurse_to != P_NONE)
1378  {
1379  blkno = recurse_to;
1380  goto restart;
1381  }
1382 }
BlockNumber lastBlockLocked
Definition: nbtree.c:63
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3586
#define BTP_SPLIT_END
Definition: nbtree.h:75
BlockNumber btpo_next
Definition: nbtree.h:57
MemoryContext pagedelcontext
Definition: nbtree.c:65
double tuples_removed
Definition: genam.h:77
void * callback_state
Definition: nbtree.c:60
#define P_IGNORE(opaque)
Definition: nbtree.h:181
BlockNumber lastBlockVacuumed
Definition: nbtree.c:62
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3362
BlockNumber totFreePages
Definition: nbtree.c:64
#define MaxOffsetNumber
Definition: off.h:28
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:205
IndexBulkDeleteCallback callback
Definition: nbtree.c:59
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
ItemPointerData t_tid
Definition: itup.h:37
BufferAccessStrategy strategy
Definition: genam.h:51
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:169
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:135
uint32 BlockNumber
Definition: block.h:31
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:744
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:238
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:503
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:180
BTCycleId btpo_cycleid
Definition: nbtree.h:64
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:49
static char * buf
Definition: pg_test_fsync.c:65
IndexTupleData * IndexTuple
Definition: itup.h:53
BlockNumber pages_deleted
Definition: genam.h:78
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define P_ISDELETED(opaque)
Definition: nbtree.h:179
BTCycleId cycleid
Definition: nbtree.c:61
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
int _bt_pagedel(Relation rel, Buffer buf)
Definition: nbtpage.c:1109
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:720
#define NULL
Definition: c.h:226
IndexVacuumInfo * info
Definition: nbtree.c:57
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:789
IndexBulkDeleteResult * stats
Definition: nbtree.c:58
#define PageIsNew(page)
Definition: bufpage.h:226
void vacuum_delay_point(void)
Definition: vacuum.c:1515
uint16 btpo_flags
Definition: nbtree.h:63
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:176
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:83
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define P_ISLEAF(opaque)
Definition: nbtree.h:177
static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 999 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_relbuf(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate(), BTREE_METAPAGE, btvacuumpage(), buf, callback(), BTVacState::callback, BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, BTVacState::info, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBufferForCleanup(), LockRelationForExtension(), MAIN_FORKNUM, MemoryContextDelete(), NULL, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, RBM_NORMAL, ReadBufferExtended(), RELATION_IS_LOCAL, RelationGetNumberOfBlocks, BTVacState::stats, IndexVacuumInfo::strategy, BTVacState::totFreePages, UnlockRelationForExtension(), and XLogStandbyInfoActive.

Referenced by btbulkdelete(), and btvacuumcleanup().

1002 {
1003  Relation rel = info->index;
1004  BTVacState vstate;
1005  BlockNumber num_pages;
1006  BlockNumber blkno;
1007  bool needLock;
1008 
1009  /*
1010  * Reset counts that will be incremented during the scan; needed in case
1011  * of multiple scans during a single VACUUM command
1012  */
1013  stats->estimated_count = false;
1014  stats->num_index_tuples = 0;
1015  stats->pages_deleted = 0;
1016 
1017  /* Set up info to pass down to btvacuumpage */
1018  vstate.info = info;
1019  vstate.stats = stats;
1020  vstate.callback = callback;
1021  vstate.callback_state = callback_state;
1022  vstate.cycleid = cycleid;
1023  vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
1025  vstate.totFreePages = 0;
1026 
1027  /* Create a temporary memory context to run _bt_pagedel in */
1029  "_bt_pagedel",
1031 
1032  /*
1033  * The outer loop iterates over all index pages except the metapage, in
1034  * physical order (we hope the kernel will cooperate in providing
1035  * read-ahead for speed). It is critical that we visit all leaf pages,
1036  * including ones added after we start the scan, else we might fail to
1037  * delete some deletable tuples. Hence, we must repeatedly check the
1038  * relation length. We must acquire the relation-extension lock while
1039  * doing so to avoid a race condition: if someone else is extending the
1040  * relation, there is a window where bufmgr/smgr have created a new
1041  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1042  * we manage to scan such a page here, we'll improperly assume it can be
1043  * recycled. Taking the lock synchronizes things enough to prevent a
1044  * problem: either num_pages won't include the new page, or _bt_getbuf
1045  * already has write lock on the buffer and it will be fully initialized
1046  * before we can examine it. (See also vacuumlazy.c, which has the same
1047  * issue.) Also, we need not worry if a page is added immediately after
1048  * we look; the page splitting code already has write-lock on the left
1049  * page before it adds a right page, so we must already have processed any
1050  * tuples due to be moved into such a page.
1051  *
1052  * We can skip locking for new or temp relations, however, since no one
1053  * else could be accessing them.
1054  */
1055  needLock = !RELATION_IS_LOCAL(rel);
1056 
1057  blkno = BTREE_METAPAGE + 1;
1058  for (;;)
1059  {
1060  /* Get the current relation length */
1061  if (needLock)
1063  num_pages = RelationGetNumberOfBlocks(rel);
1064  if (needLock)
1066 
1067  /* Quit if we've scanned the whole relation */
1068  if (blkno >= num_pages)
1069  break;
1070  /* Iterate over pages, then loop back to recheck length */
1071  for (; blkno < num_pages; blkno++)
1072  {
1073  btvacuumpage(&vstate, blkno, blkno);
1074  }
1075  }
1076 
1077  /*
1078  * Check to see if we need to issue one final WAL record for this index,
1079  * which may be needed for correctness on a hot standby node when non-MVCC
1080  * index scans could take place.
1081  *
1082  * If the WAL is replayed in hot standby, the replay process needs to get
1083  * cleanup locks on all index leaf pages, just as we've been doing here.
1084  * However, we won't issue any WAL records about pages that have no items
1085  * to be deleted. For pages between pages we've vacuumed, the replay code
1086  * will take locks under the direction of the lastBlockVacuumed fields in
1087  * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
1088  * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
1089  * against the last leaf page in the index, if that one wasn't vacuumed.
1090  */
1091  if (XLogStandbyInfoActive() &&
1092  vstate.lastBlockVacuumed < vstate.lastBlockLocked)
1093  {
1094  Buffer buf;
1095 
1096  /*
1097  * The page should be valid, but we can't use _bt_getbuf() because we
1098  * want to use a nondefault buffer access strategy. Since we aren't
1099  * going to delete any items, getting cleanup lock again is probably
1100  * overkill, but for consistency do that anyway.
1101  */
1102  buf = ReadBufferExtended(rel, MAIN_FORKNUM, vstate.lastBlockLocked,
1103  RBM_NORMAL, info->strategy);
1104  LockBufferForCleanup(buf);
1105  _bt_checkpage(rel, buf);
1106  _bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
1107  _bt_relbuf(rel, buf);
1108  }
1109 
1111 
1112  /* update statistics */
1113  stats->num_pages = num_pages;
1114  stats->pages_free = vstate.totFreePages;
1115 }
BlockNumber lastBlockLocked
Definition: nbtree.c:63
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3586
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
MemoryContext pagedelcontext
Definition: nbtree.c:65
void * callback_state
Definition: nbtree.c:60
BlockNumber lastBlockVacuumed
Definition: nbtree.c:62
BlockNumber totFreePages
Definition: nbtree.c:64
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:520
IndexBulkDeleteCallback callback
Definition: nbtree.c:59
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
BufferAccessStrategy strategy
Definition: genam.h:51
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:503
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
Definition: nbtree.c:1129
BlockNumber num_pages
Definition: genam.h:73
BlockNumber pages_free
Definition: genam.h:79
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:49
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:145
static char * buf
Definition: pg_test_fsync.c:65
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
BlockNumber pages_deleted
Definition: genam.h:78
#define BTREE_METAPAGE
Definition: nbtree.h:109
BTCycleId cycleid
Definition: nbtree.c:61
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:440
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define XLogStandbyInfoActive()
Definition: xlog.h:159
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:720
#define NULL
Definition: c.h:226
IndexVacuumInfo * info
Definition: nbtree.c:57
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:789
IndexBulkDeleteResult * stats
Definition: nbtree.c:58
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
bool estimated_count
Definition: genam.h:75