PostgreSQL Source Code  git master
nbtsearch.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/predicate.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/tqual.h"
Include dependency graph for nbtsearch.c:

Go to the source code of this file.

Functions

static bool _bt_readpage (IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 
static void _bt_saveitem (BTScanOpaque so, int itemIndex, OffsetNumber offnum, IndexTuple itup)
 
static bool _bt_steppage (IndexScanDesc scan, ScanDirection dir)
 
static bool _bt_readnextpage (IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
 
static bool _bt_parallel_readpage (IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
 
static Buffer _bt_walk_left (Relation rel, Buffer buf, Snapshot snapshot)
 
static bool _bt_endpoint (IndexScanDesc scan, ScanDirection dir)
 
static void _bt_drop_lock_and_maybe_pin (IndexScanDesc scan, BTScanPos sp)
 
static void _bt_initialize_more_data (BTScanOpaque so, ScanDirection dir)
 
BTStack _bt_search (Relation rel, int keysz, ScanKey scankey, bool nextkey, Buffer *bufP, int access, Snapshot snapshot)
 
Buffer _bt_moveright (Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey, bool forupdate, BTStack stack, int access, Snapshot snapshot)
 
OffsetNumber _bt_binsrch (Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey)
 
int32 _bt_compare (Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum)
 
bool _bt_first (IndexScanDesc scan, ScanDirection dir)
 
bool _bt_next (IndexScanDesc scan, ScanDirection dir)
 
Buffer _bt_get_endpoint (Relation rel, uint32 level, bool rightmost, Snapshot snapshot)
 

Function Documentation

◆ _bt_binsrch()

OffsetNumber _bt_binsrch ( Relation  rel,
Buffer  buf,
int  keysz,
ScanKey  scankey,
bool  nextkey 
)

Definition at line 353 of file nbtsearch.c.

References _bt_compare(), Assert, BufferGetPage, OffsetNumberPrev, P_FIRSTDATAKEY, P_ISLEAF, PageGetMaxOffsetNumber, and PageGetSpecialPointer.

Referenced by _bt_doinsert(), _bt_findinsertloc(), _bt_first(), and _bt_search().

358 {
359  Page page;
360  BTPageOpaque opaque;
361  OffsetNumber low,
362  high;
363  int32 result,
364  cmpval;
365 
366  page = BufferGetPage(buf);
367  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
368 
369  low = P_FIRSTDATAKEY(opaque);
370  high = PageGetMaxOffsetNumber(page);
371 
372  /*
373  * If there are no keys on the page, return the first available slot. Note
374  * this covers two cases: the page is really empty (no keys), or it
375  * contains only a high key. The latter case is possible after vacuuming.
376  * This can never happen on an internal page, however, since they are
377  * never empty (an internal page must have children).
378  */
379  if (high < low)
380  return low;
381 
382  /*
383  * Binary search to find the first key on the page >= scan key, or first
384  * key > scankey when nextkey is true.
385  *
386  * For nextkey=false (cmpval=1), the loop invariant is: all slots before
387  * 'low' are < scan key, all slots at or after 'high' are >= scan key.
388  *
389  * For nextkey=true (cmpval=0), the loop invariant is: all slots before
390  * 'low' are <= scan key, all slots at or after 'high' are > scan key.
391  *
392  * We can fall out when high == low.
393  */
394  high++; /* establish the loop invariant for high */
395 
396  cmpval = nextkey ? 0 : 1; /* select comparison value */
397 
398  while (high > low)
399  {
400  OffsetNumber mid = low + ((high - low) / 2);
401 
402  /* We have low <= mid < high, so mid points at a real slot */
403 
404  result = _bt_compare(rel, keysz, scankey, page, mid);
405 
406  if (result >= cmpval)
407  low = mid + 1;
408  else
409  high = mid;
410  }
411 
412  /*
413  * At this point we have high == low, but be careful: they could point
414  * past the last slot on the page.
415  *
416  * On a leaf page, we always return the first key >= scan key (resp. >
417  * scan key), which could be the last slot + 1.
418  */
419  if (P_ISLEAF(opaque))
420  return low;
421 
422  /*
423  * On a non-leaf page, return the last key < scan key (resp. <= scan key).
424  * There must be one if _bt_compare() is playing by the rules.
425  */
426  Assert(low > P_FIRSTDATAKEY(opaque));
427 
428  return OffsetNumberPrev(low);
429 }
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
signed int int32
Definition: c.h:346
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define Assert(condition)
Definition: c.h:732
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:55
int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum)
Definition: nbtsearch.c:458
Pointer Page
Definition: bufpage.h:74
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_compare()

int32 _bt_compare ( Relation  rel,
int  keysz,
ScanKey  scankey,
Page  page,
OffsetNumber  offnum 
)

Definition at line 458 of file nbtsearch.c.

References _bt_check_natts(), Assert, DatumGetInt32, FunctionCall2Coll(), i, index_getattr, INVERT_COMPARE_RESULT, P_FIRSTDATAKEY, P_ISLEAF, PageGetItem, PageGetItemId, PageGetSpecialPointer, RelationGetDescr, ScanKeyData::sk_argument, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, ScanKeyData::sk_func, and SK_ISNULL.

Referenced by _bt_binsrch(), _bt_doinsert(), _bt_findinsertloc(), _bt_moveright(), invariant_geq_offset(), invariant_leq_nontarget_offset(), and invariant_leq_offset().

463 {
464  TupleDesc itupdesc = RelationGetDescr(rel);
466  IndexTuple itup;
467  int i;
468 
469  Assert(_bt_check_natts(rel, page, offnum));
470 
471  /*
472  * Force result ">" if target item is first data item on an internal page
473  * --- see NOTE above.
474  */
475  if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
476  return 1;
477 
478  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
479 
480  /*
481  * The scan key is set up with the attribute number associated with each
482  * term in the key. It is important that, if the index is multi-key, the
483  * scan contain the first k key attributes, and that they be in order. If
484  * you think about how multi-key ordering works, you'll understand why
485  * this is.
486  *
487  * We don't test for violation of this condition here, however. The
488  * initial setup for the index scan had better have gotten it right (see
489  * _bt_first).
490  */
491 
492  for (i = 1; i <= keysz; i++)
493  {
494  Datum datum;
495  bool isNull;
496  int32 result;
497 
498  datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull);
499 
500  /* see comments about NULLs handling in btbuild */
501  if (scankey->sk_flags & SK_ISNULL) /* key is NULL */
502  {
503  if (isNull)
504  result = 0; /* NULL "=" NULL */
505  else if (scankey->sk_flags & SK_BT_NULLS_FIRST)
506  result = -1; /* NULL "<" NOT_NULL */
507  else
508  result = 1; /* NULL ">" NOT_NULL */
509  }
510  else if (isNull) /* key is NOT_NULL and item is NULL */
511  {
512  if (scankey->sk_flags & SK_BT_NULLS_FIRST)
513  result = 1; /* NOT_NULL ">" NULL */
514  else
515  result = -1; /* NOT_NULL "<" NULL */
516  }
517  else
518  {
519  /*
520  * The sk_func needs to be passed the index value as left arg and
521  * the sk_argument as right arg (they might be of different
522  * types). Since it is convenient for callers to think of
523  * _bt_compare as comparing the scankey to the index item, we have
524  * to flip the sign of the comparison result. (Unless it's a DESC
525  * column, in which case we *don't* flip the sign.)
526  */
527  result = DatumGetInt32(FunctionCall2Coll(&scankey->sk_func,
528  scankey->sk_collation,
529  datum,
530  scankey->sk_argument));
531 
532  if (!(scankey->sk_flags & SK_BT_DESC))
533  INVERT_COMPARE_RESULT(result);
534  }
535 
536  /* if the keys are unequal, return the difference */
537  if (result != 0)
538  return result;
539 
540  scankey++;
541  }
542 
543  /* if we get here, the keys are equal */
544  return 0;
545 }
#define DatumGetInt32(X)
Definition: postgres.h:457
#define RelationGetDescr(relation)
Definition: rel.h:433
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum)
Definition: nbtutils.c:2134
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1133
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
signed int int32
Definition: c.h:346
IndexTupleData * IndexTuple
Definition: itup.h:53
FmgrInfo sk_func
Definition: skey.h:71
#define SK_ISNULL
Definition: skey.h:115
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:489
uintptr_t Datum
Definition: postgres.h:367
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:732
#define SK_BT_DESC
Definition: nbtree.h:488
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
Oid sk_collation
Definition: skey.h:70
int i
Datum sk_argument
Definition: skey.h:72
#define INVERT_COMPARE_RESULT(var)
Definition: c.h:1030
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
AttrNumber sk_attno
Definition: skey.h:67
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_drop_lock_and_maybe_pin()

static void _bt_drop_lock_and_maybe_pin ( IndexScanDesc  scan,
BTScanPos  sp 
)
static

Definition at line 57 of file nbtsearch.c.

References BTScanPosData::buf, BUFFER_LOCK_UNLOCK, IndexScanDescData::indexRelation, InvalidBuffer, IsMVCCSnapshot, LockBuffer(), RelationNeedsWAL, ReleaseBuffer(), IndexScanDescData::xs_snapshot, and IndexScanDescData::xs_want_itup.

Referenced by _bt_endpoint(), _bt_first(), _bt_parallel_readpage(), and _bt_steppage().

58 {
60 
61  if (IsMVCCSnapshot(scan->xs_snapshot) &&
63  !scan->xs_want_itup)
64  {
65  ReleaseBuffer(sp->buf);
66  sp->buf = InvalidBuffer;
67  }
68 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
Snapshot xs_snapshot
Definition: relscan.h:92
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3315
Relation indexRelation
Definition: relscan.h:91
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3552
bool xs_want_itup
Definition: relscan.h:97
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31
#define RelationNeedsWAL(relation)
Definition: rel.h:510
Buffer buf
Definition: nbtree.h:357

◆ _bt_endpoint()

static bool _bt_endpoint ( IndexScanDesc  scan,
ScanDirection  dir 
)
static

Definition at line 1890 of file nbtsearch.c.

References _bt_drop_lock_and_maybe_pin(), _bt_get_endpoint(), _bt_initialize_more_data(), _bt_readpage(), _bt_steppage(), Assert, BTScanPosInvalidate, buf, BTScanPosData::buf, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, elog, ERROR, IndexScanDescData::indexRelation, BTScanPosData::itemIndex, BTScanPosData::items, LockBuffer(), IndexScanDescData::opaque, P_FIRSTDATAKEY, P_ISLEAF, P_RIGHTMOST, PageGetMaxOffsetNumber, PageGetSpecialPointer, PredicateLockPage(), PredicateLockRelation(), ScanDirectionIsBackward, ScanDirectionIsForward, HeapTupleData::t_self, IndexScanDescData::xs_ctup, IndexScanDescData::xs_itup, IndexScanDescData::xs_snapshot, and IndexScanDescData::xs_want_itup.

Referenced by _bt_first().

1891 {
1892  Relation rel = scan->indexRelation;
1893  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1894  Buffer buf;
1895  Page page;
1896  BTPageOpaque opaque;
1897  OffsetNumber start;
1898  BTScanPosItem *currItem;
1899 
1900  /*
1901  * Scan down to the leftmost or rightmost leaf page. This is a simplified
1902  * version of _bt_search(). We don't maintain a stack since we know we
1903  * won't need it.
1904  */
1906 
1907  if (!BufferIsValid(buf))
1908  {
1909  /*
1910  * Empty index. Lock the whole relation, as nothing finer to lock
1911  * exists.
1912  */
1913  PredicateLockRelation(rel, scan->xs_snapshot);
1915  return false;
1916  }
1917 
1919  page = BufferGetPage(buf);
1920  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1921  Assert(P_ISLEAF(opaque));
1922 
1923  if (ScanDirectionIsForward(dir))
1924  {
1925  /* There could be dead pages to the left, so not this: */
1926  /* Assert(P_LEFTMOST(opaque)); */
1927 
1928  start = P_FIRSTDATAKEY(opaque);
1929  }
1930  else if (ScanDirectionIsBackward(dir))
1931  {
1932  Assert(P_RIGHTMOST(opaque));
1933 
1934  start = PageGetMaxOffsetNumber(page);
1935  }
1936  else
1937  {
1938  elog(ERROR, "invalid scan direction: %d", (int) dir);
1939  start = 0; /* keep compiler quiet */
1940  }
1941 
1942  /* remember which buffer we have pinned */
1943  so->currPos.buf = buf;
1944 
1945  _bt_initialize_more_data(so, dir);
1946 
1947  /*
1948  * Now load data from the first page of the scan.
1949  */
1950  if (!_bt_readpage(scan, dir, start))
1951  {
1952  /*
1953  * There's no actually-matching data on this page. Try to advance to
1954  * the next page. Return false if there's no matching data at all.
1955  */
1957  if (!_bt_steppage(scan, dir))
1958  return false;
1959  }
1960  else
1961  {
1962  /* Drop the lock, and maybe the pin, on the current page */
1964  }
1965 
1966  /* OK, itemIndex says what to return */
1967  currItem = &so->currPos.items[so->currPos.itemIndex];
1968  scan->xs_ctup.t_self = currItem->heapTid;
1969  if (scan->xs_want_itup)
1970  scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
1971 
1972  return true;
1973 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition: predicate.c:2475
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
IndexTuple xs_itup
Definition: relscan.h:115
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:389
static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
Definition: nbtsearch.c:57
int itemIndex
Definition: nbtree.h:387
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2452
char * currTuples
Definition: nbtree.h:461
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
Snapshot xs_snapshot
Definition: relscan.h:92
Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, Snapshot snapshot)
Definition: nbtsearch.c:1808
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
Relation indexRelation
Definition: relscan.h:91
uint16 OffsetNumber
Definition: off.h:24
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:41
#define ERROR
Definition: elog.h:43
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:478
ItemPointerData t_self
Definition: htup.h:65
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
static void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
Definition: nbtsearch.c:1980
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3552
bool xs_want_itup
Definition: relscan.h:97
#define Assert(condition)
Definition: c.h:732
HeapTupleData xs_ctup
Definition: relscan.h:121
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:417
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2611
BTScanPosData currPos
Definition: nbtree.h:474
#define elog(elevel,...)
Definition: elog.h:226
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1374
Buffer buf
Definition: nbtree.h:357
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
Definition: nbtsearch.c:1216
Pointer Page
Definition: bufpage.h:74
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_first()

bool _bt_first ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 568 of file nbtsearch.c.

References _bt_binsrch(), _bt_drop_lock_and_maybe_pin(), _bt_endpoint(), _bt_freestack(), _bt_initialize_more_data(), _bt_parallel_done(), _bt_parallel_readpage(), _bt_parallel_seize(), _bt_preprocess_keys(), _bt_readpage(), _bt_search(), _bt_steppage(), Assert, BT_READ, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, BTORDER_PROC, BTScanPosInvalidate, BTScanPosIsValid, buf, BTScanPosData::buf, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferIsValid, cur, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, DatumGetPointer, elog, ERROR, get_opfamily_proc(), i, index_getprocinfo(), INDEX_MAX_KEYS, IndexScanDescData::indexRelation, InvalidBlockNumber, InvalidOid, InvalidStrategy, BTScanPosData::itemIndex, BTScanPosData::items, BTScanOpaqueData::keyData, LockBuffer(), BTScanOpaqueData::numberOfKeys, OffsetNumberPrev, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, pgstat_count_index_scan, PredicateLockPage(), PredicateLockRelation(), BTScanOpaqueData::qual_ok, RelationData::rd_opcintype, RelationData::rd_opfamily, RegProcedureIsValid, RelationGetRelationName, ScanDirectionIsBackward, ScanDirectionIsForward, ScanKeyEntryInitialize(), ScanKeyEntryInitializeWithInfo(), ScanKeyData::sk_argument, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, SK_ISNULL, SK_ROW_END, SK_ROW_HEADER, SK_ROW_MEMBER, SK_SEARCHNOTNULL, ScanKeyData::sk_strategy, ScanKeyData::sk_subtype, status(), HeapTupleData::t_self, IndexScanDescData::xs_ctup, IndexScanDescData::xs_itup, IndexScanDescData::xs_snapshot, and IndexScanDescData::xs_want_itup.

Referenced by btgetbitmap(), and btgettuple().

569 {
570  Relation rel = scan->indexRelation;
571  BTScanOpaque so = (BTScanOpaque) scan->opaque;
572  Buffer buf;
573  BTStack stack;
574  OffsetNumber offnum;
575  StrategyNumber strat;
576  bool nextkey;
577  bool goback;
578  ScanKey startKeys[INDEX_MAX_KEYS];
579  ScanKeyData scankeys[INDEX_MAX_KEYS];
580  ScanKeyData notnullkeys[INDEX_MAX_KEYS];
581  int keysCount = 0;
582  int i;
583  bool status = true;
584  StrategyNumber strat_total;
585  BTScanPosItem *currItem;
586  BlockNumber blkno;
587 
589 
591 
592  /*
593  * Examine the scan keys and eliminate any redundant keys; also mark the
594  * keys that must be matched to continue the scan.
595  */
596  _bt_preprocess_keys(scan);
597 
598  /*
599  * Quit now if _bt_preprocess_keys() discovered that the scan keys can
600  * never be satisfied (eg, x == 1 AND x > 2).
601  */
602  if (!so->qual_ok)
603  return false;
604 
605  /*
606  * For parallel scans, get the starting page from shared state. If the
607  * scan has not started, proceed to find out first leaf page in the usual
608  * way while keeping other participating processes waiting. If the scan
609  * has already begun, use the page number from the shared structure.
610  */
611  if (scan->parallel_scan != NULL)
612  {
613  status = _bt_parallel_seize(scan, &blkno);
614  if (!status)
615  return false;
616  else if (blkno == P_NONE)
617  {
618  _bt_parallel_done(scan);
619  return false;
620  }
621  else if (blkno != InvalidBlockNumber)
622  {
623  if (!_bt_parallel_readpage(scan, blkno, dir))
624  return false;
625  goto readcomplete;
626  }
627  }
628 
629  /*----------
630  * Examine the scan keys to discover where we need to start the scan.
631  *
632  * We want to identify the keys that can be used as starting boundaries;
633  * these are =, >, or >= keys for a forward scan or =, <, <= keys for
634  * a backwards scan. We can use keys for multiple attributes so long as
635  * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
636  * a > or < boundary or find an attribute with no boundary (which can be
637  * thought of as the same as "> -infinity"), we can't use keys for any
638  * attributes to its right, because it would break our simplistic notion
639  * of what initial positioning strategy to use.
640  *
641  * When the scan keys include cross-type operators, _bt_preprocess_keys
642  * may not be able to eliminate redundant keys; in such cases we will
643  * arbitrarily pick a usable one for each attribute. This is correct
644  * but possibly not optimal behavior. (For example, with keys like
645  * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
646  * x=5 would be more efficient.) Since the situation only arises given
647  * a poorly-worded query plus an incomplete opfamily, live with it.
648  *
649  * When both equality and inequality keys appear for a single attribute
650  * (again, only possible when cross-type operators appear), we *must*
651  * select one of the equality keys for the starting point, because
652  * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
653  * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
654  * start at x=4, we will fail and stop before reaching x=10. If multiple
655  * equality quals survive preprocessing, however, it doesn't matter which
656  * one we use --- by definition, they are either redundant or
657  * contradictory.
658  *
659  * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier.
660  * If the index stores nulls at the end of the index we'll be starting
661  * from, and we have no boundary key for the column (which means the key
662  * we deduced NOT NULL from is an inequality key that constrains the other
663  * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to
664  * use as a boundary key. If we didn't do this, we might find ourselves
665  * traversing a lot of null entries at the start of the scan.
666  *
667  * In this loop, row-comparison keys are treated the same as keys on their
668  * first (leftmost) columns. We'll add on lower-order columns of the row
669  * comparison below, if possible.
670  *
671  * The selected scan keys (at most one per index column) are remembered by
672  * storing their addresses into the local startKeys[] array.
673  *----------
674  */
675  strat_total = BTEqualStrategyNumber;
676  if (so->numberOfKeys > 0)
677  {
678  AttrNumber curattr;
679  ScanKey chosen;
680  ScanKey impliesNN;
681  ScanKey cur;
682 
683  /*
684  * chosen is the so-far-chosen key for the current attribute, if any.
685  * We don't cast the decision in stone until we reach keys for the
686  * next attribute.
687  */
688  curattr = 1;
689  chosen = NULL;
690  /* Also remember any scankey that implies a NOT NULL constraint */
691  impliesNN = NULL;
692 
693  /*
694  * Loop iterates from 0 to numberOfKeys inclusive; we use the last
695  * pass to handle after-last-key processing. Actual exit from the
696  * loop is at one of the "break" statements below.
697  */
698  for (cur = so->keyData, i = 0;; cur++, i++)
699  {
700  if (i >= so->numberOfKeys || cur->sk_attno != curattr)
701  {
702  /*
703  * Done looking at keys for curattr. If we didn't find a
704  * usable boundary key, see if we can deduce a NOT NULL key.
705  */
706  if (chosen == NULL && impliesNN != NULL &&
707  ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
710  {
711  /* Yes, so build the key in notnullkeys[keysCount] */
712  chosen = &notnullkeys[keysCount];
713  ScanKeyEntryInitialize(chosen,
715  (impliesNN->sk_flags &
717  curattr,
718  ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
721  InvalidOid,
722  InvalidOid,
723  InvalidOid,
724  (Datum) 0);
725  }
726 
727  /*
728  * If we still didn't find a usable boundary key, quit; else
729  * save the boundary key pointer in startKeys.
730  */
731  if (chosen == NULL)
732  break;
733  startKeys[keysCount++] = chosen;
734 
735  /*
736  * Adjust strat_total, and quit if we have stored a > or <
737  * key.
738  */
739  strat = chosen->sk_strategy;
740  if (strat != BTEqualStrategyNumber)
741  {
742  strat_total = strat;
743  if (strat == BTGreaterStrategyNumber ||
744  strat == BTLessStrategyNumber)
745  break;
746  }
747 
748  /*
749  * Done if that was the last attribute, or if next key is not
750  * in sequence (implying no boundary key is available for the
751  * next attribute).
752  */
753  if (i >= so->numberOfKeys ||
754  cur->sk_attno != curattr + 1)
755  break;
756 
757  /*
758  * Reset for next attr.
759  */
760  curattr = cur->sk_attno;
761  chosen = NULL;
762  impliesNN = NULL;
763  }
764 
765  /*
766  * Can we use this key as a starting boundary for this attr?
767  *
768  * If not, does it imply a NOT NULL constraint? (Because
769  * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
770  * *any* inequality key works for that; we need not test.)
771  */
772  switch (cur->sk_strategy)
773  {
776  if (chosen == NULL)
777  {
778  if (ScanDirectionIsBackward(dir))
779  chosen = cur;
780  else
781  impliesNN = cur;
782  }
783  break;
785  /* override any non-equality choice */
786  chosen = cur;
787  break;
790  if (chosen == NULL)
791  {
792  if (ScanDirectionIsForward(dir))
793  chosen = cur;
794  else
795  impliesNN = cur;
796  }
797  break;
798  }
799  }
800  }
801 
802  /*
803  * If we found no usable boundary keys, we have to start from one end of
804  * the tree. Walk down that edge to the first or last key, and scan from
805  * there.
806  */
807  if (keysCount == 0)
808  {
809  bool match;
810 
811  match = _bt_endpoint(scan, dir);
812 
813  if (!match)
814  {
815  /* No match, so mark (parallel) scan finished */
816  _bt_parallel_done(scan);
817  }
818 
819  return match;
820  }
821 
822  /*
823  * We want to start the scan somewhere within the index. Set up an
824  * insertion scankey we can use to search for the boundary point we
825  * identified above. The insertion scankey is built in the local
826  * scankeys[] array, using the keys identified by startKeys[].
827  */
828  Assert(keysCount <= INDEX_MAX_KEYS);
829  for (i = 0; i < keysCount; i++)
830  {
831  ScanKey cur = startKeys[i];
832 
833  Assert(cur->sk_attno == i + 1);
834 
835  if (cur->sk_flags & SK_ROW_HEADER)
836  {
837  /*
838  * Row comparison header: look to the first row member instead.
839  *
840  * The member scankeys are already in insertion format (ie, they
841  * have sk_func = 3-way-comparison function), but we have to watch
842  * out for nulls, which _bt_preprocess_keys didn't check. A null
843  * in the first row member makes the condition unmatchable, just
844  * like qual_ok = false.
845  */
846  ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
847 
848  Assert(subkey->sk_flags & SK_ROW_MEMBER);
849  if (subkey->sk_flags & SK_ISNULL)
850  {
851  _bt_parallel_done(scan);
852  return false;
853  }
854  memcpy(scankeys + i, subkey, sizeof(ScanKeyData));
855 
856  /*
857  * If the row comparison is the last positioning key we accepted,
858  * try to add additional keys from the lower-order row members.
859  * (If we accepted independent conditions on additional index
860  * columns, we use those instead --- doesn't seem worth trying to
861  * determine which is more restrictive.) Note that this is OK
862  * even if the row comparison is of ">" or "<" type, because the
863  * condition applied to all but the last row member is effectively
864  * ">=" or "<=", and so the extra keys don't break the positioning
865  * scheme. But, by the same token, if we aren't able to use all
866  * the row members, then the part of the row comparison that we
867  * did use has to be treated as just a ">=" or "<=" condition, and
868  * so we'd better adjust strat_total accordingly.
869  */
870  if (i == keysCount - 1)
871  {
872  bool used_all_subkeys = false;
873 
874  Assert(!(subkey->sk_flags & SK_ROW_END));
875  for (;;)
876  {
877  subkey++;
878  Assert(subkey->sk_flags & SK_ROW_MEMBER);
879  if (subkey->sk_attno != keysCount + 1)
880  break; /* out-of-sequence, can't use it */
881  if (subkey->sk_strategy != cur->sk_strategy)
882  break; /* wrong direction, can't use it */
883  if (subkey->sk_flags & SK_ISNULL)
884  break; /* can't use null keys */
885  Assert(keysCount < INDEX_MAX_KEYS);
886  memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData));
887  keysCount++;
888  if (subkey->sk_flags & SK_ROW_END)
889  {
890  used_all_subkeys = true;
891  break;
892  }
893  }
894  if (!used_all_subkeys)
895  {
896  switch (strat_total)
897  {
899  strat_total = BTLessEqualStrategyNumber;
900  break;
902  strat_total = BTGreaterEqualStrategyNumber;
903  break;
904  }
905  }
906  break; /* done with outer loop */
907  }
908  }
909  else
910  {
911  /*
912  * Ordinary comparison key. Transform the search-style scan key
913  * to an insertion scan key by replacing the sk_func with the
914  * appropriate btree comparison function.
915  *
916  * If scankey operator is not a cross-type comparison, we can use
917  * the cached comparison function; otherwise gotta look it up in
918  * the catalogs. (That can't lead to infinite recursion, since no
919  * indexscan initiated by syscache lookup will use cross-data-type
920  * operators.)
921  *
922  * We support the convention that sk_subtype == InvalidOid means
923  * the opclass input type; this is a hack to simplify life for
924  * ScanKeyInit().
925  */
926  if (cur->sk_subtype == rel->rd_opcintype[i] ||
927  cur->sk_subtype == InvalidOid)
928  {
929  FmgrInfo *procinfo;
930 
931  procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
933  cur->sk_flags,
934  cur->sk_attno,
936  cur->sk_subtype,
937  cur->sk_collation,
938  procinfo,
939  cur->sk_argument);
940  }
941  else
942  {
943  RegProcedure cmp_proc;
944 
945  cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
946  rel->rd_opcintype[i],
947  cur->sk_subtype,
948  BTORDER_PROC);
949  if (!RegProcedureIsValid(cmp_proc))
950  elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
951  BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
952  cur->sk_attno, RelationGetRelationName(rel));
953  ScanKeyEntryInitialize(scankeys + i,
954  cur->sk_flags,
955  cur->sk_attno,
957  cur->sk_subtype,
958  cur->sk_collation,
959  cmp_proc,
960  cur->sk_argument);
961  }
962  }
963  }
964 
965  /*----------
966  * Examine the selected initial-positioning strategy to determine exactly
967  * where we need to start the scan, and set flag variables to control the
968  * code below.
969  *
970  * If nextkey = false, _bt_search and _bt_binsrch will locate the first
971  * item >= scan key. If nextkey = true, they will locate the first
972  * item > scan key.
973  *
974  * If goback = true, we will then step back one item, while if
975  * goback = false, we will start the scan on the located item.
976  *----------
977  */
978  switch (strat_total)
979  {
981 
982  /*
983  * Find first item >= scankey, then back up one to arrive at last
984  * item < scankey. (Note: this positioning strategy is only used
985  * for a backward scan, so that is always the correct starting
986  * position.)
987  */
988  nextkey = false;
989  goback = true;
990  break;
991 
993 
994  /*
995  * Find first item > scankey, then back up one to arrive at last
996  * item <= scankey. (Note: this positioning strategy is only used
997  * for a backward scan, so that is always the correct starting
998  * position.)
999  */
1000  nextkey = true;
1001  goback = true;
1002  break;
1003 
1004  case BTEqualStrategyNumber:
1005 
1006  /*
1007  * If a backward scan was specified, need to start with last equal
1008  * item not first one.
1009  */
1010  if (ScanDirectionIsBackward(dir))
1011  {
1012  /*
1013  * This is the same as the <= strategy. We will check at the
1014  * end whether the found item is actually =.
1015  */
1016  nextkey = true;
1017  goback = true;
1018  }
1019  else
1020  {
1021  /*
1022  * This is the same as the >= strategy. We will check at the
1023  * end whether the found item is actually =.
1024  */
1025  nextkey = false;
1026  goback = false;
1027  }
1028  break;
1029 
1031 
1032  /*
1033  * Find first item >= scankey. (This is only used for forward
1034  * scans.)
1035  */
1036  nextkey = false;
1037  goback = false;
1038  break;
1039 
1041 
1042  /*
1043  * Find first item > scankey. (This is only used for forward
1044  * scans.)
1045  */
1046  nextkey = true;
1047  goback = false;
1048  break;
1049 
1050  default:
1051  /* can't get here, but keep compiler quiet */
1052  elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
1053  return false;
1054  }
1055 
1056  /*
1057  * Use the manufactured insertion scan key to descend the tree and
1058  * position ourselves on the target leaf page.
1059  */
1060  stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ,
1061  scan->xs_snapshot);
1062 
1063  /* don't need to keep the stack around... */
1064  _bt_freestack(stack);
1065 
1066  if (!BufferIsValid(buf))
1067  {
1068  /*
1069  * We only get here if the index is completely empty. Lock relation
1070  * because nothing finer to lock exists.
1071  */
1072  PredicateLockRelation(rel, scan->xs_snapshot);
1073 
1074  /*
1075  * mark parallel scan as done, so that all the workers can finish
1076  * their scan
1077  */
1078  _bt_parallel_done(scan);
1080 
1081  return false;
1082  }
1083  else
1085  scan->xs_snapshot);
1086 
1087  _bt_initialize_more_data(so, dir);
1088 
1089  /* position to the precise item on the page */
1090  offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);
1091 
1092  /*
1093  * If nextkey = false, we are positioned at the first item >= scan key, or
1094  * possibly at the end of a page on which all the existing items are less
1095  * than the scan key and we know that everything on later pages is greater
1096  * than or equal to scan key.
1097  *
1098  * If nextkey = true, we are positioned at the first item > scan key, or
1099  * possibly at the end of a page on which all the existing items are less
1100  * than or equal to the scan key and we know that everything on later
1101  * pages is greater than scan key.
1102  *
1103  * The actually desired starting point is either this item or the prior
1104  * one, or in the end-of-page case it's the first item on the next page or
1105  * the last item on this page. Adjust the starting offset if needed. (If
1106  * this results in an offset before the first item or after the last one,
1107  * _bt_readpage will report no items found, and then we'll step to the
1108  * next page as needed.)
1109  */
1110  if (goback)
1111  offnum = OffsetNumberPrev(offnum);
1112 
1113  /* remember which buffer we have pinned, if any */
1115  so->currPos.buf = buf;
1116 
1117  /*
1118  * Now load data from the first page of the scan.
1119  */
1120  if (!_bt_readpage(scan, dir, offnum))
1121  {
1122  /*
1123  * There's no actually-matching data on this page. Try to advance to
1124  * the next page. Return false if there's no matching data at all.
1125  */
1127  if (!_bt_steppage(scan, dir))
1128  return false;
1129  }
1130  else
1131  {
1132  /* Drop the lock, and maybe the pin, on the current page */
1134  }
1135 
1136 readcomplete:
1137  /* OK, itemIndex says what to return */
1138  currItem = &so->currPos.items[so->currPos.itemIndex];
1139  scan->xs_ctup.t_self = currItem->heapTid;
1140  if (scan->xs_want_itup)
1141  scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
1142 
1143  return true;
1144 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
#define InvalidStrategy
Definition: stratnum.h:24
Oid sk_subtype
Definition: skey.h:69
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
Definition: fmgr.h:56
#define SK_ROW_MEMBER
Definition: skey.h:118
void _bt_freestack(BTStack stack)
Definition: nbtutils.c:175
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition: predicate.c:2475
#define BTORDER_PROC
Definition: nbtree.h:289
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:856
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
IndexTuple xs_itup
Definition: relscan.h:115
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:389
void ScanKeyEntryInitializeWithInfo(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, FmgrInfo *finfo, Datum argument)
Definition: scankey.c:101
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:720
static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
Definition: nbtsearch.c:1659
static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
Definition: nbtsearch.c:57
int itemIndex
Definition: nbtree.h:387
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2452
char * currTuples
Definition: nbtree.h:461
regproc RegProcedure
Definition: c.h:505
#define P_NONE
Definition: nbtree.h:150
Snapshot xs_snapshot
Definition: relscan.h:92
struct cursor * cur
Definition: ecpg.c:28
uint16 StrategyNumber
Definition: stratnum.h:22
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:411
uint32 BlockNumber
Definition: block.h:31
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
Relation indexRelation
Definition: relscan.h:91
uint16 OffsetNumber
Definition: off.h:24
#define SK_ROW_END
Definition: skey.h:119
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:41
#define BT_READ
Definition: nbtree.h:299
#define ERROR
Definition: elog.h:43
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:478
StrategyNumber sk_strategy
Definition: skey.h:68
ItemPointerData t_self
Definition: htup.h:65
static char * buf
Definition: pg_test_fsync.c:67
void ScanKeyEntryInitialize(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, RegProcedure procedure, Datum argument)
Definition: scankey.c:32
ScanKeyData * ScanKey
Definition: skey.h:75
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1890
#define RegProcedureIsValid(p)
Definition: c.h:640
IndexTupleData * IndexTuple
Definition: itup.h:53
OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey)
Definition: nbtsearch.c:353
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:1293
#define SK_SEARCHNOTNULL
Definition: skey.h:122
bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
Definition: nbtree.c:639
Oid * rd_opfamily
Definition: rel.h:153
#define SK_ISNULL
Definition: skey.h:115
#define SK_ROW_HEADER
Definition: skey.h:117
void _bt_preprocess_keys(IndexScanDesc scan)
Definition: nbtutils.c:756
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:489
static void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
Definition: nbtsearch.c:1980
uintptr_t Datum
Definition: postgres.h:367
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3552
BTStack _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, Buffer *bufP, int access, Snapshot snapshot)
Definition: nbtsearch.c:97
#define InvalidOid
Definition: postgres_ext.h:36
bool xs_want_itup
Definition: relscan.h:97
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:732
#define SK_BT_DESC
Definition: nbtree.h:488
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:744
HeapTupleData xs_ctup
Definition: relscan.h:121
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:417
#define INDEX_MAX_KEYS
#define InvalidBlockNumber
Definition: block.h:33
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:55
int numberOfKeys
Definition: nbtree.h:440
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define DatumGetPointer(X)
Definition: postgres.h:534
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2611
BTScanPosData currPos
Definition: nbtree.h:474
ScanKey keyData
Definition: nbtree.h:441
Oid sk_collation
Definition: skey.h:70
#define elog(elevel,...)
Definition: elog.h:226
int i
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1374
Buffer buf
Definition: nbtree.h:357
Oid * rd_opcintype
Definition: rel.h:154
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
#define BTLessStrategyNumber
Definition: stratnum.h:29
int Buffer
Definition: buf.h:23
Datum sk_argument
Definition: skey.h:72
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
Definition: nbtsearch.c:1216
int16 AttrNumber
Definition: attnum.h:21
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
AttrNumber sk_attno
Definition: skey.h:67

◆ _bt_get_endpoint()

Buffer _bt_get_endpoint ( Relation  rel,
uint32  level,
bool  rightmost,
Snapshot  snapshot 
)

Definition at line 1808 of file nbtsearch.c.

References _bt_getroot(), _bt_gettrueroot(), _bt_relandgetbuf(), BT_READ, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTreeInnerTupleGetDownLink, buf, BufferGetPage, BufferIsValid, elog, ERROR, InvalidBuffer, BTPageOpaqueData::level, P_FIRSTDATAKEY, P_IGNORE, P_NONE, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, RelationGetRelationName, and TestForOldSnapshot().

Referenced by _bt_endpoint(), and _bt_insert_parent().

1810 {
1811  Buffer buf;
1812  Page page;
1813  BTPageOpaque opaque;
1814  OffsetNumber offnum;
1815  BlockNumber blkno;
1816  IndexTuple itup;
1817 
1818  /*
1819  * If we are looking for a leaf page, okay to descend from fast root;
1820  * otherwise better descend from true root. (There is no point in being
1821  * smarter about intermediate levels.)
1822  */
1823  if (level == 0)
1824  buf = _bt_getroot(rel, BT_READ);
1825  else
1826  buf = _bt_gettrueroot(rel);
1827 
1828  if (!BufferIsValid(buf))
1829  return InvalidBuffer;
1830 
1831  page = BufferGetPage(buf);
1832  TestForOldSnapshot(snapshot, rel, page);
1833  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1834 
1835  for (;;)
1836  {
1837  /*
1838  * If we landed on a deleted page, step right to find a live page
1839  * (there must be one). Also, if we want the rightmost page, step
1840  * right if needed to get to it (this could happen if the page split
1841  * since we obtained a pointer to it).
1842  */
1843  while (P_IGNORE(opaque) ||
1844  (rightmost && !P_RIGHTMOST(opaque)))
1845  {
1846  blkno = opaque->btpo_next;
1847  if (blkno == P_NONE)
1848  elog(ERROR, "fell off the end of index \"%s\"",
1850  buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
1851  page = BufferGetPage(buf);
1852  TestForOldSnapshot(snapshot, rel, page);
1853  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1854  }
1855 
1856  /* Done? */
1857  if (opaque->btpo.level == level)
1858  break;
1859  if (opaque->btpo.level < level)
1860  elog(ERROR, "btree level %u not found in index \"%s\"",
1861  level, RelationGetRelationName(rel));
1862 
1863  /* Descend to leftmost or rightmost child page */
1864  if (rightmost)
1865  offnum = PageGetMaxOffsetNumber(page);
1866  else
1867  offnum = P_FIRSTDATAKEY(opaque);
1868 
1869  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
1870  blkno = BTreeInnerTupleGetDownLink(itup);
1871 
1872  buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
1873  page = BufferGetPage(buf);
1874  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1875  }
1876 
1877  return buf;
1878 }
BlockNumber btpo_next
Definition: nbtree.h:58
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
#define P_IGNORE(opaque)
Definition: nbtree.h:163
#define BTreeInnerTupleGetDownLink(itup)
Definition: nbtree.h:224
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:865
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:150
#define InvalidBuffer
Definition: buf.h:25
uint32 BlockNumber
Definition: block.h:31
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:299
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:441
Buffer _bt_getroot(Relation rel, int access)
Definition: nbtpage.c:252
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
uint32 level
Definition: nbtree.h:61
Buffer _bt_gettrueroot(Relation rel)
Definition: nbtpage.c:498
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define elog(elevel,...)
Definition: elog.h:226
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74

◆ _bt_initialize_more_data()

static void _bt_initialize_more_data ( BTScanOpaque  so,
ScanDirection  dir 
)
inlinestatic

Definition at line 1980 of file nbtsearch.c.

References BTScanOpaqueData::currPos, BTScanOpaqueData::markItemIndex, BTScanPosData::moreLeft, BTScanPosData::moreRight, BTScanOpaqueData::numKilled, and ScanDirectionIsForward.

Referenced by _bt_endpoint(), _bt_first(), and _bt_parallel_readpage().

1981 {
1982  /* initialize moreLeft/moreRight appropriately for scan direction */
1983  if (ScanDirectionIsForward(dir))
1984  {
1985  so->currPos.moreLeft = false;
1986  so->currPos.moreRight = true;
1987  }
1988  else
1989  {
1990  so->currPos.moreLeft = true;
1991  so->currPos.moreRight = false;
1992  }
1993  so->numKilled = 0; /* just paranoia */
1994  so->markItemIndex = -1; /* ditto */
1995 }
bool moreRight
Definition: nbtree.h:370
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
bool moreLeft
Definition: nbtree.h:369
int markItemIndex
Definition: nbtree.h:471
BTScanPosData currPos
Definition: nbtree.h:474

◆ _bt_moveright()

Buffer _bt_moveright ( Relation  rel,
Buffer  buf,
int  keysz,
ScanKey  scankey,
bool  nextkey,
bool  forupdate,
BTStack  stack,
int  access,
Snapshot  snapshot 
)

Definition at line 244 of file nbtsearch.c.

References _bt_compare(), _bt_finish_split(), _bt_getbuf(), _bt_relandgetbuf(), _bt_relbuf(), BT_READ, BT_WRITE, BTPageOpaqueData::btpo_next, buf, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, elog, ERROR, LockBuffer(), P_HIKEY, P_IGNORE, P_INCOMPLETE_SPLIT, P_RIGHTMOST, PageGetSpecialPointer, RelationGetRelationName, and TestForOldSnapshot().

Referenced by _bt_search().

253 {
254  Page page;
255  BTPageOpaque opaque;
256  int32 cmpval;
257 
258  /*
259  * When nextkey = false (normal case): if the scan key that brought us to
260  * this page is > the high key stored on the page, then the page has split
261  * and we need to move right. (If the scan key is equal to the high key,
262  * we might or might not need to move right; have to scan the page first
263  * anyway.)
264  *
265  * When nextkey = true: move right if the scan key is >= page's high key.
266  *
267  * The page could even have split more than once, so scan as far as
268  * needed.
269  *
270  * We also have to move right if we followed a link that brought us to a
271  * dead page.
272  */
273  cmpval = nextkey ? 0 : 1;
274 
275  for (;;)
276  {
277  page = BufferGetPage(buf);
278  TestForOldSnapshot(snapshot, rel, page);
279  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
280 
281  if (P_RIGHTMOST(opaque))
282  break;
283 
284  /*
285  * Finish any incomplete splits we encounter along the way.
286  */
287  if (forupdate && P_INCOMPLETE_SPLIT(opaque))
288  {
290 
291  /* upgrade our lock if necessary */
292  if (access == BT_READ)
293  {
296  }
297 
298  if (P_INCOMPLETE_SPLIT(opaque))
299  _bt_finish_split(rel, buf, stack);
300  else
301  _bt_relbuf(rel, buf);
302 
303  /* re-acquire the lock in the right mode, and re-check */
304  buf = _bt_getbuf(rel, blkno, access);
305  continue;
306  }
307 
308  if (P_IGNORE(opaque) || _bt_compare(rel, keysz, scankey, page, P_HIKEY) >= cmpval)
309  {
310  /* step right one page */
311  buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access);
312  continue;
313  }
314  else
315  break;
316  }
317 
318  if (P_IGNORE(opaque))
319  elog(ERROR, "fell off the end of index \"%s\"",
321 
322  return buf;
323 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
BlockNumber btpo_next
Definition: nbtree.h:58
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
#define P_IGNORE(opaque)
Definition: nbtree.h:163
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:865
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
uint32 BlockNumber
Definition: block.h:31
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:165
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
signed int int32
Definition: c.h:346
#define BT_READ
Definition: nbtree.h:299
void _bt_finish_split(Relation rel, Buffer lbuf, BTStack stack)
Definition: nbtinsert.c:1927
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3552
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:884
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define P_HIKEY
Definition: nbtree.h:185
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2611
#define elog(elevel,...)
Definition: elog.h:226
int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum)
Definition: nbtsearch.c:458
#define BT_WRITE
Definition: nbtree.h:300
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
Pointer Page
Definition: bufpage.h:74

◆ _bt_next()

bool _bt_next ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 1161 of file nbtsearch.c.

References _bt_steppage(), BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanPosData::firstItem, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, IndexScanDescData::opaque, ScanDirectionIsForward, HeapTupleData::t_self, IndexScanDescData::xs_ctup, IndexScanDescData::xs_itup, and IndexScanDescData::xs_want_itup.

Referenced by btgetbitmap(), and btgettuple().

1162 {
1163  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1164  BTScanPosItem *currItem;
1165 
1166  /*
1167  * Advance to next tuple on current page; or if there's no more, try to
1168  * step to the next page with data.
1169  */
1170  if (ScanDirectionIsForward(dir))
1171  {
1172  if (++so->currPos.itemIndex > so->currPos.lastItem)
1173  {
1174  if (!_bt_steppage(scan, dir))
1175  return false;
1176  }
1177  }
1178  else
1179  {
1180  if (--so->currPos.itemIndex < so->currPos.firstItem)
1181  {
1182  if (!_bt_steppage(scan, dir))
1183  return false;
1184  }
1185  }
1186 
1187  /* OK, itemIndex says what to return */
1188  currItem = &so->currPos.items[so->currPos.itemIndex];
1189  scan->xs_ctup.t_self = currItem->heapTid;
1190  if (scan->xs_want_itup)
1191  scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
1192 
1193  return true;
1194 }
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
IndexTuple xs_itup
Definition: relscan.h:115
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:389
int itemIndex
Definition: nbtree.h:387
char * currTuples
Definition: nbtree.h:461
int lastItem
Definition: nbtree.h:386
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:478
ItemPointerData t_self
Definition: htup.h:65
IndexTupleData * IndexTuple
Definition: itup.h:53
int firstItem
Definition: nbtree.h:385
bool xs_want_itup
Definition: relscan.h:97
HeapTupleData xs_ctup
Definition: relscan.h:121
BTScanPosData currPos
Definition: nbtree.h:474
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1374

◆ _bt_parallel_readpage()

static bool _bt_parallel_readpage ( IndexScanDesc  scan,
BlockNumber  blkno,
ScanDirection  dir 
)
static

Definition at line 1659 of file nbtsearch.c.

References _bt_drop_lock_and_maybe_pin(), _bt_initialize_more_data(), _bt_readnextpage(), BTScanOpaqueData::currPos, and IndexScanDescData::opaque.

Referenced by _bt_first().

1660 {
1661  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1662 
1663  _bt_initialize_more_data(so, dir);
1664 
1665  if (!_bt_readnextpage(scan, blkno, dir))
1666  return false;
1667 
1668  /* Drop the lock, and maybe the pin, on the current page */
1670 
1671  return true;
1672 }
static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
Definition: nbtsearch.c:57
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:478
static void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
Definition: nbtsearch.c:1980
static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
Definition: nbtsearch.c:1481
BTScanPosData currPos
Definition: nbtree.h:474

◆ _bt_readnextpage()

static bool _bt_readnextpage ( IndexScanDesc  scan,
BlockNumber  blkno,
ScanDirection  dir 
)
static

Definition at line 1481 of file nbtsearch.c.

References _bt_getbuf(), _bt_parallel_done(), _bt_parallel_release(), _bt_parallel_seize(), _bt_readpage(), _bt_relbuf(), _bt_walk_left(), BT_READ, BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosUnpinIfPinned, BTScanPosData::buf, BufferGetBlockNumber(), BufferGetPage, CHECK_FOR_INTERRUPTS, BTScanPosData::currPage, BTScanOpaqueData::currPos, IndexScanDescData::indexRelation, InvalidBuffer, LockBuffer(), BTScanPosData::moreLeft, BTScanPosData::moreRight, IndexScanDescData::opaque, P_FIRSTDATAKEY, P_IGNORE, P_NONE, PageGetMaxOffsetNumber, PageGetSpecialPointer, IndexScanDescData::parallel_scan, PredicateLockPage(), ScanDirectionIsForward, status(), TestForOldSnapshot(), and IndexScanDescData::xs_snapshot.

Referenced by _bt_parallel_readpage(), and _bt_steppage().

1482 {
1483  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1484  Relation rel;
1485  Page page;
1486  BTPageOpaque opaque;
1487  bool status = true;
1488 
1489  rel = scan->indexRelation;
1490 
1491  if (ScanDirectionIsForward(dir))
1492  {
1493  for (;;)
1494  {
1495  /*
1496  * if we're at end of scan, give up and mark parallel scan as
1497  * done, so that all the workers can finish their scan
1498  */
1499  if (blkno == P_NONE || !so->currPos.moreRight)
1500  {
1501  _bt_parallel_done(scan);
1503  return false;
1504  }
1505  /* check for interrupts while we're not holding any buffer lock */
1507  /* step right one page */
1508  so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
1509  page = BufferGetPage(so->currPos.buf);
1510  TestForOldSnapshot(scan->xs_snapshot, rel, page);
1511  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1512  /* check for deleted page */
1513  if (!P_IGNORE(opaque))
1514  {
1515  PredicateLockPage(rel, blkno, scan->xs_snapshot);
1516  /* see if there are any matches on this page */
1517  /* note that this will clear moreRight if we can stop */
1518  if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque)))
1519  break;
1520  }
1521  else if (scan->parallel_scan != NULL)
1522  {
1523  /* allow next page be processed by parallel worker */
1524  _bt_parallel_release(scan, opaque->btpo_next);
1525  }
1526 
1527  /* nope, keep going */
1528  if (scan->parallel_scan != NULL)
1529  {
1530  _bt_relbuf(rel, so->currPos.buf);
1531  status = _bt_parallel_seize(scan, &blkno);
1532  if (!status)
1533  {
1535  return false;
1536  }
1537  }
1538  else
1539  {
1540  blkno = opaque->btpo_next;
1541  _bt_relbuf(rel, so->currPos.buf);
1542  }
1543  }
1544  }
1545  else
1546  {
1547  /*
1548  * Should only happen in parallel cases, when some other backend
1549  * advanced the scan.
1550  */
1551  if (so->currPos.currPage != blkno)
1552  {
1554  so->currPos.currPage = blkno;
1555  }
1556 
1557  /*
1558  * Walk left to the next page with data. This is much more complex
1559  * than the walk-right case because of the possibility that the page
1560  * to our left splits while we are in flight to it, plus the
1561  * possibility that the page we were on gets deleted after we leave
1562  * it. See nbtree/README for details.
1563  *
1564  * It might be possible to rearrange this code to have less overhead
1565  * in pinning and locking, but that would require capturing the left
1566  * pointer when the page is initially read, and using it here, along
1567  * with big changes to _bt_walk_left() and the code below. It is not
1568  * clear whether this would be a win, since if the page immediately to
1569  * the left splits after we read this page and before we step left, we
1570  * would need to visit more pages than with the current code.
1571  *
1572  * Note that if we change the code so that we drop the pin for a scan
1573  * which uses a non-MVCC snapshot, we will need to modify the code for
1574  * walking left, to allow for the possibility that a referenced page
1575  * has been deleted. As long as the buffer is pinned or the snapshot
1576  * is MVCC the page cannot move past the half-dead state to fully
1577  * deleted.
1578  */
1579  if (BTScanPosIsPinned(so->currPos))
1580  LockBuffer(so->currPos.buf, BT_READ);
1581  else
1582  so->currPos.buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ);
1583 
1584  for (;;)
1585  {
1586  /* Done if we know there are no matching keys to the left */
1587  if (!so->currPos.moreLeft)
1588  {
1589  _bt_relbuf(rel, so->currPos.buf);
1590  _bt_parallel_done(scan);
1592  return false;
1593  }
1594 
1595  /* Step to next physical page */
1596  so->currPos.buf = _bt_walk_left(rel, so->currPos.buf,
1597  scan->xs_snapshot);
1598 
1599  /* if we're physically at end of index, return failure */
1600  if (so->currPos.buf == InvalidBuffer)
1601  {
1602  _bt_parallel_done(scan);
1604  return false;
1605  }
1606 
1607  /*
1608  * Okay, we managed to move left to a non-deleted page. Done if
1609  * it's not half-dead and contains matching tuples. Else loop back
1610  * and do it all again.
1611  */
1612  page = BufferGetPage(so->currPos.buf);
1613  TestForOldSnapshot(scan->xs_snapshot, rel, page);
1614  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1615  if (!P_IGNORE(opaque))
1616  {
1618  /* see if there are any matches on this page */
1619  /* note that this will clear moreLeft if we can stop */
1620  if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page)))
1621  break;
1622  }
1623  else if (scan->parallel_scan != NULL)
1624  {
1625  /* allow next page be processed by parallel worker */
1627  }
1628 
1629  /*
1630  * For parallel scans, get the last page scanned as it is quite
1631  * possible that by the time we try to seize the scan, some other
1632  * worker has already advanced the scan to a different page. We
1633  * must continue based on the latest page scanned by any worker.
1634  */
1635  if (scan->parallel_scan != NULL)
1636  {
1637  _bt_relbuf(rel, so->currPos.buf);
1638  status = _bt_parallel_seize(scan, &blkno);
1639  if (!status)
1640  {
1642  return false;
1643  }
1644  so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
1645  }
1646  }
1647  }
1648 
1649  return true;
1650 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition: predicate.c:2475
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
#define P_IGNORE(opaque)
Definition: nbtree.h:163
bool moreRight
Definition: nbtree.h:370
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:720
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
#define P_NONE
Definition: nbtree.h:150
Snapshot xs_snapshot
Definition: relscan.h:92
#define InvalidBuffer
Definition: buf.h:25
BlockNumber currPage
Definition: nbtree.h:360
bool moreLeft
Definition: nbtree.h:369
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
Relation indexRelation
Definition: relscan.h:91
#define BT_READ
Definition: nbtree.h:299
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:478
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:394
bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
Definition: nbtree.c:639
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3552
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:884
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:417
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page)
Definition: nbtree.c:697
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2611
BTScanPosData currPos
Definition: nbtree.h:474
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
Buffer buf
Definition: nbtree.h:357
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
Definition: nbtsearch.c:1216
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:405
Pointer Page
Definition: bufpage.h:74
static Buffer _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
Definition: nbtsearch.c:1689

◆ _bt_readpage()

static bool _bt_readpage ( IndexScanDesc  scan,
ScanDirection  dir,
OffsetNumber  offnum 
)
static

Definition at line 1216 of file nbtsearch.c.

References _bt_checkkeys(), _bt_parallel_release(), _bt_saveitem(), Assert, BTScanPosIsPinned, BTScanPosData::buf, BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetPage, BufferIsValid, BTScanPosData::currPage, BTScanOpaqueData::currPos, BTScanPosData::firstItem, BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanPosData::lsn, Max, MaxIndexTuplesPerPage, Min, BTScanPosData::moreLeft, BTScanPosData::moreRight, BTScanPosData::nextPage, BTScanPosData::nextTupleOffset, OffsetNumberNext, OffsetNumberPrev, IndexScanDescData::opaque, P_FIRSTDATAKEY, PageGetMaxOffsetNumber, PageGetSpecialPointer, IndexScanDescData::parallel_scan, and ScanDirectionIsForward.

Referenced by _bt_endpoint(), _bt_first(), and _bt_readnextpage().

1217 {
1218  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1219  Page page;
1220  BTPageOpaque opaque;
1221  OffsetNumber minoff;
1222  OffsetNumber maxoff;
1223  int itemIndex;
1224  IndexTuple itup;
1225  bool continuescan;
1226 
1227  /*
1228  * We must have the buffer pinned and locked, but the usual macro can't be
1229  * used here; this function is what makes it good for currPos.
1230  */
1232 
1233  page = BufferGetPage(so->currPos.buf);
1234  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1235 
1236  /* allow next page be processed by parallel worker */
1237  if (scan->parallel_scan)
1238  {
1239  if (ScanDirectionIsForward(dir))
1240  _bt_parallel_release(scan, opaque->btpo_next);
1241  else
1243  }
1244 
1245  minoff = P_FIRSTDATAKEY(opaque);
1246  maxoff = PageGetMaxOffsetNumber(page);
1247 
1248  /*
1249  * We note the buffer's block number so that we can release the pin later.
1250  * This allows us to re-read the buffer if it is needed again for hinting.
1251  */
1253 
1254  /*
1255  * We save the LSN of the page as we read it, so that we know whether it
1256  * safe to apply LP_DEAD hints to the page later. This allows us to drop
1257  * the pin for MVCC scans, which allows vacuum to avoid blocking.
1258  */
1260 
1261  /*
1262  * we must save the page's right-link while scanning it; this tells us
1263  * where to step right to after we're done with these items. There is no
1264  * corresponding need for the left-link, since splits always go right.
1265  */
1266  so->currPos.nextPage = opaque->btpo_next;
1267 
1268  /* initialize tuple workspace to empty */
1269  so->currPos.nextTupleOffset = 0;
1270 
1271  /*
1272  * Now that the current page has been made consistent, the macro should be
1273  * good.
1274  */
1276 
1277  if (ScanDirectionIsForward(dir))
1278  {
1279  /* load items[] in ascending order */
1280  itemIndex = 0;
1281 
1282  offnum = Max(offnum, minoff);
1283 
1284  while (offnum <= maxoff)
1285  {
1286  itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
1287  if (itup != NULL)
1288  {
1289  /* tuple passes all scan key conditions, so remember it */
1290  _bt_saveitem(so, itemIndex, offnum, itup);
1291  itemIndex++;
1292  }
1293  if (!continuescan)
1294  {
1295  /* there can't be any more matches, so stop */
1296  so->currPos.moreRight = false;
1297  break;
1298  }
1299 
1300  offnum = OffsetNumberNext(offnum);
1301  }
1302 
1303  Assert(itemIndex <= MaxIndexTuplesPerPage);
1304  so->currPos.firstItem = 0;
1305  so->currPos.lastItem = itemIndex - 1;
1306  so->currPos.itemIndex = 0;
1307  }
1308  else
1309  {
1310  /* load items[] in descending order */
1311  itemIndex = MaxIndexTuplesPerPage;
1312 
1313  offnum = Min(offnum, maxoff);
1314 
1315  while (offnum >= minoff)
1316  {
1317  itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
1318  if (itup != NULL)
1319  {
1320  /* tuple passes all scan key conditions, so remember it */
1321  itemIndex--;
1322  _bt_saveitem(so, itemIndex, offnum, itup);
1323  }
1324  if (!continuescan)
1325  {
1326  /* there can't be any more matches, so stop */
1327  so->currPos.moreLeft = false;
1328  break;
1329  }
1330 
1331  offnum = OffsetNumberPrev(offnum);
1332  }
1333 
1334  Assert(itemIndex >= 0);
1335  so->currPos.firstItem = itemIndex;
1338  }
1339 
1340  return (so->currPos.firstItem <= so->currPos.lastItem);
1341 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
bool moreRight
Definition: nbtree.h:370
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
IndexTuple _bt_checkkeys(IndexScanDesc scan, Page page, OffsetNumber offnum, ScanDirection dir, bool *continuescan)
Definition: nbtutils.c:1370
int itemIndex
Definition: nbtree.h:387
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
#define Min(x, y)
Definition: c.h:890
BlockNumber currPage
Definition: nbtree.h:360
bool moreLeft
Definition: nbtree.h:369
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
int nextTupleOffset
Definition: nbtree.h:376
int lastItem
Definition: nbtree.h:386
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:478
XLogRecPtr BufferGetLSNAtomic(Buffer buffer)
Definition: bufmgr.c:2838
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:394
static void _bt_saveitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum, IndexTuple itup)
Definition: nbtsearch.c:1345
int firstItem
Definition: nbtree.h:385
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define Max(x, y)
Definition: c.h:884
#define Assert(condition)
Definition: c.h:732
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:55
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page)
Definition: nbtree.c:697
BlockNumber nextPage
Definition: nbtree.h:361
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2611
BTScanPosData currPos
Definition: nbtree.h:474
#define MaxIndexTuplesPerPage
Definition: itup.h:145
XLogRecPtr lsn
Definition: nbtree.h:359
Buffer buf
Definition: nbtree.h:357
Pointer Page
Definition: bufpage.h:74

◆ _bt_saveitem()

static void _bt_saveitem ( BTScanOpaque  so,
int  itemIndex,
OffsetNumber  offnum,
IndexTuple  itup 
)
static

Definition at line 1345 of file nbtsearch.c.

References BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanPosItem::heapTid, BTScanPosItem::indexOffset, IndexTupleSize, BTScanPosData::items, MAXALIGN, BTScanPosData::nextTupleOffset, IndexTupleData::t_tid, and BTScanPosItem::tupleOffset.

Referenced by _bt_readpage().

1347 {
1348  BTScanPosItem *currItem = &so->currPos.items[itemIndex];
1349 
1350  currItem->heapTid = itup->t_tid;
1351  currItem->indexOffset = offnum;
1352  if (so->currTuples)
1353  {
1354  Size itupsz = IndexTupleSize(itup);
1355 
1356  currItem->tupleOffset = so->currPos.nextTupleOffset;
1357  memcpy(so->currTuples + so->currPos.nextTupleOffset, itup, itupsz);
1358  so->currPos.nextTupleOffset += MAXALIGN(itupsz);
1359  }
1360 }
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:389
char * currTuples
Definition: nbtree.h:461
ItemPointerData t_tid
Definition: itup.h:37
OffsetNumber indexOffset
Definition: nbtree.h:351
int nextTupleOffset
Definition: nbtree.h:376
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:685
ItemPointerData heapTid
Definition: nbtree.h:350
BTScanPosData currPos
Definition: nbtree.h:474
LocationIndex tupleOffset
Definition: nbtree.h:352
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_search()

BTStack _bt_search ( Relation  rel,
int  keysz,
ScanKey  scankey,
bool  nextkey,
Buffer bufP,
int  access,
Snapshot  snapshot 
)

Definition at line 97 of file nbtsearch.c.

References _bt_binsrch(), _bt_getroot(), _bt_moveright(), _bt_relandgetbuf(), BT_READ, BT_WRITE, BTPageOpaqueData::btpo, BTreeInnerTupleGetDownLink, BTStackData::bts_blkno, BTStackData::bts_btentry, BTStackData::bts_offset, BTStackData::bts_parent, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, BTPageOpaqueData::level, LockBuffer(), P_ISLEAF, PageGetItem, PageGetItemId, PageGetSpecialPointer, and palloc().

Referenced by _bt_doinsert(), _bt_first(), and _bt_pagedel().

99 {
100  BTStack stack_in = NULL;
101  int page_access = BT_READ;
102 
103  /* Get the root page to start with */
104  *bufP = _bt_getroot(rel, access);
105 
106  /* If index is empty and access = BT_READ, no root page is created. */
107  if (!BufferIsValid(*bufP))
108  return (BTStack) NULL;
109 
110  /* Loop iterates once per level descended in the tree */
111  for (;;)
112  {
113  Page page;
114  BTPageOpaque opaque;
115  OffsetNumber offnum;
116  ItemId itemid;
117  IndexTuple itup;
118  BlockNumber blkno;
119  BlockNumber par_blkno;
120  BTStack new_stack;
121 
122  /*
123  * Race -- the page we just grabbed may have split since we read its
124  * pointer in the parent (or metapage). If it has, we may need to
125  * move right to its new sibling. Do that.
126  *
127  * In write-mode, allow _bt_moveright to finish any incomplete splits
128  * along the way. Strictly speaking, we'd only need to finish an
129  * incomplete split on the leaf page we're about to insert to, not on
130  * any of the upper levels (they are taken care of in _bt_getstackbuf,
131  * if the leaf page is split and we insert to the parent page). But
132  * this is a good opportunity to finish splits of internal pages too.
133  */
134  *bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey,
135  (access == BT_WRITE), stack_in,
136  page_access, snapshot);
137 
138  /* if this is a leaf page, we're done */
139  page = BufferGetPage(*bufP);
140  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
141  if (P_ISLEAF(opaque))
142  break;
143 
144  /*
145  * Find the appropriate item on the internal page, and get the child
146  * page that it points to.
147  */
148  offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey);
149  itemid = PageGetItemId(page, offnum);
150  itup = (IndexTuple) PageGetItem(page, itemid);
151  blkno = BTreeInnerTupleGetDownLink(itup);
152  par_blkno = BufferGetBlockNumber(*bufP);
153 
154  /*
155  * We need to save the location of the index entry we chose in the
156  * parent page on a stack. In case we split the tree, we'll use the
157  * stack to work back up to the parent page. We also save the actual
158  * downlink (block) to uniquely identify the index entry, in case it
159  * moves right while we're working lower in the tree. See the paper
160  * by Lehman and Yao for how this is detected and handled. (We use the
161  * child link to disambiguate duplicate keys in the index -- Lehman
162  * and Yao disallow duplicate keys.)
163  */
164  new_stack = (BTStack) palloc(sizeof(BTStackData));
165  new_stack->bts_blkno = par_blkno;
166  new_stack->bts_offset = offnum;
167  new_stack->bts_btentry = blkno;
168  new_stack->bts_parent = stack_in;
169 
170  /*
171  * Page level 1 is lowest non-leaf page level prior to leaves. So,
172  * if we're on the level 1 and asked to lock leaf page in write mode,
173  * then lock next page in write mode, because it must be a leaf.
174  */
175  if (opaque->btpo.level == 1 && access == BT_WRITE)
176  page_access = BT_WRITE;
177 
178  /* drop the read lock on the parent page, acquire one on the child */
179  *bufP = _bt_relandgetbuf(rel, *bufP, blkno, page_access);
180 
181  /* okay, all set to move down a level */
182  stack_in = new_stack;
183  }
184 
185  /*
186  * If we're asked to lock leaf in write mode, but didn't manage to, then
187  * relock. That may happen when the root page appears to be leaf.
188  */
189  if (access == BT_WRITE && page_access == BT_READ)
190  {
191  /* trade in our read lock for a write lock */
193  LockBuffer(*bufP, BT_WRITE);
194 
195  /*
196  * If the page was split between the time that we surrendered our read
197  * lock and acquired our write lock, then this page may no longer be
198  * the right place for the key we want to insert. In this case, we
199  * need to move right in the tree. See Lehman and Yao for an
200  * excruciatingly precise description.
201  */
202  *bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey,
203  true, stack_in, BT_WRITE, snapshot);
204  }
205 
206  return stack_in;
207 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define BTreeInnerTupleGetDownLink(itup)
Definition: nbtree.h:224
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:865
union BTPageOpaqueData::@45 btpo
BTStackData * BTStack
Definition: nbtree.h:320
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:299
Buffer _bt_moveright(Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey, bool forupdate, BTStack stack, int access, Snapshot snapshot)
Definition: nbtsearch.c:244
OffsetNumber bts_offset
Definition: nbtree.h:315
IndexTupleData * IndexTuple
Definition: itup.h:53
OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey)
Definition: nbtsearch.c:353
Buffer _bt_getroot(Relation rel, int access)
Definition: nbtpage.c:252
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
uint32 level
Definition: nbtree.h:61
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3552
BlockNumber bts_blkno
Definition: nbtree.h:314
BlockNumber bts_btentry
Definition: nbtree.h:316
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
struct BTStackData * bts_parent
Definition: nbtree.h:317
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2611
void * palloc(Size size)
Definition: mcxt.c:924
#define BT_WRITE
Definition: nbtree.h:300
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_steppage()

static bool _bt_steppage ( IndexScanDesc  scan,
ScanDirection  dir 
)
static

Definition at line 1374 of file nbtsearch.c.

References _bt_drop_lock_and_maybe_pin(), _bt_killitems(), _bt_parallel_seize(), _bt_readnextpage(), Assert, BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanPosData::currPage, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), InvalidBlockNumber, BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::moreLeft, BTScanPosData::moreRight, BTScanPosData::nextPage, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numKilled, offsetof, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, ScanDirectionIsForward, and status().

Referenced by _bt_endpoint(), _bt_first(), and _bt_next().

1375 {
1376  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1378  bool status = true;
1379 
1381 
1382  /* Before leaving current page, deal with any killed items */
1383  if (so->numKilled > 0)
1384  _bt_killitems(scan);
1385 
1386  /*
1387  * Before we modify currPos, make a copy of the page data if there was a
1388  * mark position that needs it.
1389  */
1390  if (so->markItemIndex >= 0)
1391  {
1392  /* bump pin on current buffer for assignment to mark buffer */
1393  if (BTScanPosIsPinned(so->currPos))
1395  memcpy(&so->markPos, &so->currPos,
1396  offsetof(BTScanPosData, items[1]) +
1397  so->currPos.lastItem * sizeof(BTScanPosItem));
1398  if (so->markTuples)
1399  memcpy(so->markTuples, so->currTuples,
1400  so->currPos.nextTupleOffset);
1401  so->markPos.itemIndex = so->markItemIndex;
1402  so->markItemIndex = -1;
1403  }
1404 
1405  if (ScanDirectionIsForward(dir))
1406  {
1407  /* Walk right to the next page with data */
1408  if (scan->parallel_scan != NULL)
1409  {
1410  /*
1411  * Seize the scan to get the next block number; if the scan has
1412  * ended already, bail out.
1413  */
1414  status = _bt_parallel_seize(scan, &blkno);
1415  if (!status)
1416  {
1417  /* release the previous buffer, if pinned */
1420  return false;
1421  }
1422  }
1423  else
1424  {
1425  /* Not parallel, so use the previously-saved nextPage link. */
1426  blkno = so->currPos.nextPage;
1427  }
1428 
1429  /* Remember we left a page with data */
1430  so->currPos.moreLeft = true;
1431 
1432  /* release the previous buffer, if pinned */
1434  }
1435  else
1436  {
1437  /* Remember we left a page with data */
1438  so->currPos.moreRight = true;
1439 
1440  if (scan->parallel_scan != NULL)
1441  {
1442  /*
1443  * Seize the scan to get the current block number; if the scan has
1444  * ended already, bail out.
1445  */
1446  status = _bt_parallel_seize(scan, &blkno);
1448  if (!status)
1449  {
1451  return false;
1452  }
1453  }
1454  else
1455  {
1456  /* Not parallel, so just use our own notion of the current page */
1457  blkno = so->currPos.currPage;
1458  }
1459  }
1460 
1461  if (!_bt_readnextpage(scan, blkno, dir))
1462  return false;
1463 
1464  /* Drop the lock, and maybe the pin, on the current page */
1466 
1467  return true;
1468 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
bool moreRight
Definition: nbtree.h:370
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
Definition: nbtsearch.c:57
int itemIndex
Definition: nbtree.h:387
char * currTuples
Definition: nbtree.h:461
BlockNumber currPage
Definition: nbtree.h:360
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:411
uint32 BlockNumber
Definition: block.h:31
bool moreLeft
Definition: nbtree.h:369
int nextTupleOffset
Definition: nbtree.h:376
int lastItem
Definition: nbtree.h:386
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1744
BTScanPosData markPos
Definition: nbtree.h:475
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:478
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:394
char * markTuples
Definition: nbtree.h:462
bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
Definition: nbtree.c:639
int markItemIndex
Definition: nbtree.h:471
static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
Definition: nbtsearch.c:1481
#define Assert(condition)
Definition: c.h:732
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:417
#define InvalidBlockNumber
Definition: block.h:33
BlockNumber nextPage
Definition: nbtree.h:361
BTScanPosData currPos
Definition: nbtree.h:474
Buffer buf
Definition: nbtree.h:357
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3353
#define offsetof(type, field)
Definition: c.h:655
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:405

◆ _bt_walk_left()

static Buffer _bt_walk_left ( Relation  rel,
Buffer  buf,
Snapshot  snapshot 
)
static

Definition at line 1689 of file nbtsearch.c.

References _bt_getbuf(), _bt_relandgetbuf(), _bt_relbuf(), BT_READ, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, buf, BufferGetBlockNumber(), BufferGetPage, CHECK_FOR_INTERRUPTS, elog, ERROR, InvalidBuffer, P_ISDELETED, P_LEFTMOST, P_RIGHTMOST, PageGetSpecialPointer, RelationGetRelationName, and TestForOldSnapshot().

Referenced by _bt_readnextpage().

1690 {
1691  Page page;
1692  BTPageOpaque opaque;
1693 
1694  page = BufferGetPage(buf);
1695  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1696 
1697  for (;;)
1698  {
1699  BlockNumber obknum;
1700  BlockNumber lblkno;
1701  BlockNumber blkno;
1702  int tries;
1703 
1704  /* if we're at end of tree, release buf and return failure */
1705  if (P_LEFTMOST(opaque))
1706  {
1707  _bt_relbuf(rel, buf);
1708  break;
1709  }
1710  /* remember original page we are stepping left from */
1711  obknum = BufferGetBlockNumber(buf);
1712  /* step left */
1713  blkno = lblkno = opaque->btpo_prev;
1714  _bt_relbuf(rel, buf);
1715  /* check for interrupts while we're not holding any buffer lock */
1717  buf = _bt_getbuf(rel, blkno, BT_READ);
1718  page = BufferGetPage(buf);
1719  TestForOldSnapshot(snapshot, rel, page);
1720  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1721 
1722  /*
1723  * If this isn't the page we want, walk right till we find what we
1724  * want --- but go no more than four hops (an arbitrary limit). If we
1725  * don't find the correct page by then, the most likely bet is that
1726  * the original page got deleted and isn't in the sibling chain at all
1727  * anymore, not that its left sibling got split more than four times.
1728  *
1729  * Note that it is correct to test P_ISDELETED not P_IGNORE here,
1730  * because half-dead pages are still in the sibling chain. Caller
1731  * must reject half-dead pages if wanted.
1732  */
1733  tries = 0;
1734  for (;;)
1735  {
1736  if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum)
1737  {
1738  /* Found desired page, return it */
1739  return buf;
1740  }
1741  if (P_RIGHTMOST(opaque) || ++tries > 4)
1742  break;
1743  blkno = opaque->btpo_next;
1744  buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
1745  page = BufferGetPage(buf);
1746  TestForOldSnapshot(snapshot, rel, page);
1747  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1748  }
1749 
1750  /* Return to the original page to see what's up */
1751  buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
1752  page = BufferGetPage(buf);
1753  TestForOldSnapshot(snapshot, rel, page);
1754  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1755  if (P_ISDELETED(opaque))
1756  {
1757  /*
1758  * It was deleted. Move right to first nondeleted page (there
1759  * must be one); that is the page that has acquired the deleted
1760  * one's keyspace, so stepping left from it will take us where we
1761  * want to be.
1762  */
1763  for (;;)
1764  {
1765  if (P_RIGHTMOST(opaque))
1766  elog(ERROR, "fell off the end of index \"%s\"",
1768  blkno = opaque->btpo_next;
1769  buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
1770  page = BufferGetPage(buf);
1771  TestForOldSnapshot(snapshot, rel, page);
1772  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1773  if (!P_ISDELETED(opaque))
1774  break;
1775  }
1776 
1777  /*
1778  * Now return to top of loop, resetting obknum to point to this
1779  * nondeleted page, and try again.
1780  */
1781  }
1782  else
1783  {
1784  /*
1785  * It wasn't deleted; the explanation had better be that the page
1786  * to the left got split or deleted. Without this check, we'd go
1787  * into an infinite loop if there's anything wrong.
1788  */
1789  if (opaque->btpo_prev == lblkno)
1790  elog(ERROR, "could not find left sibling of block %u in index \"%s\"",
1791  obknum, RelationGetRelationName(rel));
1792  /* Okay to try again with new lblkno value */
1793  }
1794  }
1795 
1796  return InvalidBuffer;
1797 }
BlockNumber btpo_next
Definition: nbtree.h:58
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:865
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
#define InvalidBuffer
Definition: buf.h:25
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define BT_READ
Definition: nbtree.h:299
#define ERROR
Definition: elog.h:43
BlockNumber btpo_prev
Definition: nbtree.h:57
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define P_LEFTMOST(opaque)
Definition: nbtree.h:156
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define P_ISDELETED(opaque)
Definition: nbtree.h:160
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:884
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2611
#define elog(elevel,...)
Definition: elog.h:226
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
Pointer Page
Definition: bufpage.h:74