PostgreSQL Source Code  git master
nbtdedup.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "miscadmin.h"
#include "utils/rel.h"
Include dependency graph for nbtdedup.c:

Go to the source code of this file.

Functions

static void _bt_bottomupdel_finish_pending (Page page, BTDedupState state, TM_IndexDeleteOp *delstate)
 
static bool _bt_do_singleval (Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
 
static void _bt_singleval_fillfactor (Page page, BTDedupState state, Size newitemsz)
 
void _bt_dedup_pass (Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem, Size newitemsz, bool bottomupdedup)
 
bool _bt_bottomupdel_pass (Relation rel, Buffer buf, Relation heapRel, Size newitemsz)
 
void _bt_dedup_start_pending (BTDedupState state, IndexTuple base, OffsetNumber baseoff)
 
bool _bt_dedup_save_htid (BTDedupState state, IndexTuple itup)
 
Size _bt_dedup_finish_pending (Page newpage, BTDedupState state)
 
IndexTuple _bt_form_posting (IndexTuple base, ItemPointer htids, int nhtids)
 
void _bt_update_posting (BTVacuumPosting vacposting)
 
IndexTuple _bt_swap_posting (IndexTuple newitem, IndexTuple oposting, int postingoff)
 

Function Documentation

◆ _bt_bottomupdel_finish_pending()

static void _bt_bottomupdel_finish_pending ( Page  page,
BTDedupState  state,
TM_IndexDeleteOp delstate 
)
static

Definition at line 641 of file nbtdedup.c.

References Assert, BTDedupInterval::baseoff, BTDedupStateData::baseoff, BTreeTupleGetHeapTID(), BTreeTupleGetMaxHeapTID(), BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPosting(), TM_IndexDeleteOp::deltids, TM_IndexStatus::freespace, i, TM_IndexDelete::id, TM_IndexStatus::idxoffnum, BTDedupStateData::intervals, ItemIdGetLength, ItemPointerGetBlockNumber, TM_IndexStatus::knowndeletable, TM_IndexDeleteOp::ndeltids, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupInterval::nitems, BTDedupStateData::nitems, PageGetItem, PageGetItemId, BTDedupStateData::phystupsize, TM_IndexStatus::promising, TM_IndexDeleteOp::status, IndexTupleData::t_tid, and TM_IndexDelete::tid.

Referenced by _bt_bottomupdel_pass().

643 {
644  bool dupinterval = (state->nitems > 1);
645 
646  Assert(state->nitems > 0);
647  Assert(state->nitems <= state->nhtids);
648  Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
649 
650  for (int i = 0; i < state->nitems; i++)
651  {
652  OffsetNumber offnum = state->baseoff + i;
653  ItemId itemid = PageGetItemId(page, offnum);
654  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
655  TM_IndexDelete *ideltid = &delstate->deltids[delstate->ndeltids];
656  TM_IndexStatus *istatus = &delstate->status[delstate->ndeltids];
657 
658  if (!BTreeTupleIsPosting(itup))
659  {
660  /* Simple case: A plain non-pivot tuple */
661  ideltid->tid = itup->t_tid;
662  ideltid->id = delstate->ndeltids;
663  istatus->idxoffnum = offnum;
664  istatus->knowndeletable = false; /* for now */
665  istatus->promising = dupinterval; /* simple rule */
666  istatus->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData);
667 
668  delstate->ndeltids++;
669  }
670  else
671  {
672  /*
673  * Complicated case: A posting list tuple.
674  *
675  * We make the conservative assumption that there can only be at
676  * most one affected logical row per posting list tuple. There
677  * will be at most one promising entry in deltids to represent
678  * this presumed lone logical row. Note that this isn't even
679  * considered unless the posting list tuple is also in an interval
680  * of duplicates -- this complicated rule is just a variant of the
681  * simple rule used to decide if plain index tuples are promising.
682  */
683  int nitem = BTreeTupleGetNPosting(itup);
684  bool firstpromising = false;
685  bool lastpromising = false;
686 
687  Assert(_bt_posting_valid(itup));
688 
689  if (dupinterval)
690  {
691  /*
692  * Complicated rule: either the first or last TID in the
693  * posting list gets marked promising (if any at all)
694  */
695  BlockNumber minblocklist,
696  midblocklist,
697  maxblocklist;
698  ItemPointer mintid,
699  midtid,
700  maxtid;
701 
702  mintid = BTreeTupleGetHeapTID(itup);
703  midtid = BTreeTupleGetPostingN(itup, nitem / 2);
704  maxtid = BTreeTupleGetMaxHeapTID(itup);
705  minblocklist = ItemPointerGetBlockNumber(mintid);
706  midblocklist = ItemPointerGetBlockNumber(midtid);
707  maxblocklist = ItemPointerGetBlockNumber(maxtid);
708 
709  /* Only entry with predominant table block can be promising */
710  firstpromising = (minblocklist == midblocklist);
711  lastpromising = (!firstpromising &&
712  midblocklist == maxblocklist);
713  }
714 
715  for (int p = 0; p < nitem; p++)
716  {
717  ItemPointer htid = BTreeTupleGetPostingN(itup, p);
718 
719  ideltid->tid = *htid;
720  ideltid->id = delstate->ndeltids;
721  istatus->idxoffnum = offnum;
722  istatus->knowndeletable = false; /* for now */
723  istatus->promising = false;
724  if ((firstpromising && p == 0) ||
725  (lastpromising && p == nitem - 1))
726  istatus->promising = true;
727  istatus->freespace = sizeof(ItemPointerData); /* at worst */
728 
729  ideltid++;
730  istatus++;
731  delstate->ndeltids++;
732  }
733  }
734  }
735 
736  if (dupinterval)
737  {
738  state->intervals[state->nintervals].nitems = state->nitems;
739  state->nintervals++;
740  }
741 
742  /* Reset state for next interval */
743  state->nhtids = 0;
744  state->nitems = 0;
745  state->phystupsize = 0;
746 }
TM_IndexDelete * deltids
Definition: tableam.h:228
OffsetNumber baseoff
Definition: nbtree.h:872
static ItemPointer BTreeTupleGetHeapTID(IndexTuple itup)
Definition: nbtree.h:631
ItemPointerData t_tid
Definition: itup.h:37
bool knowndeletable
Definition: tableam.h:196
uint32 BlockNumber
Definition: block.h:31
OffsetNumber idxoffnum
Definition: tableam.h:195
uint16 OffsetNumber
Definition: off.h:24
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
Size phystupsize
Definition: nbtree.h:879
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:888
IndexTupleData * IndexTuple
Definition: itup.h:53
struct ItemIdData ItemIdData
bool promising
Definition: tableam.h:199
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static ItemPointer BTreeTupleGetMaxHeapTID(IndexTuple itup)
Definition: nbtree.h:657
TM_IndexStatus * status
Definition: tableam.h:229
ItemPointerData tid
Definition: tableam.h:189
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:537
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:485
uint16 nitems
Definition: nbtree.h:842
#define Assert(condition)
Definition: c.h:804
struct ItemPointerData ItemPointerData
int16 freespace
Definition: tableam.h:200
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:511
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
OffsetNumber baseoff
Definition: nbtree.h:841
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_bottomupdel_pass()

bool _bt_bottomupdel_pass ( Relation  rel,
Buffer  buf,
Relation  heapRel,
Size  newitemsz 
)

Definition at line 305 of file nbtdedup.c.

References _bt_bottomupdel_finish_pending(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), _bt_delitems_delete_check(), _bt_keep_natts_fast(), Assert, BTDedupStateData::base, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, TM_IndexDeleteOp::bottomup, TM_IndexDeleteOp::bottomupfreespace, BufferGetPage, BTDedupStateData::deduplicate, TM_IndexDeleteOp::deltids, BTDedupStateData::htids, IndexRelationGetNumberOfKeyAttributes, InvalidOffsetNumber, ItemIdIsDead, Max, BTDedupStateData::maxpostingsize, MaxTIDsPerBTreePage, TM_IndexDeleteOp::ndeltids, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, OffsetNumberNext, P_FIRSTDATAKEY, PageGetExactFreeSpace(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, palloc(), pfree(), BTDedupStateData::phystupsize, and TM_IndexDeleteOp::status.

Referenced by _bt_delete_or_dedup_one_page().

307 {
308  OffsetNumber offnum,
309  minoff,
310  maxoff;
311  Page page = BufferGetPage(buf);
314  TM_IndexDeleteOp delstate;
315  bool neverdedup;
316  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
317 
318  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
319  newitemsz += sizeof(ItemIdData);
320 
321  /* Initialize deduplication state */
322  state = (BTDedupState) palloc(sizeof(BTDedupStateData));
323  state->deduplicate = true;
324  state->nmaxitems = 0;
325  state->maxpostingsize = BLCKSZ; /* We're not really deduplicating */
326  state->base = NULL;
327  state->baseoff = InvalidOffsetNumber;
328  state->basetupsize = 0;
329  state->htids = palloc(state->maxpostingsize);
330  state->nhtids = 0;
331  state->nitems = 0;
332  state->phystupsize = 0;
333  state->nintervals = 0;
334 
335  /*
336  * Initialize tableam state that describes bottom-up index deletion
337  * operation.
338  *
339  * We'll go on to ask the tableam to search for TIDs whose index tuples we
340  * can safely delete. The tableam will search until our leaf page space
341  * target is satisfied, or until the cost of continuing with the tableam
342  * operation seems too high. It focuses its efforts on TIDs associated
343  * with duplicate index tuples that we mark "promising".
344  *
345  * This space target is a little arbitrary. The tableam must be able to
346  * keep the costs and benefits in balance. We provide the tableam with
347  * exhaustive information about what might work, without directly
348  * concerning ourselves with avoiding work during the tableam call. Our
349  * role in costing the bottom-up deletion process is strictly advisory.
350  */
351  delstate.bottomup = true;
352  delstate.bottomupfreespace = Max(BLCKSZ / 16, newitemsz);
353  delstate.ndeltids = 0;
354  delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete));
355  delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus));
356 
357  minoff = P_FIRSTDATAKEY(opaque);
358  maxoff = PageGetMaxOffsetNumber(page);
359  for (offnum = minoff;
360  offnum <= maxoff;
361  offnum = OffsetNumberNext(offnum))
362  {
363  ItemId itemid = PageGetItemId(page, offnum);
364  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
365 
366  Assert(!ItemIdIsDead(itemid));
367 
368  if (offnum == minoff)
369  {
370  /* itup starts first pending interval */
371  _bt_dedup_start_pending(state, itup, offnum);
372  }
373  else if (_bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
374  _bt_dedup_save_htid(state, itup))
375  {
376  /* Tuple is equal; just added its TIDs to pending interval */
377  }
378  else
379  {
380  /* Finalize interval -- move its TIDs to delete state */
381  _bt_bottomupdel_finish_pending(page, state, &delstate);
382 
383  /* itup starts new pending interval */
384  _bt_dedup_start_pending(state, itup, offnum);
385  }
386  }
387  /* Finalize final interval -- move its TIDs to delete state */
388  _bt_bottomupdel_finish_pending(page, state, &delstate);
389 
390  /*
391  * We don't give up now in the event of having few (or even zero)
392  * promising tuples for the tableam because it's not up to us as the index
393  * AM to manage costs (note that the tableam might have heuristics of its
394  * own that work out what to do). We should at least avoid having our
395  * caller do a useless deduplication pass after we return in the event of
396  * zero promising tuples, though.
397  */
398  neverdedup = false;
399  if (state->nintervals == 0)
400  neverdedup = true;
401 
402  pfree(state->htids);
403  pfree(state);
404 
405  /* Ask tableam which TIDs are deletable, then physically delete them */
406  _bt_delitems_delete_check(rel, buf, heapRel, &delstate);
407 
408  pfree(delstate.deltids);
409  pfree(delstate.status);
410 
411  /* Report "success" to caller unconditionally to avoid deduplication */
412  if (neverdedup)
413  return true;
414 
415  /* Don't dedup when we won't end up back here any time soon anyway */
416  return PageGetExactFreeSpace(page) >= Max(BLCKSZ / 24, newitemsz);
417 }
TM_IndexDelete * deltids
Definition: tableam.h:228
IndexTuple base
Definition: nbtree.h:871
void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, TM_IndexDeleteOp *delstate)
Definition: nbtpage.c:1529
OffsetNumber baseoff
Definition: nbtree.h:872
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:429
#define MaxTIDsPerBTreePage
Definition: nbtree.h:184
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:71
uint16 OffsetNumber
Definition: off.h:24
ItemPointer htids
Definition: nbtree.h:876
void pfree(void *pointer)
Definition: mcxt.c:1169
Size phystupsize
Definition: nbtree.h:879
static char * buf
Definition: pg_test_fsync.c:68
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:480
IndexTupleData * IndexTuple
Definition: itup.h:53
struct ItemIdData ItemIdData
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:496
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
TM_IndexStatus * status
Definition: tableam.h:229
static void _bt_bottomupdel_finish_pending(Page page, BTDedupState state, TM_IndexDeleteOp *delstate)
Definition: nbtdedup.c:641
#define InvalidOffsetNumber
Definition: off.h:26
#define Max(x, y)
Definition: c.h:980
#define Assert(condition)
Definition: c.h:804
Definition: regguts.h:317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
Size basetupsize
Definition: nbtree.h:873
Size maxpostingsize
Definition: nbtree.h:868
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:951
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2419
void * palloc(Size size)
Definition: mcxt.c:1062
bool deduplicate
Definition: nbtree.h:866
int bottomupfreespace
Definition: tableam.h:224
BTDedupStateData * BTDedupState
Definition: nbtree.h:891
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78

◆ _bt_dedup_finish_pending()

Size _bt_dedup_finish_pending ( Page  newpage,
BTDedupState  state 
)

Definition at line 551 of file nbtdedup.c.

References _bt_form_posting(), Assert, BTDedupStateData::base, BTDedupInterval::baseoff, BTDedupStateData::baseoff, elog, ERROR, BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::intervals, InvalidOffsetNumber, MAXALIGN, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupInterval::nitems, BTDedupStateData::nitems, OffsetNumberNext, PageAddItem, PageGetMaxOffsetNumber, pfree(), and BTDedupStateData::phystupsize.

Referenced by _bt_dedup_pass(), and btree_xlog_dedup().

552 {
553  OffsetNumber tupoff;
554  Size tuplesz;
555  Size spacesaving;
556 
557  Assert(state->nitems > 0);
558  Assert(state->nitems <= state->nhtids);
559  Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
560 
561  tupoff = OffsetNumberNext(PageGetMaxOffsetNumber(newpage));
562  if (state->nitems == 1)
563  {
564  /* Use original, unchanged base tuple */
565  tuplesz = IndexTupleSize(state->base);
566  if (PageAddItem(newpage, (Item) state->base, tuplesz, tupoff,
567  false, false) == InvalidOffsetNumber)
568  elog(ERROR, "deduplication failed to add tuple to page");
569 
570  spacesaving = 0;
571  }
572  else
573  {
574  IndexTuple final;
575 
576  /* Form a tuple with a posting list */
577  final = _bt_form_posting(state->base, state->htids, state->nhtids);
578  tuplesz = IndexTupleSize(final);
579  Assert(tuplesz <= state->maxpostingsize);
580 
581  /* Save final number of items for posting list */
582  state->intervals[state->nintervals].nitems = state->nitems;
583 
584  Assert(tuplesz == MAXALIGN(IndexTupleSize(final)));
585  if (PageAddItem(newpage, (Item) final, tuplesz, tupoff, false,
586  false) == InvalidOffsetNumber)
587  elog(ERROR, "deduplication failed to add tuple to page");
588 
589  pfree(final);
590  spacesaving = state->phystupsize - (tuplesz + sizeof(ItemIdData));
591  /* Increment nintervals, since we wrote a new posting list tuple */
592  state->nintervals++;
593  Assert(spacesaving > 0 && spacesaving < BLCKSZ);
594  }
595 
596  /* Reset state for next pending posting list */
597  state->nhtids = 0;
598  state->nitems = 0;
599  state->phystupsize = 0;
600 
601  return spacesaving;
602 }
IndexTuple base
Definition: nbtree.h:871
IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
Definition: nbtdedup.c:857
OffsetNumber baseoff
Definition: nbtree.h:872
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
ItemPointer htids
Definition: nbtree.h:876
void pfree(void *pointer)
Definition: mcxt.c:1169
Size phystupsize
Definition: nbtree.h:879
#define ERROR
Definition: elog.h:46
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:888
struct ItemIdData ItemIdData
#define InvalidOffsetNumber
Definition: off.h:26
uint16 nitems
Definition: nbtree.h:842
#define Assert(condition)
Definition: c.h:804
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
#define elog(elevel,...)
Definition: elog.h:232
OffsetNumber baseoff
Definition: nbtree.h:841
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_dedup_pass()

void _bt_dedup_pass ( Relation  rel,
Buffer  buf,
Relation  heapRel,
IndexTuple  newitem,
Size  newitemsz,
bool  bottomupdedup 
)

Definition at line 57 of file nbtdedup.c.

References _bt_dedup_finish_pending(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), _bt_do_singleval(), _bt_keep_natts_fast(), _bt_singleval_fillfactor(), Assert, BTDedupStateData::base, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTMaxItemSize, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, BTDedupStateData::deduplicate, elog, END_CRIT_SECTION, ERROR, BTDedupStateData::htids, INDEX_SIZE_MASK, IndexRelationGetNumberOfKeyAttributes, BTDedupStateData::intervals, InvalidOffsetNumber, ItemIdGetLength, ItemIdIsDead, MarkBufferDirty(), BTDedupStateData::maxpostingsize, Min, BTDedupStateData::nhtids, xl_btree_dedup::nintervals, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, OffsetNumberNext, P_FIRSTDATAKEY, P_HAS_GARBAGE, P_HIKEY, P_RIGHTMOST, PageAddItem, PageGetExactFreeSpace(), PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN, palloc(), pfree(), BTDedupStateData::phystupsize, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeDedup, START_CRIT_SECTION, XLOG_BTREE_DEDUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_delete_or_dedup_one_page().

59 {
60  OffsetNumber offnum,
61  minoff,
62  maxoff;
63  Page page = BufferGetPage(buf);
65  Page newpage;
67  Size pagesaving = 0;
68  bool singlevalstrat = false;
69  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
70 
71  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
72  newitemsz += sizeof(ItemIdData);
73 
74  /*
75  * Initialize deduplication state.
76  *
77  * It would be possible for maxpostingsize (limit on posting list tuple
78  * size) to be set to one third of the page. However, it seems like a
79  * good idea to limit the size of posting lists to one sixth of a page.
80  * That ought to leave us with a good split point when pages full of
81  * duplicates can be split several times.
82  */
83  state = (BTDedupState) palloc(sizeof(BTDedupStateData));
84  state->deduplicate = true;
85  state->nmaxitems = 0;
86  state->maxpostingsize = Min(BTMaxItemSize(page) / 2, INDEX_SIZE_MASK);
87  /* Metadata about base tuple of current pending posting list */
88  state->base = NULL;
90  state->basetupsize = 0;
91  /* Metadata about current pending posting list TIDs */
92  state->htids = palloc(state->maxpostingsize);
93  state->nhtids = 0;
94  state->nitems = 0;
95  /* Size of all physical tuples to be replaced by pending posting list */
96  state->phystupsize = 0;
97  /* nintervals should be initialized to zero */
98  state->nintervals = 0;
99 
100  minoff = P_FIRSTDATAKEY(opaque);
101  maxoff = PageGetMaxOffsetNumber(page);
102 
103  /*
104  * Consider applying "single value" strategy, though only if the page
105  * seems likely to be split in the near future
106  */
107  if (!bottomupdedup)
108  singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem);
109 
110  /*
111  * Deduplicate items from page, and write them to newpage.
112  *
113  * Copy the original page's LSN into newpage copy. This will become the
114  * updated version of the page. We need this because XLogInsert will
115  * examine the LSN and possibly dump it in a page image.
116  */
117  newpage = PageGetTempPageCopySpecial(page);
118  PageSetLSN(newpage, PageGetLSN(page));
119 
120  /* Copy high key, if any */
121  if (!P_RIGHTMOST(opaque))
122  {
123  ItemId hitemid = PageGetItemId(page, P_HIKEY);
124  Size hitemsz = ItemIdGetLength(hitemid);
125  IndexTuple hitem = (IndexTuple) PageGetItem(page, hitemid);
126 
127  if (PageAddItem(newpage, (Item) hitem, hitemsz, P_HIKEY,
128  false, false) == InvalidOffsetNumber)
129  elog(ERROR, "deduplication failed to add highkey");
130  }
131 
132  for (offnum = minoff;
133  offnum <= maxoff;
134  offnum = OffsetNumberNext(offnum))
135  {
136  ItemId itemid = PageGetItemId(page, offnum);
137  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
138 
139  Assert(!ItemIdIsDead(itemid));
140 
141  if (offnum == minoff)
142  {
143  /*
144  * No previous/base tuple for the data item -- use the data item
145  * as base tuple of pending posting list
146  */
147  _bt_dedup_start_pending(state, itup, offnum);
148  }
149  else if (state->deduplicate &&
150  _bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
151  _bt_dedup_save_htid(state, itup))
152  {
153  /*
154  * Tuple is equal to base tuple of pending posting list. Heap
155  * TID(s) for itup have been saved in state.
156  */
157  }
158  else
159  {
160  /*
161  * Tuple is not equal to pending posting list tuple, or
162  * _bt_dedup_save_htid() opted to not merge current item into
163  * pending posting list for some other reason (e.g., adding more
164  * TIDs would have caused posting list to exceed current
165  * maxpostingsize).
166  *
167  * If state contains pending posting list with more than one item,
168  * form new posting tuple, and actually update the page. Else
169  * reset the state and move on without modifying the page.
170  */
171  pagesaving += _bt_dedup_finish_pending(newpage, state);
172 
173  if (singlevalstrat)
174  {
175  /*
176  * Single value strategy's extra steps.
177  *
178  * Lower maxpostingsize for sixth and final large posting list
179  * tuple at the point where 5 maxpostingsize-capped tuples
180  * have either been formed or observed.
181  *
182  * When a sixth maxpostingsize-capped item is formed/observed,
183  * stop merging together tuples altogether. The few tuples
184  * that remain at the end of the page won't be merged together
185  * at all (at least not until after a future page split takes
186  * place).
187  */
188  if (state->nmaxitems == 5)
189  _bt_singleval_fillfactor(page, state, newitemsz);
190  else if (state->nmaxitems == 6)
191  {
192  state->deduplicate = false;
193  singlevalstrat = false; /* won't be back here */
194  }
195  }
196 
197  /* itup starts new pending posting list */
198  _bt_dedup_start_pending(state, itup, offnum);
199  }
200  }
201 
202  /* Handle the last item */
203  pagesaving += _bt_dedup_finish_pending(newpage, state);
204 
205  /*
206  * If no items suitable for deduplication were found, newpage must be
207  * exactly the same as the original page, so just return from function.
208  *
209  * We could determine whether or not to proceed on the basis the space
210  * savings being sufficient to avoid an immediate page split instead. We
211  * don't do that because there is some small value in nbtsplitloc.c always
212  * operating against a page that is fully deduplicated (apart from
213  * newitem). Besides, most of the cost has already been paid.
214  */
215  if (state->nintervals == 0)
216  {
217  /* cannot leak memory here */
218  pfree(newpage);
219  pfree(state->htids);
220  pfree(state);
221  return;
222  }
223 
224  /*
225  * By here, it's clear that deduplication will definitely go ahead.
226  *
227  * Clear the BTP_HAS_GARBAGE page flag. The index must be a heapkeyspace
228  * index, and as such we'll never pay attention to BTP_HAS_GARBAGE anyway.
229  * But keep things tidy.
230  */
231  if (P_HAS_GARBAGE(opaque))
232  {
233  BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(newpage);
234 
235  nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
236  }
237 
239 
240  PageRestoreTempPage(newpage, page);
242 
243  /* XLOG stuff */
244  if (RelationNeedsWAL(rel))
245  {
246  XLogRecPtr recptr;
247  xl_btree_dedup xlrec_dedup;
248 
249  xlrec_dedup.nintervals = state->nintervals;
250 
251  XLogBeginInsert();
253  XLogRegisterData((char *) &xlrec_dedup, SizeOfBtreeDedup);
254 
255  /*
256  * The intervals array is not in the buffer, but pretend that it is.
257  * When XLogInsert stores the whole buffer, the array need not be
258  * stored too.
259  */
260  XLogRegisterBufData(0, (char *) state->intervals,
261  state->nintervals * sizeof(BTDedupInterval));
262 
263  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DEDUP);
264 
265  PageSetLSN(page, recptr);
266  }
267 
269 
270  /* Local space accounting should agree with page accounting */
271  Assert(pagesaving < newitemsz || PageGetExactFreeSpace(page) >= newitemsz);
272 
273  /* cannot leak memory here */
274  pfree(state->htids);
275  pfree(state);
276 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:378
IndexTuple base
Definition: nbtree.h:871
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
uint16 nintervals
Definition: nbtxlog.h:172
OffsetNumber baseoff
Definition: nbtree.h:872
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1565
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:232
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
#define Min(x, y)
Definition: c.h:986
#define END_CRIT_SECTION()
Definition: miscadmin.h:149
Pointer Item
Definition: item.h:17
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define P_HAS_GARBAGE(opaque)
Definition: nbtree.h:225
#define START_CRIT_SECTION()
Definition: miscadmin.h:147
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:429
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
Size _bt_dedup_finish_pending(Page newpage, BTDedupState state)
Definition: nbtdedup.c:551
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:71
uint16 OffsetNumber
Definition: off.h:24
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
ItemPointer htids
Definition: nbtree.h:876
void pfree(void *pointer)
Definition: mcxt.c:1169
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
Size phystupsize
Definition: nbtree.h:879
#define ERROR
Definition: elog.h:46
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:888
static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
Definition: nbtdedup.c:775
static char * buf
Definition: pg_test_fsync.c:68
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:480
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
struct ItemIdData ItemIdData
#define SizeOfBtreeDedup
Definition: nbtxlog.h:177
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:496
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define XLOG_BTREE_DEDUP
Definition: nbtxlog.h:33
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:340
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:434
#define InvalidOffsetNumber
Definition: off.h:26
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
Definition: regguts.h:317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:540
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
Size basetupsize
Definition: nbtree.h:873
#define RelationNeedsWAL(relation)
Definition: rel.h:601
Size maxpostingsize
Definition: nbtree.h:868
#define PageGetLSN(page)
Definition: bufpage.h:366
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:951
#define BTMaxItemSize(page)
Definition: nbtree.h:162
#define P_HIKEY
Definition: nbtree.h:367
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2419
void * palloc(Size size)
Definition: mcxt.c:1062
bool deduplicate
Definition: nbtree.h:866
#define elog(elevel,...)
Definition: elog.h:232
BTDedupStateData * BTDedupState
Definition: nbtree.h:891
void XLogBeginInsert(void)
Definition: xloginsert.c:135
uint16 btpo_flags
Definition: nbtree.h:67
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:218
#define BTP_HAS_GARBAGE
Definition: nbtree.h:80
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
static void _bt_singleval_fillfactor(Page page, BTDedupState state, Size newitemsz)
Definition: nbtdedup.c:815

◆ _bt_dedup_save_htid()

bool _bt_dedup_save_htid ( BTDedupState  state,
IndexTuple  itup 
)

Definition at line 480 of file nbtdedup.c.

References Assert, BTDedupStateData::basetupsize, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTDedupStateData::htids, IndexTupleSize, MAXALIGN, BTDedupStateData::maxpostingsize, BTDedupStateData::nhtids, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, BTDedupStateData::phystupsize, and IndexTupleData::t_tid.

Referenced by _bt_bottomupdel_pass(), _bt_dedup_pass(), _bt_load(), and btree_xlog_dedup().

481 {
482  int nhtids;
483  ItemPointer htids;
484  Size mergedtupsz;
485 
486  Assert(!BTreeTupleIsPivot(itup));
487 
488  if (!BTreeTupleIsPosting(itup))
489  {
490  nhtids = 1;
491  htids = &itup->t_tid;
492  }
493  else
494  {
495  nhtids = BTreeTupleGetNPosting(itup);
496  htids = BTreeTupleGetPosting(itup);
497  }
498 
499  /*
500  * Don't append (have caller finish pending posting list as-is) if
501  * appending heap TID(s) from itup would put us over maxpostingsize limit.
502  *
503  * This calculation needs to match the code used within _bt_form_posting()
504  * for new posting list tuples.
505  */
506  mergedtupsz = MAXALIGN(state->basetupsize +
507  (state->nhtids + nhtids) * sizeof(ItemPointerData));
508 
509  if (mergedtupsz > state->maxpostingsize)
510  {
511  /*
512  * Count this as an oversized item for single value strategy, though
513  * only when there are 50 TIDs in the final posting list tuple. This
514  * limit (which is fairly arbitrary) avoids confusion about how many
515  * 1/6 of a page tuples have been encountered/created by the current
516  * deduplication pass.
517  *
518  * Note: We deliberately don't consider which deduplication pass
519  * merged together tuples to create this item (could be a previous
520  * deduplication pass, or current pass). See _bt_do_singleval()
521  * comments.
522  */
523  if (state->nhtids > 50)
524  state->nmaxitems++;
525 
526  return false;
527  }
528 
529  /*
530  * Save heap TIDs to pending posting list tuple -- itup can be merged into
531  * pending posting list
532  */
533  state->nitems++;
534  memcpy(state->htids + state->nhtids, htids,
535  sizeof(ItemPointerData) * nhtids);
536  state->nhtids += nhtids;
537  state->phystupsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
538 
539  return true;
540 }
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:473
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:530
ItemPointerData t_tid
Definition: itup.h:37
ItemPointer htids
Definition: nbtree.h:876
Size phystupsize
Definition: nbtree.h:879
struct ItemIdData ItemIdData
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:485
#define Assert(condition)
Definition: c.h:804
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
Size basetupsize
Definition: nbtree.h:873
Size maxpostingsize
Definition: nbtree.h:868
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:511
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_dedup_start_pending()

void _bt_dedup_start_pending ( BTDedupState  state,
IndexTuple  base,
OffsetNumber  baseoff 
)

Definition at line 429 of file nbtdedup.c.

References Assert, BTDedupStateData::base, BTDedupInterval::baseoff, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleGetPostingOffset(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::intervals, MAXALIGN, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::phystupsize, and IndexTupleData::t_tid.

Referenced by _bt_bottomupdel_pass(), _bt_dedup_pass(), _bt_load(), and btree_xlog_dedup().

431 {
432  Assert(state->nhtids == 0);
433  Assert(state->nitems == 0);
434  Assert(!BTreeTupleIsPivot(base));
435 
436  /*
437  * Copy heap TID(s) from new base tuple for new candidate posting list
438  * into working state's array
439  */
440  if (!BTreeTupleIsPosting(base))
441  {
442  memcpy(state->htids, &base->t_tid, sizeof(ItemPointerData));
443  state->nhtids = 1;
444  state->basetupsize = IndexTupleSize(base);
445  }
446  else
447  {
448  int nposting;
449 
450  nposting = BTreeTupleGetNPosting(base);
451  memcpy(state->htids, BTreeTupleGetPosting(base),
452  sizeof(ItemPointerData) * nposting);
453  state->nhtids = nposting;
454  /* basetupsize should not include existing posting list */
456  }
457 
458  /*
459  * Save new base tuple itself -- it'll be needed if we actually create a
460  * new posting list from new pending posting list.
461  *
462  * Must maintain physical size of all existing tuples (including line
463  * pointer overhead) so that we can calculate space savings on page.
464  */
465  state->nitems = 1;
466  state->base = base;
467  state->baseoff = baseoff;
468  state->phystupsize = MAXALIGN(IndexTupleSize(base)) + sizeof(ItemIdData);
469  /* Also save baseoff in pending state for interval */
470  state->intervals[state->nintervals].baseoff = state->baseoff;
471 }
IndexTuple base
Definition: nbtree.h:871
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:473
OffsetNumber baseoff
Definition: nbtree.h:872
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:530
ItemPointerData t_tid
Definition: itup.h:37
ItemPointer htids
Definition: nbtree.h:876
Size phystupsize
Definition: nbtree.h:879
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:888
struct ItemIdData ItemIdData
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:485
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:522
#define Assert(condition)
Definition: c.h:804
#define MAXALIGN(LEN)
Definition: c.h:757
Size basetupsize
Definition: nbtree.h:873
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:511
OffsetNumber baseoff
Definition: nbtree.h:841
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_do_singleval()

static bool _bt_do_singleval ( Relation  rel,
Page  page,
BTDedupState  state,
OffsetNumber  minoff,
IndexTuple  newitem 
)
static

Definition at line 775 of file nbtdedup.c.

References _bt_keep_natts_fast(), IndexRelationGetNumberOfKeyAttributes, PageGetItem, PageGetItemId, and PageGetMaxOffsetNumber.

Referenced by _bt_dedup_pass().

777 {
778  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
779  ItemId itemid;
780  IndexTuple itup;
781 
782  itemid = PageGetItemId(page, minoff);
783  itup = (IndexTuple) PageGetItem(page, itemid);
784 
785  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
786  {
787  itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
788  itup = (IndexTuple) PageGetItem(page, itemid);
789 
790  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
791  return true;
792  }
793 
794  return false;
795 }
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:496
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2419
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_form_posting()

IndexTuple _bt_form_posting ( IndexTuple  base,
ItemPointer  htids,
int  nhtids 
)

Definition at line 857 of file nbtdedup.c.

References Assert, BTreeTupleGetPosting(), BTreeTupleGetPostingOffset(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTreeTupleSetPosting(), INDEX_ALT_TID_MASK, INDEX_SIZE_MASK, IndexTupleSize, ItemPointerCopy, ItemPointerIsValid, MAXALIGN, palloc0(), PG_UINT16_MAX, IndexTupleData::t_info, and IndexTupleData::t_tid.

Referenced by _bt_dedup_finish_pending(), _bt_sort_dedup_finish_pending(), and bt_posting_plain_tuple().

858 {
859  uint32 keysize,
860  newsize;
861  IndexTuple itup;
862 
863  if (BTreeTupleIsPosting(base))
864  keysize = BTreeTupleGetPostingOffset(base);
865  else
866  keysize = IndexTupleSize(base);
867 
868  Assert(!BTreeTupleIsPivot(base));
869  Assert(nhtids > 0 && nhtids <= PG_UINT16_MAX);
870  Assert(keysize == MAXALIGN(keysize));
871 
872  /* Determine final size of new tuple */
873  if (nhtids > 1)
874  newsize = MAXALIGN(keysize +
875  nhtids * sizeof(ItemPointerData));
876  else
877  newsize = keysize;
878 
879  Assert(newsize <= INDEX_SIZE_MASK);
880  Assert(newsize == MAXALIGN(newsize));
881 
882  /* Allocate memory using palloc0() (matches index_form_tuple()) */
883  itup = palloc0(newsize);
884  memcpy(itup, base, keysize);
885  itup->t_info &= ~INDEX_SIZE_MASK;
886  itup->t_info |= newsize;
887  if (nhtids > 1)
888  {
889  /* Form posting list tuple */
890  BTreeTupleSetPosting(itup, nhtids, keysize);
891  memcpy(BTreeTupleGetPosting(itup), htids,
892  sizeof(ItemPointerData) * nhtids);
893  Assert(_bt_posting_valid(itup));
894  }
895  else
896  {
897  /* Form standard non-pivot tuple */
898  itup->t_info &= ~INDEX_ALT_TID_MASK;
899  ItemPointerCopy(htids, &itup->t_tid);
901  }
902 
903  return itup;
904 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:473
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:530
ItemPointerData t_tid
Definition: itup.h:37
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:459
#define PG_UINT16_MAX
Definition: c.h:522
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:497
unsigned int uint32
Definition: c.h:441
void * palloc0(Size size)
Definition: mcxt.c:1093
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:485
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:522
#define Assert(condition)
Definition: c.h:804
#define MAXALIGN(LEN)
Definition: c.h:757
unsigned short t_info
Definition: itup.h:49
#define IndexTupleSize(itup)
Definition: itup.h:71
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161

◆ _bt_singleval_fillfactor()

static void _bt_singleval_fillfactor ( Page  page,
BTDedupState  state,
Size  newitemsz 
)
static

Definition at line 815 of file nbtdedup.c.

References BTREE_SINGLEVAL_FILLFACTOR, MAXALIGN, BTDedupStateData::maxpostingsize, PageGetPageSize, and SizeOfPageHeaderData.

Referenced by _bt_dedup_pass().

816 {
817  Size leftfree;
818  int reduction;
819 
820  /* This calculation needs to match nbtsplitloc.c */
821  leftfree = PageGetPageSize(page) - SizeOfPageHeaderData -
822  MAXALIGN(sizeof(BTPageOpaqueData));
823  /* Subtract size of new high key (includes pivot heap TID space) */
824  leftfree -= newitemsz + MAXALIGN(sizeof(ItemPointerData));
825 
826  /*
827  * Reduce maxpostingsize by an amount equal to target free space on left
828  * half of page
829  */
830  reduction = leftfree * ((100 - BTREE_SINGLEVAL_FILLFACTOR) / 100.0);
831  if (state->maxpostingsize > reduction)
832  state->maxpostingsize -= reduction;
833  else
834  state->maxpostingsize = 0;
835 }
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define PageGetPageSize(page)
Definition: bufpage.h:268
#define BTREE_SINGLEVAL_FILLFACTOR
Definition: nbtree.h:201
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
Size maxpostingsize
Definition: nbtree.h:868

◆ _bt_swap_posting()

IndexTuple _bt_swap_posting ( IndexTuple  newitem,
IndexTuple  oposting,
int  postingoff 
)

Definition at line 1015 of file nbtdedup.c.

References Assert, BTreeTupleGetHeapTID(), BTreeTupleGetMaxHeapTID(), BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), CopyIndexTuple(), elog, ERROR, i, ItemPointerCompare(), ItemPointerCopy, ItemPointerIsValid, and IndexTupleData::t_tid.

Referenced by _bt_insertonpg(), btree_xlog_insert(), and btree_xlog_split().

1016 {
1017  int nhtids;
1018  char *replacepos;
1019  char *replaceposright;
1020  Size nmovebytes;
1021  IndexTuple nposting;
1022 
1023  nhtids = BTreeTupleGetNPosting(oposting);
1024  Assert(_bt_posting_valid(oposting));
1025 
1026  /*
1027  * The postingoff argument originated as a _bt_binsrch_posting() return
1028  * value. It will be 0 in the event of corruption that makes a leaf page
1029  * contain a non-pivot tuple that's somehow identical to newitem (no two
1030  * non-pivot tuples should ever have the same TID). This has been known
1031  * to happen in the field from time to time.
1032  *
1033  * Perform a basic sanity check to catch this case now.
1034  */
1035  if (!(postingoff > 0 && postingoff < nhtids))
1036  elog(ERROR, "posting list tuple with %d items cannot be split at offset %d",
1037  nhtids, postingoff);
1038 
1039  /*
1040  * Move item pointers in posting list to make a gap for the new item's
1041  * heap TID. We shift TIDs one place to the right, losing original
1042  * rightmost TID. (nmovebytes must not include TIDs to the left of
1043  * postingoff, nor the existing rightmost/max TID that gets overwritten.)
1044  */
1045  nposting = CopyIndexTuple(oposting);
1046  replacepos = (char *) BTreeTupleGetPostingN(nposting, postingoff);
1047  replaceposright = (char *) BTreeTupleGetPostingN(nposting, postingoff + 1);
1048  nmovebytes = (nhtids - postingoff - 1) * sizeof(ItemPointerData);
1049  memmove(replaceposright, replacepos, nmovebytes);
1050 
1051  /* Fill the gap at postingoff with TID of new item (original new TID) */
1052  Assert(!BTreeTupleIsPivot(newitem) && !BTreeTupleIsPosting(newitem));
1053  ItemPointerCopy(&newitem->t_tid, (ItemPointer) replacepos);
1054 
1055  /* Now copy oposting's rightmost/max TID into new item (final new TID) */
1056  ItemPointerCopy(BTreeTupleGetMaxHeapTID(oposting), &newitem->t_tid);
1057 
1059  BTreeTupleGetHeapTID(newitem)) < 0);
1060  Assert(_bt_posting_valid(nposting));
1061 
1062  return nposting;
1063 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:473
static ItemPointer BTreeTupleGetHeapTID(IndexTuple itup)
Definition: nbtree.h:631
ItemPointerData t_tid
Definition: itup.h:37
#define ERROR
Definition: elog.h:46
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:528
static ItemPointer BTreeTupleGetMaxHeapTID(IndexTuple itup)
Definition: nbtree.h:657
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:537
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:485
#define Assert(condition)
Definition: c.h:804
size_t Size
Definition: c.h:540
#define elog(elevel,...)
Definition: elog.h:232
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:511
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161

◆ _bt_update_posting()

void _bt_update_posting ( BTVacuumPosting  vacposting)

Definition at line 917 of file nbtdedup.c.

References Assert, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleGetPostingN(), BTreeTupleGetPostingOffset(), BTreeTupleSetPosting(), BTVacuumPostingData::deletetids, i, INDEX_ALT_TID_MASK, INDEX_SIZE_MASK, ItemPointerIsValid, BTVacuumPostingData::itup, MAXALIGN, BTVacuumPostingData::ndeletedtids, palloc0(), IndexTupleData::t_info, and IndexTupleData::t_tid.

Referenced by _bt_delitems_update(), and btree_xlog_updates().

918 {
919  IndexTuple origtuple = vacposting->itup;
920  uint32 keysize,
921  newsize;
922  IndexTuple itup;
923  int nhtids;
924  int ui,
925  d;
926  ItemPointer htids;
927 
928  nhtids = BTreeTupleGetNPosting(origtuple) - vacposting->ndeletedtids;
929 
930  Assert(_bt_posting_valid(origtuple));
931  Assert(nhtids > 0 && nhtids < BTreeTupleGetNPosting(origtuple));
932 
933  /*
934  * Determine final size of new tuple.
935  *
936  * This calculation needs to match the code used within _bt_form_posting()
937  * for new posting list tuples. We avoid calling _bt_form_posting() here
938  * to save ourselves a second memory allocation for a htids workspace.
939  */
940  keysize = BTreeTupleGetPostingOffset(origtuple);
941  if (nhtids > 1)
942  newsize = MAXALIGN(keysize +
943  nhtids * sizeof(ItemPointerData));
944  else
945  newsize = keysize;
946 
947  Assert(newsize <= INDEX_SIZE_MASK);
948  Assert(newsize == MAXALIGN(newsize));
949 
950  /* Allocate memory using palloc0() (matches index_form_tuple()) */
951  itup = palloc0(newsize);
952  memcpy(itup, origtuple, keysize);
953  itup->t_info &= ~INDEX_SIZE_MASK;
954  itup->t_info |= newsize;
955 
956  if (nhtids > 1)
957  {
958  /* Form posting list tuple */
959  BTreeTupleSetPosting(itup, nhtids, keysize);
960  htids = BTreeTupleGetPosting(itup);
961  }
962  else
963  {
964  /* Form standard non-pivot tuple */
965  itup->t_info &= ~INDEX_ALT_TID_MASK;
966  htids = &itup->t_tid;
967  }
968 
969  ui = 0;
970  d = 0;
971  for (int i = 0; i < BTreeTupleGetNPosting(origtuple); i++)
972  {
973  if (d < vacposting->ndeletedtids && vacposting->deletetids[d] == i)
974  {
975  d++;
976  continue;
977  }
978  htids[ui++] = *BTreeTupleGetPostingN(origtuple, i);
979  }
980  Assert(ui == nhtids);
981  Assert(d == vacposting->ndeletedtids);
982  Assert(nhtids == 1 || _bt_posting_valid(itup));
983  Assert(nhtids > 1 || ItemPointerIsValid(&itup->t_tid));
984 
985  /* vacposting arg's itup will now point to updated version */
986  vacposting->itup = itup;
987 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
uint16 ndeletedtids
Definition: nbtree.h:908
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:530
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple itup
Definition: nbtree.h:904
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:459
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:497
unsigned int uint32
Definition: c.h:441
void * palloc0(Size size)
Definition: mcxt.c:1093
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:909
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:537
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:522
#define Assert(condition)
Definition: c.h:804
#define MAXALIGN(LEN)
Definition: c.h:757
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:511
unsigned short t_info
Definition: itup.h:49