PostgreSQL Source Code  git master
nbtdedup.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "miscadmin.h"
#include "utils/rel.h"
Include dependency graph for nbtdedup.c:

Go to the source code of this file.

Functions

static void _bt_bottomupdel_finish_pending (Page page, BTDedupState state, TM_IndexDeleteOp *delstate)
 
static bool _bt_do_singleval (Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
 
static void _bt_singleval_fillfactor (Page page, BTDedupState state, Size newitemsz)
 
void _bt_dedup_pass (Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem, Size newitemsz, bool checkingunique)
 
bool _bt_bottomupdel_pass (Relation rel, Buffer buf, Relation heapRel, Size newitemsz)
 
void _bt_dedup_start_pending (BTDedupState state, IndexTuple base, OffsetNumber baseoff)
 
bool _bt_dedup_save_htid (BTDedupState state, IndexTuple itup)
 
Size _bt_dedup_finish_pending (Page newpage, BTDedupState state)
 
IndexTuple _bt_form_posting (IndexTuple base, ItemPointer htids, int nhtids)
 
void _bt_update_posting (BTVacuumPosting vacposting)
 
IndexTuple _bt_swap_posting (IndexTuple newitem, IndexTuple oposting, int postingoff)
 

Function Documentation

◆ _bt_bottomupdel_finish_pending()

static void _bt_bottomupdel_finish_pending ( Page  page,
BTDedupState  state,
TM_IndexDeleteOp delstate 
)
static

Definition at line 635 of file nbtdedup.c.

References Assert, BTDedupInterval::baseoff, BTDedupStateData::baseoff, BTreeTupleGetHeapTID(), BTreeTupleGetMaxHeapTID(), BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPosting(), TM_IndexDeleteOp::deltids, TM_IndexStatus::freespace, i, TM_IndexDelete::id, TM_IndexStatus::idxoffnum, BTDedupStateData::intervals, ItemIdGetLength, ItemPointerGetBlockNumber, TM_IndexStatus::knowndeletable, TM_IndexDeleteOp::ndeltids, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupInterval::nitems, BTDedupStateData::nitems, PageGetItem, PageGetItemId, BTDedupStateData::phystupsize, TM_IndexStatus::promising, TM_IndexDeleteOp::status, IndexTupleData::t_tid, and TM_IndexDelete::tid.

Referenced by _bt_bottomupdel_pass().

637 {
638  bool dupinterval = (state->nitems > 1);
639 
640  Assert(state->nitems > 0);
641  Assert(state->nitems <= state->nhtids);
642  Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
643 
644  for (int i = 0; i < state->nitems; i++)
645  {
646  OffsetNumber offnum = state->baseoff + i;
647  ItemId itemid = PageGetItemId(page, offnum);
648  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
649  TM_IndexDelete *ideltid = &delstate->deltids[delstate->ndeltids];
650  TM_IndexStatus *istatus = &delstate->status[delstate->ndeltids];
651 
652  if (!BTreeTupleIsPosting(itup))
653  {
654  /* Simple case: A plain non-pivot tuple */
655  ideltid->tid = itup->t_tid;
656  ideltid->id = delstate->ndeltids;
657  istatus->idxoffnum = offnum;
658  istatus->knowndeletable = false; /* for now */
659  istatus->promising = dupinterval; /* simple rule */
660  istatus->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData);
661 
662  delstate->ndeltids++;
663  }
664  else
665  {
666  /*
667  * Complicated case: A posting list tuple.
668  *
669  * We make the conservative assumption that there can only be at
670  * most one affected logical row per posting list tuple. There
671  * will be at most one promising entry in deltids to represent
672  * this presumed lone logical row. Note that this isn't even
673  * considered unless the posting list tuple is also in an interval
674  * of duplicates -- this complicated rule is just a variant of the
675  * simple rule used to decide if plain index tuples are promising.
676  */
677  int nitem = BTreeTupleGetNPosting(itup);
678  bool firstpromising = false;
679  bool lastpromising = false;
680 
681  Assert(_bt_posting_valid(itup));
682 
683  if (dupinterval)
684  {
685  /*
686  * Complicated rule: either the first or last TID in the
687  * posting list gets marked promising (if any at all)
688  */
689  BlockNumber minblocklist,
690  midblocklist,
691  maxblocklist;
692  ItemPointer mintid,
693  midtid,
694  maxtid;
695 
696  mintid = BTreeTupleGetHeapTID(itup);
697  midtid = BTreeTupleGetPostingN(itup, nitem / 2);
698  maxtid = BTreeTupleGetMaxHeapTID(itup);
699  minblocklist = ItemPointerGetBlockNumber(mintid);
700  midblocklist = ItemPointerGetBlockNumber(midtid);
701  maxblocklist = ItemPointerGetBlockNumber(maxtid);
702 
703  /* Only entry with predominant table block can be promising */
704  firstpromising = (minblocklist == midblocklist);
705  lastpromising = (!firstpromising &&
706  midblocklist == maxblocklist);
707  }
708 
709  for (int p = 0; p < nitem; p++)
710  {
711  ItemPointer htid = BTreeTupleGetPostingN(itup, p);
712 
713  ideltid->tid = *htid;
714  ideltid->id = delstate->ndeltids;
715  istatus->idxoffnum = offnum;
716  istatus->knowndeletable = false; /* for now */
717  istatus->promising = false;
718  if ((firstpromising && p == 0) ||
719  (lastpromising && p == nitem - 1))
720  istatus->promising = true;
721  istatus->freespace = sizeof(ItemPointerData); /* at worst */
722 
723  ideltid++;
724  istatus++;
725  delstate->ndeltids++;
726  }
727  }
728  }
729 
730  if (dupinterval)
731  {
732  state->intervals[state->nintervals].nitems = state->nitems;
733  state->nintervals++;
734  }
735 
736  /* Reset state for next interval */
737  state->nhtids = 0;
738  state->nitems = 0;
739  state->phystupsize = 0;
740 }
TM_IndexDelete * deltids
Definition: tableam.h:228
OffsetNumber baseoff
Definition: nbtree.h:853
static ItemPointer BTreeTupleGetHeapTID(IndexTuple itup)
Definition: nbtree.h:612
ItemPointerData t_tid
Definition: itup.h:37
bool knowndeletable
Definition: tableam.h:196
uint32 BlockNumber
Definition: block.h:31
OffsetNumber idxoffnum
Definition: tableam.h:195
uint16 OffsetNumber
Definition: off.h:24
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
Size phystupsize
Definition: nbtree.h:860
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:869
IndexTupleData * IndexTuple
Definition: itup.h:53
struct ItemIdData ItemIdData
bool promising
Definition: tableam.h:199
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static ItemPointer BTreeTupleGetMaxHeapTID(IndexTuple itup)
Definition: nbtree.h:638
TM_IndexStatus * status
Definition: tableam.h:229
ItemPointerData tid
Definition: tableam.h:189
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:518
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:466
uint16 nitems
Definition: nbtree.h:823
#define Assert(condition)
Definition: c.h:804
struct ItemPointerData ItemPointerData
int16 freespace
Definition: tableam.h:200
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:492
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
OffsetNumber baseoff
Definition: nbtree.h:822
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_bottomupdel_pass()

bool _bt_bottomupdel_pass ( Relation  rel,
Buffer  buf,
Relation  heapRel,
Size  newitemsz 
)

Definition at line 299 of file nbtdedup.c.

References _bt_bottomupdel_finish_pending(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), _bt_delitems_delete_check(), _bt_keep_natts_fast(), Assert, BTDedupStateData::base, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, TM_IndexDeleteOp::bottomup, TM_IndexDeleteOp::bottomupfreespace, BufferGetPage, BTDedupStateData::deduplicate, TM_IndexDeleteOp::deltids, BTDedupStateData::htids, IndexRelationGetNumberOfKeyAttributes, InvalidOffsetNumber, ItemIdIsDead, Max, BTDedupStateData::maxpostingsize, MaxTIDsPerBTreePage, TM_IndexDeleteOp::ndeltids, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, OffsetNumberNext, P_FIRSTDATAKEY, PageGetExactFreeSpace(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, palloc(), pfree(), BTDedupStateData::phystupsize, and TM_IndexDeleteOp::status.

Referenced by _bt_delete_or_dedup_one_page().

301 {
302  OffsetNumber offnum,
303  minoff,
304  maxoff;
305  Page page = BufferGetPage(buf);
308  TM_IndexDeleteOp delstate;
309  bool neverdedup;
310  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
311 
312  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
313  newitemsz += sizeof(ItemIdData);
314 
315  /* Initialize deduplication state */
316  state = (BTDedupState) palloc(sizeof(BTDedupStateData));
317  state->deduplicate = true;
318  state->nmaxitems = 0;
319  state->maxpostingsize = BLCKSZ; /* We're not really deduplicating */
320  state->base = NULL;
321  state->baseoff = InvalidOffsetNumber;
322  state->basetupsize = 0;
323  state->htids = palloc(state->maxpostingsize);
324  state->nhtids = 0;
325  state->nitems = 0;
326  state->phystupsize = 0;
327  state->nintervals = 0;
328 
329  /*
330  * Initialize tableam state that describes bottom-up index deletion
331  * operation.
332  *
333  * We'll go on to ask the tableam to search for TIDs whose index tuples we
334  * can safely delete. The tableam will search until our leaf page space
335  * target is satisfied, or until the cost of continuing with the tableam
336  * operation seems too high. It focuses its efforts on TIDs associated
337  * with duplicate index tuples that we mark "promising".
338  *
339  * This space target is a little arbitrary. The tableam must be able to
340  * keep the costs and benefits in balance. We provide the tableam with
341  * exhaustive information about what might work, without directly
342  * concerning ourselves with avoiding work during the tableam call. Our
343  * role in costing the bottom-up deletion process is strictly advisory.
344  */
345  delstate.bottomup = true;
346  delstate.bottomupfreespace = Max(BLCKSZ / 16, newitemsz);
347  delstate.ndeltids = 0;
348  delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete));
349  delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus));
350 
351  minoff = P_FIRSTDATAKEY(opaque);
352  maxoff = PageGetMaxOffsetNumber(page);
353  for (offnum = minoff;
354  offnum <= maxoff;
355  offnum = OffsetNumberNext(offnum))
356  {
357  ItemId itemid = PageGetItemId(page, offnum);
358  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
359 
360  Assert(!ItemIdIsDead(itemid));
361 
362  if (offnum == minoff)
363  {
364  /* itup starts first pending interval */
365  _bt_dedup_start_pending(state, itup, offnum);
366  }
367  else if (_bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
368  _bt_dedup_save_htid(state, itup))
369  {
370  /* Tuple is equal; just added its TIDs to pending interval */
371  }
372  else
373  {
374  /* Finalize interval -- move its TIDs to delete state */
375  _bt_bottomupdel_finish_pending(page, state, &delstate);
376 
377  /* itup starts new pending interval */
378  _bt_dedup_start_pending(state, itup, offnum);
379  }
380  }
381  /* Finalize final interval -- move its TIDs to delete state */
382  _bt_bottomupdel_finish_pending(page, state, &delstate);
383 
384  /*
385  * We don't give up now in the event of having few (or even zero)
386  * promising tuples for the tableam because it's not up to us as the index
387  * AM to manage costs (note that the tableam might have heuristics of its
388  * own that work out what to do). We should at least avoid having our
389  * caller do a useless deduplication pass after we return in the event of
390  * zero promising tuples, though.
391  */
392  neverdedup = false;
393  if (state->nintervals == 0)
394  neverdedup = true;
395 
396  pfree(state->htids);
397  pfree(state);
398 
399  /* Ask tableam which TIDs are deletable, then physically delete them */
400  _bt_delitems_delete_check(rel, buf, heapRel, &delstate);
401 
402  pfree(delstate.deltids);
403  pfree(delstate.status);
404 
405  /* Report "success" to caller unconditionally to avoid deduplication */
406  if (neverdedup)
407  return true;
408 
409  /* Don't dedup when we won't end up back here any time soon anyway */
410  return PageGetExactFreeSpace(page) >= Max(BLCKSZ / 24, newitemsz);
411 }
TM_IndexDelete * deltids
Definition: tableam.h:228
IndexTuple base
Definition: nbtree.h:852
void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, TM_IndexDeleteOp *delstate)
Definition: nbtpage.c:1490
OffsetNumber baseoff
Definition: nbtree.h:853
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:350
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:423
#define MaxTIDsPerBTreePage
Definition: nbtree.h:184
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:71
uint16 OffsetNumber
Definition: off.h:24
ItemPointer htids
Definition: nbtree.h:857
void pfree(void *pointer)
Definition: mcxt.c:1057
Size phystupsize
Definition: nbtree.h:860
static char * buf
Definition: pg_test_fsync.c:68
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:474
IndexTupleData * IndexTuple
Definition: itup.h:53
struct ItemIdData ItemIdData
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:476
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
TM_IndexStatus * status
Definition: tableam.h:229
static void _bt_bottomupdel_finish_pending(Page page, BTDedupState state, TM_IndexDeleteOp *delstate)
Definition: nbtdedup.c:635
#define InvalidOffsetNumber
Definition: off.h:26
#define Max(x, y)
Definition: c.h:980
#define Assert(condition)
Definition: c.h:804
Definition: regguts.h:317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
Size basetupsize
Definition: nbtree.h:854
Size maxpostingsize
Definition: nbtree.h:849
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:841
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2419
void * palloc(Size size)
Definition: mcxt.c:950
bool deduplicate
Definition: nbtree.h:847
int bottomupfreespace
Definition: tableam.h:224
BTDedupStateData * BTDedupState
Definition: nbtree.h:872
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78

◆ _bt_dedup_finish_pending()

Size _bt_dedup_finish_pending ( Page  newpage,
BTDedupState  state 
)

Definition at line 545 of file nbtdedup.c.

References _bt_form_posting(), Assert, BTDedupStateData::base, BTDedupInterval::baseoff, BTDedupStateData::baseoff, elog, ERROR, BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::intervals, InvalidOffsetNumber, MAXALIGN, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupInterval::nitems, BTDedupStateData::nitems, OffsetNumberNext, PageAddItem, PageGetMaxOffsetNumber, pfree(), and BTDedupStateData::phystupsize.

Referenced by _bt_dedup_pass(), and btree_xlog_dedup().

546 {
547  OffsetNumber tupoff;
548  Size tuplesz;
549  Size spacesaving;
550 
551  Assert(state->nitems > 0);
552  Assert(state->nitems <= state->nhtids);
553  Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
554 
555  tupoff = OffsetNumberNext(PageGetMaxOffsetNumber(newpage));
556  if (state->nitems == 1)
557  {
558  /* Use original, unchanged base tuple */
559  tuplesz = IndexTupleSize(state->base);
560  if (PageAddItem(newpage, (Item) state->base, tuplesz, tupoff,
561  false, false) == InvalidOffsetNumber)
562  elog(ERROR, "deduplication failed to add tuple to page");
563 
564  spacesaving = 0;
565  }
566  else
567  {
568  IndexTuple final;
569 
570  /* Form a tuple with a posting list */
571  final = _bt_form_posting(state->base, state->htids, state->nhtids);
572  tuplesz = IndexTupleSize(final);
573  Assert(tuplesz <= state->maxpostingsize);
574 
575  /* Save final number of items for posting list */
576  state->intervals[state->nintervals].nitems = state->nitems;
577 
578  Assert(tuplesz == MAXALIGN(IndexTupleSize(final)));
579  if (PageAddItem(newpage, (Item) final, tuplesz, tupoff, false,
580  false) == InvalidOffsetNumber)
581  elog(ERROR, "deduplication failed to add tuple to page");
582 
583  pfree(final);
584  spacesaving = state->phystupsize - (tuplesz + sizeof(ItemIdData));
585  /* Increment nintervals, since we wrote a new posting list tuple */
586  state->nintervals++;
587  Assert(spacesaving > 0 && spacesaving < BLCKSZ);
588  }
589 
590  /* Reset state for next pending posting list */
591  state->nhtids = 0;
592  state->nitems = 0;
593  state->phystupsize = 0;
594 
595  return spacesaving;
596 }
IndexTuple base
Definition: nbtree.h:852
IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
Definition: nbtdedup.c:859
OffsetNumber baseoff
Definition: nbtree.h:853
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
ItemPointer htids
Definition: nbtree.h:857
void pfree(void *pointer)
Definition: mcxt.c:1057
Size phystupsize
Definition: nbtree.h:860
#define ERROR
Definition: elog.h:45
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:869
struct ItemIdData ItemIdData
#define InvalidOffsetNumber
Definition: off.h:26
uint16 nitems
Definition: nbtree.h:823
#define Assert(condition)
Definition: c.h:804
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
#define elog(elevel,...)
Definition: elog.h:227
OffsetNumber baseoff
Definition: nbtree.h:822
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_dedup_pass()

void _bt_dedup_pass ( Relation  rel,
Buffer  buf,
Relation  heapRel,
IndexTuple  newitem,
Size  newitemsz,
bool  checkingunique 
)

Definition at line 54 of file nbtdedup.c.

References _bt_dedup_finish_pending(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), _bt_do_singleval(), _bt_keep_natts_fast(), _bt_singleval_fillfactor(), Assert, BTDedupStateData::base, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTMaxItemSize, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, BTDedupStateData::deduplicate, elog, END_CRIT_SECTION, ERROR, BTDedupStateData::htids, INDEX_SIZE_MASK, IndexRelationGetNumberOfKeyAttributes, BTDedupStateData::intervals, InvalidOffsetNumber, ItemIdGetLength, ItemIdIsDead, MarkBufferDirty(), BTDedupStateData::maxpostingsize, Min, BTDedupStateData::nhtids, xl_btree_dedup::nintervals, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, OffsetNumberNext, P_FIRSTDATAKEY, P_HAS_GARBAGE, P_HIKEY, P_RIGHTMOST, PageAddItem, PageGetExactFreeSpace(), PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN, palloc(), pfree(), BTDedupStateData::phystupsize, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeDedup, START_CRIT_SECTION, XLOG_BTREE_DEDUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_delete_or_dedup_one_page().

56 {
57  OffsetNumber offnum,
58  minoff,
59  maxoff;
60  Page page = BufferGetPage(buf);
62  Page newpage;
64  Size pagesaving = 0;
65  bool singlevalstrat = false;
66  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
67 
68  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
69  newitemsz += sizeof(ItemIdData);
70 
71  /*
72  * Initialize deduplication state.
73  *
74  * It would be possible for maxpostingsize (limit on posting list tuple
75  * size) to be set to one third of the page. However, it seems like a
76  * good idea to limit the size of posting lists to one sixth of a page.
77  * That ought to leave us with a good split point when pages full of
78  * duplicates can be split several times.
79  */
80  state = (BTDedupState) palloc(sizeof(BTDedupStateData));
81  state->deduplicate = true;
82  state->nmaxitems = 0;
83  state->maxpostingsize = Min(BTMaxItemSize(page) / 2, INDEX_SIZE_MASK);
84  /* Metadata about base tuple of current pending posting list */
85  state->base = NULL;
87  state->basetupsize = 0;
88  /* Metadata about current pending posting list TIDs */
89  state->htids = palloc(state->maxpostingsize);
90  state->nhtids = 0;
91  state->nitems = 0;
92  /* Size of all physical tuples to be replaced by pending posting list */
93  state->phystupsize = 0;
94  /* nintervals should be initialized to zero */
95  state->nintervals = 0;
96 
97  minoff = P_FIRSTDATAKEY(opaque);
98  maxoff = PageGetMaxOffsetNumber(page);
99 
100  /* Determine if "single value" strategy should be used */
101  if (!checkingunique)
102  singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem);
103 
104  /*
105  * Deduplicate items from page, and write them to newpage.
106  *
107  * Copy the original page's LSN into newpage copy. This will become the
108  * updated version of the page. We need this because XLogInsert will
109  * examine the LSN and possibly dump it in a page image.
110  */
111  newpage = PageGetTempPageCopySpecial(page);
112  PageSetLSN(newpage, PageGetLSN(page));
113 
114  /* Copy high key, if any */
115  if (!P_RIGHTMOST(opaque))
116  {
117  ItemId hitemid = PageGetItemId(page, P_HIKEY);
118  Size hitemsz = ItemIdGetLength(hitemid);
119  IndexTuple hitem = (IndexTuple) PageGetItem(page, hitemid);
120 
121  if (PageAddItem(newpage, (Item) hitem, hitemsz, P_HIKEY,
122  false, false) == InvalidOffsetNumber)
123  elog(ERROR, "deduplication failed to add highkey");
124  }
125 
126  for (offnum = minoff;
127  offnum <= maxoff;
128  offnum = OffsetNumberNext(offnum))
129  {
130  ItemId itemid = PageGetItemId(page, offnum);
131  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
132 
133  Assert(!ItemIdIsDead(itemid));
134 
135  if (offnum == minoff)
136  {
137  /*
138  * No previous/base tuple for the data item -- use the data item
139  * as base tuple of pending posting list
140  */
141  _bt_dedup_start_pending(state, itup, offnum);
142  }
143  else if (state->deduplicate &&
144  _bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
145  _bt_dedup_save_htid(state, itup))
146  {
147  /*
148  * Tuple is equal to base tuple of pending posting list. Heap
149  * TID(s) for itup have been saved in state.
150  */
151  }
152  else
153  {
154  /*
155  * Tuple is not equal to pending posting list tuple, or
156  * _bt_dedup_save_htid() opted to not merge current item into
157  * pending posting list for some other reason (e.g., adding more
158  * TIDs would have caused posting list to exceed current
159  * maxpostingsize).
160  *
161  * If state contains pending posting list with more than one item,
162  * form new posting tuple, and actually update the page. Else
163  * reset the state and move on without modifying the page.
164  */
165  pagesaving += _bt_dedup_finish_pending(newpage, state);
166 
167  if (singlevalstrat)
168  {
169  /*
170  * Single value strategy's extra steps.
171  *
172  * Lower maxpostingsize for sixth and final large posting list
173  * tuple at the point where 5 maxpostingsize-capped tuples
174  * have either been formed or observed.
175  *
176  * When a sixth maxpostingsize-capped item is formed/observed,
177  * stop merging together tuples altogether. The few tuples
178  * that remain at the end of the page won't be merged together
179  * at all (at least not until after a future page split takes
180  * place).
181  */
182  if (state->nmaxitems == 5)
183  _bt_singleval_fillfactor(page, state, newitemsz);
184  else if (state->nmaxitems == 6)
185  {
186  state->deduplicate = false;
187  singlevalstrat = false; /* won't be back here */
188  }
189  }
190 
191  /* itup starts new pending posting list */
192  _bt_dedup_start_pending(state, itup, offnum);
193  }
194  }
195 
196  /* Handle the last item */
197  pagesaving += _bt_dedup_finish_pending(newpage, state);
198 
199  /*
200  * If no items suitable for deduplication were found, newpage must be
201  * exactly the same as the original page, so just return from function.
202  *
203  * We could determine whether or not to proceed on the basis the space
204  * savings being sufficient to avoid an immediate page split instead. We
205  * don't do that because there is some small value in nbtsplitloc.c always
206  * operating against a page that is fully deduplicated (apart from
207  * newitem). Besides, most of the cost has already been paid.
208  */
209  if (state->nintervals == 0)
210  {
211  /* cannot leak memory here */
212  pfree(newpage);
213  pfree(state->htids);
214  pfree(state);
215  return;
216  }
217 
218  /*
219  * By here, it's clear that deduplication will definitely go ahead.
220  *
221  * Clear the BTP_HAS_GARBAGE page flag. The index must be a heapkeyspace
222  * index, and as such we'll never pay attention to BTP_HAS_GARBAGE anyway.
223  * But keep things tidy.
224  */
225  if (P_HAS_GARBAGE(opaque))
226  {
227  BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(newpage);
228 
229  nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
230  }
231 
233 
234  PageRestoreTempPage(newpage, page);
236 
237  /* XLOG stuff */
238  if (RelationNeedsWAL(rel))
239  {
240  XLogRecPtr recptr;
241  xl_btree_dedup xlrec_dedup;
242 
243  xlrec_dedup.nintervals = state->nintervals;
244 
245  XLogBeginInsert();
247  XLogRegisterData((char *) &xlrec_dedup, SizeOfBtreeDedup);
248 
249  /*
250  * The intervals array is not in the buffer, but pretend that it is.
251  * When XLogInsert stores the whole buffer, the array need not be
252  * stored too.
253  */
254  XLogRegisterBufData(0, (char *) state->intervals,
255  state->nintervals * sizeof(BTDedupInterval));
256 
257  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DEDUP);
258 
259  PageSetLSN(page, recptr);
260  }
261 
263 
264  /* Local space accounting should agree with page accounting */
265  Assert(pagesaving < newitemsz || PageGetExactFreeSpace(page) >= newitemsz);
266 
267  /* cannot leak memory here */
268  pfree(state->htids);
269  pfree(state);
270 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
IndexTuple base
Definition: nbtree.h:852
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:411
uint16 nintervals
Definition: nbtxlog.h:173
OffsetNumber baseoff
Definition: nbtree.h:853
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1483
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:350
#define Min(x, y)
Definition: c.h:986
#define END_CRIT_SECTION()
Definition: miscadmin.h:135
Pointer Item
Definition: item.h:17
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define P_HAS_GARBAGE(opaque)
Definition: nbtree.h:225
#define START_CRIT_SECTION()
Definition: miscadmin.h:133
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:423
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
Size _bt_dedup_finish_pending(Page newpage, BTDedupState state)
Definition: nbtdedup.c:545
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:71
uint16 OffsetNumber
Definition: off.h:24
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:389
ItemPointer htids
Definition: nbtree.h:857
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
Size phystupsize
Definition: nbtree.h:860
#define ERROR
Definition: elog.h:45
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:869
static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
Definition: nbtdedup.c:777
static char * buf
Definition: pg_test_fsync.c:68
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:474
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
struct ItemIdData ItemIdData
#define SizeOfBtreeDedup
Definition: nbtxlog.h:178
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:476
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define XLOG_BTREE_DEDUP
Definition: nbtxlog.h:33
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define InvalidOffsetNumber
Definition: off.h:26
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
Definition: regguts.h:317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:540
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
Size basetupsize
Definition: nbtree.h:854
#define RelationNeedsWAL(relation)
Definition: rel.h:563
Size maxpostingsize
Definition: nbtree.h:849
#define PageGetLSN(page)
Definition: bufpage.h:366
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:841
#define BTMaxItemSize(page)
Definition: nbtree.h:162
#define P_HIKEY
Definition: nbtree.h:348
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2419
void * palloc(Size size)
Definition: mcxt.c:950
bool deduplicate
Definition: nbtree.h:847
#define elog(elevel,...)
Definition: elog.h:227
BTDedupStateData * BTDedupState
Definition: nbtree.h:872
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:67
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:218
#define BTP_HAS_GARBAGE
Definition: nbtree.h:80
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
static void _bt_singleval_fillfactor(Page page, BTDedupState state, Size newitemsz)
Definition: nbtdedup.c:817

◆ _bt_dedup_save_htid()

bool _bt_dedup_save_htid ( BTDedupState  state,
IndexTuple  itup 
)

Definition at line 474 of file nbtdedup.c.

References Assert, BTDedupStateData::basetupsize, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTDedupStateData::htids, IndexTupleSize, MAXALIGN, BTDedupStateData::maxpostingsize, BTDedupStateData::nhtids, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, BTDedupStateData::phystupsize, and IndexTupleData::t_tid.

Referenced by _bt_bottomupdel_pass(), _bt_dedup_pass(), _bt_load(), and btree_xlog_dedup().

475 {
476  int nhtids;
477  ItemPointer htids;
478  Size mergedtupsz;
479 
480  Assert(!BTreeTupleIsPivot(itup));
481 
482  if (!BTreeTupleIsPosting(itup))
483  {
484  nhtids = 1;
485  htids = &itup->t_tid;
486  }
487  else
488  {
489  nhtids = BTreeTupleGetNPosting(itup);
490  htids = BTreeTupleGetPosting(itup);
491  }
492 
493  /*
494  * Don't append (have caller finish pending posting list as-is) if
495  * appending heap TID(s) from itup would put us over maxpostingsize limit.
496  *
497  * This calculation needs to match the code used within _bt_form_posting()
498  * for new posting list tuples.
499  */
500  mergedtupsz = MAXALIGN(state->basetupsize +
501  (state->nhtids + nhtids) * sizeof(ItemPointerData));
502 
503  if (mergedtupsz > state->maxpostingsize)
504  {
505  /*
506  * Count this as an oversized item for single value strategy, though
507  * only when there are 50 TIDs in the final posting list tuple. This
508  * limit (which is fairly arbitrary) avoids confusion about how many
509  * 1/6 of a page tuples have been encountered/created by the current
510  * deduplication pass.
511  *
512  * Note: We deliberately don't consider which deduplication pass
513  * merged together tuples to create this item (could be a previous
514  * deduplication pass, or current pass). See _bt_do_singleval()
515  * comments.
516  */
517  if (state->nhtids > 50)
518  state->nmaxitems++;
519 
520  return false;
521  }
522 
523  /*
524  * Save heap TIDs to pending posting list tuple -- itup can be merged into
525  * pending posting list
526  */
527  state->nitems++;
528  memcpy(state->htids + state->nhtids, htids,
529  sizeof(ItemPointerData) * nhtids);
530  state->nhtids += nhtids;
531  state->phystupsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
532 
533  return true;
534 }
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:454
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:511
ItemPointerData t_tid
Definition: itup.h:37
ItemPointer htids
Definition: nbtree.h:857
Size phystupsize
Definition: nbtree.h:860
struct ItemIdData ItemIdData
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:466
#define Assert(condition)
Definition: c.h:804
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
Size basetupsize
Definition: nbtree.h:854
Size maxpostingsize
Definition: nbtree.h:849
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:492
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_dedup_start_pending()

void _bt_dedup_start_pending ( BTDedupState  state,
IndexTuple  base,
OffsetNumber  baseoff 
)

Definition at line 423 of file nbtdedup.c.

References Assert, BTDedupStateData::base, BTDedupInterval::baseoff, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleGetPostingOffset(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::intervals, MAXALIGN, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::phystupsize, and IndexTupleData::t_tid.

Referenced by _bt_bottomupdel_pass(), _bt_dedup_pass(), _bt_load(), and btree_xlog_dedup().

425 {
426  Assert(state->nhtids == 0);
427  Assert(state->nitems == 0);
428  Assert(!BTreeTupleIsPivot(base));
429 
430  /*
431  * Copy heap TID(s) from new base tuple for new candidate posting list
432  * into working state's array
433  */
434  if (!BTreeTupleIsPosting(base))
435  {
436  memcpy(state->htids, &base->t_tid, sizeof(ItemPointerData));
437  state->nhtids = 1;
438  state->basetupsize = IndexTupleSize(base);
439  }
440  else
441  {
442  int nposting;
443 
444  nposting = BTreeTupleGetNPosting(base);
445  memcpy(state->htids, BTreeTupleGetPosting(base),
446  sizeof(ItemPointerData) * nposting);
447  state->nhtids = nposting;
448  /* basetupsize should not include existing posting list */
450  }
451 
452  /*
453  * Save new base tuple itself -- it'll be needed if we actually create a
454  * new posting list from new pending posting list.
455  *
456  * Must maintain physical size of all existing tuples (including line
457  * pointer overhead) so that we can calculate space savings on page.
458  */
459  state->nitems = 1;
460  state->base = base;
461  state->baseoff = baseoff;
462  state->phystupsize = MAXALIGN(IndexTupleSize(base)) + sizeof(ItemIdData);
463  /* Also save baseoff in pending state for interval */
464  state->intervals[state->nintervals].baseoff = state->baseoff;
465 }
IndexTuple base
Definition: nbtree.h:852
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:454
OffsetNumber baseoff
Definition: nbtree.h:853
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:511
ItemPointerData t_tid
Definition: itup.h:37
ItemPointer htids
Definition: nbtree.h:857
Size phystupsize
Definition: nbtree.h:860
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:869
struct ItemIdData ItemIdData
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:466
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:503
#define Assert(condition)
Definition: c.h:804
#define MAXALIGN(LEN)
Definition: c.h:757
Size basetupsize
Definition: nbtree.h:854
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:492
OffsetNumber baseoff
Definition: nbtree.h:822
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_do_singleval()

static bool _bt_do_singleval ( Relation  rel,
Page  page,
BTDedupState  state,
OffsetNumber  minoff,
IndexTuple  newitem 
)
static

Definition at line 777 of file nbtdedup.c.

References _bt_keep_natts_fast(), IndexRelationGetNumberOfKeyAttributes, PageGetItem, PageGetItemId, and PageGetMaxOffsetNumber.

Referenced by _bt_dedup_pass().

779 {
780  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
781  ItemId itemid;
782  IndexTuple itup;
783 
784  itemid = PageGetItemId(page, minoff);
785  itup = (IndexTuple) PageGetItem(page, itemid);
786 
787  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
788  {
789  itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
790  itup = (IndexTuple) PageGetItem(page, itemid);
791 
792  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
793  return true;
794  }
795 
796  return false;
797 }
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:476
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2419
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_form_posting()

IndexTuple _bt_form_posting ( IndexTuple  base,
ItemPointer  htids,
int  nhtids 
)

Definition at line 859 of file nbtdedup.c.

References Assert, BTreeTupleGetPosting(), BTreeTupleGetPostingOffset(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTreeTupleSetPosting(), INDEX_ALT_TID_MASK, INDEX_SIZE_MASK, IndexTupleSize, ItemPointerCopy, ItemPointerIsValid, MAXALIGN, palloc0(), PG_UINT16_MAX, IndexTupleData::t_info, and IndexTupleData::t_tid.

Referenced by _bt_dedup_finish_pending(), _bt_sort_dedup_finish_pending(), and bt_posting_plain_tuple().

860 {
861  uint32 keysize,
862  newsize;
863  IndexTuple itup;
864 
865  if (BTreeTupleIsPosting(base))
866  keysize = BTreeTupleGetPostingOffset(base);
867  else
868  keysize = IndexTupleSize(base);
869 
870  Assert(!BTreeTupleIsPivot(base));
871  Assert(nhtids > 0 && nhtids <= PG_UINT16_MAX);
872  Assert(keysize == MAXALIGN(keysize));
873 
874  /* Determine final size of new tuple */
875  if (nhtids > 1)
876  newsize = MAXALIGN(keysize +
877  nhtids * sizeof(ItemPointerData));
878  else
879  newsize = keysize;
880 
881  Assert(newsize <= INDEX_SIZE_MASK);
882  Assert(newsize == MAXALIGN(newsize));
883 
884  /* Allocate memory using palloc0() (matches index_form_tuple()) */
885  itup = palloc0(newsize);
886  memcpy(itup, base, keysize);
887  itup->t_info &= ~INDEX_SIZE_MASK;
888  itup->t_info |= newsize;
889  if (nhtids > 1)
890  {
891  /* Form posting list tuple */
892  BTreeTupleSetPosting(itup, nhtids, keysize);
893  memcpy(BTreeTupleGetPosting(itup), htids,
894  sizeof(ItemPointerData) * nhtids);
895  Assert(_bt_posting_valid(itup));
896  }
897  else
898  {
899  /* Form standard non-pivot tuple */
900  itup->t_info &= ~INDEX_ALT_TID_MASK;
901  ItemPointerCopy(htids, &itup->t_tid);
903  }
904 
905  return itup;
906 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:454
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:511
ItemPointerData t_tid
Definition: itup.h:37
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:440
#define PG_UINT16_MAX
Definition: c.h:522
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:478
unsigned int uint32
Definition: c.h:441
void * palloc0(Size size)
Definition: mcxt.c:981
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:466
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:503
#define Assert(condition)
Definition: c.h:804
#define MAXALIGN(LEN)
Definition: c.h:757
unsigned short t_info
Definition: itup.h:49
#define IndexTupleSize(itup)
Definition: itup.h:71
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161

◆ _bt_singleval_fillfactor()

static void _bt_singleval_fillfactor ( Page  page,
BTDedupState  state,
Size  newitemsz 
)
static

Definition at line 817 of file nbtdedup.c.

References BTREE_SINGLEVAL_FILLFACTOR, MAXALIGN, BTDedupStateData::maxpostingsize, PageGetPageSize, and SizeOfPageHeaderData.

Referenced by _bt_dedup_pass().

818 {
819  Size leftfree;
820  int reduction;
821 
822  /* This calculation needs to match nbtsplitloc.c */
823  leftfree = PageGetPageSize(page) - SizeOfPageHeaderData -
824  MAXALIGN(sizeof(BTPageOpaqueData));
825  /* Subtract size of new high key (includes pivot heap TID space) */
826  leftfree -= newitemsz + MAXALIGN(sizeof(ItemPointerData));
827 
828  /*
829  * Reduce maxpostingsize by an amount equal to target free space on left
830  * half of page
831  */
832  reduction = leftfree * ((100 - BTREE_SINGLEVAL_FILLFACTOR) / 100.0);
833  if (state->maxpostingsize > reduction)
834  state->maxpostingsize -= reduction;
835  else
836  state->maxpostingsize = 0;
837 }
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define PageGetPageSize(page)
Definition: bufpage.h:268
#define BTREE_SINGLEVAL_FILLFACTOR
Definition: nbtree.h:201
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
Size maxpostingsize
Definition: nbtree.h:849

◆ _bt_swap_posting()

IndexTuple _bt_swap_posting ( IndexTuple  newitem,
IndexTuple  oposting,
int  postingoff 
)

Definition at line 1017 of file nbtdedup.c.

References Assert, BTreeTupleGetHeapTID(), BTreeTupleGetMaxHeapTID(), BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), CopyIndexTuple(), i, ItemPointerCompare(), ItemPointerCopy, ItemPointerIsValid, and IndexTupleData::t_tid.

Referenced by _bt_insertonpg(), btree_xlog_insert(), and btree_xlog_split().

1018 {
1019  int nhtids;
1020  char *replacepos;
1021  char *replaceposright;
1022  Size nmovebytes;
1023  IndexTuple nposting;
1024 
1025  nhtids = BTreeTupleGetNPosting(oposting);
1026  Assert(_bt_posting_valid(oposting));
1027  Assert(postingoff > 0 && postingoff < nhtids);
1028 
1029  /*
1030  * Move item pointers in posting list to make a gap for the new item's
1031  * heap TID. We shift TIDs one place to the right, losing original
1032  * rightmost TID. (nmovebytes must not include TIDs to the left of
1033  * postingoff, nor the existing rightmost/max TID that gets overwritten.)
1034  */
1035  nposting = CopyIndexTuple(oposting);
1036  replacepos = (char *) BTreeTupleGetPostingN(nposting, postingoff);
1037  replaceposright = (char *) BTreeTupleGetPostingN(nposting, postingoff + 1);
1038  nmovebytes = (nhtids - postingoff - 1) * sizeof(ItemPointerData);
1039  memmove(replaceposright, replacepos, nmovebytes);
1040 
1041  /* Fill the gap at postingoff with TID of new item (original new TID) */
1042  Assert(!BTreeTupleIsPivot(newitem) && !BTreeTupleIsPosting(newitem));
1043  ItemPointerCopy(&newitem->t_tid, (ItemPointer) replacepos);
1044 
1045  /* Now copy oposting's rightmost/max TID into new item (final new TID) */
1046  ItemPointerCopy(BTreeTupleGetMaxHeapTID(oposting), &newitem->t_tid);
1047 
1049  BTreeTupleGetHeapTID(newitem)) < 0);
1050  Assert(_bt_posting_valid(nposting));
1051 
1052  return nposting;
1053 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:454
static ItemPointer BTreeTupleGetHeapTID(IndexTuple itup)
Definition: nbtree.h:612
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:510
static ItemPointer BTreeTupleGetMaxHeapTID(IndexTuple itup)
Definition: nbtree.h:638
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:518
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:466
#define Assert(condition)
Definition: c.h:804
size_t Size
Definition: c.h:540
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:492
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161

◆ _bt_update_posting()

void _bt_update_posting ( BTVacuumPosting  vacposting)

Definition at line 919 of file nbtdedup.c.

References Assert, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleGetPostingN(), BTreeTupleGetPostingOffset(), BTreeTupleSetPosting(), BTVacuumPostingData::deletetids, i, INDEX_ALT_TID_MASK, INDEX_SIZE_MASK, ItemPointerIsValid, BTVacuumPostingData::itup, MAXALIGN, BTVacuumPostingData::ndeletedtids, palloc0(), IndexTupleData::t_info, and IndexTupleData::t_tid.

Referenced by _bt_delitems_update(), and btree_xlog_updates().

920 {
921  IndexTuple origtuple = vacposting->itup;
922  uint32 keysize,
923  newsize;
924  IndexTuple itup;
925  int nhtids;
926  int ui,
927  d;
928  ItemPointer htids;
929 
930  nhtids = BTreeTupleGetNPosting(origtuple) - vacposting->ndeletedtids;
931 
932  Assert(_bt_posting_valid(origtuple));
933  Assert(nhtids > 0 && nhtids < BTreeTupleGetNPosting(origtuple));
934 
935  /*
936  * Determine final size of new tuple.
937  *
938  * This calculation needs to match the code used within _bt_form_posting()
939  * for new posting list tuples. We avoid calling _bt_form_posting() here
940  * to save ourselves a second memory allocation for a htids workspace.
941  */
942  keysize = BTreeTupleGetPostingOffset(origtuple);
943  if (nhtids > 1)
944  newsize = MAXALIGN(keysize +
945  nhtids * sizeof(ItemPointerData));
946  else
947  newsize = keysize;
948 
949  Assert(newsize <= INDEX_SIZE_MASK);
950  Assert(newsize == MAXALIGN(newsize));
951 
952  /* Allocate memory using palloc0() (matches index_form_tuple()) */
953  itup = palloc0(newsize);
954  memcpy(itup, origtuple, keysize);
955  itup->t_info &= ~INDEX_SIZE_MASK;
956  itup->t_info |= newsize;
957 
958  if (nhtids > 1)
959  {
960  /* Form posting list tuple */
961  BTreeTupleSetPosting(itup, nhtids, keysize);
962  htids = BTreeTupleGetPosting(itup);
963  }
964  else
965  {
966  /* Form standard non-pivot tuple */
967  itup->t_info &= ~INDEX_ALT_TID_MASK;
968  htids = &itup->t_tid;
969  }
970 
971  ui = 0;
972  d = 0;
973  for (int i = 0; i < BTreeTupleGetNPosting(origtuple); i++)
974  {
975  if (d < vacposting->ndeletedtids && vacposting->deletetids[d] == i)
976  {
977  d++;
978  continue;
979  }
980  htids[ui++] = *BTreeTupleGetPostingN(origtuple, i);
981  }
982  Assert(ui == nhtids);
983  Assert(d == vacposting->ndeletedtids);
984  Assert(nhtids == 1 || _bt_posting_valid(itup));
985  Assert(nhtids > 1 || ItemPointerIsValid(&itup->t_tid));
986 
987  /* vacposting arg's itup will now point to updated version */
988  vacposting->itup = itup;
989 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
uint16 ndeletedtids
Definition: nbtree.h:889
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:511
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple itup
Definition: nbtree.h:885
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:440
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:478
unsigned int uint32
Definition: c.h:441
void * palloc0(Size size)
Definition: mcxt.c:981
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:890
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:518
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:503
#define Assert(condition)
Definition: c.h:804
#define MAXALIGN(LEN)
Definition: c.h:757
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:492
unsigned short t_info
Definition: itup.h:49