PostgreSQL Source Code  git master
nbtdedup.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "miscadmin.h"
#include "utils/rel.h"
Include dependency graph for nbtdedup.c:

Go to the source code of this file.

Functions

static bool _bt_do_singleval (Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
 
static void _bt_singleval_fillfactor (Page page, BTDedupState state, Size newitemsz)
 
void _bt_dedup_one_page (Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem, Size newitemsz, bool checkingunique)
 
void _bt_dedup_start_pending (BTDedupState state, IndexTuple base, OffsetNumber baseoff)
 
bool _bt_dedup_save_htid (BTDedupState state, IndexTuple itup)
 
Size _bt_dedup_finish_pending (Page newpage, BTDedupState state)
 
IndexTuple _bt_form_posting (IndexTuple base, ItemPointer htids, int nhtids)
 
void _bt_update_posting (BTVacuumPosting vacposting)
 
IndexTuple _bt_swap_posting (IndexTuple newitem, IndexTuple oposting, int postingoff)
 

Function Documentation

◆ _bt_dedup_finish_pending()

Size _bt_dedup_finish_pending ( Page  newpage,
BTDedupState  state 
)

Definition at line 446 of file nbtdedup.c.

References _bt_form_posting(), Assert, BTDedupStateData::base, BTDedupInterval::baseoff, BTDedupStateData::baseoff, elog, ERROR, BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::intervals, InvalidOffsetNumber, MAXALIGN, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupInterval::nitems, BTDedupStateData::nitems, OffsetNumberNext, PageAddItem, PageGetMaxOffsetNumber, pfree(), and BTDedupStateData::phystupsize.

Referenced by _bt_dedup_one_page(), and btree_xlog_dedup().

447 {
448  OffsetNumber tupoff;
449  Size tuplesz;
450  Size spacesaving;
451 
452  Assert(state->nitems > 0);
453  Assert(state->nitems <= state->nhtids);
454  Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
455 
456  tupoff = OffsetNumberNext(PageGetMaxOffsetNumber(newpage));
457  if (state->nitems == 1)
458  {
459  /* Use original, unchanged base tuple */
460  tuplesz = IndexTupleSize(state->base);
461  if (PageAddItem(newpage, (Item) state->base, tuplesz, tupoff,
462  false, false) == InvalidOffsetNumber)
463  elog(ERROR, "deduplication failed to add tuple to page");
464 
465  spacesaving = 0;
466  }
467  else
468  {
469  IndexTuple final;
470 
471  /* Form a tuple with a posting list */
472  final = _bt_form_posting(state->base, state->htids, state->nhtids);
473  tuplesz = IndexTupleSize(final);
474  Assert(tuplesz <= state->maxpostingsize);
475 
476  /* Save final number of items for posting list */
477  state->intervals[state->nintervals].nitems = state->nitems;
478 
479  Assert(tuplesz == MAXALIGN(IndexTupleSize(final)));
480  if (PageAddItem(newpage, (Item) final, tuplesz, tupoff, false,
481  false) == InvalidOffsetNumber)
482  elog(ERROR, "deduplication failed to add tuple to page");
483 
484  pfree(final);
485  spacesaving = state->phystupsize - (tuplesz + sizeof(ItemIdData));
486  /* Increment nintervals, since we wrote a new posting list tuple */
487  state->nintervals++;
488  Assert(spacesaving > 0 && spacesaving < BLCKSZ);
489  }
490 
491  /* Reset state for next pending posting list */
492  state->nhtids = 0;
493  state->nitems = 0;
494  state->phystupsize = 0;
495 
496  return spacesaving;
497 }
IndexTuple base
Definition: nbtree.h:746
IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
Definition: nbtdedup.c:616
OffsetNumber baseoff
Definition: nbtree.h:747
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:410
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
ItemPointer htids
Definition: nbtree.h:751
void pfree(void *pointer)
Definition: mcxt.c:1057
Size phystupsize
Definition: nbtree.h:754
#define ERROR
Definition: elog.h:43
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:763
struct ItemIdData ItemIdData
#define InvalidOffsetNumber
Definition: off.h:26
uint16 nitems
Definition: nbtree.h:717
#define Assert(condition)
Definition: c.h:746
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:474
#define MAXALIGN(LEN)
Definition: c.h:699
#define elog(elevel,...)
Definition: elog.h:214
OffsetNumber baseoff
Definition: nbtree.h:716
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_dedup_one_page()

void _bt_dedup_one_page ( Relation  rel,
Buffer  buf,
Relation  heapRel,
IndexTuple  newitem,
Size  newitemsz,
bool  checkingunique 
)

Definition at line 56 of file nbtdedup.c.

References _bt_dedup_finish_pending(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), _bt_delitems_delete(), _bt_do_singleval(), _bt_keep_natts_fast(), _bt_singleval_fillfactor(), Assert, BTDedupStateData::base, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTMaxItemSize, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, BTDedupStateData::deduplicate, elog, END_CRIT_SECTION, ERROR, BTDedupStateData::htids, INDEX_SIZE_MASK, IndexRelationGetNumberOfKeyAttributes, BTDedupStateData::intervals, InvalidOffsetNumber, ItemIdGetLength, ItemIdIsDead, MarkBufferDirty(), MaxIndexTuplesPerPage, BTDedupStateData::maxpostingsize, Min, BTDedupStateData::nhtids, xl_btree_dedup::nintervals, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, OffsetNumberNext, P_FIRSTDATAKEY, P_HAS_GARBAGE, P_HIKEY, P_RIGHTMOST, PageAddItem, PageGetExactFreeSpace(), PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN, palloc(), pfree(), BTDedupStateData::phystupsize, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeDedup, START_CRIT_SECTION, XLOG_BTREE_DEDUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_findinsertloc().

58 {
59  OffsetNumber offnum,
60  minoff,
61  maxoff;
62  Page page = BufferGetPage(buf);
63  BTPageOpaque opaque;
64  Page newpage;
67  int ndeletable = 0;
68  Size pagesaving = 0;
69  bool singlevalstrat = false;
70  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
71 
72  /*
73  * We can't assume that there are no LP_DEAD items. For one thing, VACUUM
74  * will clear the BTP_HAS_GARBAGE hint without reliably removing items
75  * that are marked LP_DEAD. We don't want to unnecessarily unset LP_DEAD
76  * bits when deduplicating items. Allowing it would be correct, though
77  * wasteful.
78  */
79  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
80  minoff = P_FIRSTDATAKEY(opaque);
81  maxoff = PageGetMaxOffsetNumber(page);
82  for (offnum = minoff;
83  offnum <= maxoff;
84  offnum = OffsetNumberNext(offnum))
85  {
86  ItemId itemid = PageGetItemId(page, offnum);
87 
88  if (ItemIdIsDead(itemid))
89  deletable[ndeletable++] = offnum;
90  }
91 
92  if (ndeletable > 0)
93  {
94  _bt_delitems_delete(rel, buf, deletable, ndeletable, heapRel);
95 
96  /*
97  * Return when a split will be avoided. This is equivalent to
98  * avoiding a split using the usual _bt_vacuum_one_page() path.
99  */
100  if (PageGetFreeSpace(page) >= newitemsz)
101  return;
102 
103  /*
104  * Reconsider number of items on page, in case _bt_delitems_delete()
105  * managed to delete an item or two
106  */
107  minoff = P_FIRSTDATAKEY(opaque);
108  maxoff = PageGetMaxOffsetNumber(page);
109  }
110 
111  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
112  newitemsz += sizeof(ItemIdData);
113 
114  /*
115  * By here, it's clear that deduplication will definitely be attempted.
116  * Initialize deduplication state.
117  *
118  * It would be possible for maxpostingsize (limit on posting list tuple
119  * size) to be set to one third of the page. However, it seems like a
120  * good idea to limit the size of posting lists to one sixth of a page.
121  * That ought to leave us with a good split point when pages full of
122  * duplicates can be split several times.
123  */
124  state = (BTDedupState) palloc(sizeof(BTDedupStateData));
125  state->deduplicate = true;
126  state->nmaxitems = 0;
127  state->maxpostingsize = Min(BTMaxItemSize(page) / 2, INDEX_SIZE_MASK);
128  /* Metadata about base tuple of current pending posting list */
129  state->base = NULL;
130  state->baseoff = InvalidOffsetNumber;
131  state->basetupsize = 0;
132  /* Metadata about current pending posting list TIDs */
133  state->htids = palloc(state->maxpostingsize);
134  state->nhtids = 0;
135  state->nitems = 0;
136  /* Size of all physical tuples to be replaced by pending posting list */
137  state->phystupsize = 0;
138  /* nintervals should be initialized to zero */
139  state->nintervals = 0;
140 
141  /* Determine if "single value" strategy should be used */
142  if (!checkingunique)
143  singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem);
144 
145  /*
146  * Deduplicate items from page, and write them to newpage.
147  *
148  * Copy the original page's LSN into newpage copy. This will become the
149  * updated version of the page. We need this because XLogInsert will
150  * examine the LSN and possibly dump it in a page image.
151  */
152  newpage = PageGetTempPageCopySpecial(page);
153  PageSetLSN(newpage, PageGetLSN(page));
154 
155  /* Copy high key, if any */
156  if (!P_RIGHTMOST(opaque))
157  {
158  ItemId hitemid = PageGetItemId(page, P_HIKEY);
159  Size hitemsz = ItemIdGetLength(hitemid);
160  IndexTuple hitem = (IndexTuple) PageGetItem(page, hitemid);
161 
162  if (PageAddItem(newpage, (Item) hitem, hitemsz, P_HIKEY,
163  false, false) == InvalidOffsetNumber)
164  elog(ERROR, "deduplication failed to add highkey");
165  }
166 
167  for (offnum = minoff;
168  offnum <= maxoff;
169  offnum = OffsetNumberNext(offnum))
170  {
171  ItemId itemid = PageGetItemId(page, offnum);
172  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
173 
174  Assert(!ItemIdIsDead(itemid));
175 
176  if (offnum == minoff)
177  {
178  /*
179  * No previous/base tuple for the data item -- use the data item
180  * as base tuple of pending posting list
181  */
182  _bt_dedup_start_pending(state, itup, offnum);
183  }
184  else if (state->deduplicate &&
185  _bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
186  _bt_dedup_save_htid(state, itup))
187  {
188  /*
189  * Tuple is equal to base tuple of pending posting list. Heap
190  * TID(s) for itup have been saved in state.
191  */
192  }
193  else
194  {
195  /*
196  * Tuple is not equal to pending posting list tuple, or
197  * _bt_dedup_save_htid() opted to not merge current item into
198  * pending posting list for some other reason (e.g., adding more
199  * TIDs would have caused posting list to exceed current
200  * maxpostingsize).
201  *
202  * If state contains pending posting list with more than one item,
203  * form new posting tuple, and actually update the page. Else
204  * reset the state and move on without modifying the page.
205  */
206  pagesaving += _bt_dedup_finish_pending(newpage, state);
207 
208  if (singlevalstrat)
209  {
210  /*
211  * Single value strategy's extra steps.
212  *
213  * Lower maxpostingsize for sixth and final large posting list
214  * tuple at the point where 5 maxpostingsize-capped tuples
215  * have either been formed or observed.
216  *
217  * When a sixth maxpostingsize-capped item is formed/observed,
218  * stop merging together tuples altogether. The few tuples
219  * that remain at the end of the page won't be merged together
220  * at all (at least not until after a future page split takes
221  * place).
222  */
223  if (state->nmaxitems == 5)
224  _bt_singleval_fillfactor(page, state, newitemsz);
225  else if (state->nmaxitems == 6)
226  {
227  state->deduplicate = false;
228  singlevalstrat = false; /* won't be back here */
229  }
230  }
231 
232  /* itup starts new pending posting list */
233  _bt_dedup_start_pending(state, itup, offnum);
234  }
235  }
236 
237  /* Handle the last item */
238  pagesaving += _bt_dedup_finish_pending(newpage, state);
239 
240  /*
241  * If no items suitable for deduplication were found, newpage must be
242  * exactly the same as the original page, so just return from function.
243  *
244  * We could determine whether or not to proceed on the basis the space
245  * savings being sufficient to avoid an immediate page split instead. We
246  * don't do that because there is some small value in nbtsplitloc.c always
247  * operating against a page that is fully deduplicated (apart from
248  * newitem). Besides, most of the cost has already been paid.
249  */
250  if (state->nintervals == 0)
251  {
252  /* cannot leak memory here */
253  pfree(newpage);
254  pfree(state->htids);
255  pfree(state);
256  return;
257  }
258 
259  /*
260  * By here, it's clear that deduplication will definitely go ahead.
261  *
262  * Clear the BTP_HAS_GARBAGE page flag in the unlikely event that it is
263  * still falsely set, just to keep things tidy. (We can't rely on
264  * _bt_vacuum_one_page() having done this already, and we can't rely on a
265  * page split or VACUUM getting to it in the near future.)
266  */
267  if (P_HAS_GARBAGE(opaque))
268  {
269  BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(newpage);
270 
271  nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
272  }
273 
275 
276  PageRestoreTempPage(newpage, page);
278 
279  /* XLOG stuff */
280  if (RelationNeedsWAL(rel))
281  {
282  XLogRecPtr recptr;
283  xl_btree_dedup xlrec_dedup;
284 
285  xlrec_dedup.nintervals = state->nintervals;
286 
287  XLogBeginInsert();
289  XLogRegisterData((char *) &xlrec_dedup, SizeOfBtreeDedup);
290 
291  /*
292  * The intervals array is not in the buffer, but pretend that it is.
293  * When XLogInsert stores the whole buffer, the array need not be
294  * stored too.
295  */
296  XLogRegisterBufData(0, (char *) state->intervals,
297  state->nintervals * sizeof(BTDedupInterval));
298 
299  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DEDUP);
300 
301  PageSetLSN(page, recptr);
302  }
303 
305 
306  /* Local space accounting should agree with page accounting */
307  Assert(pagesaving < newitemsz || PageGetExactFreeSpace(page) >= newitemsz);
308 
309  /* cannot leak memory here */
310  pfree(state->htids);
311  pfree(state);
312 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
IndexTuple base
Definition: nbtree.h:746
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:403
uint16 nintervals
Definition: nbtxlog.h:172
OffsetNumber baseoff
Definition: nbtree.h:747
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define Min(x, y)
Definition: c.h:928
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
Pointer Item
Definition: item.h:17
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define P_HAS_GARBAGE(opaque)
Definition: nbtree.h:220
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:410
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:324
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
Size _bt_dedup_finish_pending(Page newpage, BTDedupState state)
Definition: nbtdedup.c:446
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:782
uint16 OffsetNumber
Definition: off.h:24
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:381
ItemPointer htids
Definition: nbtree.h:751
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
Size phystupsize
Definition: nbtree.h:754
#define ERROR
Definition: elog.h:43
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:763
static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
Definition: nbtdedup.c:534
void _bt_delitems_delete(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, Relation heapRel)
Definition: nbtpage.c:1290
static char * buf
Definition: pg_test_fsync.c:68
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:375
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
struct ItemIdData ItemIdData
#define SizeOfBtreeDedup
Definition: nbtxlog.h:177
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:475
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define XLOG_BTREE_DEDUP
Definition: nbtxlog.h:32
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define InvalidOffsetNumber
Definition: off.h:26
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:746
Definition: regguts.h:298
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:474
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
Size basetupsize
Definition: nbtree.h:748
#define RelationNeedsWAL(relation)
Definition: rel.h:562
Size maxpostingsize
Definition: nbtree.h:743
#define PageGetLSN(page)
Definition: bufpage.h:366
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:833
#define BTMaxItemSize(page)
Definition: nbtree.h:157
#define P_HIKEY
Definition: nbtree.h:242
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2418
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:950
bool deduplicate
Definition: nbtree.h:741
#define elog(elevel,...)
Definition: elog.h:214
BTDedupStateData * BTDedupState
Definition: nbtree.h:766
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
static void _bt_singleval_fillfactor(Page page, BTDedupState state, Size newitemsz)
Definition: nbtdedup.c:574

◆ _bt_dedup_save_htid()

bool _bt_dedup_save_htid ( BTDedupState  state,
IndexTuple  itup 
)

Definition at line 375 of file nbtdedup.c.

References Assert, BTDedupStateData::basetupsize, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTDedupStateData::htids, IndexTupleSize, MAXALIGN, BTDedupStateData::maxpostingsize, BTDedupStateData::nhtids, BTDedupStateData::nitems, BTDedupStateData::nmaxitems, BTDedupStateData::phystupsize, and IndexTupleData::t_tid.

Referenced by _bt_dedup_one_page(), _bt_load(), and btree_xlog_dedup().

376 {
377  int nhtids;
378  ItemPointer htids;
379  Size mergedtupsz;
380 
381  Assert(!BTreeTupleIsPivot(itup));
382 
383  if (!BTreeTupleIsPosting(itup))
384  {
385  nhtids = 1;
386  htids = &itup->t_tid;
387  }
388  else
389  {
390  nhtids = BTreeTupleGetNPosting(itup);
391  htids = BTreeTupleGetPosting(itup);
392  }
393 
394  /*
395  * Don't append (have caller finish pending posting list as-is) if
396  * appending heap TID(s) from itup would put us over maxpostingsize limit.
397  *
398  * This calculation needs to match the code used within _bt_form_posting()
399  * for new posting list tuples.
400  */
401  mergedtupsz = MAXALIGN(state->basetupsize +
402  (state->nhtids + nhtids) * sizeof(ItemPointerData));
403 
404  if (mergedtupsz > state->maxpostingsize)
405  {
406  /*
407  * Count this as an oversized item for single value strategy, though
408  * only when there are 50 TIDs in the final posting list tuple. This
409  * limit (which is fairly arbitrary) avoids confusion about how many
410  * 1/6 of a page tuples have been encountered/created by the current
411  * deduplication pass.
412  *
413  * Note: We deliberately don't consider which deduplication pass
414  * merged together tuples to create this item (could be a previous
415  * deduplication pass, or current pass). See _bt_do_singleval()
416  * comments.
417  */
418  if (state->nhtids > 50)
419  state->nmaxitems++;
420 
421  return false;
422  }
423 
424  /*
425  * Save heap TIDs to pending posting list tuple -- itup can be merged into
426  * pending posting list
427  */
428  state->nitems++;
429  memcpy(state->htids + state->nhtids, htids,
430  sizeof(ItemPointerData) * nhtids);
431  state->nhtids += nhtids;
432  state->phystupsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
433 
434  return true;
435 }
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:405
ItemPointerData t_tid
Definition: itup.h:37
ItemPointer htids
Definition: nbtree.h:751
Size phystupsize
Definition: nbtree.h:754
struct ItemIdData ItemIdData
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
#define Assert(condition)
Definition: c.h:746
size_t Size
Definition: c.h:474
#define MAXALIGN(LEN)
Definition: c.h:699
Size basetupsize
Definition: nbtree.h:748
Size maxpostingsize
Definition: nbtree.h:743
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_dedup_start_pending()

void _bt_dedup_start_pending ( BTDedupState  state,
IndexTuple  base,
OffsetNumber  baseoff 
)

Definition at line 324 of file nbtdedup.c.

References Assert, BTDedupStateData::base, BTDedupInterval::baseoff, BTDedupStateData::baseoff, BTDedupStateData::basetupsize, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleGetPostingOffset(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTDedupStateData::htids, IndexTupleSize, BTDedupStateData::intervals, MAXALIGN, BTDedupStateData::nhtids, BTDedupStateData::nintervals, BTDedupStateData::nitems, BTDedupStateData::phystupsize, and IndexTupleData::t_tid.

Referenced by _bt_dedup_one_page(), _bt_load(), and btree_xlog_dedup().

326 {
327  Assert(state->nhtids == 0);
328  Assert(state->nitems == 0);
329  Assert(!BTreeTupleIsPivot(base));
330 
331  /*
332  * Copy heap TID(s) from new base tuple for new candidate posting list
333  * into working state's array
334  */
335  if (!BTreeTupleIsPosting(base))
336  {
337  memcpy(state->htids, &base->t_tid, sizeof(ItemPointerData));
338  state->nhtids = 1;
339  state->basetupsize = IndexTupleSize(base);
340  }
341  else
342  {
343  int nposting;
344 
345  nposting = BTreeTupleGetNPosting(base);
346  memcpy(state->htids, BTreeTupleGetPosting(base),
347  sizeof(ItemPointerData) * nposting);
348  state->nhtids = nposting;
349  /* basetupsize should not include existing posting list */
351  }
352 
353  /*
354  * Save new base tuple itself -- it'll be needed if we actually create a
355  * new posting list from new pending posting list.
356  *
357  * Must maintain physical size of all existing tuples (including line
358  * pointer overhead) so that we can calculate space savings on page.
359  */
360  state->nitems = 1;
361  state->base = base;
362  state->baseoff = baseoff;
363  state->phystupsize = MAXALIGN(IndexTupleSize(base)) + sizeof(ItemIdData);
364  /* Also save baseoff in pending state for interval */
365  state->intervals[state->nintervals].baseoff = state->baseoff;
366 }
IndexTuple base
Definition: nbtree.h:746
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
OffsetNumber baseoff
Definition: nbtree.h:747
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:405
ItemPointerData t_tid
Definition: itup.h:37
ItemPointer htids
Definition: nbtree.h:751
Size phystupsize
Definition: nbtree.h:754
BTDedupInterval intervals[MaxIndexTuplesPerPage]
Definition: nbtree.h:763
struct ItemIdData ItemIdData
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:397
#define Assert(condition)
Definition: c.h:746
#define MAXALIGN(LEN)
Definition: c.h:699
Size basetupsize
Definition: nbtree.h:748
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
OffsetNumber baseoff
Definition: nbtree.h:716
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_do_singleval()

static bool _bt_do_singleval ( Relation  rel,
Page  page,
BTDedupState  state,
OffsetNumber  minoff,
IndexTuple  newitem 
)
static

Definition at line 534 of file nbtdedup.c.

References _bt_keep_natts_fast(), IndexRelationGetNumberOfKeyAttributes, PageGetItem, PageGetItemId, and PageGetMaxOffsetNumber.

Referenced by _bt_dedup_one_page().

536 {
537  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
538  ItemId itemid;
539  IndexTuple itup;
540 
541  itemid = PageGetItemId(page, minoff);
542  itup = (IndexTuple) PageGetItem(page, itemid);
543 
544  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
545  {
546  itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
547  itup = (IndexTuple) PageGetItem(page, itemid);
548 
549  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
550  return true;
551  }
552 
553  return false;
554 }
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:475
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2418
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_form_posting()

IndexTuple _bt_form_posting ( IndexTuple  base,
ItemPointer  htids,
int  nhtids 
)

Definition at line 616 of file nbtdedup.c.

References Assert, BTreeTupleGetPosting(), BTreeTupleGetPostingOffset(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), BTreeTupleSetPosting(), INDEX_ALT_TID_MASK, INDEX_SIZE_MASK, IndexTupleSize, ItemPointerCopy, ItemPointerIsValid, MAXALIGN, palloc0(), PG_UINT16_MAX, IndexTupleData::t_info, and IndexTupleData::t_tid.

Referenced by _bt_dedup_finish_pending(), _bt_sort_dedup_finish_pending(), and bt_posting_plain_tuple().

617 {
618  uint32 keysize,
619  newsize;
620  IndexTuple itup;
621 
622  if (BTreeTupleIsPosting(base))
623  keysize = BTreeTupleGetPostingOffset(base);
624  else
625  keysize = IndexTupleSize(base);
626 
627  Assert(!BTreeTupleIsPivot(base));
628  Assert(nhtids > 0 && nhtids <= PG_UINT16_MAX);
629  Assert(keysize == MAXALIGN(keysize));
630 
631  /* Determine final size of new tuple */
632  if (nhtids > 1)
633  newsize = MAXALIGN(keysize +
634  nhtids * sizeof(ItemPointerData));
635  else
636  newsize = keysize;
637 
638  Assert(newsize <= INDEX_SIZE_MASK);
639  Assert(newsize == MAXALIGN(newsize));
640 
641  /* Allocate memory using palloc0() (matches index_form_tuple()) */
642  itup = palloc0(newsize);
643  memcpy(itup, base, keysize);
644  itup->t_info &= ~INDEX_SIZE_MASK;
645  itup->t_info |= newsize;
646  if (nhtids > 1)
647  {
648  /* Form posting list tuple */
649  BTreeTupleSetPosting(itup, nhtids, keysize);
650  memcpy(BTreeTupleGetPosting(itup), htids,
651  sizeof(ItemPointerData) * nhtids);
652  Assert(_bt_posting_valid(itup));
653  }
654  else
655  {
656  /* Form standard non-pivot tuple */
657  itup->t_info &= ~INDEX_ALT_TID_MASK;
658  ItemPointerCopy(htids, &itup->t_tid);
660  }
661 
662  return itup;
663 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:405
ItemPointerData t_tid
Definition: itup.h:37
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:334
#define PG_UINT16_MAX
Definition: c.h:456
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:372
unsigned int uint32
Definition: c.h:375
void * palloc0(Size size)
Definition: mcxt.c:981
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:397
#define Assert(condition)
Definition: c.h:746
#define MAXALIGN(LEN)
Definition: c.h:699
unsigned short t_info
Definition: itup.h:49
#define IndexTupleSize(itup)
Definition: itup.h:71
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161

◆ _bt_singleval_fillfactor()

static void _bt_singleval_fillfactor ( Page  page,
BTDedupState  state,
Size  newitemsz 
)
static

Definition at line 574 of file nbtdedup.c.

References BTREE_SINGLEVAL_FILLFACTOR, MAXALIGN, BTDedupStateData::maxpostingsize, PageGetPageSize, and SizeOfPageHeaderData.

Referenced by _bt_dedup_one_page().

575 {
576  Size leftfree;
577  int reduction;
578 
579  /* This calculation needs to match nbtsplitloc.c */
580  leftfree = PageGetPageSize(page) - SizeOfPageHeaderData -
581  MAXALIGN(sizeof(BTPageOpaqueData));
582  /* Subtract size of new high key (includes pivot heap TID space) */
583  leftfree -= newitemsz + MAXALIGN(sizeof(ItemPointerData));
584 
585  /*
586  * Reduce maxpostingsize by an amount equal to target free space on left
587  * half of page
588  */
589  reduction = leftfree * ((100 - BTREE_SINGLEVAL_FILLFACTOR) / 100.0);
590  if (state->maxpostingsize > reduction)
591  state->maxpostingsize -= reduction;
592  else
593  state->maxpostingsize = 0;
594 }
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define PageGetPageSize(page)
Definition: bufpage.h:268
#define BTREE_SINGLEVAL_FILLFACTOR
Definition: nbtree.h:196
size_t Size
Definition: c.h:474
#define MAXALIGN(LEN)
Definition: c.h:699
Size maxpostingsize
Definition: nbtree.h:743

◆ _bt_swap_posting()

IndexTuple _bt_swap_posting ( IndexTuple  newitem,
IndexTuple  oposting,
int  postingoff 
)

Definition at line 774 of file nbtdedup.c.

References Assert, BTreeTupleGetHeapTID(), BTreeTupleGetMaxHeapTID(), BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), CopyIndexTuple(), i, ItemPointerCompare(), ItemPointerCopy, ItemPointerIsValid, and IndexTupleData::t_tid.

Referenced by _bt_insertonpg(), btree_xlog_insert(), and btree_xlog_split().

775 {
776  int nhtids;
777  char *replacepos;
778  char *replaceposright;
779  Size nmovebytes;
780  IndexTuple nposting;
781 
782  nhtids = BTreeTupleGetNPosting(oposting);
783  Assert(_bt_posting_valid(oposting));
784  Assert(postingoff > 0 && postingoff < nhtids);
785 
786  /*
787  * Move item pointers in posting list to make a gap for the new item's
788  * heap TID. We shift TIDs one place to the right, losing original
789  * rightmost TID. (nmovebytes must not include TIDs to the left of
790  * postingoff, nor the existing rightmost/max TID that gets overwritten.)
791  */
792  nposting = CopyIndexTuple(oposting);
793  replacepos = (char *) BTreeTupleGetPostingN(nposting, postingoff);
794  replaceposright = (char *) BTreeTupleGetPostingN(nposting, postingoff + 1);
795  nmovebytes = (nhtids - postingoff - 1) * sizeof(ItemPointerData);
796  memmove(replaceposright, replacepos, nmovebytes);
797 
798  /* Fill the gap at postingoff with TID of new item (original new TID) */
799  Assert(!BTreeTupleIsPivot(newitem) && !BTreeTupleIsPosting(newitem));
800  ItemPointerCopy(&newitem->t_tid, (ItemPointer) replacepos);
801 
802  /* Now copy oposting's rightmost/max TID into new item (final new TID) */
803  ItemPointerCopy(BTreeTupleGetMaxHeapTID(oposting), &newitem->t_tid);
804 
806  BTreeTupleGetHeapTID(newitem)) < 0);
807  Assert(_bt_posting_valid(nposting));
808 
809  return nposting;
810 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
static ItemPointer BTreeTupleGetHeapTID(IndexTuple itup)
Definition: nbtree.h:506
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:510
static ItemPointer BTreeTupleGetMaxHeapTID(IndexTuple itup)
Definition: nbtree.h:532
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:412
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
#define Assert(condition)
Definition: c.h:746
size_t Size
Definition: c.h:474
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161

◆ _bt_update_posting()

void _bt_update_posting ( BTVacuumPosting  vacposting)

Definition at line 676 of file nbtdedup.c.

References Assert, BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTreeTupleGetPostingN(), BTreeTupleGetPostingOffset(), BTreeTupleSetPosting(), BTVacuumPostingData::deletetids, i, INDEX_ALT_TID_MASK, INDEX_SIZE_MASK, ItemPointerIsValid, BTVacuumPostingData::itup, MAXALIGN, BTVacuumPostingData::ndeletedtids, palloc0(), IndexTupleData::t_info, and IndexTupleData::t_tid.

Referenced by _bt_delitems_vacuum(), and btree_xlog_vacuum().

677 {
678  IndexTuple origtuple = vacposting->itup;
679  uint32 keysize,
680  newsize;
681  IndexTuple itup;
682  int nhtids;
683  int ui,
684  d;
685  ItemPointer htids;
686 
687  nhtids = BTreeTupleGetNPosting(origtuple) - vacposting->ndeletedtids;
688 
689  Assert(_bt_posting_valid(origtuple));
690  Assert(nhtids > 0 && nhtids < BTreeTupleGetNPosting(origtuple));
691 
692  /*
693  * Determine final size of new tuple.
694  *
695  * This calculation needs to match the code used within _bt_form_posting()
696  * for new posting list tuples. We avoid calling _bt_form_posting() here
697  * to save ourselves a second memory allocation for a htids workspace.
698  */
699  keysize = BTreeTupleGetPostingOffset(origtuple);
700  if (nhtids > 1)
701  newsize = MAXALIGN(keysize +
702  nhtids * sizeof(ItemPointerData));
703  else
704  newsize = keysize;
705 
706  Assert(newsize <= INDEX_SIZE_MASK);
707  Assert(newsize == MAXALIGN(newsize));
708 
709  /* Allocate memory using palloc0() (matches index_form_tuple()) */
710  itup = palloc0(newsize);
711  memcpy(itup, origtuple, keysize);
712  itup->t_info &= ~INDEX_SIZE_MASK;
713  itup->t_info |= newsize;
714 
715  if (nhtids > 1)
716  {
717  /* Form posting list tuple */
718  BTreeTupleSetPosting(itup, nhtids, keysize);
719  htids = BTreeTupleGetPosting(itup);
720  }
721  else
722  {
723  /* Form standard non-pivot tuple */
724  itup->t_info &= ~INDEX_ALT_TID_MASK;
725  htids = &itup->t_tid;
726  }
727 
728  ui = 0;
729  d = 0;
730  for (int i = 0; i < BTreeTupleGetNPosting(origtuple); i++)
731  {
732  if (d < vacposting->ndeletedtids && vacposting->deletetids[d] == i)
733  {
734  d++;
735  continue;
736  }
737  htids[ui++] = *BTreeTupleGetPostingN(origtuple, i);
738  }
739  Assert(ui == nhtids);
740  Assert(d == vacposting->ndeletedtids);
741  Assert(nhtids == 1 || _bt_posting_valid(itup));
742  Assert(nhtids > 1 || ItemPointerIsValid(&itup->t_tid));
743 
744  /* vacposting arg's itup will now point to updated version */
745  vacposting->itup = itup;
746 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
uint16 ndeletedtids
Definition: nbtree.h:782
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:405
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple itup
Definition: nbtree.h:778
#define INDEX_SIZE_MASK
Definition: itup.h:65
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:334
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:372
unsigned int uint32
Definition: c.h:375
void * palloc0(Size size)
Definition: mcxt.c:981
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:783
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:412
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:397
#define Assert(condition)
Definition: c.h:746
#define MAXALIGN(LEN)
Definition: c.h:699
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
unsigned short t_info
Definition: itup.h:49