PostgreSQL Source Code  git master
nbtdedup.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nbtdedup.c
4  * Deduplicate or bottom-up delete items in Postgres btrees.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/nbtree/nbtdedup.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/nbtree.h"
18 #include "access/nbtxlog.h"
19 #include "access/xloginsert.h"
20 #include "miscadmin.h"
21 #include "utils/rel.h"
22 
24  TM_IndexDeleteOp *delstate);
25 static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state,
26  OffsetNumber minoff, IndexTuple newitem);
28  Size newitemsz);
29 #ifdef USE_ASSERT_CHECKING
30 static bool _bt_posting_valid(IndexTuple posting);
31 #endif
32 
33 /*
34  * Perform a deduplication pass.
35  *
36  * The general approach taken here is to perform as much deduplication as
37  * possible to free as much space as possible. Note, however, that "single
38  * value" strategy is used for !bottomupdedup callers when the page is full of
39  * tuples of a single value. Deduplication passes that apply the strategy
40  * will leave behind a few untouched tuples at the end of the page, preparing
41  * the page for an anticipated page split that uses nbtsplitloc.c's own single
42  * value strategy. Our high level goal is to delay merging the untouched
43  * tuples until after the page splits.
44  *
45  * When a call to _bt_bottomupdel_pass() just took place (and failed), our
46  * high level goal is to prevent a page split entirely by buying more time.
47  * We still hope that a page split can be avoided altogether. That's why
48  * single value strategy is not even considered for bottomupdedup callers.
49  *
50  * The page will have to be split if we cannot successfully free at least
51  * newitemsz (we also need space for newitem's line pointer, which isn't
52  * included in caller's newitemsz).
53  *
54  * Note: Caller should have already deleted all existing items with their
55  * LP_DEAD bits set.
56  */
57 void
58 _bt_dedup_pass(Relation rel, Buffer buf, IndexTuple newitem, Size newitemsz,
59  bool bottomupdedup)
60 {
61  OffsetNumber offnum,
62  minoff,
63  maxoff;
64  Page page = BufferGetPage(buf);
65  BTPageOpaque opaque = BTPageGetOpaque(page);
66  Page newpage;
68  Size pagesaving PG_USED_FOR_ASSERTS_ONLY = 0;
69  bool singlevalstrat = false;
70  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
71 
72  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
73  newitemsz += sizeof(ItemIdData);
74 
75  /*
76  * Initialize deduplication state.
77  *
78  * It would be possible for maxpostingsize (limit on posting list tuple
79  * size) to be set to one third of the page. However, it seems like a
80  * good idea to limit the size of posting lists to one sixth of a page.
81  * That ought to leave us with a good split point when pages full of
82  * duplicates can be split several times.
83  */
85  state->deduplicate = true;
86  state->nmaxitems = 0;
87  state->maxpostingsize = Min(BTMaxItemSize(page) / 2, INDEX_SIZE_MASK);
88  /* Metadata about base tuple of current pending posting list */
89  state->base = NULL;
90  state->baseoff = InvalidOffsetNumber;
91  state->basetupsize = 0;
92  /* Metadata about current pending posting list TIDs */
93  state->htids = palloc(state->maxpostingsize);
94  state->nhtids = 0;
95  state->nitems = 0;
96  /* Size of all physical tuples to be replaced by pending posting list */
97  state->phystupsize = 0;
98  /* nintervals should be initialized to zero */
99  state->nintervals = 0;
100 
101  minoff = P_FIRSTDATAKEY(opaque);
102  maxoff = PageGetMaxOffsetNumber(page);
103 
104  /*
105  * Consider applying "single value" strategy, though only if the page
106  * seems likely to be split in the near future
107  */
108  if (!bottomupdedup)
109  singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem);
110 
111  /*
112  * Deduplicate items from page, and write them to newpage.
113  *
114  * Copy the original page's LSN into newpage copy. This will become the
115  * updated version of the page. We need this because XLogInsert will
116  * examine the LSN and possibly dump it in a page image.
117  */
118  newpage = PageGetTempPageCopySpecial(page);
119  PageSetLSN(newpage, PageGetLSN(page));
120 
121  /* Copy high key, if any */
122  if (!P_RIGHTMOST(opaque))
123  {
124  ItemId hitemid = PageGetItemId(page, P_HIKEY);
125  Size hitemsz = ItemIdGetLength(hitemid);
126  IndexTuple hitem = (IndexTuple) PageGetItem(page, hitemid);
127 
128  if (PageAddItem(newpage, (Item) hitem, hitemsz, P_HIKEY,
129  false, false) == InvalidOffsetNumber)
130  elog(ERROR, "deduplication failed to add highkey");
131  }
132 
133  for (offnum = minoff;
134  offnum <= maxoff;
135  offnum = OffsetNumberNext(offnum))
136  {
137  ItemId itemid = PageGetItemId(page, offnum);
138  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
139 
140  Assert(!ItemIdIsDead(itemid));
141 
142  if (offnum == minoff)
143  {
144  /*
145  * No previous/base tuple for the data item -- use the data item
146  * as base tuple of pending posting list
147  */
148  _bt_dedup_start_pending(state, itup, offnum);
149  }
150  else if (state->deduplicate &&
151  _bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
152  _bt_dedup_save_htid(state, itup))
153  {
154  /*
155  * Tuple is equal to base tuple of pending posting list. Heap
156  * TID(s) for itup have been saved in state.
157  */
158  }
159  else
160  {
161  /*
162  * Tuple is not equal to pending posting list tuple, or
163  * _bt_dedup_save_htid() opted to not merge current item into
164  * pending posting list for some other reason (e.g., adding more
165  * TIDs would have caused posting list to exceed current
166  * maxpostingsize).
167  *
168  * If state contains pending posting list with more than one item,
169  * form new posting tuple and add it to our temp page (newpage).
170  * Else add pending interval's base tuple to the temp page as-is.
171  */
172  pagesaving += _bt_dedup_finish_pending(newpage, state);
173 
174  if (singlevalstrat)
175  {
176  /*
177  * Single value strategy's extra steps.
178  *
179  * Lower maxpostingsize for sixth and final large posting list
180  * tuple at the point where 5 maxpostingsize-capped tuples
181  * have either been formed or observed.
182  *
183  * When a sixth maxpostingsize-capped item is formed/observed,
184  * stop merging together tuples altogether. The few tuples
185  * that remain at the end of the page won't be merged together
186  * at all (at least not until after a future page split takes
187  * place, when this page's newly allocated right sibling page
188  * gets its first deduplication pass).
189  */
190  if (state->nmaxitems == 5)
191  _bt_singleval_fillfactor(page, state, newitemsz);
192  else if (state->nmaxitems == 6)
193  {
194  state->deduplicate = false;
195  singlevalstrat = false; /* won't be back here */
196  }
197  }
198 
199  /* itup starts new pending posting list */
200  _bt_dedup_start_pending(state, itup, offnum);
201  }
202  }
203 
204  /* Handle the last item */
205  pagesaving += _bt_dedup_finish_pending(newpage, state);
206 
207  /*
208  * If no items suitable for deduplication were found, newpage must be
209  * exactly the same as the original page, so just return from function.
210  *
211  * We could determine whether or not to proceed on the basis the space
212  * savings being sufficient to avoid an immediate page split instead. We
213  * don't do that because there is some small value in nbtsplitloc.c always
214  * operating against a page that is fully deduplicated (apart from
215  * newitem). Besides, most of the cost has already been paid.
216  */
217  if (state->nintervals == 0)
218  {
219  /* cannot leak memory here */
220  pfree(newpage);
221  pfree(state->htids);
222  pfree(state);
223  return;
224  }
225 
226  /*
227  * By here, it's clear that deduplication will definitely go ahead.
228  *
229  * Clear the BTP_HAS_GARBAGE page flag. The index must be a heapkeyspace
230  * index, and as such we'll never pay attention to BTP_HAS_GARBAGE anyway.
231  * But keep things tidy.
232  */
233  if (P_HAS_GARBAGE(opaque))
234  {
235  BTPageOpaque nopaque = BTPageGetOpaque(newpage);
236 
237  nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
238  }
239 
241 
242  PageRestoreTempPage(newpage, page);
244 
245  /* XLOG stuff */
246  if (RelationNeedsWAL(rel))
247  {
248  XLogRecPtr recptr;
249  xl_btree_dedup xlrec_dedup;
250 
251  xlrec_dedup.nintervals = state->nintervals;
252 
253  XLogBeginInsert();
255  XLogRegisterData((char *) &xlrec_dedup, SizeOfBtreeDedup);
256 
257  /*
258  * The intervals array is not in the buffer, but pretend that it is.
259  * When XLogInsert stores the whole buffer, the array need not be
260  * stored too.
261  */
262  XLogRegisterBufData(0, (char *) state->intervals,
263  state->nintervals * sizeof(BTDedupInterval));
264 
265  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DEDUP);
266 
267  PageSetLSN(page, recptr);
268  }
269 
271 
272  /* Local space accounting should agree with page accounting */
273  Assert(pagesaving < newitemsz || PageGetExactFreeSpace(page) >= newitemsz);
274 
275  /* cannot leak memory here */
276  pfree(state->htids);
277  pfree(state);
278 }
279 
280 /*
281  * Perform bottom-up index deletion pass.
282  *
283  * See if duplicate index tuples (plus certain nearby tuples) are eligible to
284  * be deleted via bottom-up index deletion. The high level goal here is to
285  * entirely prevent "unnecessary" page splits caused by MVCC version churn
286  * from UPDATEs (when the UPDATEs don't logically modify any of the columns
287  * covered by the 'rel' index). This is qualitative, not quantitative -- we
288  * do not particularly care about once-off opportunities to delete many index
289  * tuples together.
290  *
291  * See nbtree/README for details on the design of nbtree bottom-up deletion.
292  * See access/tableam.h for a description of how we're expected to cooperate
293  * with the tableam.
294  *
295  * Returns true on success, in which case caller can assume page split will be
296  * avoided for a reasonable amount of time. Returns false when caller should
297  * deduplicate the page (if possible at all).
298  *
299  * Note: Occasionally we return true despite failing to delete enough items to
300  * avoid a split. This makes caller skip deduplication and go split the page
301  * right away. Our return value is always just advisory information.
302  *
303  * Note: Caller should have already deleted all existing items with their
304  * LP_DEAD bits set.
305  */
306 bool
308  Size newitemsz)
309 {
310  OffsetNumber offnum,
311  minoff,
312  maxoff;
313  Page page = BufferGetPage(buf);
314  BTPageOpaque opaque = BTPageGetOpaque(page);
316  TM_IndexDeleteOp delstate;
317  bool neverdedup;
318  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
319 
320  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
321  newitemsz += sizeof(ItemIdData);
322 
323  /* Initialize deduplication state */
325  state->deduplicate = true;
326  state->nmaxitems = 0;
327  state->maxpostingsize = BLCKSZ; /* We're not really deduplicating */
328  state->base = NULL;
329  state->baseoff = InvalidOffsetNumber;
330  state->basetupsize = 0;
331  state->htids = palloc(state->maxpostingsize);
332  state->nhtids = 0;
333  state->nitems = 0;
334  state->phystupsize = 0;
335  state->nintervals = 0;
336 
337  /*
338  * Initialize tableam state that describes bottom-up index deletion
339  * operation.
340  *
341  * We'll go on to ask the tableam to search for TIDs whose index tuples we
342  * can safely delete. The tableam will search until our leaf page space
343  * target is satisfied, or until the cost of continuing with the tableam
344  * operation seems too high. It focuses its efforts on TIDs associated
345  * with duplicate index tuples that we mark "promising".
346  *
347  * This space target is a little arbitrary. The tableam must be able to
348  * keep the costs and benefits in balance. We provide the tableam with
349  * exhaustive information about what might work, without directly
350  * concerning ourselves with avoiding work during the tableam call. Our
351  * role in costing the bottom-up deletion process is strictly advisory.
352  */
353  delstate.irel = rel;
354  delstate.iblknum = BufferGetBlockNumber(buf);
355  delstate.bottomup = true;
356  delstate.bottomupfreespace = Max(BLCKSZ / 16, newitemsz);
357  delstate.ndeltids = 0;
358  delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete));
359  delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus));
360 
361  minoff = P_FIRSTDATAKEY(opaque);
362  maxoff = PageGetMaxOffsetNumber(page);
363  for (offnum = minoff;
364  offnum <= maxoff;
365  offnum = OffsetNumberNext(offnum))
366  {
367  ItemId itemid = PageGetItemId(page, offnum);
368  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
369 
370  Assert(!ItemIdIsDead(itemid));
371 
372  if (offnum == minoff)
373  {
374  /* itup starts first pending interval */
375  _bt_dedup_start_pending(state, itup, offnum);
376  }
377  else if (_bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
378  _bt_dedup_save_htid(state, itup))
379  {
380  /* Tuple is equal; just added its TIDs to pending interval */
381  }
382  else
383  {
384  /* Finalize interval -- move its TIDs to delete state */
385  _bt_bottomupdel_finish_pending(page, state, &delstate);
386 
387  /* itup starts new pending interval */
388  _bt_dedup_start_pending(state, itup, offnum);
389  }
390  }
391  /* Finalize final interval -- move its TIDs to delete state */
392  _bt_bottomupdel_finish_pending(page, state, &delstate);
393 
394  /*
395  * We don't give up now in the event of having few (or even zero)
396  * promising tuples for the tableam because it's not up to us as the index
397  * AM to manage costs (note that the tableam might have heuristics of its
398  * own that work out what to do). We should at least avoid having our
399  * caller do a useless deduplication pass after we return in the event of
400  * zero promising tuples, though.
401  */
402  neverdedup = false;
403  if (state->nintervals == 0)
404  neverdedup = true;
405 
406  pfree(state->htids);
407  pfree(state);
408 
409  /* Ask tableam which TIDs are deletable, then physically delete them */
410  _bt_delitems_delete_check(rel, buf, heapRel, &delstate);
411 
412  pfree(delstate.deltids);
413  pfree(delstate.status);
414 
415  /* Report "success" to caller unconditionally to avoid deduplication */
416  if (neverdedup)
417  return true;
418 
419  /* Don't dedup when we won't end up back here any time soon anyway */
420  return PageGetExactFreeSpace(page) >= Max(BLCKSZ / 24, newitemsz);
421 }
422 
423 /*
424  * Create a new pending posting list tuple based on caller's base tuple.
425  *
426  * Every tuple processed by deduplication either becomes the base tuple for a
427  * posting list, or gets its heap TID(s) accepted into a pending posting list.
428  * A tuple that starts out as the base tuple for a posting list will only
429  * actually be rewritten within _bt_dedup_finish_pending() when it turns out
430  * that there are duplicates that can be merged into the base tuple.
431  */
432 void
434  OffsetNumber baseoff)
435 {
436  Assert(state->nhtids == 0);
437  Assert(state->nitems == 0);
438  Assert(!BTreeTupleIsPivot(base));
439 
440  /*
441  * Copy heap TID(s) from new base tuple for new candidate posting list
442  * into working state's array
443  */
444  if (!BTreeTupleIsPosting(base))
445  {
446  memcpy(state->htids, &base->t_tid, sizeof(ItemPointerData));
447  state->nhtids = 1;
448  state->basetupsize = IndexTupleSize(base);
449  }
450  else
451  {
452  int nposting;
453 
454  nposting = BTreeTupleGetNPosting(base);
455  memcpy(state->htids, BTreeTupleGetPosting(base),
456  sizeof(ItemPointerData) * nposting);
457  state->nhtids = nposting;
458  /* basetupsize should not include existing posting list */
459  state->basetupsize = BTreeTupleGetPostingOffset(base);
460  }
461 
462  /*
463  * Save new base tuple itself -- it'll be needed if we actually create a
464  * new posting list from new pending posting list.
465  *
466  * Must maintain physical size of all existing tuples (including line
467  * pointer overhead) so that we can calculate space savings on page.
468  */
469  state->nitems = 1;
470  state->base = base;
471  state->baseoff = baseoff;
472  state->phystupsize = MAXALIGN(IndexTupleSize(base)) + sizeof(ItemIdData);
473  /* Also save baseoff in pending state for interval */
474  state->intervals[state->nintervals].baseoff = state->baseoff;
475 }
476 
477 /*
478  * Save itup heap TID(s) into pending posting list where possible.
479  *
480  * Returns bool indicating if the pending posting list managed by state now
481  * includes itup's heap TID(s).
482  */
483 bool
485 {
486  int nhtids;
487  ItemPointer htids;
488  Size mergedtupsz;
489 
490  Assert(!BTreeTupleIsPivot(itup));
491 
492  if (!BTreeTupleIsPosting(itup))
493  {
494  nhtids = 1;
495  htids = &itup->t_tid;
496  }
497  else
498  {
499  nhtids = BTreeTupleGetNPosting(itup);
500  htids = BTreeTupleGetPosting(itup);
501  }
502 
503  /*
504  * Don't append (have caller finish pending posting list as-is) if
505  * appending heap TID(s) from itup would put us over maxpostingsize limit.
506  *
507  * This calculation needs to match the code used within _bt_form_posting()
508  * for new posting list tuples.
509  */
510  mergedtupsz = MAXALIGN(state->basetupsize +
511  (state->nhtids + nhtids) * sizeof(ItemPointerData));
512 
513  if (mergedtupsz > state->maxpostingsize)
514  {
515  /*
516  * Count this as an oversized item for single value strategy, though
517  * only when there are 50 TIDs in the final posting list tuple. This
518  * limit (which is fairly arbitrary) avoids confusion about how many
519  * 1/6 of a page tuples have been encountered/created by the current
520  * deduplication pass.
521  *
522  * Note: We deliberately don't consider which deduplication pass
523  * merged together tuples to create this item (could be a previous
524  * deduplication pass, or current pass). See _bt_do_singleval()
525  * comments.
526  */
527  if (state->nhtids > 50)
528  state->nmaxitems++;
529 
530  return false;
531  }
532 
533  /*
534  * Save heap TIDs to pending posting list tuple -- itup can be merged into
535  * pending posting list
536  */
537  state->nitems++;
538  memcpy(state->htids + state->nhtids, htids,
539  sizeof(ItemPointerData) * nhtids);
540  state->nhtids += nhtids;
541  state->phystupsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
542 
543  return true;
544 }
545 
546 /*
547  * Finalize pending posting list tuple, and add it to the page. Final tuple
548  * is based on saved base tuple, and saved list of heap TIDs.
549  *
550  * Returns space saving from deduplicating to make a new posting list tuple.
551  * Note that this includes line pointer overhead. This is zero in the case
552  * where no deduplication was possible.
553  */
554 Size
556 {
557  OffsetNumber tupoff;
558  Size tuplesz;
559  Size spacesaving;
560 
561  Assert(state->nitems > 0);
562  Assert(state->nitems <= state->nhtids);
563  Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
564 
565  tupoff = OffsetNumberNext(PageGetMaxOffsetNumber(newpage));
566  if (state->nitems == 1)
567  {
568  /* Use original, unchanged base tuple */
569  tuplesz = IndexTupleSize(state->base);
570  Assert(tuplesz == MAXALIGN(IndexTupleSize(state->base)));
571  Assert(tuplesz <= BTMaxItemSize(newpage));
572  if (PageAddItem(newpage, (Item) state->base, tuplesz, tupoff,
573  false, false) == InvalidOffsetNumber)
574  elog(ERROR, "deduplication failed to add tuple to page");
575 
576  spacesaving = 0;
577  }
578  else
579  {
580  IndexTuple final;
581 
582  /* Form a tuple with a posting list */
583  final = _bt_form_posting(state->base, state->htids, state->nhtids);
584  tuplesz = IndexTupleSize(final);
585  Assert(tuplesz <= state->maxpostingsize);
586 
587  /* Save final number of items for posting list */
588  state->intervals[state->nintervals].nitems = state->nitems;
589 
590  Assert(tuplesz == MAXALIGN(IndexTupleSize(final)));
591  Assert(tuplesz <= BTMaxItemSize(newpage));
592  if (PageAddItem(newpage, (Item) final, tuplesz, tupoff, false,
593  false) == InvalidOffsetNumber)
594  elog(ERROR, "deduplication failed to add tuple to page");
595 
596  pfree(final);
597  spacesaving = state->phystupsize - (tuplesz + sizeof(ItemIdData));
598  /* Increment nintervals, since we wrote a new posting list tuple */
599  state->nintervals++;
600  Assert(spacesaving > 0 && spacesaving < BLCKSZ);
601  }
602 
603  /* Reset state for next pending posting list */
604  state->nhtids = 0;
605  state->nitems = 0;
606  state->phystupsize = 0;
607 
608  return spacesaving;
609 }
610 
611 /*
612  * Finalize interval during bottom-up index deletion.
613  *
614  * During a bottom-up pass we expect that TIDs will be recorded in dedup state
615  * first, and then get moved over to delstate (in variable-sized batches) by
616  * calling here. Call here happens when the number of TIDs in a dedup
617  * interval is known, and interval gets finalized (i.e. when caller sees next
618  * tuple on the page is not a duplicate, or when caller runs out of tuples to
619  * process from leaf page).
620  *
621  * This is where bottom-up deletion determines and remembers which entries are
622  * duplicates. This will be important information to the tableam delete
623  * infrastructure later on. Plain index tuple duplicates are marked
624  * "promising" here, per tableam contract.
625  *
626  * Our approach to marking entries whose TIDs come from posting lists is more
627  * complicated. Posting lists can only be formed by a deduplication pass (or
628  * during an index build), so recent version churn affecting the pointed-to
629  * logical rows is not particularly likely. We may still give a weak signal
630  * about posting list tuples' entries (by marking just one of its TIDs/entries
631  * promising), though this is only a possibility in the event of further
632  * duplicate index tuples in final interval that covers posting list tuple (as
633  * in the plain tuple case). A weak signal/hint will be useful to the tableam
634  * when it has no stronger signal to go with for the deletion operation as a
635  * whole.
636  *
637  * The heuristics we use work well in practice because we only need to give
638  * the tableam the right _general_ idea about where to look. Garbage tends to
639  * naturally get concentrated in relatively few table blocks with workloads
640  * that bottom-up deletion targets. The tableam cannot possibly rank all
641  * available table blocks sensibly based on the hints we provide, but that's
642  * okay -- only the extremes matter. The tableam just needs to be able to
643  * predict which few table blocks will have the most tuples that are safe to
644  * delete for each deletion operation, with low variance across related
645  * deletion operations.
646  */
647 static void
649  TM_IndexDeleteOp *delstate)
650 {
651  bool dupinterval = (state->nitems > 1);
652 
653  Assert(state->nitems > 0);
654  Assert(state->nitems <= state->nhtids);
655  Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
656 
657  for (int i = 0; i < state->nitems; i++)
658  {
659  OffsetNumber offnum = state->baseoff + i;
660  ItemId itemid = PageGetItemId(page, offnum);
661  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
662  TM_IndexDelete *ideltid = &delstate->deltids[delstate->ndeltids];
663  TM_IndexStatus *istatus = &delstate->status[delstate->ndeltids];
664 
665  if (!BTreeTupleIsPosting(itup))
666  {
667  /* Simple case: A plain non-pivot tuple */
668  ideltid->tid = itup->t_tid;
669  ideltid->id = delstate->ndeltids;
670  istatus->idxoffnum = offnum;
671  istatus->knowndeletable = false; /* for now */
672  istatus->promising = dupinterval; /* simple rule */
673  istatus->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData);
674 
675  delstate->ndeltids++;
676  }
677  else
678  {
679  /*
680  * Complicated case: A posting list tuple.
681  *
682  * We make the conservative assumption that there can only be at
683  * most one affected logical row per posting list tuple. There
684  * will be at most one promising entry in deltids to represent
685  * this presumed lone logical row. Note that this isn't even
686  * considered unless the posting list tuple is also in an interval
687  * of duplicates -- this complicated rule is just a variant of the
688  * simple rule used to decide if plain index tuples are promising.
689  */
690  int nitem = BTreeTupleGetNPosting(itup);
691  bool firstpromising = false;
692  bool lastpromising = false;
693 
694  Assert(_bt_posting_valid(itup));
695 
696  if (dupinterval)
697  {
698  /*
699  * Complicated rule: either the first or last TID in the
700  * posting list gets marked promising (if any at all)
701  */
702  BlockNumber minblocklist,
703  midblocklist,
704  maxblocklist;
705  ItemPointer mintid,
706  midtid,
707  maxtid;
708 
709  mintid = BTreeTupleGetHeapTID(itup);
710  midtid = BTreeTupleGetPostingN(itup, nitem / 2);
711  maxtid = BTreeTupleGetMaxHeapTID(itup);
712  minblocklist = ItemPointerGetBlockNumber(mintid);
713  midblocklist = ItemPointerGetBlockNumber(midtid);
714  maxblocklist = ItemPointerGetBlockNumber(maxtid);
715 
716  /* Only entry with predominant table block can be promising */
717  firstpromising = (minblocklist == midblocklist);
718  lastpromising = (!firstpromising &&
719  midblocklist == maxblocklist);
720  }
721 
722  for (int p = 0; p < nitem; p++)
723  {
724  ItemPointer htid = BTreeTupleGetPostingN(itup, p);
725 
726  ideltid->tid = *htid;
727  ideltid->id = delstate->ndeltids;
728  istatus->idxoffnum = offnum;
729  istatus->knowndeletable = false; /* for now */
730  istatus->promising = false;
731  if ((firstpromising && p == 0) ||
732  (lastpromising && p == nitem - 1))
733  istatus->promising = true;
734  istatus->freespace = sizeof(ItemPointerData); /* at worst */
735 
736  ideltid++;
737  istatus++;
738  delstate->ndeltids++;
739  }
740  }
741  }
742 
743  if (dupinterval)
744  {
745  state->intervals[state->nintervals].nitems = state->nitems;
746  state->nintervals++;
747  }
748 
749  /* Reset state for next interval */
750  state->nhtids = 0;
751  state->nitems = 0;
752  state->phystupsize = 0;
753 }
754 
755 /*
756  * Determine if page non-pivot tuples (data items) are all duplicates of the
757  * same value -- if they are, deduplication's "single value" strategy should
758  * be applied. The general goal of this strategy is to ensure that
759  * nbtsplitloc.c (which uses its own single value strategy) will find a useful
760  * split point as further duplicates are inserted, and successive rightmost
761  * page splits occur among pages that store the same duplicate value. When
762  * the page finally splits, it should end up BTREE_SINGLEVAL_FILLFACTOR% full,
763  * just like it would if deduplication were disabled.
764  *
765  * We expect that affected workloads will require _several_ single value
766  * strategy deduplication passes (over a page that only stores duplicates)
767  * before the page is finally split. The first deduplication pass should only
768  * find regular non-pivot tuples. Later deduplication passes will find
769  * existing maxpostingsize-capped posting list tuples, which must be skipped
770  * over. The penultimate pass is generally the first pass that actually
771  * reaches _bt_singleval_fillfactor(), and so will deliberately leave behind a
772  * few untouched non-pivot tuples. The final deduplication pass won't free
773  * any space -- it will skip over everything without merging anything (it
774  * retraces the steps of the penultimate pass).
775  *
776  * Fortunately, having several passes isn't too expensive. Each pass (after
777  * the first pass) won't spend many cycles on the large posting list tuples
778  * left by previous passes. Each pass will find a large contiguous group of
779  * smaller duplicate tuples to merge together at the end of the page.
780  */
781 static bool
783  OffsetNumber minoff, IndexTuple newitem)
784 {
785  int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
786  ItemId itemid;
787  IndexTuple itup;
788 
789  itemid = PageGetItemId(page, minoff);
790  itup = (IndexTuple) PageGetItem(page, itemid);
791 
792  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
793  {
794  itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
795  itup = (IndexTuple) PageGetItem(page, itemid);
796 
797  if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
798  return true;
799  }
800 
801  return false;
802 }
803 
804 /*
805  * Lower maxpostingsize when using "single value" strategy, to avoid a sixth
806  * and final maxpostingsize-capped tuple. The sixth and final posting list
807  * tuple will end up somewhat smaller than the first five. (Note: The first
808  * five tuples could actually just be very large duplicate tuples that
809  * couldn't be merged together at all. Deduplication will simply not modify
810  * the page when that happens.)
811  *
812  * When there are six posting lists on the page (after current deduplication
813  * pass goes on to create/observe a sixth very large tuple), caller should end
814  * its deduplication pass. It isn't useful to try to deduplicate items that
815  * are supposed to end up on the new right sibling page following the
816  * anticipated page split. A future deduplication pass of future right
817  * sibling page might take care of it. (This is why the first single value
818  * strategy deduplication pass for a given leaf page will generally find only
819  * plain non-pivot tuples -- see _bt_do_singleval() comments.)
820  */
821 static void
823 {
824  Size leftfree;
825  int reduction;
826 
827  /* This calculation needs to match nbtsplitloc.c */
828  leftfree = PageGetPageSize(page) - SizeOfPageHeaderData -
829  MAXALIGN(sizeof(BTPageOpaqueData));
830  /* Subtract size of new high key (includes pivot heap TID space) */
831  leftfree -= newitemsz + MAXALIGN(sizeof(ItemPointerData));
832 
833  /*
834  * Reduce maxpostingsize by an amount equal to target free space on left
835  * half of page
836  */
837  reduction = leftfree * ((100 - BTREE_SINGLEVAL_FILLFACTOR) / 100.0);
838  if (state->maxpostingsize > reduction)
839  state->maxpostingsize -= reduction;
840  else
841  state->maxpostingsize = 0;
842 }
843 
844 /*
845  * Build a posting list tuple based on caller's "base" index tuple and list of
846  * heap TIDs. When nhtids == 1, builds a standard non-pivot tuple without a
847  * posting list. (Posting list tuples can never have a single heap TID, partly
848  * because that ensures that deduplication always reduces final MAXALIGN()'d
849  * size of entire tuple.)
850  *
851  * Convention is that posting list starts at a MAXALIGN()'d offset (rather
852  * than a SHORTALIGN()'d offset), in line with the approach taken when
853  * appending a heap TID to new pivot tuple/high key during suffix truncation.
854  * This sometimes wastes a little space that was only needed as alignment
855  * padding in the original tuple. Following this convention simplifies the
856  * space accounting used when deduplicating a page (the same convention
857  * simplifies the accounting for choosing a point to split a page at).
858  *
859  * Note: Caller's "htids" array must be unique and already in ascending TID
860  * order. Any existing heap TIDs from "base" won't automatically appear in
861  * returned posting list tuple (they must be included in htids array.)
862  */
864 _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
865 {
866  uint32 keysize,
867  newsize;
868  IndexTuple itup;
869 
870  if (BTreeTupleIsPosting(base))
871  keysize = BTreeTupleGetPostingOffset(base);
872  else
873  keysize = IndexTupleSize(base);
874 
875  Assert(!BTreeTupleIsPivot(base));
876  Assert(nhtids > 0 && nhtids <= PG_UINT16_MAX);
877  Assert(keysize == MAXALIGN(keysize));
878 
879  /* Determine final size of new tuple */
880  if (nhtids > 1)
881  newsize = MAXALIGN(keysize +
882  nhtids * sizeof(ItemPointerData));
883  else
884  newsize = keysize;
885 
886  Assert(newsize <= INDEX_SIZE_MASK);
887  Assert(newsize == MAXALIGN(newsize));
888 
889  /* Allocate memory using palloc0() (matches index_form_tuple()) */
890  itup = palloc0(newsize);
891  memcpy(itup, base, keysize);
892  itup->t_info &= ~INDEX_SIZE_MASK;
893  itup->t_info |= newsize;
894  if (nhtids > 1)
895  {
896  /* Form posting list tuple */
897  BTreeTupleSetPosting(itup, nhtids, keysize);
898  memcpy(BTreeTupleGetPosting(itup), htids,
899  sizeof(ItemPointerData) * nhtids);
900  Assert(_bt_posting_valid(itup));
901  }
902  else
903  {
904  /* Form standard non-pivot tuple */
905  itup->t_info &= ~INDEX_ALT_TID_MASK;
906  ItemPointerCopy(htids, &itup->t_tid);
908  }
909 
910  return itup;
911 }
912 
913 /*
914  * Generate a replacement tuple by "updating" a posting list tuple so that it
915  * no longer has TIDs that need to be deleted.
916  *
917  * Used by both VACUUM and index deletion. Caller's vacposting argument
918  * points to the existing posting list tuple to be updated.
919  *
920  * On return, caller's vacposting argument will point to final "updated"
921  * tuple, which will be palloc()'d in caller's memory context.
922  */
923 void
925 {
926  IndexTuple origtuple = vacposting->itup;
927  uint32 keysize,
928  newsize;
929  IndexTuple itup;
930  int nhtids;
931  int ui,
932  d;
933  ItemPointer htids;
934 
935  nhtids = BTreeTupleGetNPosting(origtuple) - vacposting->ndeletedtids;
936 
937  Assert(_bt_posting_valid(origtuple));
938  Assert(nhtids > 0 && nhtids < BTreeTupleGetNPosting(origtuple));
939 
940  /*
941  * Determine final size of new tuple.
942  *
943  * This calculation needs to match the code used within _bt_form_posting()
944  * for new posting list tuples. We avoid calling _bt_form_posting() here
945  * to save ourselves a second memory allocation for a htids workspace.
946  */
947  keysize = BTreeTupleGetPostingOffset(origtuple);
948  if (nhtids > 1)
949  newsize = MAXALIGN(keysize +
950  nhtids * sizeof(ItemPointerData));
951  else
952  newsize = keysize;
953 
954  Assert(newsize <= INDEX_SIZE_MASK);
955  Assert(newsize == MAXALIGN(newsize));
956 
957  /* Allocate memory using palloc0() (matches index_form_tuple()) */
958  itup = palloc0(newsize);
959  memcpy(itup, origtuple, keysize);
960  itup->t_info &= ~INDEX_SIZE_MASK;
961  itup->t_info |= newsize;
962 
963  if (nhtids > 1)
964  {
965  /* Form posting list tuple */
966  BTreeTupleSetPosting(itup, nhtids, keysize);
967  htids = BTreeTupleGetPosting(itup);
968  }
969  else
970  {
971  /* Form standard non-pivot tuple */
972  itup->t_info &= ~INDEX_ALT_TID_MASK;
973  htids = &itup->t_tid;
974  }
975 
976  ui = 0;
977  d = 0;
978  for (int i = 0; i < BTreeTupleGetNPosting(origtuple); i++)
979  {
980  if (d < vacposting->ndeletedtids && vacposting->deletetids[d] == i)
981  {
982  d++;
983  continue;
984  }
985  htids[ui++] = *BTreeTupleGetPostingN(origtuple, i);
986  }
987  Assert(ui == nhtids);
988  Assert(d == vacposting->ndeletedtids);
989  Assert(nhtids == 1 || _bt_posting_valid(itup));
990  Assert(nhtids > 1 || ItemPointerIsValid(&itup->t_tid));
991 
992  /* vacposting arg's itup will now point to updated version */
993  vacposting->itup = itup;
994 }
995 
996 /*
997  * Prepare for a posting list split by swapping heap TID in newitem with heap
998  * TID from original posting list (the 'oposting' heap TID located at offset
999  * 'postingoff'). Modifies newitem, so caller should pass their own private
1000  * copy that can safely be modified.
1001  *
1002  * Returns new posting list tuple, which is palloc()'d in caller's context.
1003  * This is guaranteed to be the same size as 'oposting'. Modified newitem is
1004  * what caller actually inserts. (This happens inside the same critical
1005  * section that performs an in-place update of old posting list using new
1006  * posting list returned here.)
1007  *
1008  * While the keys from newitem and oposting must be opclass equal, and must
1009  * generate identical output when run through the underlying type's output
1010  * function, it doesn't follow that their representations match exactly.
1011  * Caller must avoid assuming that there can't be representational differences
1012  * that make datums from oposting bigger or smaller than the corresponding
1013  * datums from newitem. For example, differences in TOAST input state might
1014  * break a faulty assumption about tuple size (the executor is entitled to
1015  * apply TOAST compression based on its own criteria). It also seems possible
1016  * that further representational variation will be introduced in the future,
1017  * in order to support nbtree features like page-level prefix compression.
1018  *
1019  * See nbtree/README for details on the design of posting list splits.
1020  */
1021 IndexTuple
1022 _bt_swap_posting(IndexTuple newitem, IndexTuple oposting, int postingoff)
1023 {
1024  int nhtids;
1025  char *replacepos;
1026  char *replaceposright;
1027  Size nmovebytes;
1028  IndexTuple nposting;
1029 
1030  nhtids = BTreeTupleGetNPosting(oposting);
1031  Assert(_bt_posting_valid(oposting));
1032 
1033  /*
1034  * The postingoff argument originated as a _bt_binsrch_posting() return
1035  * value. It will be 0 in the event of corruption that makes a leaf page
1036  * contain a non-pivot tuple that's somehow identical to newitem (no two
1037  * non-pivot tuples should ever have the same TID). This has been known
1038  * to happen in the field from time to time.
1039  *
1040  * Perform a basic sanity check to catch this case now.
1041  */
1042  if (!(postingoff > 0 && postingoff < nhtids))
1043  elog(ERROR, "posting list tuple with %d items cannot be split at offset %d",
1044  nhtids, postingoff);
1045 
1046  /*
1047  * Move item pointers in posting list to make a gap for the new item's
1048  * heap TID. We shift TIDs one place to the right, losing original
1049  * rightmost TID. (nmovebytes must not include TIDs to the left of
1050  * postingoff, nor the existing rightmost/max TID that gets overwritten.)
1051  */
1052  nposting = CopyIndexTuple(oposting);
1053  replacepos = (char *) BTreeTupleGetPostingN(nposting, postingoff);
1054  replaceposright = (char *) BTreeTupleGetPostingN(nposting, postingoff + 1);
1055  nmovebytes = (nhtids - postingoff - 1) * sizeof(ItemPointerData);
1056  memmove(replaceposright, replacepos, nmovebytes);
1057 
1058  /* Fill the gap at postingoff with TID of new item (original new TID) */
1059  Assert(!BTreeTupleIsPivot(newitem) && !BTreeTupleIsPosting(newitem));
1060  ItemPointerCopy(&newitem->t_tid, (ItemPointer) replacepos);
1061 
1062  /* Now copy oposting's rightmost/max TID into new item (final new TID) */
1063  ItemPointerCopy(BTreeTupleGetMaxHeapTID(oposting), &newitem->t_tid);
1064 
1066  BTreeTupleGetHeapTID(newitem)) < 0);
1067  Assert(_bt_posting_valid(nposting));
1068 
1069  return nposting;
1070 }
1071 
1072 /*
1073  * Verify posting list invariants for "posting", which must be a posting list
1074  * tuple. Used within assertions.
1075  */
1076 #ifdef USE_ASSERT_CHECKING
1077 static bool
1078 _bt_posting_valid(IndexTuple posting)
1079 {
1080  ItemPointerData last;
1081  ItemPointer htid;
1082 
1083  if (!BTreeTupleIsPosting(posting) || BTreeTupleGetNPosting(posting) < 2)
1084  return false;
1085 
1086  /* Remember first heap TID for loop */
1087  ItemPointerCopy(BTreeTupleGetHeapTID(posting), &last);
1088  if (!ItemPointerIsValid(&last))
1089  return false;
1090 
1091  /* Iterate, starting from second TID */
1092  for (int i = 1; i < BTreeTupleGetNPosting(posting); i++)
1093  {
1094  htid = BTreeTupleGetPostingN(posting, i);
1095 
1096  if (!ItemPointerIsValid(htid))
1097  return false;
1098  if (ItemPointerCompare(htid, &last) <= 0)
1099  return false;
1100  ItemPointerCopy(htid, &last);
1101  }
1102 
1103  return true;
1104 }
1105 #endif
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3667
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2474
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:408
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:958
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static Size PageGetPageSize(Page page)
Definition: bufpage.h:273
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
#define MAXALIGN(LEN)
Definition: c.h:811
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define Max(x, y)
Definition: c.h:998
#define Assert(condition)
Definition: c.h:858
#define PG_UINT16_MAX
Definition: c.h:587
size_t Size
Definition: c.h:605
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
int i
Definition: isn.c:73
Pointer Item
Definition: item.h:17
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
struct ItemIdData ItemIdData
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
struct ItemPointerData ItemPointerData
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
#define INDEX_SIZE_MASK
Definition: itup.h:65
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc0(Size size)
Definition: mcxt.c:1346
void * palloc(Size size)
Definition: mcxt.c:1316
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting, int postingoff)
Definition: nbtdedup.c:1022
void _bt_dedup_pass(Relation rel, Buffer buf, IndexTuple newitem, Size newitemsz, bool bottomupdedup)
Definition: nbtdedup.c:58
void _bt_update_posting(BTVacuumPosting vacposting)
Definition: nbtdedup.c:924
bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, Size newitemsz)
Definition: nbtdedup.c:307
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:484
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:433
static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
Definition: nbtdedup.c:782
IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
Definition: nbtdedup.c:864
static void _bt_bottomupdel_finish_pending(Page page, BTDedupState state, TM_IndexDeleteOp *delstate)
Definition: nbtdedup.c:648
Size _bt_dedup_finish_pending(Page newpage, BTDedupState state)
Definition: nbtdedup.c:555
static void _bt_singleval_fillfactor(Page page, BTDedupState state, Size newitemsz)
Definition: nbtdedup.c:822
void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, TM_IndexDeleteOp *delstate)
Definition: nbtpage.c:1513
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:518
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:480
#define BTREE_SINGLEVAL_FILLFACTOR
Definition: nbtree.h:202
#define P_HIKEY
Definition: nbtree.h:367
#define P_HAS_GARBAGE(opaque)
Definition: nbtree.h:226
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:504
#define BTP_HAS_GARBAGE
Definition: nbtree.h:82
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:537
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:529
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BTMaxItemSize(page)
Definition: nbtree.h:164
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:544
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:459
static ItemPointer BTreeTupleGetMaxHeapTID(IndexTuple itup)
Definition: nbtree.h:664
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:492
BTDedupStateData * BTDedupState
Definition: nbtree.h:893
static ItemPointer BTreeTupleGetHeapTID(IndexTuple itup)
Definition: nbtree.h:638
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:4874
#define XLOG_BTREE_DEDUP
Definition: nbtxlog.h:33
#define SizeOfBtreeDedup
Definition: nbtxlog.h:174
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:73
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:524
uint16 btpo_flags
Definition: nbtree.h:67
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:911
uint16 ndeletedtids
Definition: nbtree.h:910
IndexTuple itup
Definition: nbtree.h:906
ItemPointerData t_tid
Definition: itup.h:37
unsigned short t_info
Definition: itup.h:49
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
Relation irel
Definition: tableam.h:247
TM_IndexDelete * deltids
Definition: tableam.h:254
BlockNumber iblknum
Definition: tableam.h:248
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224
OffsetNumber idxoffnum
Definition: tableam.h:219
Definition: regguts.h:323
uint16 nintervals
Definition: nbtxlog.h:169
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34