PostgreSQL Source Code git master
nbtdedup.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nbtdedup.c
4 * Deduplicate or bottom-up delete items in Postgres btrees.
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/nbtree/nbtdedup.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/nbtree.h"
18#include "access/nbtxlog.h"
19#include "access/xloginsert.h"
20#include "miscadmin.h"
21#include "utils/rel.h"
22
24 TM_IndexDeleteOp *delstate);
25static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state,
26 OffsetNumber minoff, IndexTuple newitem);
28 Size newitemsz);
29#ifdef USE_ASSERT_CHECKING
30static bool _bt_posting_valid(IndexTuple posting);
31#endif
32
33/*
34 * Perform a deduplication pass.
35 *
36 * The general approach taken here is to perform as much deduplication as
37 * possible to free as much space as possible. Note, however, that "single
38 * value" strategy is used for !bottomupdedup callers when the page is full of
39 * tuples of a single value. Deduplication passes that apply the strategy
40 * will leave behind a few untouched tuples at the end of the page, preparing
41 * the page for an anticipated page split that uses nbtsplitloc.c's own single
42 * value strategy. Our high level goal is to delay merging the untouched
43 * tuples until after the page splits.
44 *
45 * When a call to _bt_bottomupdel_pass() just took place (and failed), our
46 * high level goal is to prevent a page split entirely by buying more time.
47 * We still hope that a page split can be avoided altogether. That's why
48 * single value strategy is not even considered for bottomupdedup callers.
49 *
50 * The page will have to be split if we cannot successfully free at least
51 * newitemsz (we also need space for newitem's line pointer, which isn't
52 * included in caller's newitemsz).
53 *
54 * Note: Caller should have already deleted all existing items with their
55 * LP_DEAD bits set.
56 */
57void
59 bool bottomupdedup)
60{
61 OffsetNumber offnum,
62 minoff,
63 maxoff;
64 Page page = BufferGetPage(buf);
65 BTPageOpaque opaque = BTPageGetOpaque(page);
66 Page newpage;
68 Size pagesaving PG_USED_FOR_ASSERTS_ONLY = 0;
69 bool singlevalstrat = false;
70 int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
71
72 /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
73 newitemsz += sizeof(ItemIdData);
74
75 /*
76 * Initialize deduplication state.
77 *
78 * It would be possible for maxpostingsize (limit on posting list tuple
79 * size) to be set to one third of the page. However, it seems like a
80 * good idea to limit the size of posting lists to one sixth of a page.
81 * That ought to leave us with a good split point when pages full of
82 * duplicates can be split several times.
83 */
85 state->deduplicate = true;
86 state->nmaxitems = 0;
87 state->maxpostingsize = Min(BTMaxItemSize(page) / 2, INDEX_SIZE_MASK);
88 /* Metadata about base tuple of current pending posting list */
89 state->base = NULL;
90 state->baseoff = InvalidOffsetNumber;
91 state->basetupsize = 0;
92 /* Metadata about current pending posting list TIDs */
93 state->htids = palloc(state->maxpostingsize);
94 state->nhtids = 0;
95 state->nitems = 0;
96 /* Size of all physical tuples to be replaced by pending posting list */
97 state->phystupsize = 0;
98 /* nintervals should be initialized to zero */
99 state->nintervals = 0;
100
101 minoff = P_FIRSTDATAKEY(opaque);
102 maxoff = PageGetMaxOffsetNumber(page);
103
104 /*
105 * Consider applying "single value" strategy, though only if the page
106 * seems likely to be split in the near future
107 */
108 if (!bottomupdedup)
109 singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem);
110
111 /*
112 * Deduplicate items from page, and write them to newpage.
113 *
114 * Copy the original page's LSN into newpage copy. This will become the
115 * updated version of the page. We need this because XLogInsert will
116 * examine the LSN and possibly dump it in a page image.
117 */
118 newpage = PageGetTempPageCopySpecial(page);
119 PageSetLSN(newpage, PageGetLSN(page));
120
121 /* Copy high key, if any */
122 if (!P_RIGHTMOST(opaque))
123 {
124 ItemId hitemid = PageGetItemId(page, P_HIKEY);
125 Size hitemsz = ItemIdGetLength(hitemid);
126 IndexTuple hitem = (IndexTuple) PageGetItem(page, hitemid);
127
128 if (PageAddItem(newpage, (Item) hitem, hitemsz, P_HIKEY,
129 false, false) == InvalidOffsetNumber)
130 elog(ERROR, "deduplication failed to add highkey");
131 }
132
133 for (offnum = minoff;
134 offnum <= maxoff;
135 offnum = OffsetNumberNext(offnum))
136 {
137 ItemId itemid = PageGetItemId(page, offnum);
138 IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
139
140 Assert(!ItemIdIsDead(itemid));
141
142 if (offnum == minoff)
143 {
144 /*
145 * No previous/base tuple for the data item -- use the data item
146 * as base tuple of pending posting list
147 */
148 _bt_dedup_start_pending(state, itup, offnum);
149 }
150 else if (state->deduplicate &&
151 _bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
153 {
154 /*
155 * Tuple is equal to base tuple of pending posting list. Heap
156 * TID(s) for itup have been saved in state.
157 */
158 }
159 else
160 {
161 /*
162 * Tuple is not equal to pending posting list tuple, or
163 * _bt_dedup_save_htid() opted to not merge current item into
164 * pending posting list for some other reason (e.g., adding more
165 * TIDs would have caused posting list to exceed current
166 * maxpostingsize).
167 *
168 * If state contains pending posting list with more than one item,
169 * form new posting tuple and add it to our temp page (newpage).
170 * Else add pending interval's base tuple to the temp page as-is.
171 */
172 pagesaving += _bt_dedup_finish_pending(newpage, state);
173
174 if (singlevalstrat)
175 {
176 /*
177 * Single value strategy's extra steps.
178 *
179 * Lower maxpostingsize for sixth and final large posting list
180 * tuple at the point where 5 maxpostingsize-capped tuples
181 * have either been formed or observed.
182 *
183 * When a sixth maxpostingsize-capped item is formed/observed,
184 * stop merging together tuples altogether. The few tuples
185 * that remain at the end of the page won't be merged together
186 * at all (at least not until after a future page split takes
187 * place, when this page's newly allocated right sibling page
188 * gets its first deduplication pass).
189 */
190 if (state->nmaxitems == 5)
191 _bt_singleval_fillfactor(page, state, newitemsz);
192 else if (state->nmaxitems == 6)
193 {
194 state->deduplicate = false;
195 singlevalstrat = false; /* won't be back here */
196 }
197 }
198
199 /* itup starts new pending posting list */
200 _bt_dedup_start_pending(state, itup, offnum);
201 }
202 }
203
204 /* Handle the last item */
205 pagesaving += _bt_dedup_finish_pending(newpage, state);
206
207 /*
208 * If no items suitable for deduplication were found, newpage must be
209 * exactly the same as the original page, so just return from function.
210 *
211 * We could determine whether or not to proceed on the basis the space
212 * savings being sufficient to avoid an immediate page split instead. We
213 * don't do that because there is some small value in nbtsplitloc.c always
214 * operating against a page that is fully deduplicated (apart from
215 * newitem). Besides, most of the cost has already been paid.
216 */
217 if (state->nintervals == 0)
218 {
219 /* cannot leak memory here */
220 pfree(newpage);
221 pfree(state->htids);
222 pfree(state);
223 return;
224 }
225
226 /*
227 * By here, it's clear that deduplication will definitely go ahead.
228 *
229 * Clear the BTP_HAS_GARBAGE page flag. The index must be a heapkeyspace
230 * index, and as such we'll never pay attention to BTP_HAS_GARBAGE anyway.
231 * But keep things tidy.
232 */
233 if (P_HAS_GARBAGE(opaque))
234 {
235 BTPageOpaque nopaque = BTPageGetOpaque(newpage);
236
237 nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
238 }
239
241
242 PageRestoreTempPage(newpage, page);
244
245 /* XLOG stuff */
246 if (RelationNeedsWAL(rel))
247 {
248 XLogRecPtr recptr;
249 xl_btree_dedup xlrec_dedup;
250
251 xlrec_dedup.nintervals = state->nintervals;
252
255 XLogRegisterData((char *) &xlrec_dedup, SizeOfBtreeDedup);
256
257 /*
258 * The intervals array is not in the buffer, but pretend that it is.
259 * When XLogInsert stores the whole buffer, the array need not be
260 * stored too.
261 */
262 XLogRegisterBufData(0, (char *) state->intervals,
263 state->nintervals * sizeof(BTDedupInterval));
264
265 recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DEDUP);
266
267 PageSetLSN(page, recptr);
268 }
269
271
272 /* Local space accounting should agree with page accounting */
273 Assert(pagesaving < newitemsz || PageGetExactFreeSpace(page) >= newitemsz);
274
275 /* cannot leak memory here */
276 pfree(state->htids);
277 pfree(state);
278}
279
280/*
281 * Perform bottom-up index deletion pass.
282 *
283 * See if duplicate index tuples (plus certain nearby tuples) are eligible to
284 * be deleted via bottom-up index deletion. The high level goal here is to
285 * entirely prevent "unnecessary" page splits caused by MVCC version churn
286 * from UPDATEs (when the UPDATEs don't logically modify any of the columns
287 * covered by the 'rel' index). This is qualitative, not quantitative -- we
288 * do not particularly care about once-off opportunities to delete many index
289 * tuples together.
290 *
291 * See nbtree/README for details on the design of nbtree bottom-up deletion.
292 * See access/tableam.h for a description of how we're expected to cooperate
293 * with the tableam.
294 *
295 * Returns true on success, in which case caller can assume page split will be
296 * avoided for a reasonable amount of time. Returns false when caller should
297 * deduplicate the page (if possible at all).
298 *
299 * Note: Occasionally we return true despite failing to delete enough items to
300 * avoid a split. This makes caller skip deduplication and go split the page
301 * right away. Our return value is always just advisory information.
302 *
303 * Note: Caller should have already deleted all existing items with their
304 * LP_DEAD bits set.
305 */
306bool
308 Size newitemsz)
309{
310 OffsetNumber offnum,
311 minoff,
312 maxoff;
313 Page page = BufferGetPage(buf);
314 BTPageOpaque opaque = BTPageGetOpaque(page);
316 TM_IndexDeleteOp delstate;
317 bool neverdedup;
318 int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
319
320 /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
321 newitemsz += sizeof(ItemIdData);
322
323 /* Initialize deduplication state */
325 state->deduplicate = true;
326 state->nmaxitems = 0;
327 state->maxpostingsize = BLCKSZ; /* We're not really deduplicating */
328 state->base = NULL;
329 state->baseoff = InvalidOffsetNumber;
330 state->basetupsize = 0;
331 state->htids = palloc(state->maxpostingsize);
332 state->nhtids = 0;
333 state->nitems = 0;
334 state->phystupsize = 0;
335 state->nintervals = 0;
336
337 /*
338 * Initialize tableam state that describes bottom-up index deletion
339 * operation.
340 *
341 * We'll go on to ask the tableam to search for TIDs whose index tuples we
342 * can safely delete. The tableam will search until our leaf page space
343 * target is satisfied, or until the cost of continuing with the tableam
344 * operation seems too high. It focuses its efforts on TIDs associated
345 * with duplicate index tuples that we mark "promising".
346 *
347 * This space target is a little arbitrary. The tableam must be able to
348 * keep the costs and benefits in balance. We provide the tableam with
349 * exhaustive information about what might work, without directly
350 * concerning ourselves with avoiding work during the tableam call. Our
351 * role in costing the bottom-up deletion process is strictly advisory.
352 */
353 delstate.irel = rel;
354 delstate.iblknum = BufferGetBlockNumber(buf);
355 delstate.bottomup = true;
356 delstate.bottomupfreespace = Max(BLCKSZ / 16, newitemsz);
357 delstate.ndeltids = 0;
358 delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete));
359 delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus));
360
361 minoff = P_FIRSTDATAKEY(opaque);
362 maxoff = PageGetMaxOffsetNumber(page);
363 for (offnum = minoff;
364 offnum <= maxoff;
365 offnum = OffsetNumberNext(offnum))
366 {
367 ItemId itemid = PageGetItemId(page, offnum);
368 IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
369
370 Assert(!ItemIdIsDead(itemid));
371
372 if (offnum == minoff)
373 {
374 /* itup starts first pending interval */
375 _bt_dedup_start_pending(state, itup, offnum);
376 }
377 else if (_bt_keep_natts_fast(rel, state->base, itup) > nkeyatts &&
379 {
380 /* Tuple is equal; just added its TIDs to pending interval */
381 }
382 else
383 {
384 /* Finalize interval -- move its TIDs to delete state */
385 _bt_bottomupdel_finish_pending(page, state, &delstate);
386
387 /* itup starts new pending interval */
388 _bt_dedup_start_pending(state, itup, offnum);
389 }
390 }
391 /* Finalize final interval -- move its TIDs to delete state */
392 _bt_bottomupdel_finish_pending(page, state, &delstate);
393
394 /*
395 * We don't give up now in the event of having few (or even zero)
396 * promising tuples for the tableam because it's not up to us as the index
397 * AM to manage costs (note that the tableam might have heuristics of its
398 * own that work out what to do). We should at least avoid having our
399 * caller do a useless deduplication pass after we return in the event of
400 * zero promising tuples, though.
401 */
402 neverdedup = false;
403 if (state->nintervals == 0)
404 neverdedup = true;
405
406 pfree(state->htids);
407 pfree(state);
408
409 /* Ask tableam which TIDs are deletable, then physically delete them */
410 _bt_delitems_delete_check(rel, buf, heapRel, &delstate);
411
412 pfree(delstate.deltids);
413 pfree(delstate.status);
414
415 /* Report "success" to caller unconditionally to avoid deduplication */
416 if (neverdedup)
417 return true;
418
419 /* Don't dedup when we won't end up back here any time soon anyway */
420 return PageGetExactFreeSpace(page) >= Max(BLCKSZ / 24, newitemsz);
421}
422
423/*
424 * Create a new pending posting list tuple based on caller's base tuple.
425 *
426 * Every tuple processed by deduplication either becomes the base tuple for a
427 * posting list, or gets its heap TID(s) accepted into a pending posting list.
428 * A tuple that starts out as the base tuple for a posting list will only
429 * actually be rewritten within _bt_dedup_finish_pending() when it turns out
430 * that there are duplicates that can be merged into the base tuple.
431 */
432void
434 OffsetNumber baseoff)
435{
436 Assert(state->nhtids == 0);
437 Assert(state->nitems == 0);
439
440 /*
441 * Copy heap TID(s) from new base tuple for new candidate posting list
442 * into working state's array
443 */
444 if (!BTreeTupleIsPosting(base))
445 {
446 memcpy(state->htids, &base->t_tid, sizeof(ItemPointerData));
447 state->nhtids = 1;
448 state->basetupsize = IndexTupleSize(base);
449 }
450 else
451 {
452 int nposting;
453
454 nposting = BTreeTupleGetNPosting(base);
455 memcpy(state->htids, BTreeTupleGetPosting(base),
456 sizeof(ItemPointerData) * nposting);
457 state->nhtids = nposting;
458 /* basetupsize should not include existing posting list */
459 state->basetupsize = BTreeTupleGetPostingOffset(base);
460 }
461
462 /*
463 * Save new base tuple itself -- it'll be needed if we actually create a
464 * new posting list from new pending posting list.
465 *
466 * Must maintain physical size of all existing tuples (including line
467 * pointer overhead) so that we can calculate space savings on page.
468 */
469 state->nitems = 1;
470 state->base = base;
471 state->baseoff = baseoff;
472 state->phystupsize = MAXALIGN(IndexTupleSize(base)) + sizeof(ItemIdData);
473 /* Also save baseoff in pending state for interval */
474 state->intervals[state->nintervals].baseoff = state->baseoff;
475}
476
477/*
478 * Save itup heap TID(s) into pending posting list where possible.
479 *
480 * Returns bool indicating if the pending posting list managed by state now
481 * includes itup's heap TID(s).
482 */
483bool
485{
486 int nhtids;
487 ItemPointer htids;
488 Size mergedtupsz;
489
491
492 if (!BTreeTupleIsPosting(itup))
493 {
494 nhtids = 1;
495 htids = &itup->t_tid;
496 }
497 else
498 {
499 nhtids = BTreeTupleGetNPosting(itup);
500 htids = BTreeTupleGetPosting(itup);
501 }
502
503 /*
504 * Don't append (have caller finish pending posting list as-is) if
505 * appending heap TID(s) from itup would put us over maxpostingsize limit.
506 *
507 * This calculation needs to match the code used within _bt_form_posting()
508 * for new posting list tuples.
509 */
510 mergedtupsz = MAXALIGN(state->basetupsize +
511 (state->nhtids + nhtids) * sizeof(ItemPointerData));
512
513 if (mergedtupsz > state->maxpostingsize)
514 {
515 /*
516 * Count this as an oversized item for single value strategy, though
517 * only when there are 50 TIDs in the final posting list tuple. This
518 * limit (which is fairly arbitrary) avoids confusion about how many
519 * 1/6 of a page tuples have been encountered/created by the current
520 * deduplication pass.
521 *
522 * Note: We deliberately don't consider which deduplication pass
523 * merged together tuples to create this item (could be a previous
524 * deduplication pass, or current pass). See _bt_do_singleval()
525 * comments.
526 */
527 if (state->nhtids > 50)
528 state->nmaxitems++;
529
530 return false;
531 }
532
533 /*
534 * Save heap TIDs to pending posting list tuple -- itup can be merged into
535 * pending posting list
536 */
537 state->nitems++;
538 memcpy(state->htids + state->nhtids, htids,
539 sizeof(ItemPointerData) * nhtids);
540 state->nhtids += nhtids;
541 state->phystupsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
542
543 return true;
544}
545
546/*
547 * Finalize pending posting list tuple, and add it to the page. Final tuple
548 * is based on saved base tuple, and saved list of heap TIDs.
549 *
550 * Returns space saving from deduplicating to make a new posting list tuple.
551 * Note that this includes line pointer overhead. This is zero in the case
552 * where no deduplication was possible.
553 */
554Size
556{
557 OffsetNumber tupoff;
558 Size tuplesz;
559 Size spacesaving;
560
561 Assert(state->nitems > 0);
562 Assert(state->nitems <= state->nhtids);
563 Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
564
565 tupoff = OffsetNumberNext(PageGetMaxOffsetNumber(newpage));
566 if (state->nitems == 1)
567 {
568 /* Use original, unchanged base tuple */
569 tuplesz = IndexTupleSize(state->base);
570 Assert(tuplesz == MAXALIGN(IndexTupleSize(state->base)));
571 Assert(tuplesz <= BTMaxItemSize(newpage));
572 if (PageAddItem(newpage, (Item) state->base, tuplesz, tupoff,
573 false, false) == InvalidOffsetNumber)
574 elog(ERROR, "deduplication failed to add tuple to page");
575
576 spacesaving = 0;
577 }
578 else
579 {
580 IndexTuple final;
581
582 /* Form a tuple with a posting list */
583 final = _bt_form_posting(state->base, state->htids, state->nhtids);
584 tuplesz = IndexTupleSize(final);
585 Assert(tuplesz <= state->maxpostingsize);
586
587 /* Save final number of items for posting list */
588 state->intervals[state->nintervals].nitems = state->nitems;
589
590 Assert(tuplesz == MAXALIGN(IndexTupleSize(final)));
591 Assert(tuplesz <= BTMaxItemSize(newpage));
592 if (PageAddItem(newpage, (Item) final, tuplesz, tupoff, false,
593 false) == InvalidOffsetNumber)
594 elog(ERROR, "deduplication failed to add tuple to page");
595
596 pfree(final);
597 spacesaving = state->phystupsize - (tuplesz + sizeof(ItemIdData));
598 /* Increment nintervals, since we wrote a new posting list tuple */
599 state->nintervals++;
600 Assert(spacesaving > 0 && spacesaving < BLCKSZ);
601 }
602
603 /* Reset state for next pending posting list */
604 state->nhtids = 0;
605 state->nitems = 0;
606 state->phystupsize = 0;
607
608 return spacesaving;
609}
610
611/*
612 * Finalize interval during bottom-up index deletion.
613 *
614 * During a bottom-up pass we expect that TIDs will be recorded in dedup state
615 * first, and then get moved over to delstate (in variable-sized batches) by
616 * calling here. Call here happens when the number of TIDs in a dedup
617 * interval is known, and interval gets finalized (i.e. when caller sees next
618 * tuple on the page is not a duplicate, or when caller runs out of tuples to
619 * process from leaf page).
620 *
621 * This is where bottom-up deletion determines and remembers which entries are
622 * duplicates. This will be important information to the tableam delete
623 * infrastructure later on. Plain index tuple duplicates are marked
624 * "promising" here, per tableam contract.
625 *
626 * Our approach to marking entries whose TIDs come from posting lists is more
627 * complicated. Posting lists can only be formed by a deduplication pass (or
628 * during an index build), so recent version churn affecting the pointed-to
629 * logical rows is not particularly likely. We may still give a weak signal
630 * about posting list tuples' entries (by marking just one of its TIDs/entries
631 * promising), though this is only a possibility in the event of further
632 * duplicate index tuples in final interval that covers posting list tuple (as
633 * in the plain tuple case). A weak signal/hint will be useful to the tableam
634 * when it has no stronger signal to go with for the deletion operation as a
635 * whole.
636 *
637 * The heuristics we use work well in practice because we only need to give
638 * the tableam the right _general_ idea about where to look. Garbage tends to
639 * naturally get concentrated in relatively few table blocks with workloads
640 * that bottom-up deletion targets. The tableam cannot possibly rank all
641 * available table blocks sensibly based on the hints we provide, but that's
642 * okay -- only the extremes matter. The tableam just needs to be able to
643 * predict which few table blocks will have the most tuples that are safe to
644 * delete for each deletion operation, with low variance across related
645 * deletion operations.
646 */
647static void
649 TM_IndexDeleteOp *delstate)
650{
651 bool dupinterval = (state->nitems > 1);
652
653 Assert(state->nitems > 0);
654 Assert(state->nitems <= state->nhtids);
655 Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
656
657 for (int i = 0; i < state->nitems; i++)
658 {
659 OffsetNumber offnum = state->baseoff + i;
660 ItemId itemid = PageGetItemId(page, offnum);
661 IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
662 TM_IndexDelete *ideltid = &delstate->deltids[delstate->ndeltids];
663 TM_IndexStatus *istatus = &delstate->status[delstate->ndeltids];
664
665 if (!BTreeTupleIsPosting(itup))
666 {
667 /* Simple case: A plain non-pivot tuple */
668 ideltid->tid = itup->t_tid;
669 ideltid->id = delstate->ndeltids;
670 istatus->idxoffnum = offnum;
671 istatus->knowndeletable = false; /* for now */
672 istatus->promising = dupinterval; /* simple rule */
673 istatus->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData);
674
675 delstate->ndeltids++;
676 }
677 else
678 {
679 /*
680 * Complicated case: A posting list tuple.
681 *
682 * We make the conservative assumption that there can only be at
683 * most one affected logical row per posting list tuple. There
684 * will be at most one promising entry in deltids to represent
685 * this presumed lone logical row. Note that this isn't even
686 * considered unless the posting list tuple is also in an interval
687 * of duplicates -- this complicated rule is just a variant of the
688 * simple rule used to decide if plain index tuples are promising.
689 */
690 int nitem = BTreeTupleGetNPosting(itup);
691 bool firstpromising = false;
692 bool lastpromising = false;
693
694 Assert(_bt_posting_valid(itup));
695
696 if (dupinterval)
697 {
698 /*
699 * Complicated rule: either the first or last TID in the
700 * posting list gets marked promising (if any at all)
701 */
702 BlockNumber minblocklist,
703 midblocklist,
704 maxblocklist;
705 ItemPointer mintid,
706 midtid,
707 maxtid;
708
709 mintid = BTreeTupleGetHeapTID(itup);
710 midtid = BTreeTupleGetPostingN(itup, nitem / 2);
711 maxtid = BTreeTupleGetMaxHeapTID(itup);
712 minblocklist = ItemPointerGetBlockNumber(mintid);
713 midblocklist = ItemPointerGetBlockNumber(midtid);
714 maxblocklist = ItemPointerGetBlockNumber(maxtid);
715
716 /* Only entry with predominant table block can be promising */
717 firstpromising = (minblocklist == midblocklist);
718 lastpromising = (!firstpromising &&
719 midblocklist == maxblocklist);
720 }
721
722 for (int p = 0; p < nitem; p++)
723 {
724 ItemPointer htid = BTreeTupleGetPostingN(itup, p);
725
726 ideltid->tid = *htid;
727 ideltid->id = delstate->ndeltids;
728 istatus->idxoffnum = offnum;
729 istatus->knowndeletable = false; /* for now */
730 istatus->promising = false;
731 if ((firstpromising && p == 0) ||
732 (lastpromising && p == nitem - 1))
733 istatus->promising = true;
734 istatus->freespace = sizeof(ItemPointerData); /* at worst */
735
736 ideltid++;
737 istatus++;
738 delstate->ndeltids++;
739 }
740 }
741 }
742
743 if (dupinterval)
744 {
745 state->intervals[state->nintervals].nitems = state->nitems;
746 state->nintervals++;
747 }
748
749 /* Reset state for next interval */
750 state->nhtids = 0;
751 state->nitems = 0;
752 state->phystupsize = 0;
753}
754
755/*
756 * Determine if page non-pivot tuples (data items) are all duplicates of the
757 * same value -- if they are, deduplication's "single value" strategy should
758 * be applied. The general goal of this strategy is to ensure that
759 * nbtsplitloc.c (which uses its own single value strategy) will find a useful
760 * split point as further duplicates are inserted, and successive rightmost
761 * page splits occur among pages that store the same duplicate value. When
762 * the page finally splits, it should end up BTREE_SINGLEVAL_FILLFACTOR% full,
763 * just like it would if deduplication were disabled.
764 *
765 * We expect that affected workloads will require _several_ single value
766 * strategy deduplication passes (over a page that only stores duplicates)
767 * before the page is finally split. The first deduplication pass should only
768 * find regular non-pivot tuples. Later deduplication passes will find
769 * existing maxpostingsize-capped posting list tuples, which must be skipped
770 * over. The penultimate pass is generally the first pass that actually
771 * reaches _bt_singleval_fillfactor(), and so will deliberately leave behind a
772 * few untouched non-pivot tuples. The final deduplication pass won't free
773 * any space -- it will skip over everything without merging anything (it
774 * retraces the steps of the penultimate pass).
775 *
776 * Fortunately, having several passes isn't too expensive. Each pass (after
777 * the first pass) won't spend many cycles on the large posting list tuples
778 * left by previous passes. Each pass will find a large contiguous group of
779 * smaller duplicate tuples to merge together at the end of the page.
780 */
781static bool
783 OffsetNumber minoff, IndexTuple newitem)
784{
785 int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
786 ItemId itemid;
787 IndexTuple itup;
788
789 itemid = PageGetItemId(page, minoff);
790 itup = (IndexTuple) PageGetItem(page, itemid);
791
792 if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
793 {
794 itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
795 itup = (IndexTuple) PageGetItem(page, itemid);
796
797 if (_bt_keep_natts_fast(rel, newitem, itup) > nkeyatts)
798 return true;
799 }
800
801 return false;
802}
803
804/*
805 * Lower maxpostingsize when using "single value" strategy, to avoid a sixth
806 * and final maxpostingsize-capped tuple. The sixth and final posting list
807 * tuple will end up somewhat smaller than the first five. (Note: The first
808 * five tuples could actually just be very large duplicate tuples that
809 * couldn't be merged together at all. Deduplication will simply not modify
810 * the page when that happens.)
811 *
812 * When there are six posting lists on the page (after current deduplication
813 * pass goes on to create/observe a sixth very large tuple), caller should end
814 * its deduplication pass. It isn't useful to try to deduplicate items that
815 * are supposed to end up on the new right sibling page following the
816 * anticipated page split. A future deduplication pass of future right
817 * sibling page might take care of it. (This is why the first single value
818 * strategy deduplication pass for a given leaf page will generally find only
819 * plain non-pivot tuples -- see _bt_do_singleval() comments.)
820 */
821static void
823{
824 Size leftfree;
825 int reduction;
826
827 /* This calculation needs to match nbtsplitloc.c */
828 leftfree = PageGetPageSize(page) - SizeOfPageHeaderData -
829 MAXALIGN(sizeof(BTPageOpaqueData));
830 /* Subtract size of new high key (includes pivot heap TID space) */
831 leftfree -= newitemsz + MAXALIGN(sizeof(ItemPointerData));
832
833 /*
834 * Reduce maxpostingsize by an amount equal to target free space on left
835 * half of page
836 */
837 reduction = leftfree * ((100 - BTREE_SINGLEVAL_FILLFACTOR) / 100.0);
838 if (state->maxpostingsize > reduction)
839 state->maxpostingsize -= reduction;
840 else
841 state->maxpostingsize = 0;
842}
843
844/*
845 * Build a posting list tuple based on caller's "base" index tuple and list of
846 * heap TIDs. When nhtids == 1, builds a standard non-pivot tuple without a
847 * posting list. (Posting list tuples can never have a single heap TID, partly
848 * because that ensures that deduplication always reduces final MAXALIGN()'d
849 * size of entire tuple.)
850 *
851 * Convention is that posting list starts at a MAXALIGN()'d offset (rather
852 * than a SHORTALIGN()'d offset), in line with the approach taken when
853 * appending a heap TID to new pivot tuple/high key during suffix truncation.
854 * This sometimes wastes a little space that was only needed as alignment
855 * padding in the original tuple. Following this convention simplifies the
856 * space accounting used when deduplicating a page (the same convention
857 * simplifies the accounting for choosing a point to split a page at).
858 *
859 * Note: Caller's "htids" array must be unique and already in ascending TID
860 * order. Any existing heap TIDs from "base" won't automatically appear in
861 * returned posting list tuple (they must be included in htids array.)
862 */
865{
866 uint32 keysize,
867 newsize;
868 IndexTuple itup;
869
870 if (BTreeTupleIsPosting(base))
871 keysize = BTreeTupleGetPostingOffset(base);
872 else
873 keysize = IndexTupleSize(base);
874
876 Assert(nhtids > 0 && nhtids <= PG_UINT16_MAX);
877 Assert(keysize == MAXALIGN(keysize));
878
879 /* Determine final size of new tuple */
880 if (nhtids > 1)
881 newsize = MAXALIGN(keysize +
882 nhtids * sizeof(ItemPointerData));
883 else
884 newsize = keysize;
885
886 Assert(newsize <= INDEX_SIZE_MASK);
887 Assert(newsize == MAXALIGN(newsize));
888
889 /* Allocate memory using palloc0() (matches index_form_tuple()) */
890 itup = palloc0(newsize);
891 memcpy(itup, base, keysize);
892 itup->t_info &= ~INDEX_SIZE_MASK;
893 itup->t_info |= newsize;
894 if (nhtids > 1)
895 {
896 /* Form posting list tuple */
897 BTreeTupleSetPosting(itup, nhtids, keysize);
898 memcpy(BTreeTupleGetPosting(itup), htids,
899 sizeof(ItemPointerData) * nhtids);
900 Assert(_bt_posting_valid(itup));
901 }
902 else
903 {
904 /* Form standard non-pivot tuple */
905 itup->t_info &= ~INDEX_ALT_TID_MASK;
906 ItemPointerCopy(htids, &itup->t_tid);
908 }
909
910 return itup;
911}
912
913/*
914 * Generate a replacement tuple by "updating" a posting list tuple so that it
915 * no longer has TIDs that need to be deleted.
916 *
917 * Used by both VACUUM and index deletion. Caller's vacposting argument
918 * points to the existing posting list tuple to be updated.
919 *
920 * On return, caller's vacposting argument will point to final "updated"
921 * tuple, which will be palloc()'d in caller's memory context.
922 */
923void
925{
926 IndexTuple origtuple = vacposting->itup;
927 uint32 keysize,
928 newsize;
929 IndexTuple itup;
930 int nhtids;
931 int ui,
932 d;
933 ItemPointer htids;
934
935 nhtids = BTreeTupleGetNPosting(origtuple) - vacposting->ndeletedtids;
936
937 Assert(_bt_posting_valid(origtuple));
938 Assert(nhtids > 0 && nhtids < BTreeTupleGetNPosting(origtuple));
939
940 /*
941 * Determine final size of new tuple.
942 *
943 * This calculation needs to match the code used within _bt_form_posting()
944 * for new posting list tuples. We avoid calling _bt_form_posting() here
945 * to save ourselves a second memory allocation for a htids workspace.
946 */
947 keysize = BTreeTupleGetPostingOffset(origtuple);
948 if (nhtids > 1)
949 newsize = MAXALIGN(keysize +
950 nhtids * sizeof(ItemPointerData));
951 else
952 newsize = keysize;
953
954 Assert(newsize <= INDEX_SIZE_MASK);
955 Assert(newsize == MAXALIGN(newsize));
956
957 /* Allocate memory using palloc0() (matches index_form_tuple()) */
958 itup = palloc0(newsize);
959 memcpy(itup, origtuple, keysize);
960 itup->t_info &= ~INDEX_SIZE_MASK;
961 itup->t_info |= newsize;
962
963 if (nhtids > 1)
964 {
965 /* Form posting list tuple */
966 BTreeTupleSetPosting(itup, nhtids, keysize);
967 htids = BTreeTupleGetPosting(itup);
968 }
969 else
970 {
971 /* Form standard non-pivot tuple */
972 itup->t_info &= ~INDEX_ALT_TID_MASK;
973 htids = &itup->t_tid;
974 }
975
976 ui = 0;
977 d = 0;
978 for (int i = 0; i < BTreeTupleGetNPosting(origtuple); i++)
979 {
980 if (d < vacposting->ndeletedtids && vacposting->deletetids[d] == i)
981 {
982 d++;
983 continue;
984 }
985 htids[ui++] = *BTreeTupleGetPostingN(origtuple, i);
986 }
987 Assert(ui == nhtids);
988 Assert(d == vacposting->ndeletedtids);
989 Assert(nhtids == 1 || _bt_posting_valid(itup));
990 Assert(nhtids > 1 || ItemPointerIsValid(&itup->t_tid));
991
992 /* vacposting arg's itup will now point to updated version */
993 vacposting->itup = itup;
994}
995
996/*
997 * Prepare for a posting list split by swapping heap TID in newitem with heap
998 * TID from original posting list (the 'oposting' heap TID located at offset
999 * 'postingoff'). Modifies newitem, so caller should pass their own private
1000 * copy that can safely be modified.
1001 *
1002 * Returns new posting list tuple, which is palloc()'d in caller's context.
1003 * This is guaranteed to be the same size as 'oposting'. Modified newitem is
1004 * what caller actually inserts. (This happens inside the same critical
1005 * section that performs an in-place update of old posting list using new
1006 * posting list returned here.)
1007 *
1008 * While the keys from newitem and oposting must be opclass equal, and must
1009 * generate identical output when run through the underlying type's output
1010 * function, it doesn't follow that their representations match exactly.
1011 * Caller must avoid assuming that there can't be representational differences
1012 * that make datums from oposting bigger or smaller than the corresponding
1013 * datums from newitem. For example, differences in TOAST input state might
1014 * break a faulty assumption about tuple size (the executor is entitled to
1015 * apply TOAST compression based on its own criteria). It also seems possible
1016 * that further representational variation will be introduced in the future,
1017 * in order to support nbtree features like page-level prefix compression.
1018 *
1019 * See nbtree/README for details on the design of posting list splits.
1020 */
1022_bt_swap_posting(IndexTuple newitem, IndexTuple oposting, int postingoff)
1023{
1024 int nhtids;
1025 char *replacepos;
1026 char *replaceposright;
1027 Size nmovebytes;
1028 IndexTuple nposting;
1029
1030 nhtids = BTreeTupleGetNPosting(oposting);
1031 Assert(_bt_posting_valid(oposting));
1032
1033 /*
1034 * The postingoff argument originated as a _bt_binsrch_posting() return
1035 * value. It will be 0 in the event of corruption that makes a leaf page
1036 * contain a non-pivot tuple that's somehow identical to newitem (no two
1037 * non-pivot tuples should ever have the same TID). This has been known
1038 * to happen in the field from time to time.
1039 *
1040 * Perform a basic sanity check to catch this case now.
1041 */
1042 if (!(postingoff > 0 && postingoff < nhtids))
1043 elog(ERROR, "posting list tuple with %d items cannot be split at offset %d",
1044 nhtids, postingoff);
1045
1046 /*
1047 * Move item pointers in posting list to make a gap for the new item's
1048 * heap TID. We shift TIDs one place to the right, losing original
1049 * rightmost TID. (nmovebytes must not include TIDs to the left of
1050 * postingoff, nor the existing rightmost/max TID that gets overwritten.)
1051 */
1052 nposting = CopyIndexTuple(oposting);
1053 replacepos = (char *) BTreeTupleGetPostingN(nposting, postingoff);
1054 replaceposright = (char *) BTreeTupleGetPostingN(nposting, postingoff + 1);
1055 nmovebytes = (nhtids - postingoff - 1) * sizeof(ItemPointerData);
1056 memmove(replaceposright, replacepos, nmovebytes);
1057
1058 /* Fill the gap at postingoff with TID of new item (original new TID) */
1059 Assert(!BTreeTupleIsPivot(newitem) && !BTreeTupleIsPosting(newitem));
1060 ItemPointerCopy(&newitem->t_tid, (ItemPointer) replacepos);
1061
1062 /* Now copy oposting's rightmost/max TID into new item (final new TID) */
1063 ItemPointerCopy(BTreeTupleGetMaxHeapTID(oposting), &newitem->t_tid);
1064
1066 BTreeTupleGetHeapTID(newitem)) < 0);
1067 Assert(_bt_posting_valid(nposting));
1068
1069 return nposting;
1070}
1071
1072/*
1073 * Verify posting list invariants for "posting", which must be a posting list
1074 * tuple. Used within assertions.
1075 */
1076#ifdef USE_ASSERT_CHECKING
1077static bool
1078_bt_posting_valid(IndexTuple posting)
1079{
1080 ItemPointerData last;
1081 ItemPointer htid;
1082
1083 if (!BTreeTupleIsPosting(posting) || BTreeTupleGetNPosting(posting) < 2)
1084 return false;
1085
1086 /* Remember first heap TID for loop */
1087 ItemPointerCopy(BTreeTupleGetHeapTID(posting), &last);
1088 if (!ItemPointerIsValid(&last))
1089 return false;
1090
1091 /* Iterate, starting from second TID */
1092 for (int i = 1; i < BTreeTupleGetNPosting(posting); i++)
1093 {
1094 htid = BTreeTupleGetPostingN(posting, i);
1095
1096 if (!ItemPointerIsValid(htid))
1097 return false;
1098 if (ItemPointerCompare(htid, &last) <= 0)
1099 return false;
1100 ItemPointerCopy(htid, &last);
1101 }
1102
1103 return true;
1104}
1105#endif
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:396
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:413
Size PageGetExactFreeSpace(const PageData *page)
Definition: bufpage.c:947
Page PageGetTempPageCopySpecial(const PageData *page)
Definition: bufpage.c:391
static Size PageGetPageSize(const PageData *page)
Definition: bufpage.h:277
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
#define SizeOfPageHeaderData
Definition: bufpage.h:217
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
PageData * Page
Definition: bufpage.h:82
static XLogRecPtr PageGetLSN(const PageData *page)
Definition: bufpage.h:386
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
#define Min(x, y)
Definition: c.h:961
#define MAXALIGN(LEN)
Definition: c.h:768
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:204
#define Max(x, y)
Definition: c.h:955
#define Assert(condition)
Definition: c.h:815
uint32_t uint32
Definition: c.h:488
#define PG_UINT16_MAX
Definition: c.h:544
size_t Size
Definition: c.h:562
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
int i
Definition: isn.c:72
Pointer Item
Definition: item.h:17
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
struct ItemIdData ItemIdData
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
struct ItemPointerData ItemPointerData
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
IndexTupleData * IndexTuple
Definition: itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition: itup.h:71
#define INDEX_SIZE_MASK
Definition: itup.h:65
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting, int postingoff)
Definition: nbtdedup.c:1022
void _bt_dedup_pass(Relation rel, Buffer buf, IndexTuple newitem, Size newitemsz, bool bottomupdedup)
Definition: nbtdedup.c:58
void _bt_update_posting(BTVacuumPosting vacposting)
Definition: nbtdedup.c:924
bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, Size newitemsz)
Definition: nbtdedup.c:307
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:484
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:433
static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem)
Definition: nbtdedup.c:782
IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
Definition: nbtdedup.c:864
static void _bt_bottomupdel_finish_pending(Page page, BTDedupState state, TM_IndexDeleteOp *delstate)
Definition: nbtdedup.c:648
Size _bt_dedup_finish_pending(Page newpage, BTDedupState state)
Definition: nbtdedup.c:555
static void _bt_singleval_fillfactor(Page page, BTDedupState state, Size newitemsz)
Definition: nbtdedup.c:822
void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, TM_IndexDeleteOp *delstate)
Definition: nbtpage.c:1513
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:518
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:480
#define BTREE_SINGLEVAL_FILLFACTOR
Definition: nbtree.h:202
#define P_HIKEY
Definition: nbtree.h:367
#define P_HAS_GARBAGE(opaque)
Definition: nbtree.h:226
static void BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
Definition: nbtree.h:504
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:537
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:529
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BTMaxItemSize(page)
Definition: nbtree.h:164
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:544
static ItemPointer BTreeTupleGetMaxHeapTID(IndexTuple itup)
Definition: nbtree.h:664
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:492
BTDedupStateData * BTDedupState
Definition: nbtree.h:898
static ItemPointer BTreeTupleGetHeapTID(IndexTuple itup)
Definition: nbtree.h:638
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:3032
#define XLOG_BTREE_DEDUP
Definition: nbtxlog.h:33
#define SizeOfBtreeDedup
Definition: nbtxlog.h:174
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:72
#define RelationNeedsWAL(relation)
Definition: rel.h:629
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:525
uint16 btpo_flags
Definition: nbtree.h:67
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:916
uint16 ndeletedtids
Definition: nbtree.h:915
IndexTuple itup
Definition: nbtree.h:911
ItemPointerData t_tid
Definition: itup.h:37
unsigned short t_info
Definition: itup.h:49
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
Relation irel
Definition: tableam.h:247
TM_IndexDelete * deltids
Definition: tableam.h:254
BlockNumber iblknum
Definition: tableam.h:248
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224
OffsetNumber idxoffnum
Definition: tableam.h:219
Definition: regguts.h:323
uint16 nintervals
Definition: nbtxlog.h:169
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterBufData(uint8 block_id, const char *data, uint32 len)
Definition: xloginsert.c:405
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:35