PostgreSQL Source Code git master
nbtsplitloc.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/tableam.h"
#include "common/int.h"
Include dependency graph for nbtsplitloc.c:

Go to the source code of this file.

Data Structures

struct  SplitPoint
 
struct  FindSplitData
 

Macros

#define LEAF_SPLIT_DISTANCE   0.050
 
#define INTERNAL_SPLIT_DISTANCE   0.075
 

Enumerations

enum  FindSplitStrat { SPLIT_DEFAULT , SPLIT_MANY_DUPLICATES , SPLIT_SINGLE_VALUE }
 

Functions

static void _bt_recsplitloc (FindSplitData *state, OffsetNumber firstrightoff, bool newitemonleft, int olddataitemstoleft, Size firstrightofforigpagetuplesz)
 
static void _bt_deltasortsplits (FindSplitData *state, double fillfactormult, bool usemult)
 
static int _bt_splitcmp (const void *arg1, const void *arg2)
 
static bool _bt_afternewitemoff (FindSplitData *state, OffsetNumber maxoff, int leaffillfactor, bool *usemult)
 
static bool _bt_adjacenthtid (const ItemPointerData *lowhtid, const ItemPointerData *highhtid)
 
static OffsetNumber _bt_bestsplitloc (FindSplitData *state, int perfectpenalty, bool *newitemonleft, FindSplitStrat strategy)
 
static int _bt_defaultinterval (FindSplitData *state)
 
static int _bt_strategy (FindSplitData *state, SplitPoint *leftpage, SplitPoint *rightpage, FindSplitStrat *strategy)
 
static void _bt_interval_edges (FindSplitData *state, SplitPoint **leftinterval, SplitPoint **rightinterval)
 
static int _bt_split_penalty (FindSplitData *state, SplitPoint *split)
 
static IndexTuple _bt_split_lastleft (FindSplitData *state, SplitPoint *split)
 
static IndexTuple _bt_split_firstright (FindSplitData *state, SplitPoint *split)
 
OffsetNumber _bt_findsplitloc (Relation rel, Page origpage, OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem, bool *newitemonleft)
 

Macro Definition Documentation

◆ INTERNAL_SPLIT_DISTANCE

#define INTERNAL_SPLIT_DISTANCE   0.075

Definition at line 851 of file nbtsplitloc.c.

◆ LEAF_SPLIT_DISTANCE

#define LEAF_SPLIT_DISTANCE   0.050

Definition at line 850 of file nbtsplitloc.c.

Enumeration Type Documentation

◆ FindSplitStrat

Enumerator
SPLIT_DEFAULT 
SPLIT_MANY_DUPLICATES 
SPLIT_SINGLE_VALUE 

Definition at line 21 of file nbtsplitloc.c.

22{
23 /* strategy for searching through materialized list of split points */
24 SPLIT_DEFAULT, /* give some weight to truncation */
25 SPLIT_MANY_DUPLICATES, /* find minimally distinguishing point */
26 SPLIT_SINGLE_VALUE, /* leave left page almost full */
FindSplitStrat
Definition: nbtsplitloc.c:22
@ SPLIT_DEFAULT
Definition: nbtsplitloc.c:24
@ SPLIT_MANY_DUPLICATES
Definition: nbtsplitloc.c:25
@ SPLIT_SINGLE_VALUE
Definition: nbtsplitloc.c:26

Function Documentation

◆ _bt_adjacenthtid()

static bool _bt_adjacenthtid ( const ItemPointerData lowhtid,
const ItemPointerData highhtid 
)
static

Definition at line 750 of file nbtsplitloc.c.

751{
752 BlockNumber lowblk,
753 highblk;
754
755 lowblk = ItemPointerGetBlockNumber(lowhtid);
756 highblk = ItemPointerGetBlockNumber(highhtid);
757
758 /* Make optimistic assumption of adjacency when heap blocks match */
759 if (lowblk == highblk)
760 return true;
761
762 /* When heap block one up, second offset should be FirstOffsetNumber */
763 if (lowblk + 1 == highblk &&
765 return true;
766
767 return false;
768}
uint32 BlockNumber
Definition: block.h:31
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
#define FirstOffsetNumber
Definition: off.h:27

References FirstOffsetNumber, ItemPointerGetBlockNumber(), and ItemPointerGetOffsetNumber().

Referenced by _bt_afternewitemoff().

◆ _bt_afternewitemoff()

static bool _bt_afternewitemoff ( FindSplitData state,
OffsetNumber  maxoff,
int  leaffillfactor,
bool *  usemult 
)
static

Definition at line 631 of file nbtsplitloc.c.

633{
634 int16 nkeyatts;
635 ItemId itemid;
636 IndexTuple tup;
637 int keepnatts;
638
639 Assert(state->is_leaf && !state->is_rightmost);
640
642
643 /* Single key indexes not considered here */
644 if (nkeyatts == 1)
645 return false;
646
647 /* Ascending insertion pattern never inferred when new item is first */
648 if (state->newitemoff == P_FIRSTKEY)
649 return false;
650
651 /*
652 * Only apply optimization on pages with equisized tuples, since ordinal
653 * keys are likely to be fixed-width. Testing if the new tuple is
654 * variable width directly might also work, but that fails to apply the
655 * optimization to indexes with a numeric_ops attribute.
656 *
657 * Conclude that page has equisized tuples when the new item is the same
658 * width as the smallest item observed during pass over page, and other
659 * non-pivot tuples must be the same width as well. (Note that the
660 * possibly-truncated existing high key isn't counted in
661 * olddataitemstotal, and must be subtracted from maxoff.)
662 */
663 if (state->newitemsz != state->minfirstrightsz)
664 return false;
665 if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
666 return false;
667
668 /*
669 * Avoid applying optimization when tuples are wider than a tuple
670 * consisting of two non-NULL int8/int64 attributes (or four non-NULL
671 * int4/int32 attributes)
672 */
673 if (state->newitemsz >
674 MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
675 sizeof(ItemIdData))
676 return false;
677
678 /*
679 * At least the first attribute's value must be equal to the corresponding
680 * value in previous tuple to apply optimization. New item cannot be a
681 * duplicate, either.
682 *
683 * Handle case where new item is to the right of all items on the existing
684 * page. This is suggestive of monotonically increasing insertions in
685 * itself, so the "heap TID adjacency" test is not applied here.
686 */
687 if (state->newitemoff > maxoff)
688 {
689 itemid = PageGetItemId(state->origpage, maxoff);
690 tup = (IndexTuple) PageGetItem(state->origpage, itemid);
691 keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
692
693 if (keepnatts > 1 && keepnatts <= nkeyatts)
694 {
695 *usemult = true;
696 return true;
697 }
698
699 return false;
700 }
701
702 /*
703 * "Low cardinality leading column, high cardinality suffix column"
704 * indexes with a random insertion pattern (e.g., an index with a boolean
705 * column, such as an index on '(book_is_in_print, book_isbn)') present us
706 * with a risk of consistently misapplying the optimization. We're
707 * willing to accept very occasional misapplication of the optimization,
708 * provided the cases where we get it wrong are rare and self-limiting.
709 *
710 * Heap TID adjacency strongly suggests that the item just to the left was
711 * inserted very recently, which limits overapplication of the
712 * optimization. Besides, all inappropriate cases triggered here will
713 * still split in the middle of the page on average.
714 */
715 itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
716 tup = (IndexTuple) PageGetItem(state->origpage, itemid);
717 /* Do cheaper test first */
718 if (BTreeTupleIsPosting(tup) ||
719 !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
720 return false;
721 /* Check same conditions as rightmost item case, too */
722 keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
723
724 if (keepnatts > 1 && keepnatts <= nkeyatts)
725 {
726 double interp = (double) state->newitemoff / ((double) maxoff + 1);
727 double leaffillfactormult = (double) leaffillfactor / 100.0;
728
729 /*
730 * Don't allow caller to split after a new item when it will result in
731 * a split point to the right of the point that a leaf fillfactor
732 * split would use -- have caller apply leaf fillfactor instead
733 */
734 *usemult = interp > leaffillfactormult;
735
736 return true;
737 }
738
739 return false;
740}
static void * PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
#define MAXALIGN(LEN)
Definition: c.h:813
int64_t int64
Definition: c.h:538
int16_t int16
Definition: c.h:536
Assert(PointerIsAligned(start, uint64))
IndexTupleData * IndexTuple
Definition: itup.h:53
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:493
#define P_FIRSTKEY
Definition: nbtree.h:369
static bool _bt_adjacenthtid(const ItemPointerData *lowhtid, const ItemPointerData *highhtid)
Definition: nbtsplitloc.c:750
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:4102
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:534
ItemPointerData t_tid
Definition: itup.h:37
Definition: regguts.h:323

References _bt_adjacenthtid(), _bt_keep_natts_fast(), Assert(), BTreeTupleIsPosting(), IndexRelationGetNumberOfKeyAttributes, MAXALIGN, OffsetNumberPrev, P_FIRSTKEY, PageGetItem(), PageGetItemId(), and IndexTupleData::t_tid.

Referenced by _bt_findsplitloc().

◆ _bt_bestsplitloc()

static OffsetNumber _bt_bestsplitloc ( FindSplitData state,
int  perfectpenalty,
bool *  newitemonleft,
FindSplitStrat  strategy 
)
static

Definition at line 789 of file nbtsplitloc.c.

791{
792 int bestpenalty,
793 lowsplit;
794 int highsplit = Min(state->interval, state->nsplits);
795 SplitPoint *final;
796
797 bestpenalty = INT_MAX;
798 lowsplit = 0;
799 for (int i = lowsplit; i < highsplit; i++)
800 {
801 int penalty;
802
803 penalty = _bt_split_penalty(state, state->splits + i);
804
805 if (penalty < bestpenalty)
806 {
807 bestpenalty = penalty;
808 lowsplit = i;
809 }
810
811 if (penalty <= perfectpenalty)
812 break;
813 }
814
815 final = &state->splits[lowsplit];
816
817 /*
818 * There is a risk that the "many duplicates" strategy will repeatedly do
819 * the wrong thing when there are monotonically decreasing insertions to
820 * the right of a large group of duplicates. Repeated splits could leave
821 * a succession of right half pages with free space that can never be
822 * used. This must be avoided.
823 *
824 * Consider the example of the leftmost page in a single integer attribute
825 * NULLS FIRST index which is almost filled with NULLs. Monotonically
826 * decreasing integer insertions might cause the same leftmost page to
827 * split repeatedly at the same point. Each split derives its new high
828 * key from the lowest current value to the immediate right of the large
829 * group of NULLs, which will always be higher than all future integer
830 * insertions, directing all future integer insertions to the same
831 * leftmost page.
832 */
833 if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
834 !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
835 final->firstrightoff < state->newitemoff + 9)
836 {
837 /*
838 * Avoid the problem by performing a 50:50 split when the new item is
839 * just to the right of the would-be "many duplicates" split point.
840 * (Note that the test used for an insert that is "just to the right"
841 * of the split point is conservative.)
842 */
843 final = &state->splits[0];
844 }
845
846 *newitemonleft = final->newitemonleft;
847 return final->firstrightoff;
848}
#define Min(x, y)
Definition: c.h:1006
#define final(a, b, c)
Definition: hashfn.c:116
int i
Definition: isn.c:77
static int _bt_split_penalty(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1132

References _bt_split_penalty(), final, i, Min, and SPLIT_MANY_DUPLICATES.

Referenced by _bt_findsplitloc().

◆ _bt_defaultinterval()

static int _bt_defaultinterval ( FindSplitData state)
static

Definition at line 877 of file nbtsplitloc.c.

878{
879 SplitPoint *spaceoptimal;
880 int16 tolerance,
881 lowleftfree,
882 lowrightfree,
883 highleftfree,
884 highrightfree;
885
886 /*
887 * Determine leftfree and rightfree values that are higher and lower than
888 * we're willing to tolerate. Note that the final split interval will be
889 * about 10% of nsplits in the common case where all non-pivot tuples
890 * (data items) from a leaf page are uniformly sized. We're a bit more
891 * aggressive when splitting internal pages.
892 */
893 if (state->is_leaf)
894 tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
895 else
896 tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
897
898 /* First candidate split point is the most evenly balanced */
899 spaceoptimal = state->splits;
900 lowleftfree = spaceoptimal->leftfree - tolerance;
901 lowrightfree = spaceoptimal->rightfree - tolerance;
902 highleftfree = spaceoptimal->leftfree + tolerance;
903 highrightfree = spaceoptimal->rightfree + tolerance;
904
905 /*
906 * Iterate through split points, starting from the split immediately after
907 * 'spaceoptimal'. Find the first split point that divides free space so
908 * unevenly that including it in the split interval would be unacceptable.
909 */
910 for (int i = 1; i < state->nsplits; i++)
911 {
912 SplitPoint *split = state->splits + i;
913
914 /* Cannot use curdelta here, since its value is often weighted */
915 if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
916 split->leftfree > highleftfree || split->rightfree > highrightfree)
917 return i;
918 }
919
920 return state->nsplits;
921}
#define LEAF_SPLIT_DISTANCE
Definition: nbtsplitloc.c:850
#define INTERNAL_SPLIT_DISTANCE
Definition: nbtsplitloc.c:851
int16 leftfree
Definition: nbtsplitloc.c:33
int16 rightfree
Definition: nbtsplitloc.c:34

References i, INTERNAL_SPLIT_DISTANCE, LEAF_SPLIT_DISTANCE, SplitPoint::leftfree, and SplitPoint::rightfree.

Referenced by _bt_findsplitloc().

◆ _bt_deltasortsplits()

static void _bt_deltasortsplits ( FindSplitData state,
double  fillfactormult,
bool  usemult 
)
static

Definition at line 567 of file nbtsplitloc.c.

569{
570 for (int i = 0; i < state->nsplits; i++)
571 {
572 SplitPoint *split = state->splits + i;
573 int16 delta;
574
575 if (usemult)
576 delta = fillfactormult * split->leftfree -
577 (1.0 - fillfactormult) * split->rightfree;
578 else
579 delta = split->leftfree - split->rightfree;
580
581 if (delta < 0)
582 delta = -delta;
583
584 /* Save delta */
585 split->curdelta = delta;
586 }
587
588 qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
589}
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
static int _bt_splitcmp(const void *arg1, const void *arg2)
Definition: nbtsplitloc.c:595
#define qsort(a, b, c, d)
Definition: port.h:500
int16 curdelta
Definition: nbtsplitloc.c:32

References _bt_splitcmp(), SplitPoint::curdelta, i, if(), SplitPoint::leftfree, qsort, and SplitPoint::rightfree.

Referenced by _bt_findsplitloc().

◆ _bt_findsplitloc()

OffsetNumber _bt_findsplitloc ( Relation  rel,
Page  origpage,
OffsetNumber  newitemoff,
Size  newitemsz,
IndexTuple  newitem,
bool *  newitemonleft 
)

Definition at line 130 of file nbtsplitloc.c.

136{
137 BTPageOpaque opaque;
138 int leftspace,
139 rightspace,
140 olddataitemstotal,
141 olddataitemstoleft,
142 perfectpenalty,
143 leaffillfactor;
145 FindSplitStrat strategy;
146 ItemId itemid;
147 OffsetNumber offnum,
148 maxoff,
149 firstrightoff;
150 double fillfactormult;
151 bool usemult;
152 SplitPoint leftpage,
153 rightpage;
154
155 opaque = BTPageGetOpaque(origpage);
156 maxoff = PageGetMaxOffsetNumber(origpage);
157
158 /* Total free space available on a btree page, after fixed overhead */
159 leftspace = rightspace =
161 MAXALIGN(sizeof(BTPageOpaqueData));
162
163 /* The right page will have the same high key as the old page */
164 if (!P_RIGHTMOST(opaque))
165 {
166 itemid = PageGetItemId(origpage, P_HIKEY);
167 rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
168 sizeof(ItemIdData));
169 }
170
171 /* Count up total space in data items before actually scanning 'em */
172 olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
173 leaffillfactor = BTGetFillFactor(rel);
174
175 /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
176 newitemsz += sizeof(ItemIdData);
177 state.rel = rel;
178 state.origpage = origpage;
179 state.newitem = newitem;
180 state.newitemsz = newitemsz;
181 state.is_leaf = P_ISLEAF(opaque);
182 state.is_rightmost = P_RIGHTMOST(opaque);
183 state.leftspace = leftspace;
184 state.rightspace = rightspace;
185 state.olddataitemstotal = olddataitemstotal;
186 state.minfirstrightsz = SIZE_MAX;
187 state.newitemoff = newitemoff;
188
189 /* newitem cannot be a posting list item */
190 Assert(!BTreeTupleIsPosting(newitem));
191
192 /*
193 * nsplits should never exceed maxoff because there will be at most as
194 * many candidate split points as there are points _between_ tuples, once
195 * you imagine that the new item is already on the original page (the
196 * final number of splits may be slightly lower because not all points
197 * between tuples will be legal).
198 */
199 state.maxsplits = maxoff;
200 state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
201 state.nsplits = 0;
202
203 /*
204 * Scan through the data items and calculate space usage for a split at
205 * each possible position
206 */
207 olddataitemstoleft = 0;
208
209 for (offnum = P_FIRSTDATAKEY(opaque);
210 offnum <= maxoff;
211 offnum = OffsetNumberNext(offnum))
212 {
213 Size itemsz;
214
215 itemid = PageGetItemId(origpage, offnum);
216 itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
217
218 /*
219 * When item offset number is not newitemoff, neither side of the
220 * split can be newitem. Record a split after the previous data item
221 * from original page, but before the current data item from original
222 * page. (_bt_recsplitloc() will reject the split when there are no
223 * previous items, which we rely on.)
224 */
225 if (offnum < newitemoff)
226 _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
227 else if (offnum > newitemoff)
228 _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
229 else
230 {
231 /*
232 * Record a split after all "offnum < newitemoff" original page
233 * data items, but before newitem
234 */
235 _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
236
237 /*
238 * Record a split after newitem, but before data item from
239 * original page at offset newitemoff/current offset
240 */
241 _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
242 }
243
244 olddataitemstoleft += itemsz;
245 }
246
247 /*
248 * Record a split after all original page data items, but before newitem.
249 * (Though only when it's possible that newitem will end up alone on new
250 * right page.)
251 */
252 Assert(olddataitemstoleft == olddataitemstotal);
253 if (newitemoff > maxoff)
254 _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
255
256 /*
257 * I believe it is not possible to fail to find a feasible split, but just
258 * in case ...
259 */
260 if (state.nsplits == 0)
261 elog(ERROR, "could not find a feasible split point for index \"%s\"",
263
264 /*
265 * Start search for a split point among list of legal split points. Give
266 * primary consideration to equalizing available free space in each half
267 * of the split initially (start with default strategy), while applying
268 * rightmost and split-after-new-item optimizations where appropriate.
269 * Either of the two other fallback strategies may be required for cases
270 * with a large number of duplicates around the original/space-optimal
271 * split point.
272 *
273 * Default strategy gives some weight to suffix truncation in deciding a
274 * split point on leaf pages. It attempts to select a split point where a
275 * distinguishing attribute appears earlier in the new high key for the
276 * left side of the split, in order to maximize the number of trailing
277 * attributes that can be truncated away. Only candidate split points
278 * that imply an acceptable balance of free space on each side are
279 * considered. See _bt_defaultinterval().
280 */
281 if (!state.is_leaf)
282 {
283 /* fillfactormult only used on rightmost page */
284 usemult = state.is_rightmost;
285 fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
286 }
287 else if (state.is_rightmost)
288 {
289 /* Rightmost leaf page -- fillfactormult always used */
290 usemult = true;
291 fillfactormult = leaffillfactor / 100.0;
292 }
293 else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
294 {
295 /*
296 * New item inserted at rightmost point among a localized grouping on
297 * a leaf page -- apply "split after new item" optimization, either by
298 * applying leaf fillfactor multiplier, or by choosing the exact split
299 * point that leaves newitem as lastleft. (usemult is set for us.)
300 */
301 if (usemult)
302 {
303 /* fillfactormult should be set based on leaf fillfactor */
304 fillfactormult = leaffillfactor / 100.0;
305 }
306 else
307 {
308 /* find precise split point after newitemoff */
309 for (int i = 0; i < state.nsplits; i++)
310 {
311 SplitPoint *split = state.splits + i;
312
313 if (split->newitemonleft &&
314 newitemoff == split->firstrightoff)
315 {
316 pfree(state.splits);
317 *newitemonleft = true;
318 return newitemoff;
319 }
320 }
321
322 /*
323 * Cannot legally split after newitemoff; proceed with split
324 * without using fillfactor multiplier. This is defensive, and
325 * should never be needed in practice.
326 */
327 fillfactormult = 0.50;
328 }
329 }
330 else
331 {
332 /* Other leaf page. 50:50 page split. */
333 usemult = false;
334 /* fillfactormult not used, but be tidy */
335 fillfactormult = 0.50;
336 }
337
338 /*
339 * Save leftmost and rightmost splits for page before original ordinal
340 * sort order is lost by delta/fillfactormult sort
341 */
342 leftpage = state.splits[0];
343 rightpage = state.splits[state.nsplits - 1];
344
345 /* Give split points a fillfactormult-wise delta, and sort on deltas */
346 _bt_deltasortsplits(&state, fillfactormult, usemult);
347
348 /* Determine split interval for default strategy */
349 state.interval = _bt_defaultinterval(&state);
350
351 /*
352 * Determine if default strategy/split interval will produce a
353 * sufficiently distinguishing split, or if we should change strategies.
354 * Alternative strategies change the range of split points that are
355 * considered acceptable (split interval), and possibly change
356 * fillfactormult, in order to deal with pages with a large number of
357 * duplicates gracefully.
358 *
359 * Pass low and high splits for the entire page (actually, they're for an
360 * imaginary version of the page that includes newitem). These are used
361 * when the initial split interval encloses split points that are full of
362 * duplicates, and we need to consider if it's even possible to avoid
363 * appending a heap TID.
364 */
365 perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
366
367 if (strategy == SPLIT_DEFAULT)
368 {
369 /*
370 * Default strategy worked out (always works out with internal page).
371 * Original split interval still stands.
372 */
373 }
374
375 /*
376 * Many duplicates strategy is used when a heap TID would otherwise be
377 * appended, but the page isn't completely full of logical duplicates.
378 *
379 * The split interval is widened to include all legal candidate split
380 * points. There might be a few as two distinct values in the whole-page
381 * split interval, though it's also possible that most of the values on
382 * the page are unique. The final split point will either be to the
383 * immediate left or to the immediate right of the group of duplicate
384 * tuples that enclose the first/delta-optimal split point (perfect
385 * penalty was set so that the lowest delta split point that avoids
386 * appending a heap TID will be chosen). Maximizing the number of
387 * attributes that can be truncated away is not a goal of the many
388 * duplicates strategy.
389 *
390 * Single value strategy is used when it is impossible to avoid appending
391 * a heap TID. It arranges to leave the left page very full. This
392 * maximizes space utilization in cases where tuples with the same
393 * attribute values span many pages. Newly inserted duplicates will tend
394 * to have higher heap TID values, so we'll end up splitting to the right
395 * consistently. (Single value strategy is harmless though not
396 * particularly useful with !heapkeyspace indexes.)
397 */
398 else if (strategy == SPLIT_MANY_DUPLICATES)
399 {
400 Assert(state.is_leaf);
401 /* Shouldn't try to truncate away extra user attributes */
402 Assert(perfectpenalty ==
404 /* No need to resort splits -- no change in fillfactormult/deltas */
405 state.interval = state.nsplits;
406 }
407 else if (strategy == SPLIT_SINGLE_VALUE)
408 {
409 Assert(state.is_leaf);
410 /* Split near the end of the page */
411 usemult = true;
412 fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
413 /* Resort split points with new delta */
414 _bt_deltasortsplits(&state, fillfactormult, usemult);
415 /* Appending a heap TID is unavoidable, so interval of 1 is fine */
416 state.interval = 1;
417 }
418
419 /*
420 * Search among acceptable split points (using final split interval) for
421 * the entry that has the lowest penalty, and is therefore expected to
422 * maximize fan-out. Sets *newitemonleft for us.
423 */
424 firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
425 strategy);
426 pfree(state.splits);
427
428 return firstrightoff;
429}
Size PageGetExactFreeSpace(const PageData *page)
Definition: bufpage.c:957
static Size PageGetPageSize(const PageData *page)
Definition: bufpage.h:276
#define SizeOfPageHeaderData
Definition: bufpage.h:216
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:371
size_t Size
Definition: c.h:613
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
struct ItemIdData ItemIdData
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
#define P_ISLEAF(opaque)
Definition: nbtree.h:221
#define BTREE_SINGLEVAL_FILLFACTOR
Definition: nbtree.h:203
#define P_HIKEY
Definition: nbtree.h:368
#define BTPageGetOpaque(page)
Definition: nbtree.h:74
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:370
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:220
#define BTGetFillFactor(relation)
Definition: nbtree.h:1158
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:202
static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult, bool usemult)
Definition: nbtsplitloc.c:567
static int _bt_strategy(FindSplitData *state, SplitPoint *leftpage, SplitPoint *rightpage, FindSplitStrat *strategy)
Definition: nbtsplitloc.c:935
static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff, int leaffillfactor, bool *usemult)
Definition: nbtsplitloc.c:631
static void _bt_recsplitloc(FindSplitData *state, OffsetNumber firstrightoff, bool newitemonleft, int olddataitemstoleft, Size firstrightofforigpagetuplesz)
Definition: nbtsplitloc.c:450
static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty, bool *newitemonleft, FindSplitStrat strategy)
Definition: nbtsplitloc.c:789
static int _bt_defaultinterval(FindSplitData *state)
Definition: nbtsplitloc.c:877
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define RelationGetRelationName(relation)
Definition: rel.h:549
bool newitemonleft
Definition: nbtsplitloc.c:38
OffsetNumber firstrightoff
Definition: nbtsplitloc.c:37

References _bt_afternewitemoff(), _bt_bestsplitloc(), _bt_defaultinterval(), _bt_deltasortsplits(), _bt_recsplitloc(), _bt_strategy(), Assert(), BTGetFillFactor, BTPageGetOpaque, BTREE_NONLEAF_FILLFACTOR, BTREE_SINGLEVAL_FILLFACTOR, BTreeTupleIsPosting(), elog, ERROR, SplitPoint::firstrightoff, i, IndexRelationGetNumberOfKeyAttributes, ItemIdGetLength, MAXALIGN, SplitPoint::newitemonleft, OffsetNumberNext, P_FIRSTDATAKEY, P_HIKEY, P_ISLEAF, P_RIGHTMOST, PageGetExactFreeSpace(), PageGetItemId(), PageGetMaxOffsetNumber(), PageGetPageSize(), palloc(), pfree(), RelationGetRelationName, SizeOfPageHeaderData, SPLIT_DEFAULT, SPLIT_MANY_DUPLICATES, and SPLIT_SINGLE_VALUE.

Referenced by _bt_split().

◆ _bt_interval_edges()

static void _bt_interval_edges ( FindSplitData state,
SplitPoint **  leftinterval,
SplitPoint **  rightinterval 
)
static

Definition at line 1053 of file nbtsplitloc.c.

1055{
1056 int highsplit = Min(state->interval, state->nsplits);
1057 SplitPoint *deltaoptimal;
1058
1059 deltaoptimal = state->splits;
1060 *leftinterval = NULL;
1061 *rightinterval = NULL;
1062
1063 /*
1064 * Delta is an absolute distance to optimal split point, so both the
1065 * leftmost and rightmost split point will usually be at the end of the
1066 * array
1067 */
1068 for (int i = highsplit - 1; i >= 0; i--)
1069 {
1070 SplitPoint *distant = state->splits + i;
1071
1072 if (distant->firstrightoff < deltaoptimal->firstrightoff)
1073 {
1074 if (*leftinterval == NULL)
1075 *leftinterval = distant;
1076 }
1077 else if (distant->firstrightoff > deltaoptimal->firstrightoff)
1078 {
1079 if (*rightinterval == NULL)
1080 *rightinterval = distant;
1081 }
1082 else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
1083 {
1084 /*
1085 * "incoming tuple will become firstright" (distant) is to the
1086 * left of "incoming tuple will become lastleft" (delta-optimal)
1087 */
1088 Assert(distant->firstrightoff == state->newitemoff);
1089 if (*leftinterval == NULL)
1090 *leftinterval = distant;
1091 }
1092 else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
1093 {
1094 /*
1095 * "incoming tuple will become lastleft" (distant) is to the right
1096 * of "incoming tuple will become firstright" (delta-optimal)
1097 */
1098 Assert(distant->firstrightoff == state->newitemoff);
1099 if (*rightinterval == NULL)
1100 *rightinterval = distant;
1101 }
1102 else
1103 {
1104 /* There was only one or two splits in initial split interval */
1105 Assert(distant == deltaoptimal);
1106 if (*leftinterval == NULL)
1107 *leftinterval = distant;
1108 if (*rightinterval == NULL)
1109 *rightinterval = distant;
1110 }
1111
1112 if (*leftinterval && *rightinterval)
1113 return;
1114 }
1115
1116 Assert(false);
1117}

References Assert(), SplitPoint::firstrightoff, i, Min, and SplitPoint::newitemonleft.

Referenced by _bt_strategy().

◆ _bt_recsplitloc()

static void _bt_recsplitloc ( FindSplitData state,
OffsetNumber  firstrightoff,
bool  newitemonleft,
int  olddataitemstoleft,
Size  firstrightofforigpagetuplesz 
)
static

Definition at line 450 of file nbtsplitloc.c.

455{
456 int16 leftfree,
457 rightfree;
458 Size firstrightsz;
459 Size postingsz = 0;
460 bool newitemisfirstright;
461
462 /* Is the new item going to be split point's firstright tuple? */
463 newitemisfirstright = (firstrightoff == state->newitemoff &&
464 !newitemonleft);
465
466 if (newitemisfirstright)
467 firstrightsz = state->newitemsz;
468 else
469 {
470 firstrightsz = firstrightofforigpagetuplesz;
471
472 /*
473 * Calculate suffix truncation space saving when firstright tuple is a
474 * posting list tuple, though only when the tuple is over 64 bytes
475 * including line pointer overhead (arbitrary). This avoids accessing
476 * the tuple in cases where its posting list must be very small (if
477 * tuple has one at all).
478 *
479 * Note: We don't do this in the case where firstright tuple is
480 * newitem, since newitem cannot have a posting list.
481 */
482 if (state->is_leaf && firstrightsz > 64)
483 {
484 ItemId itemid;
485 IndexTuple newhighkey;
486
487 itemid = PageGetItemId(state->origpage, firstrightoff);
488 newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
489
490 if (BTreeTupleIsPosting(newhighkey))
491 postingsz = IndexTupleSize(newhighkey) -
492 BTreeTupleGetPostingOffset(newhighkey);
493 }
494 }
495
496 /* Account for all the old tuples */
497 leftfree = state->leftspace - olddataitemstoleft;
498 rightfree = state->rightspace -
499 (state->olddataitemstotal - olddataitemstoleft);
500
501 /*
502 * The first item on the right page becomes the high key of the left page;
503 * therefore it counts against left space as well as right space (we
504 * cannot assume that suffix truncation will make it any smaller). When
505 * index has included attributes, then those attributes of left page high
506 * key will be truncated leaving that page with slightly more free space.
507 * However, that shouldn't affect our ability to find valid split
508 * location, since we err in the direction of being pessimistic about free
509 * space on the left half. Besides, even when suffix truncation of
510 * non-TID attributes occurs, the new high key often won't even be a
511 * single MAXALIGN() quantum smaller than the firstright tuple it's based
512 * on.
513 *
514 * If we are on the leaf level, assume that suffix truncation cannot avoid
515 * adding a heap TID to the left half's new high key when splitting at the
516 * leaf level. In practice the new high key will often be smaller and
517 * will rarely be larger, but conservatively assume the worst case. We do
518 * go to the trouble of subtracting away posting list overhead, though
519 * only when it looks like it will make an appreciable difference.
520 * (Posting lists are the only case where truncation will typically make
521 * the final high key far smaller than firstright, so being a bit more
522 * precise there noticeably improves the balance of free space.)
523 */
524 if (state->is_leaf)
525 leftfree -= (int16) (firstrightsz +
526 MAXALIGN(sizeof(ItemPointerData)) -
527 postingsz);
528 else
529 leftfree -= (int16) firstrightsz;
530
531 /* account for the new item */
532 if (newitemonleft)
533 leftfree -= (int16) state->newitemsz;
534 else
535 rightfree -= (int16) state->newitemsz;
536
537 /*
538 * If we are not on the leaf level, we will be able to discard the key
539 * data from the first item that winds up on the right page.
540 */
541 if (!state->is_leaf)
542 rightfree += (int16) firstrightsz -
543 (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
544
545 /* Record split if legal */
546 if (leftfree >= 0 && rightfree >= 0)
547 {
548 Assert(state->nsplits < state->maxsplits);
549
550 /* Determine smallest firstright tuple size among legal splits */
551 state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
552
553 state->splits[state->nsplits].curdelta = 0;
554 state->splits[state->nsplits].leftfree = leftfree;
555 state->splits[state->nsplits].rightfree = rightfree;
556 state->splits[state->nsplits].firstrightoff = firstrightoff;
557 state->splits[state->nsplits].newitemonleft = newitemonleft;
558 state->nsplits++;
559 }
560}
static Size IndexTupleSize(const IndexTupleData *itup)
Definition: itup.h:71
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:530

References Assert(), BTreeTupleGetPostingOffset(), BTreeTupleIsPosting(), IndexTupleSize(), MAXALIGN, Min, PageGetItem(), and PageGetItemId().

Referenced by _bt_findsplitloc().

◆ _bt_split_firstright()

static IndexTuple _bt_split_firstright ( FindSplitData state,
SplitPoint split 
)
inlinestatic

Definition at line 1176 of file nbtsplitloc.c.

1177{
1178 ItemId itemid;
1179
1180 if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
1181 return state->newitem;
1182
1183 itemid = PageGetItemId(state->origpage, split->firstrightoff);
1184 return (IndexTuple) PageGetItem(state->origpage, itemid);
1185}

References SplitPoint::firstrightoff, SplitPoint::newitemonleft, PageGetItem(), and PageGetItemId().

Referenced by _bt_split_penalty(), and _bt_strategy().

◆ _bt_split_lastleft()

static IndexTuple _bt_split_lastleft ( FindSplitData state,
SplitPoint split 
)
inlinestatic

Definition at line 1160 of file nbtsplitloc.c.

1161{
1162 ItemId itemid;
1163
1164 if (split->newitemonleft && split->firstrightoff == state->newitemoff)
1165 return state->newitem;
1166
1167 itemid = PageGetItemId(state->origpage,
1169 return (IndexTuple) PageGetItem(state->origpage, itemid);
1170}

References SplitPoint::firstrightoff, SplitPoint::newitemonleft, OffsetNumberPrev, PageGetItem(), and PageGetItemId().

Referenced by _bt_split_penalty(), and _bt_strategy().

◆ _bt_split_penalty()

static int _bt_split_penalty ( FindSplitData state,
SplitPoint split 
)
inlinestatic

Definition at line 1132 of file nbtsplitloc.c.

1133{
1134 IndexTuple lastleft;
1135 IndexTuple firstright;
1136
1137 if (!state->is_leaf)
1138 {
1139 ItemId itemid;
1140
1141 if (!split->newitemonleft &&
1142 split->firstrightoff == state->newitemoff)
1143 return state->newitemsz;
1144
1145 itemid = PageGetItemId(state->origpage, split->firstrightoff);
1146
1147 return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
1148 }
1149
1150 lastleft = _bt_split_lastleft(state, split);
1151 firstright = _bt_split_firstright(state, split);
1152
1153 return _bt_keep_natts_fast(state->rel, lastleft, firstright);
1154}
static IndexTuple _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1160
static IndexTuple _bt_split_firstright(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1176

References _bt_keep_natts_fast(), _bt_split_firstright(), _bt_split_lastleft(), SplitPoint::firstrightoff, ItemIdGetLength, MAXALIGN, SplitPoint::newitemonleft, and PageGetItemId().

Referenced by _bt_bestsplitloc().

◆ _bt_splitcmp()

static int _bt_splitcmp ( const void *  arg1,
const void *  arg2 
)
static

Definition at line 595 of file nbtsplitloc.c.

596{
597 SplitPoint *split1 = (SplitPoint *) arg1;
598 SplitPoint *split2 = (SplitPoint *) arg2;
599
600 return pg_cmp_s16(split1->curdelta, split2->curdelta);
601}
static int pg_cmp_s16(int16 a, int16 b)
Definition: int.h:701

References SplitPoint::curdelta, and pg_cmp_s16().

Referenced by _bt_deltasortsplits().

◆ _bt_strategy()

static int _bt_strategy ( FindSplitData state,
SplitPoint leftpage,
SplitPoint rightpage,
FindSplitStrat strategy 
)
static

Definition at line 935 of file nbtsplitloc.c.

937{
938 IndexTuple leftmost,
939 rightmost;
940 SplitPoint *leftinterval,
941 *rightinterval;
942 int perfectpenalty;
943 int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
944
945 /* Assume that alternative strategy won't be used for now */
946 *strategy = SPLIT_DEFAULT;
947
948 /*
949 * Use smallest observed firstright item size for entire page (actually,
950 * entire imaginary version of page that includes newitem) as perfect
951 * penalty on internal pages. This can save cycles in the common case
952 * where most or all splits (not just splits within interval) have
953 * firstright tuples that are the same size.
954 */
955 if (!state->is_leaf)
956 return state->minfirstrightsz;
957
958 /*
959 * Use leftmost and rightmost tuples from leftmost and rightmost splits in
960 * current split interval
961 */
962 _bt_interval_edges(state, &leftinterval, &rightinterval);
963 leftmost = _bt_split_lastleft(state, leftinterval);
964 rightmost = _bt_split_firstright(state, rightinterval);
965
966 /*
967 * If initial split interval can produce a split point that will at least
968 * avoid appending a heap TID in new high key, we're done. Finish split
969 * with default strategy and initial split interval.
970 */
971 perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
972 if (perfectpenalty <= indnkeyatts)
973 return perfectpenalty;
974
975 /*
976 * Work out how caller should finish split when even their "perfect"
977 * penalty for initial/default split interval indicates that the interval
978 * does not contain even a single split that avoids appending a heap TID.
979 *
980 * Use the leftmost split's lastleft tuple and the rightmost split's
981 * firstright tuple to assess every possible split.
982 */
983 leftmost = _bt_split_lastleft(state, leftpage);
984 rightmost = _bt_split_firstright(state, rightpage);
985
986 /*
987 * If page (including new item) has many duplicates but is not entirely
988 * full of duplicates, a many duplicates strategy split will be performed.
989 * If page is entirely full of duplicates, a single value strategy split
990 * will be performed.
991 */
992 perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
993 if (perfectpenalty <= indnkeyatts)
994 {
995 *strategy = SPLIT_MANY_DUPLICATES;
996
997 /*
998 * Many duplicates strategy should split at either side the group of
999 * duplicates that enclose the delta-optimal split point. Return
1000 * indnkeyatts rather than the true perfect penalty to make that
1001 * happen. (If perfectpenalty was returned here then low cardinality
1002 * composite indexes could have continual unbalanced splits.)
1003 *
1004 * Note that caller won't go through with a many duplicates split in
1005 * rare cases where it looks like there are ever-decreasing insertions
1006 * to the immediate right of the split point. This must happen just
1007 * before a final decision is made, within _bt_bestsplitloc().
1008 */
1009 return indnkeyatts;
1010 }
1011
1012 /*
1013 * Single value strategy is only appropriate with ever-increasing heap
1014 * TIDs; otherwise, original default strategy split should proceed to
1015 * avoid pathological performance. Use page high key to infer if this is
1016 * the rightmost page among pages that store the same duplicate value.
1017 * This should not prevent insertions of heap TIDs that are slightly out
1018 * of order from using single value strategy, since that's expected with
1019 * concurrent inserters of the same duplicate value.
1020 */
1021 else if (state->is_rightmost)
1022 *strategy = SPLIT_SINGLE_VALUE;
1023 else
1024 {
1025 ItemId itemid;
1026 IndexTuple hikey;
1027
1028 itemid = PageGetItemId(state->origpage, P_HIKEY);
1029 hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
1030 perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
1031 state->newitem);
1032 if (perfectpenalty <= indnkeyatts)
1033 *strategy = SPLIT_SINGLE_VALUE;
1034 else
1035 {
1036 /*
1037 * Have caller finish split using default strategy, since page
1038 * does not appear to be the rightmost page for duplicates of the
1039 * value the page is filled with
1040 */
1041 }
1042 }
1043
1044 return perfectpenalty;
1045}
static void _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval, SplitPoint **rightinterval)
Definition: nbtsplitloc.c:1053

References _bt_interval_edges(), _bt_keep_natts_fast(), _bt_split_firstright(), _bt_split_lastleft(), IndexRelationGetNumberOfKeyAttributes, P_HIKEY, PageGetItem(), PageGetItemId(), SPLIT_DEFAULT, SPLIT_MANY_DUPLICATES, and SPLIT_SINGLE_VALUE.

Referenced by _bt_findsplitloc().