PostgreSQL Source Code  git master
nbtsplitloc.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "storage/lmgr.h"
Include dependency graph for nbtsplitloc.c:

Go to the source code of this file.

Data Structures

struct  SplitPoint
 
struct  FindSplitData
 

Macros

#define MAX_LEAF_INTERVAL   9
 
#define MAX_INTERNAL_INTERVAL   18
 

Enumerations

enum  FindSplitStrat { SPLIT_DEFAULT, SPLIT_MANY_DUPLICATES, SPLIT_SINGLE_VALUE }
 

Functions

static void _bt_recsplitloc (FindSplitData *state, OffsetNumber firstoldonright, bool newitemonleft, int olddataitemstoleft, Size firstoldonrightsz)
 
static void _bt_deltasortsplits (FindSplitData *state, double fillfactormult, bool usemult)
 
static int _bt_splitcmp (const void *arg1, const void *arg2)
 
static bool _bt_afternewitemoff (FindSplitData *state, OffsetNumber maxoff, int leaffillfactor, bool *usemult)
 
static bool _bt_adjacenthtid (ItemPointer lowhtid, ItemPointer highhtid)
 
static OffsetNumber _bt_bestsplitloc (FindSplitData *state, int perfectpenalty, bool *newitemonleft, FindSplitStrat strategy)
 
static int _bt_strategy (FindSplitData *state, SplitPoint *leftpage, SplitPoint *rightpage, FindSplitStrat *strategy)
 
static void _bt_interval_edges (FindSplitData *state, SplitPoint **leftinterval, SplitPoint **rightinterval)
 
static int _bt_split_penalty (FindSplitData *state, SplitPoint *split)
 
static IndexTuple _bt_split_lastleft (FindSplitData *state, SplitPoint *split)
 
static IndexTuple _bt_split_firstright (FindSplitData *state, SplitPoint *split)
 
OffsetNumber _bt_findsplitloc (Relation rel, Page page, OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem, bool *newitemonleft)
 

Macro Definition Documentation

◆ MAX_INTERNAL_INTERVAL

#define MAX_INTERNAL_INTERVAL   18

Definition at line 22 of file nbtsplitloc.c.

Referenced by _bt_findsplitloc().

◆ MAX_LEAF_INTERVAL

#define MAX_LEAF_INTERVAL   9

Definition at line 21 of file nbtsplitloc.c.

Referenced by _bt_bestsplitloc(), and _bt_findsplitloc().

Enumeration Type Documentation

◆ FindSplitStrat

Enumerator
SPLIT_DEFAULT 
SPLIT_MANY_DUPLICATES 
SPLIT_SINGLE_VALUE 

Definition at line 24 of file nbtsplitloc.c.

25 {
26  /* strategy for searching through materialized list of split points */
27  SPLIT_DEFAULT, /* give some weight to truncation */
28  SPLIT_MANY_DUPLICATES, /* find minimally distinguishing point */
29  SPLIT_SINGLE_VALUE /* leave left page almost full */
FindSplitStrat
Definition: nbtsplitloc.c:24

Function Documentation

◆ _bt_adjacenthtid()

static bool _bt_adjacenthtid ( ItemPointer  lowhtid,
ItemPointer  highhtid 
)
static

Definition at line 758 of file nbtsplitloc.c.

References FirstOffsetNumber, ItemPointerGetBlockNumber, and ItemPointerGetOffsetNumber.

Referenced by _bt_afternewitemoff().

759 {
760  BlockNumber lowblk,
761  highblk;
762 
763  lowblk = ItemPointerGetBlockNumber(lowhtid);
764  highblk = ItemPointerGetBlockNumber(highhtid);
765 
766  /* Make optimistic assumption of adjacency when heap blocks match */
767  if (lowblk == highblk)
768  return true;
769 
770  /* When heap block one up, second offset should be FirstOffsetNumber */
771  if (lowblk + 1 == highblk &&
773  return true;
774 
775  return false;
776 }
uint32 BlockNumber
Definition: block.h:31
#define FirstOffsetNumber
Definition: off.h:27
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98

◆ _bt_afternewitemoff()

static bool _bt_afternewitemoff ( FindSplitData state,
OffsetNumber  maxoff,
int  leaffillfactor,
bool usemult 
)
static

Definition at line 639 of file nbtsplitloc.c.

References _bt_adjacenthtid(), _bt_keep_natts_fast(), Assert, BTreeTupleIsPosting(), IndexRelationGetNumberOfKeyAttributes, FindSplitData::is_leaf, FindSplitData::is_rightmost, MAXALIGN, FindSplitData::minfirstrightsz, FindSplitData::newitem, FindSplitData::newitemoff, FindSplitData::newitemsz, OffsetNumberPrev, FindSplitData::olddataitemstotal, P_FIRSTKEY, FindSplitData::page, PageGetItem, PageGetItemId, FindSplitData::rel, and IndexTupleData::t_tid.

Referenced by _bt_findsplitloc().

641 {
642  int16 nkeyatts;
643  ItemId itemid;
644  IndexTuple tup;
645  int keepnatts;
646 
647  Assert(state->is_leaf && !state->is_rightmost);
648 
649  nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
650 
651  /* Single key indexes not considered here */
652  if (nkeyatts == 1)
653  return false;
654 
655  /* Ascending insertion pattern never inferred when new item is first */
656  if (state->newitemoff == P_FIRSTKEY)
657  return false;
658 
659  /*
660  * Only apply optimization on pages with equisized tuples, since ordinal
661  * keys are likely to be fixed-width. Testing if the new tuple is
662  * variable width directly might also work, but that fails to apply the
663  * optimization to indexes with a numeric_ops attribute.
664  *
665  * Conclude that page has equisized tuples when the new item is the same
666  * width as the smallest item observed during pass over page, and other
667  * non-pivot tuples must be the same width as well. (Note that the
668  * possibly-truncated existing high key isn't counted in
669  * olddataitemstotal, and must be subtracted from maxoff.)
670  */
671  if (state->newitemsz != state->minfirstrightsz)
672  return false;
673  if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
674  return false;
675 
676  /*
677  * Avoid applying optimization when tuples are wider than a tuple
678  * consisting of two non-NULL int8/int64 attributes (or four non-NULL
679  * int4/int32 attributes)
680  */
681  if (state->newitemsz >
682  MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
683  sizeof(ItemIdData))
684  return false;
685 
686  /*
687  * At least the first attribute's value must be equal to the corresponding
688  * value in previous tuple to apply optimization. New item cannot be a
689  * duplicate, either.
690  *
691  * Handle case where new item is to the right of all items on the existing
692  * page. This is suggestive of monotonically increasing insertions in
693  * itself, so the "heap TID adjacency" test is not applied here.
694  */
695  if (state->newitemoff > maxoff)
696  {
697  itemid = PageGetItemId(state->page, maxoff);
698  tup = (IndexTuple) PageGetItem(state->page, itemid);
699  keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
700 
701  if (keepnatts > 1 && keepnatts <= nkeyatts)
702  {
703  *usemult = true;
704  return true;
705  }
706 
707  return false;
708  }
709 
710  /*
711  * "Low cardinality leading column, high cardinality suffix column"
712  * indexes with a random insertion pattern (e.g., an index with a boolean
713  * column, such as an index on '(book_is_in_print, book_isbn)') present us
714  * with a risk of consistently misapplying the optimization. We're
715  * willing to accept very occasional misapplication of the optimization,
716  * provided the cases where we get it wrong are rare and self-limiting.
717  *
718  * Heap TID adjacency strongly suggests that the item just to the left was
719  * inserted very recently, which limits overapplication of the
720  * optimization. Besides, all inappropriate cases triggered here will
721  * still split in the middle of the page on average.
722  */
723  itemid = PageGetItemId(state->page, OffsetNumberPrev(state->newitemoff));
724  tup = (IndexTuple) PageGetItem(state->page, itemid);
725  /* Do cheaper test first */
726  if (BTreeTupleIsPosting(tup) ||
727  !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
728  return false;
729  /* Check same conditions as rightmost item case, too */
730  keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
731 
732  if (keepnatts > 1 && keepnatts <= nkeyatts)
733  {
734  double interp = (double) state->newitemoff / ((double) maxoff + 1);
735  double leaffillfactormult = (double) leaffillfactor / 100.0;
736 
737  /*
738  * Don't allow caller to split after a new item when it will result in
739  * a split point to the right of the point that a leaf fillfactor
740  * split would use -- have caller apply leaf fillfactor instead
741  */
742  *usemult = interp > leaffillfactormult;
743 
744  return true;
745  }
746 
747  return false;
748 }
signed short int16
Definition: c.h:354
bool is_rightmost
Definition: nbtsplitloc.c:53
Size minfirstrightsz
Definition: nbtsplitloc.c:58
IndexTuple newitem
Definition: nbtsplitloc.c:50
ItemPointerData t_tid
Definition: itup.h:37
IndexTupleData * IndexTuple
Definition: itup.h:53
#define P_FIRSTKEY
Definition: nbtree.h:243
Relation rel
Definition: nbtsplitloc.c:48
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:447
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:369
int olddataitemstotal
Definition: nbtsplitloc.c:57
#define Assert(condition)
Definition: c.h:738
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
#define MAXALIGN(LEN)
Definition: c.h:691
static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
Definition: nbtsplitloc.c:758
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2437
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_bestsplitloc()

static OffsetNumber _bt_bestsplitloc ( FindSplitData state,
int  perfectpenalty,
bool newitemonleft,
FindSplitStrat  strategy 
)
static

Definition at line 797 of file nbtsplitloc.c.

References _bt_split_penalty(), final, i, FindSplitData::interval, FindSplitData::is_rightmost, MAX_LEAF_INTERVAL, Min, FindSplitData::newitemoff, SplitPoint::newitemonleft, FindSplitData::nsplits, SPLIT_MANY_DUPLICATES, and FindSplitData::splits.

Referenced by _bt_findsplitloc().

799 {
800  int bestpenalty,
801  lowsplit;
802  int highsplit = Min(state->interval, state->nsplits);
803  SplitPoint *final;
804 
805  bestpenalty = INT_MAX;
806  lowsplit = 0;
807  for (int i = lowsplit; i < highsplit; i++)
808  {
809  int penalty;
810 
811  penalty = _bt_split_penalty(state, state->splits + i);
812 
813  if (penalty <= perfectpenalty)
814  {
815  bestpenalty = penalty;
816  lowsplit = i;
817  break;
818  }
819 
820  if (penalty < bestpenalty)
821  {
822  bestpenalty = penalty;
823  lowsplit = i;
824  }
825  }
826 
827  final = &state->splits[lowsplit];
828 
829  /*
830  * There is a risk that the "many duplicates" strategy will repeatedly do
831  * the wrong thing when there are monotonically decreasing insertions to
832  * the right of a large group of duplicates. Repeated splits could leave
833  * a succession of right half pages with free space that can never be
834  * used. This must be avoided.
835  *
836  * Consider the example of the leftmost page in a single integer attribute
837  * NULLS FIRST index which is almost filled with NULLs. Monotonically
838  * decreasing integer insertions might cause the same leftmost page to
839  * split repeatedly at the same point. Each split derives its new high
840  * key from the lowest current value to the immediate right of the large
841  * group of NULLs, which will always be higher than all future integer
842  * insertions, directing all future integer insertions to the same
843  * leftmost page.
844  */
845  if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
846  !final->newitemonleft && final->firstoldonright >= state->newitemoff &&
847  final->firstoldonright < state->newitemoff + MAX_LEAF_INTERVAL)
848  {
849  /*
850  * Avoid the problem by performing a 50:50 split when the new item is
851  * just to the right of the would-be "many duplicates" split point.
852  */
853  final = &state->splits[0];
854  }
855 
856  *newitemonleft = final->newitemonleft;
857  return final->firstoldonright;
858 }
bool is_rightmost
Definition: nbtsplitloc.c:53
#define MAX_LEAF_INTERVAL
Definition: nbtsplitloc.c:21
static int _bt_split_penalty(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1070
#define Min(x, y)
Definition: c.h:920
bool newitemonleft
Definition: nbtsplitloc.c:41
#define final(a, b, c)
Definition: hashfn.c:116
SplitPoint * splits
Definition: nbtsplitloc.c:63
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
int i

◆ _bt_deltasortsplits()

static void _bt_deltasortsplits ( FindSplitData state,
double  fillfactormult,
bool  usemult 
)
static

Definition at line 570 of file nbtsplitloc.c.

References _bt_splitcmp(), SplitPoint::curdelta, i, SplitPoint::leftfree, FindSplitData::nsplits, qsort, SplitPoint::rightfree, and FindSplitData::splits.

Referenced by _bt_findsplitloc().

572 {
573  for (int i = 0; i < state->nsplits; i++)
574  {
575  SplitPoint *split = state->splits + i;
576  int16 delta;
577 
578  if (usemult)
579  delta = fillfactormult * split->leftfree -
580  (1.0 - fillfactormult) * split->rightfree;
581  else
582  delta = split->leftfree - split->rightfree;
583 
584  if (delta < 0)
585  delta = -delta;
586 
587  /* Save delta */
588  split->curdelta = delta;
589  }
590 
591  qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
592 }
signed short int16
Definition: c.h:354
static int _bt_splitcmp(const void *arg1, const void *arg2)
Definition: nbtsplitloc.c:598
int16 leftfree
Definition: nbtsplitloc.c:36
SplitPoint * splits
Definition: nbtsplitloc.c:63
int16 rightfree
Definition: nbtsplitloc.c:37
int16 curdelta
Definition: nbtsplitloc.c:35
int i
#define qsort(a, b, c, d)
Definition: port.h:474

◆ _bt_findsplitloc()

OffsetNumber _bt_findsplitloc ( Relation  rel,
Page  page,
OffsetNumber  newitemoff,
Size  newitemsz,
IndexTuple  newitem,
bool newitemonleft 
)

Definition at line 127 of file nbtsplitloc.c.

References _bt_afternewitemoff(), _bt_bestsplitloc(), _bt_deltasortsplits(), _bt_recsplitloc(), _bt_strategy(), Assert, BTGetFillFactor, BTREE_NONLEAF_FILLFACTOR, BTREE_SINGLEVAL_FILLFACTOR, BTreeTupleIsPosting(), elog, ERROR, SplitPoint::firstoldonright, i, IndexRelationGetNumberOfKeyAttributes, FindSplitData::interval, FindSplitData::is_leaf, FindSplitData::is_rightmost, ItemIdGetLength, FindSplitData::leftspace, Max, MAX_INTERNAL_INTERVAL, MAX_LEAF_INTERVAL, MAXALIGN, FindSplitData::maxsplits, Min, FindSplitData::minfirstrightsz, FindSplitData::newitem, FindSplitData::newitemoff, SplitPoint::newitemonleft, FindSplitData::newitemsz, FindSplitData::nsplits, OffsetNumberNext, FindSplitData::olddataitemstotal, P_FIRSTDATAKEY, P_HIKEY, P_ISLEAF, P_RIGHTMOST, FindSplitData::page, PageGetExactFreeSpace(), PageGetItemId, PageGetMaxOffsetNumber, PageGetPageSize, PageGetSpecialPointer, palloc(), pfree(), FindSplitData::rel, RelationGetRelationName, FindSplitData::rightspace, SizeOfPageHeaderData, SPLIT_DEFAULT, SPLIT_MANY_DUPLICATES, SPLIT_SINGLE_VALUE, and FindSplitData::splits.

Referenced by _bt_split().

133 {
134  BTPageOpaque opaque;
135  int leftspace,
136  rightspace,
137  olddataitemstotal,
138  olddataitemstoleft,
139  perfectpenalty,
140  leaffillfactor;
142  FindSplitStrat strategy;
143  ItemId itemid;
144  OffsetNumber offnum,
145  maxoff,
146  foundfirstright;
147  double fillfactormult;
148  bool usemult;
149  SplitPoint leftpage,
150  rightpage;
151 
152  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
153  maxoff = PageGetMaxOffsetNumber(page);
154 
155  /* Total free space available on a btree page, after fixed overhead */
156  leftspace = rightspace =
158  MAXALIGN(sizeof(BTPageOpaqueData));
159 
160  /* The right page will have the same high key as the old page */
161  if (!P_RIGHTMOST(opaque))
162  {
163  itemid = PageGetItemId(page, P_HIKEY);
164  rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
165  sizeof(ItemIdData));
166  }
167 
168  /* Count up total space in data items before actually scanning 'em */
169  olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(page);
170  leaffillfactor = BTGetFillFactor(rel);
171 
172  /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
173  newitemsz += sizeof(ItemIdData);
174  state.rel = rel;
175  state.page = page;
176  state.newitem = newitem;
177  state.newitemsz = newitemsz;
178  state.is_leaf = P_ISLEAF(opaque);
179  state.is_rightmost = P_RIGHTMOST(opaque);
180  state.leftspace = leftspace;
181  state.rightspace = rightspace;
182  state.olddataitemstotal = olddataitemstotal;
183  state.minfirstrightsz = SIZE_MAX;
184  state.newitemoff = newitemoff;
185 
186  /* newitem cannot be a posting list item */
187  Assert(!BTreeTupleIsPosting(newitem));
188 
189  /*
190  * maxsplits should never exceed maxoff because there will be at most as
191  * many candidate split points as there are points _between_ tuples, once
192  * you imagine that the new item is already on the original page (the
193  * final number of splits may be slightly lower because not all points
194  * between tuples will be legal).
195  */
196  state.maxsplits = maxoff;
197  state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
198  state.nsplits = 0;
199 
200  /*
201  * Scan through the data items and calculate space usage for a split at
202  * each possible position
203  */
204  olddataitemstoleft = 0;
205 
206  for (offnum = P_FIRSTDATAKEY(opaque);
207  offnum <= maxoff;
208  offnum = OffsetNumberNext(offnum))
209  {
210  Size itemsz;
211 
212  itemid = PageGetItemId(page, offnum);
213  itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
214 
215  /*
216  * When item offset number is not newitemoff, neither side of the
217  * split can be newitem. Record a split after the previous data item
218  * from original page, but before the current data item from original
219  * page. (_bt_recsplitloc() will reject the split when there are no
220  * previous items, which we rely on.)
221  */
222  if (offnum < newitemoff)
223  _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
224  else if (offnum > newitemoff)
225  _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
226  else
227  {
228  /*
229  * Record a split after all "offnum < newitemoff" original page
230  * data items, but before newitem
231  */
232  _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
233 
234  /*
235  * Record a split after newitem, but before data item from
236  * original page at offset newitemoff/current offset
237  */
238  _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
239  }
240 
241  olddataitemstoleft += itemsz;
242  }
243 
244  /*
245  * Record a split after all original page data items, but before newitem.
246  * (Though only when it's possible that newitem will end up alone on new
247  * right page.)
248  */
249  Assert(olddataitemstoleft == olddataitemstotal);
250  if (newitemoff > maxoff)
251  _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
252 
253  /*
254  * I believe it is not possible to fail to find a feasible split, but just
255  * in case ...
256  */
257  if (state.nsplits == 0)
258  elog(ERROR, "could not find a feasible split point for index \"%s\"",
260 
261  /*
262  * Start search for a split point among list of legal split points. Give
263  * primary consideration to equalizing available free space in each half
264  * of the split initially (start with default strategy), while applying
265  * rightmost and split-after-new-item optimizations where appropriate.
266  * Either of the two other fallback strategies may be required for cases
267  * with a large number of duplicates around the original/space-optimal
268  * split point.
269  *
270  * Default strategy gives some weight to suffix truncation in deciding a
271  * split point on leaf pages. It attempts to select a split point where a
272  * distinguishing attribute appears earlier in the new high key for the
273  * left side of the split, in order to maximize the number of trailing
274  * attributes that can be truncated away. Only candidate split points
275  * that imply an acceptable balance of free space on each side are
276  * considered.
277  */
278  if (!state.is_leaf)
279  {
280  /* fillfactormult only used on rightmost page */
281  usemult = state.is_rightmost;
282  fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
283  }
284  else if (state.is_rightmost)
285  {
286  /* Rightmost leaf page -- fillfactormult always used */
287  usemult = true;
288  fillfactormult = leaffillfactor / 100.0;
289  }
290  else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
291  {
292  /*
293  * New item inserted at rightmost point among a localized grouping on
294  * a leaf page -- apply "split after new item" optimization, either by
295  * applying leaf fillfactor multiplier, or by choosing the exact split
296  * point that leaves the new item as last on the left. (usemult is set
297  * for us.)
298  */
299  if (usemult)
300  {
301  /* fillfactormult should be set based on leaf fillfactor */
302  fillfactormult = leaffillfactor / 100.0;
303  }
304  else
305  {
306  /* find precise split point after newitemoff */
307  for (int i = 0; i < state.nsplits; i++)
308  {
309  SplitPoint *split = state.splits + i;
310 
311  if (split->newitemonleft &&
312  newitemoff == split->firstoldonright)
313  {
314  pfree(state.splits);
315  *newitemonleft = true;
316  return newitemoff;
317  }
318  }
319 
320  /*
321  * Cannot legally split after newitemoff; proceed with split
322  * without using fillfactor multiplier. This is defensive, and
323  * should never be needed in practice.
324  */
325  fillfactormult = 0.50;
326  }
327  }
328  else
329  {
330  /* Other leaf page. 50:50 page split. */
331  usemult = false;
332  /* fillfactormult not used, but be tidy */
333  fillfactormult = 0.50;
334  }
335 
336  /*
337  * Set an initial limit on the split interval/number of candidate split
338  * points as appropriate. The "Prefix B-Trees" paper refers to this as
339  * sigma l for leaf splits and sigma b for internal ("branch") splits.
340  * It's hard to provide a theoretical justification for the initial size
341  * of the split interval, though it's clear that a small split interval
342  * makes suffix truncation much more effective without noticeably
343  * affecting space utilization over time.
344  */
345  state.interval = Min(Max(1, state.nsplits * 0.05),
346  state.is_leaf ? MAX_LEAF_INTERVAL :
348 
349  /*
350  * Save leftmost and rightmost splits for page before original ordinal
351  * sort order is lost by delta/fillfactormult sort
352  */
353  leftpage = state.splits[0];
354  rightpage = state.splits[state.nsplits - 1];
355 
356  /* Give split points a fillfactormult-wise delta, and sort on deltas */
357  _bt_deltasortsplits(&state, fillfactormult, usemult);
358 
359  /*
360  * Determine if default strategy/split interval will produce a
361  * sufficiently distinguishing split, or if we should change strategies.
362  * Alternative strategies change the range of split points that are
363  * considered acceptable (split interval), and possibly change
364  * fillfactormult, in order to deal with pages with a large number of
365  * duplicates gracefully.
366  *
367  * Pass low and high splits for the entire page (actually, they're for an
368  * imaginary version of the page that includes newitem). These are used
369  * when the initial split interval encloses split points that are full of
370  * duplicates, and we need to consider if it's even possible to avoid
371  * appending a heap TID.
372  */
373  perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
374 
375  if (strategy == SPLIT_DEFAULT)
376  {
377  /*
378  * Default strategy worked out (always works out with internal page).
379  * Original split interval still stands.
380  */
381  }
382 
383  /*
384  * Many duplicates strategy is used when a heap TID would otherwise be
385  * appended, but the page isn't completely full of logical duplicates.
386  *
387  * The split interval is widened to include all legal candidate split
388  * points. There might be a few as two distinct values in the whole-page
389  * split interval, though it's also possible that most of the values on
390  * the page are unique. The final split point will either be to the
391  * immediate left or to the immediate right of the group of duplicate
392  * tuples that enclose the first/delta-optimal split point (perfect
393  * penalty was set so that the lowest delta split point that avoids
394  * appending a heap TID will be chosen). Maximizing the number of
395  * attributes that can be truncated away is not a goal of the many
396  * duplicates strategy.
397  *
398  * Single value strategy is used when it is impossible to avoid appending
399  * a heap TID. It arranges to leave the left page very full. This
400  * maximizes space utilization in cases where tuples with the same
401  * attribute values span many pages. Newly inserted duplicates will tend
402  * to have higher heap TID values, so we'll end up splitting to the right
403  * consistently. (Single value strategy is harmless though not
404  * particularly useful with !heapkeyspace indexes.)
405  */
406  else if (strategy == SPLIT_MANY_DUPLICATES)
407  {
408  Assert(state.is_leaf);
409  /* Shouldn't try to truncate away extra user attributes */
410  Assert(perfectpenalty ==
412  /* No need to resort splits -- no change in fillfactormult/deltas */
413  state.interval = state.nsplits;
414  }
415  else if (strategy == SPLIT_SINGLE_VALUE)
416  {
417  Assert(state.is_leaf);
418  /* Split near the end of the page */
419  usemult = true;
420  fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
421  /* Resort split points with new delta */
422  _bt_deltasortsplits(&state, fillfactormult, usemult);
423  /* Appending a heap TID is unavoidable, so interval of 1 is fine */
424  state.interval = 1;
425  }
426 
427  /*
428  * Search among acceptable split points (using final split interval) for
429  * the entry that has the lowest penalty, and is therefore expected to
430  * maximize fan-out. Sets *newitemonleft for us.
431  */
432  foundfirstright = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
433  strategy);
434  pfree(state.splits);
435 
436  return foundfirstright;
437 }
OffsetNumber firstoldonright
Definition: nbtsplitloc.c:40
bool is_rightmost
Definition: nbtsplitloc.c:53
Size minfirstrightsz
Definition: nbtsplitloc.c:58
#define MAX_LEAF_INTERVAL
Definition: nbtsplitloc.c:21
IndexTuple newitem
Definition: nbtsplitloc.c:50
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define Min(x, y)
Definition: c.h:920
bool newitemonleft
Definition: nbtsplitloc.c:41
#define SizeOfPageHeaderData
Definition: bufpage.h:216
static int _bt_strategy(FindSplitData *state, SplitPoint *leftpage, SplitPoint *rightpage, FindSplitStrat *strategy)
Definition: nbtsplitloc.c:872
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
SplitPoint * splits
Definition: nbtsplitloc.c:63
uint16 OffsetNumber
Definition: off.h:24
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:195
#define ERROR
Definition: elog.h:43
static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty, bool *newitemonleft, FindSplitStrat strategy)
Definition: nbtsplitloc.c:797
#define PageGetPageSize(page)
Definition: bufpage.h:268
static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff, int leaffillfactor, bool *usemult)
Definition: nbtsplitloc.c:639
#define RelationGetRelationName(relation)
Definition: rel.h:462
struct ItemIdData ItemIdData
Relation rel
Definition: nbtsplitloc.c:48
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:447
#define BTREE_SINGLEVAL_FILLFACTOR
Definition: nbtree.h:196
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
#define BTGetFillFactor(relation)
Definition: nbtree.h:977
FindSplitStrat
Definition: nbtsplitloc.c:24
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:369
#define Max(x, y)
Definition: c.h:914
#define MAX_INTERNAL_INTERVAL
Definition: nbtsplitloc.c:22
int olddataitemstotal
Definition: nbtsplitloc.c:57
#define Assert(condition)
Definition: c.h:738
Definition: regguts.h:298
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:466
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define MAXALIGN(LEN)
Definition: c.h:691
static void _bt_recsplitloc(FindSplitData *state, OffsetNumber firstoldonright, bool newitemonleft, int olddataitemstoleft, Size firstoldonrightsz)
Definition: nbtsplitloc.c:456
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:625
#define P_HIKEY
Definition: nbtree.h:242
void * palloc(Size size)
Definition: mcxt.c:949
#define elog(elevel,...)
Definition: elog.h:228
int i
static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult, bool usemult)
Definition: nbtsplitloc.c:570
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_interval_edges()

static void _bt_interval_edges ( FindSplitData state,
SplitPoint **  leftinterval,
SplitPoint **  rightinterval 
)
static

Definition at line 989 of file nbtsplitloc.c.

References Assert, SplitPoint::firstoldonright, i, FindSplitData::interval, Min, FindSplitData::newitemoff, SplitPoint::newitemonleft, FindSplitData::nsplits, and FindSplitData::splits.

Referenced by _bt_strategy().

991 {
992  int highsplit = Min(state->interval, state->nsplits);
993  SplitPoint *deltaoptimal;
994 
995  deltaoptimal = state->splits;
996  *leftinterval = NULL;
997  *rightinterval = NULL;
998 
999  /*
1000  * Delta is an absolute distance to optimal split point, so both the
1001  * leftmost and rightmost split point will usually be at the end of the
1002  * array
1003  */
1004  for (int i = highsplit - 1; i >= 0; i--)
1005  {
1006  SplitPoint *distant = state->splits + i;
1007 
1008  if (distant->firstoldonright < deltaoptimal->firstoldonright)
1009  {
1010  if (*leftinterval == NULL)
1011  *leftinterval = distant;
1012  }
1013  else if (distant->firstoldonright > deltaoptimal->firstoldonright)
1014  {
1015  if (*rightinterval == NULL)
1016  *rightinterval = distant;
1017  }
1018  else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
1019  {
1020  /*
1021  * "incoming tuple will become first on right page" (distant) is
1022  * to the left of "incoming tuple will become last on left page"
1023  * (delta-optimal)
1024  */
1025  Assert(distant->firstoldonright == state->newitemoff);
1026  if (*leftinterval == NULL)
1027  *leftinterval = distant;
1028  }
1029  else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
1030  {
1031  /*
1032  * "incoming tuple will become last on left page" (distant) is to
1033  * the right of "incoming tuple will become first on right page"
1034  * (delta-optimal)
1035  */
1036  Assert(distant->firstoldonright == state->newitemoff);
1037  if (*rightinterval == NULL)
1038  *rightinterval = distant;
1039  }
1040  else
1041  {
1042  /* There was only one or two splits in initial split interval */
1043  Assert(distant == deltaoptimal);
1044  if (*leftinterval == NULL)
1045  *leftinterval = distant;
1046  if (*rightinterval == NULL)
1047  *rightinterval = distant;
1048  }
1049 
1050  if (*leftinterval && *rightinterval)
1051  return;
1052  }
1053 
1054  Assert(false);
1055 }
OffsetNumber firstoldonright
Definition: nbtsplitloc.c:40
#define Min(x, y)
Definition: c.h:920
bool newitemonleft
Definition: nbtsplitloc.c:41
SplitPoint * splits
Definition: nbtsplitloc.c:63
#define Assert(condition)
Definition: c.h:738
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
int i

◆ _bt_recsplitloc()

static void _bt_recsplitloc ( FindSplitData state,
OffsetNumber  firstoldonright,
bool  newitemonleft,
int  olddataitemstoleft,
Size  firstoldonrightsz 
)
static

Definition at line 456 of file nbtsplitloc.c.

References Assert, BTreeTupleGetPostingOffset(), BTreeTupleIsPosting(), SplitPoint::curdelta, SplitPoint::firstoldonright, IndexTupleSize, FindSplitData::is_leaf, SplitPoint::leftfree, FindSplitData::leftspace, MAXALIGN, FindSplitData::maxsplits, Min, FindSplitData::minfirstrightsz, FindSplitData::newitemoff, SplitPoint::newitemonleft, FindSplitData::newitemsz, FindSplitData::nsplits, FindSplitData::olddataitemstotal, FindSplitData::page, PageGetItem, PageGetItemId, SplitPoint::rightfree, FindSplitData::rightspace, and FindSplitData::splits.

Referenced by _bt_findsplitloc().

461 {
462  int16 leftfree,
463  rightfree;
464  Size firstrightitemsz;
465  Size postingsz = 0;
466  bool newitemisfirstonright;
467 
468  /* Is the new item going to be the first item on the right page? */
469  newitemisfirstonright = (firstoldonright == state->newitemoff
470  && !newitemonleft);
471 
472  if (newitemisfirstonright)
473  firstrightitemsz = state->newitemsz;
474  else
475  {
476  firstrightitemsz = firstoldonrightsz;
477 
478  /*
479  * Calculate suffix truncation space saving when firstright is a
480  * posting list tuple, though only when the firstright is over 64
481  * bytes including line pointer overhead (arbitrary). This avoids
482  * accessing the tuple in cases where its posting list must be very
483  * small (if firstright has one at all).
484  */
485  if (state->is_leaf && firstrightitemsz > 64)
486  {
487  ItemId itemid;
488  IndexTuple newhighkey;
489 
490  itemid = PageGetItemId(state->page, firstoldonright);
491  newhighkey = (IndexTuple) PageGetItem(state->page, itemid);
492 
493  if (BTreeTupleIsPosting(newhighkey))
494  postingsz = IndexTupleSize(newhighkey) -
495  BTreeTupleGetPostingOffset(newhighkey);
496  }
497  }
498 
499  /* Account for all the old tuples */
500  leftfree = state->leftspace - olddataitemstoleft;
501  rightfree = state->rightspace -
502  (state->olddataitemstotal - olddataitemstoleft);
503 
504  /*
505  * The first item on the right page becomes the high key of the left page;
506  * therefore it counts against left space as well as right space (we
507  * cannot assume that suffix truncation will make it any smaller). When
508  * index has included attributes, then those attributes of left page high
509  * key will be truncated leaving that page with slightly more free space.
510  * However, that shouldn't affect our ability to find valid split
511  * location, since we err in the direction of being pessimistic about free
512  * space on the left half. Besides, even when suffix truncation of
513  * non-TID attributes occurs, the new high key often won't even be a
514  * single MAXALIGN() quantum smaller than the firstright tuple it's based
515  * on.
516  *
517  * If we are on the leaf level, assume that suffix truncation cannot avoid
518  * adding a heap TID to the left half's new high key when splitting at the
519  * leaf level. In practice the new high key will often be smaller and
520  * will rarely be larger, but conservatively assume the worst case. We do
521  * go to the trouble of subtracting away posting list overhead, though
522  * only when it looks like it will make an appreciable difference.
523  * (Posting lists are the only case where truncation will typically make
524  * the final high key far smaller than firstright, so being a bit more
525  * precise there noticeably improves the balance of free space.)
526  */
527  if (state->is_leaf)
528  leftfree -= (int16) (firstrightitemsz +
529  MAXALIGN(sizeof(ItemPointerData)) -
530  postingsz);
531  else
532  leftfree -= (int16) firstrightitemsz;
533 
534  /* account for the new item */
535  if (newitemonleft)
536  leftfree -= (int16) state->newitemsz;
537  else
538  rightfree -= (int16) state->newitemsz;
539 
540  /*
541  * If we are not on the leaf level, we will be able to discard the key
542  * data from the first item that winds up on the right page.
543  */
544  if (!state->is_leaf)
545  rightfree += (int16) firstrightitemsz -
546  (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
547 
548  /* Record split if legal */
549  if (leftfree >= 0 && rightfree >= 0)
550  {
551  Assert(state->nsplits < state->maxsplits);
552 
553  /* Determine smallest firstright item size on page */
554  state->minfirstrightsz = Min(state->minfirstrightsz, firstrightitemsz);
555 
556  state->splits[state->nsplits].curdelta = 0;
557  state->splits[state->nsplits].leftfree = leftfree;
558  state->splits[state->nsplits].rightfree = rightfree;
559  state->splits[state->nsplits].firstoldonright = firstoldonright;
560  state->splits[state->nsplits].newitemonleft = newitemonleft;
561  state->nsplits++;
562  }
563 }
signed short int16
Definition: c.h:354
OffsetNumber firstoldonright
Definition: nbtsplitloc.c:40
Size minfirstrightsz
Definition: nbtsplitloc.c:58
int16 leftfree
Definition: nbtsplitloc.c:36
#define Min(x, y)
Definition: c.h:920
bool newitemonleft
Definition: nbtsplitloc.c:41
SplitPoint * splits
Definition: nbtsplitloc.c:63
IndexTupleData * IndexTuple
Definition: itup.h:53
struct ItemIdData ItemIdData
int16 rightfree
Definition: nbtsplitloc.c:37
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int16 curdelta
Definition: nbtsplitloc.c:35
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:369
static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)
Definition: nbtree.h:404
int olddataitemstotal
Definition: nbtsplitloc.c:57
#define Assert(condition)
Definition: c.h:738
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:691
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_split_firstright()

static IndexTuple _bt_split_firstright ( FindSplitData state,
SplitPoint split 
)
inlinestatic

Definition at line 1115 of file nbtsplitloc.c.

References SplitPoint::firstoldonright, FindSplitData::newitem, FindSplitData::newitemoff, SplitPoint::newitemonleft, FindSplitData::page, PageGetItem, and PageGetItemId.

Referenced by _bt_split_penalty(), and _bt_strategy().

1116 {
1117  ItemId itemid;
1118 
1119  if (!split->newitemonleft && split->firstoldonright == state->newitemoff)
1120  return state->newitem;
1121 
1122  itemid = PageGetItemId(state->page, split->firstoldonright);
1123  return (IndexTuple) PageGetItem(state->page, itemid);
1124 }
OffsetNumber firstoldonright
Definition: nbtsplitloc.c:40
IndexTuple newitem
Definition: nbtsplitloc.c:50
bool newitemonleft
Definition: nbtsplitloc.c:41
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_split_lastleft()

static IndexTuple _bt_split_lastleft ( FindSplitData state,
SplitPoint split 
)
inlinestatic

Definition at line 1099 of file nbtsplitloc.c.

References SplitPoint::firstoldonright, FindSplitData::newitem, FindSplitData::newitemoff, SplitPoint::newitemonleft, OffsetNumberPrev, FindSplitData::page, PageGetItem, and PageGetItemId.

Referenced by _bt_split_penalty(), and _bt_strategy().

1100 {
1101  ItemId itemid;
1102 
1103  if (split->newitemonleft && split->firstoldonright == state->newitemoff)
1104  return state->newitem;
1105 
1106  itemid = PageGetItemId(state->page,
1108  return (IndexTuple) PageGetItem(state->page, itemid);
1109 }
OffsetNumber firstoldonright
Definition: nbtsplitloc.c:40
IndexTuple newitem
Definition: nbtsplitloc.c:50
bool newitemonleft
Definition: nbtsplitloc.c:41
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ _bt_split_penalty()

static int _bt_split_penalty ( FindSplitData state,
SplitPoint split 
)
inlinestatic

Definition at line 1070 of file nbtsplitloc.c.

References _bt_keep_natts_fast(), _bt_split_firstright(), _bt_split_lastleft(), Assert, SplitPoint::firstoldonright, FindSplitData::is_leaf, ItemIdGetLength, MAXALIGN, FindSplitData::newitemoff, SplitPoint::newitemonleft, FindSplitData::newitemsz, FindSplitData::page, PageGetItemId, and FindSplitData::rel.

Referenced by _bt_bestsplitloc().

1071 {
1072  IndexTuple lastleftuple;
1073  IndexTuple firstrighttuple;
1074 
1075  if (!state->is_leaf)
1076  {
1077  ItemId itemid;
1078 
1079  if (!split->newitemonleft &&
1080  split->firstoldonright == state->newitemoff)
1081  return state->newitemsz;
1082 
1083  itemid = PageGetItemId(state->page, split->firstoldonright);
1084 
1085  return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
1086  }
1087 
1088  lastleftuple = _bt_split_lastleft(state, split);
1089  firstrighttuple = _bt_split_firstright(state, split);
1090 
1091  Assert(lastleftuple != firstrighttuple);
1092  return _bt_keep_natts_fast(state->rel, lastleftuple, firstrighttuple);
1093 }
OffsetNumber firstoldonright
Definition: nbtsplitloc.c:40
bool newitemonleft
Definition: nbtsplitloc.c:41
static IndexTuple _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1099
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
struct ItemIdData ItemIdData
Relation rel
Definition: nbtsplitloc.c:48
static IndexTuple _bt_split_firstright(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1115
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
#define Assert(condition)
Definition: c.h:738
OffsetNumber newitemoff
Definition: nbtsplitloc.c:54
#define MAXALIGN(LEN)
Definition: c.h:691
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2437

◆ _bt_splitcmp()

static int _bt_splitcmp ( const void *  arg1,
const void *  arg2 
)
static

Definition at line 598 of file nbtsplitloc.c.

References SplitPoint::curdelta.

Referenced by _bt_deltasortsplits().

599 {
600  SplitPoint *split1 = (SplitPoint *) arg1;
601  SplitPoint *split2 = (SplitPoint *) arg2;
602 
603  if (split1->curdelta > split2->curdelta)
604  return 1;
605  if (split1->curdelta < split2->curdelta)
606  return -1;
607 
608  return 0;
609 }
int16 curdelta
Definition: nbtsplitloc.c:35

◆ _bt_strategy()

static int _bt_strategy ( FindSplitData state,
SplitPoint leftpage,
SplitPoint rightpage,
FindSplitStrat strategy 
)
static

Definition at line 872 of file nbtsplitloc.c.

References _bt_interval_edges(), _bt_keep_natts_fast(), _bt_split_firstright(), _bt_split_lastleft(), IndexRelationGetNumberOfKeyAttributes, FindSplitData::is_leaf, FindSplitData::is_rightmost, FindSplitData::minfirstrightsz, FindSplitData::newitem, P_HIKEY, FindSplitData::page, PageGetItem, PageGetItemId, FindSplitData::rel, SPLIT_DEFAULT, SPLIT_MANY_DUPLICATES, and SPLIT_SINGLE_VALUE.

Referenced by _bt_findsplitloc().

874 {
875  IndexTuple leftmost,
876  rightmost;
877  SplitPoint *leftinterval,
878  *rightinterval;
879  int perfectpenalty;
880  int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
881 
882  /* Assume that alternative strategy won't be used for now */
883  *strategy = SPLIT_DEFAULT;
884 
885  /*
886  * Use smallest observed first right item size for entire page as perfect
887  * penalty on internal pages. This can save cycles in the common case
888  * where most or all splits (not just splits within interval) have first
889  * right tuples that are the same size.
890  */
891  if (!state->is_leaf)
892  return state->minfirstrightsz;
893 
894  /*
895  * Use leftmost and rightmost tuples from leftmost and rightmost splits in
896  * current split interval
897  */
898  _bt_interval_edges(state, &leftinterval, &rightinterval);
899  leftmost = _bt_split_lastleft(state, leftinterval);
900  rightmost = _bt_split_firstright(state, rightinterval);
901 
902  /*
903  * If initial split interval can produce a split point that will at least
904  * avoid appending a heap TID in new high key, we're done. Finish split
905  * with default strategy and initial split interval.
906  */
907  perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
908  if (perfectpenalty <= indnkeyatts)
909  return perfectpenalty;
910 
911  /*
912  * Work out how caller should finish split when even their "perfect"
913  * penalty for initial/default split interval indicates that the interval
914  * does not contain even a single split that avoids appending a heap TID.
915  *
916  * Use the leftmost split's lastleft tuple and the rightmost split's
917  * firstright tuple to assess every possible split.
918  */
919  leftmost = _bt_split_lastleft(state, leftpage);
920  rightmost = _bt_split_firstright(state, rightpage);
921 
922  /*
923  * If page (including new item) has many duplicates but is not entirely
924  * full of duplicates, a many duplicates strategy split will be performed.
925  * If page is entirely full of duplicates, a single value strategy split
926  * will be performed.
927  */
928  perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
929  if (perfectpenalty <= indnkeyatts)
930  {
931  *strategy = SPLIT_MANY_DUPLICATES;
932 
933  /*
934  * Many duplicates strategy should split at either side the group of
935  * duplicates that enclose the delta-optimal split point. Return
936  * indnkeyatts rather than the true perfect penalty to make that
937  * happen. (If perfectpenalty was returned here then low cardinality
938  * composite indexes could have continual unbalanced splits.)
939  *
940  * Note that caller won't go through with a many duplicates split in
941  * rare cases where it looks like there are ever-decreasing insertions
942  * to the immediate right of the split point. This must happen just
943  * before a final decision is made, within _bt_bestsplitloc().
944  */
945  return indnkeyatts;
946  }
947 
948  /*
949  * Single value strategy is only appropriate with ever-increasing heap
950  * TIDs; otherwise, original default strategy split should proceed to
951  * avoid pathological performance. Use page high key to infer if this is
952  * the rightmost page among pages that store the same duplicate value.
953  * This should not prevent insertions of heap TIDs that are slightly out
954  * of order from using single value strategy, since that's expected with
955  * concurrent inserters of the same duplicate value.
956  */
957  else if (state->is_rightmost)
958  *strategy = SPLIT_SINGLE_VALUE;
959  else
960  {
961  ItemId itemid;
962  IndexTuple hikey;
963 
964  itemid = PageGetItemId(state->page, P_HIKEY);
965  hikey = (IndexTuple) PageGetItem(state->page, itemid);
966  perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
967  state->newitem);
968  if (perfectpenalty <= indnkeyatts)
969  *strategy = SPLIT_SINGLE_VALUE;
970  else
971  {
972  /*
973  * Have caller finish split using default strategy, since page
974  * does not appear to be the rightmost page for duplicates of the
975  * value the page is filled with
976  */
977  }
978  }
979 
980  return perfectpenalty;
981 }
bool is_rightmost
Definition: nbtsplitloc.c:53
Size minfirstrightsz
Definition: nbtsplitloc.c:58
IndexTuple newitem
Definition: nbtsplitloc.c:50
static IndexTuple _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1099
static void _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval, SplitPoint **rightinterval)
Definition: nbtsplitloc.c:989
IndexTupleData * IndexTuple
Definition: itup.h:53
Relation rel
Definition: nbtsplitloc.c:48
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:447
static IndexTuple _bt_split_firstright(FindSplitData *state, SplitPoint *split)
Definition: nbtsplitloc.c:1115
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
#define P_HIKEY
Definition: nbtree.h:242
int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
Definition: nbtutils.c:2437
#define PageGetItem(page, itemId)
Definition: bufpage.h:340