PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nbtsort.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/rel.h"
#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
Include dependency graph for nbtsort.c:

Go to the source code of this file.

Data Structures

struct  BTSpool
 
struct  BTPageState
 
struct  BTWriteState
 

Typedefs

typedef struct BTPageState BTPageState
 
typedef struct BTWriteState BTWriteState
 

Functions

static Page _bt_blnewpage (uint32 level)
 
static BTPageState_bt_pagestate (BTWriteState *wstate, uint32 level)
 
static void _bt_slideleft (Page page)
 
static void _bt_sortaddtup (Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
 
static void _bt_buildadd (BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 
static void _bt_uppershutdown (BTWriteState *wstate, BTPageState *state)
 
static void _bt_load (BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
BTSpool_bt_spoolinit (Relation heap, Relation index, bool isunique, bool isdead)
 
void _bt_spooldestroy (BTSpool *btspool)
 
void _bt_spool (BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
 
void _bt_leafbuild (BTSpool *btspool, BTSpool *btspool2)
 
static void _bt_blwritepage (BTWriteState *wstate, Page page, BlockNumber blkno)
 

Typedef Documentation

Function Documentation

static Page _bt_blnewpage ( uint32  level)
static

Definition at line 244 of file nbtsort.c.

References _bt_pageinit(), BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageOpaqueData::level, P_NONE, PageGetSpecialPointer, and palloc().

Referenced by _bt_buildadd(), and _bt_pagestate().

245 {
246  Page page;
247  BTPageOpaque opaque;
248 
249  page = (Page) palloc(BLCKSZ);
250 
251  /* Zero the page and set up standard page header info */
252  _bt_pageinit(page, BLCKSZ);
253 
254  /* Initialize BT opaque state */
255  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
256  opaque->btpo_prev = opaque->btpo_next = P_NONE;
257  opaque->btpo.level = level;
258  opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
259  opaque->btpo_cycleid = 0;
260 
261  /* Make the P_HIKEY line pointer appear allocated */
262  ((PageHeader) page)->pd_lower += sizeof(ItemIdData);
263 
264  return page;
265 }
BlockNumber btpo_next
Definition: nbtree.h:57
#define BTP_LEAF
Definition: nbtree.h:70
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:168
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
BTCycleId btpo_cycleid
Definition: nbtree.h:64
BlockNumber btpo_prev
Definition: nbtree.h:56
struct ItemIdData ItemIdData
uint32 level
Definition: nbtree.h:60
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
void * palloc(Size size)
Definition: mcxt.c:849
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:732
uint16 btpo_flags
Definition: nbtree.h:63
Pointer Page
Definition: bufpage.h:74
static void _bt_blwritepage ( BTWriteState wstate,
Page  page,
BlockNumber  blkno 
)
static

Definition at line 271 of file nbtsort.c.

References BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTWriteState::index, log_newpage(), MAIN_FORKNUM, PageSetChecksumInplace(), palloc0(), pfree(), RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, smgrextend(), and smgrwrite().

Referenced by _bt_buildadd(), and _bt_uppershutdown().

272 {
273  /* Ensure rd_smgr is open (could have been closed by relcache flush!) */
274  RelationOpenSmgr(wstate->index);
275 
276  /* XLOG stuff */
277  if (wstate->btws_use_wal)
278  {
279  /* We use the heap NEWPAGE record type for this */
280  log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
281  }
282 
283  /*
284  * If we have to write pages nonsequentially, fill in the space with
285  * zeroes until we come back and overwrite. This is not logically
286  * necessary on standard Unix filesystems (unwritten space will read as
287  * zeroes anyway), but it should help to avoid fragmentation. The dummy
288  * pages aren't WAL-logged though.
289  */
290  while (blkno > wstate->btws_pages_written)
291  {
292  if (!wstate->btws_zeropage)
293  wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
294  /* don't set checksum for all-zero page */
296  wstate->btws_pages_written++,
297  (char *) wstate->btws_zeropage,
298  true);
299  }
300 
301  PageSetChecksumInplace(page, blkno);
302 
303  /*
304  * Now write the page. There's no need for smgr to schedule an fsync for
305  * this write; we'll do it ourselves before ending the build.
306  */
307  if (blkno == wstate->btws_pages_written)
308  {
309  /* extending the file... */
310  smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
311  (char *) page, true);
312  wstate->btws_pages_written++;
313  }
314  else
315  {
316  /* overwriting a block we zero-filled before */
317  smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
318  (char *) page, true);
319  }
320 
321  pfree(page);
322 }
Page btws_zeropage
Definition: nbtsort.c:127
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
bool btws_use_wal
Definition: nbtsort.c:124
#define RelationOpenSmgr(relation)
Definition: rel.h:461
void pfree(void *pointer)
Definition: mcxt.c:950
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
Relation index
Definition: nbtsort.c:123
BlockNumber btws_pages_written
Definition: nbtsort.c:126
void * palloc0(Size size)
Definition: mcxt.c:878
RelFileNode rd_node
Definition: rel.h:85
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1199
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:600
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
Pointer Page
Definition: bufpage.h:74
static void _bt_buildadd ( BTWriteState wstate,
BTPageState state,
IndexTuple  itup 
)
static

Definition at line 452 of file nbtsort.c.

References _bt_blnewpage(), _bt_blwritepage(), _bt_pagestate(), _bt_sortaddtup(), Assert, BTMaxItemSize, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTPageState::btps_blkno, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTWriteState::btws_pages_alloced, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), ereport, errcode(), errhint(), errmsg(), ERROR, errtableconstraint(), BTWriteState::heap, BTWriteState::index, IndexTupleDSize, ItemIdGetLength, ItemIdSetUnused, ItemPointerSet, MAXALIGN, NULL, OffsetNumberNext, P_FIRSTKEY, P_HIKEY, P_NONE, PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetSpecialPointer, pfree(), RelationGetRelationName, and IndexTupleData::t_tid.

Referenced by _bt_load(), and _bt_uppershutdown().

453 {
454  Page npage;
455  BlockNumber nblkno;
456  OffsetNumber last_off;
457  Size pgspc;
458  Size itupsz;
459 
460  /*
461  * This is a handy place to check for cancel interrupts during the btree
462  * load phase of index creation.
463  */
465 
466  npage = state->btps_page;
467  nblkno = state->btps_blkno;
468  last_off = state->btps_lastoff;
469 
470  pgspc = PageGetFreeSpace(npage);
471  itupsz = IndexTupleDSize(*itup);
472  itupsz = MAXALIGN(itupsz);
473 
474  /*
475  * Check whether the item can fit on a btree page at all. (Eventually, we
476  * ought to try to apply TOAST methods if not.) We actually need to be
477  * able to fit three items on every page, so restrict any one item to 1/3
478  * the per-page available space. Note that at this point, itupsz doesn't
479  * include the ItemId.
480  *
481  * NOTE: similar code appears in _bt_insertonpg() to defend against
482  * oversize items being inserted into an already-existing index. But
483  * during creation of an index, we don't go through there.
484  */
485  if (itupsz > BTMaxItemSize(npage))
486  ereport(ERROR,
487  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
488  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
489  itupsz, BTMaxItemSize(npage),
490  RelationGetRelationName(wstate->index)),
491  errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
492  "Consider a function index of an MD5 hash of the value, "
493  "or use full text indexing."),
494  errtableconstraint(wstate->heap,
495  RelationGetRelationName(wstate->index))));
496 
497  /*
498  * Check to see if page is "full". It's definitely full if the item won't
499  * fit. Otherwise, compare to the target freespace derived from the
500  * fillfactor. However, we must put at least two items on each page, so
501  * disregard fillfactor if we don't have that many.
502  */
503  if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
504  {
505  /*
506  * Finish off the page and write it out.
507  */
508  Page opage = npage;
509  BlockNumber oblkno = nblkno;
510  ItemId ii;
511  ItemId hii;
512  IndexTuple oitup;
513 
514  /* Create new page of same level */
515  npage = _bt_blnewpage(state->btps_level);
516 
517  /* and assign it a page position */
518  nblkno = wstate->btws_pages_alloced++;
519 
520  /*
521  * We copy the last item on the page into the new page, and then
522  * rearrange the old page so that the 'last item' becomes its high key
523  * rather than a true data item. There had better be at least two
524  * items on the page already, else the page would be empty of useful
525  * data.
526  */
527  Assert(last_off > P_FIRSTKEY);
528  ii = PageGetItemId(opage, last_off);
529  oitup = (IndexTuple) PageGetItem(opage, ii);
530  _bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY);
531 
532  /*
533  * Move 'last' into the high key position on opage
534  */
535  hii = PageGetItemId(opage, P_HIKEY);
536  *hii = *ii;
537  ItemIdSetUnused(ii); /* redundant */
538  ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
539 
540  /*
541  * Link the old page into its parent, using its minimum key. If we
542  * don't have a parent, we have to create one; this adds a new btree
543  * level.
544  */
545  if (state->btps_next == NULL)
546  state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
547 
548  Assert(state->btps_minkey != NULL);
549  ItemPointerSet(&(state->btps_minkey->t_tid), oblkno, P_HIKEY);
550  _bt_buildadd(wstate, state->btps_next, state->btps_minkey);
551  pfree(state->btps_minkey);
552 
553  /*
554  * Save a copy of the minimum key for the new page. We have to copy
555  * it off the old page, not the new one, in case we are not at leaf
556  * level.
557  */
558  state->btps_minkey = CopyIndexTuple(oitup);
559 
560  /*
561  * Set the sibling links for both pages.
562  */
563  {
566 
567  oopaque->btpo_next = nblkno;
568  nopaque->btpo_prev = oblkno;
569  nopaque->btpo_next = P_NONE; /* redundant */
570  }
571 
572  /*
573  * Write out the old page. We never need to touch it again, so we can
574  * free the opage workspace too.
575  */
576  _bt_blwritepage(wstate, opage, oblkno);
577 
578  /*
579  * Reset last_off to point to new page
580  */
581  last_off = P_FIRSTKEY;
582  }
583 
584  /*
585  * If the new item is the first for its page, stash a copy for later. Note
586  * this will only happen for the first item on a level; on later pages,
587  * the first item for a page is copied from the prior page in the code
588  * above.
589  */
590  if (last_off == P_HIKEY)
591  {
592  Assert(state->btps_minkey == NULL);
593  state->btps_minkey = CopyIndexTuple(itup);
594  }
595 
596  /*
597  * Add the new item into the current page.
598  */
599  last_off = OffsetNumberNext(last_off);
600  _bt_sortaddtup(npage, itupsz, itup, last_off);
601 
602  state->btps_page = npage;
603  state->btps_blkno = nblkno;
604  state->btps_lastoff = last_off;
605 }
BlockNumber btpo_next
Definition: nbtree.h:57
int errhint(const char *fmt,...)
Definition: elog.c:987
BlockNumber btws_pages_alloced
Definition: nbtsort.c:125
ItemPointerData t_tid
Definition: itup.h:37
#define P_NONE
Definition: nbtree.h:168
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:109
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:271
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:582
IndexTuple btps_minkey
Definition: nbtsort.c:110
uint16 OffsetNumber
Definition: off.h:24
int errtableconstraint(Relation rel, const char *conname)
Definition: relcache.c:5297
Page btps_page
Definition: nbtsort.c:108
void pfree(void *pointer)
Definition: mcxt.c:950
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
#define ERROR
Definition: elog.h:43
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:452
BlockNumber btpo_prev
Definition: nbtree.h:56
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:434
#define IndexTupleDSize(itup)
Definition: itup.h:71
OffsetNumber btps_lastoff
Definition: nbtsort.c:111
IndexTupleData * IndexTuple
Definition: itup.h:53
#define P_FIRSTKEY
Definition: nbtree.h:204
#define RelationGetRelationName(relation)
Definition: rel.h:437
struct ItemIdData ItemIdData
#define ereport(elevel, rest)
Definition: elog.h:122
Relation index
Definition: nbtsort.c:123
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
uint32 btps_level
Definition: nbtsort.c:112
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:329
struct BTPageState * btps_next
Definition: nbtsort.c:114
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:356
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
#define MAXALIGN(LEN)
Definition: c.h:588
#define BTMaxItemSize(page)
Definition: nbtree.h:119
#define P_HIKEY
Definition: nbtree.h:203
int errmsg(const char *fmt,...)
Definition: elog.c:797
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:244
static void _bt_sortaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off)
Definition: nbtsort.c:397
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
Relation heap
Definition: nbtsort.c:122
#define ItemIdSetUnused(itemId)
Definition: itemid.h:127
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:104
void _bt_leafbuild ( BTSpool btspool,
BTSpool btspool2 
)

Definition at line 201 of file nbtsort.c.

References _bt_load(), BTREE_METAPAGE, BTWriteState::btws_pages_alloced, BTWriteState::btws_pages_written, BTWriteState::btws_use_wal, BTWriteState::btws_zeropage, BTSpool::heap, BTWriteState::heap, BTSpool::index, BTWriteState::index, log_btree_build_stats, NULL, RelationNeedsWAL, ResetUsage(), ShowUsage(), BTSpool::sortstate, tuplesort_performsort(), and XLogIsNeeded.

Referenced by btbuild().

202 {
203  BTWriteState wstate;
204 
205 #ifdef BTREE_BUILD_STATS
207  {
208  ShowUsage("BTREE BUILD (Spool) STATISTICS");
209  ResetUsage();
210  }
211 #endif /* BTREE_BUILD_STATS */
212 
214  if (btspool2)
215  tuplesort_performsort(btspool2->sortstate);
216 
217  wstate.heap = btspool->heap;
218  wstate.index = btspool->index;
219 
220  /*
221  * We need to log index creation in WAL iff WAL archiving/streaming is
222  * enabled UNLESS the index isn't WAL-logged anyway.
223  */
224  wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);
225 
226  /* reserve the metapage */
227  wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
228  wstate.btws_pages_written = 0;
229  wstate.btws_zeropage = NULL; /* until needed */
230 
231  _bt_load(&wstate, btspool, btspool2);
232 }
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1773
Page btws_zeropage
Definition: nbtsort.c:127
static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
Definition: nbtsort.c:677
void ShowUsage(const char *title)
Definition: postgres.c:4380
BlockNumber btws_pages_alloced
Definition: nbtsort.c:125
#define XLogIsNeeded()
Definition: xlog.h:145
Tuplesortstate * sortstate
Definition: nbtsort.c:87
bool btws_use_wal
Definition: nbtsort.c:124
Relation heap
Definition: nbtsort.c:88
void ResetUsage(void)
Definition: postgres.c:4373
#define BTREE_METAPAGE
Definition: nbtree.h:109
Relation index
Definition: nbtsort.c:89
Relation index
Definition: nbtsort.c:123
BlockNumber btws_pages_written
Definition: nbtsort.c:126
bool log_btree_build_stats
Definition: guc.c:443
#define NULL
Definition: c.h:229
#define RelationNeedsWAL(relation)
Definition: rel.h:506
Relation heap
Definition: nbtsort.c:122
static void _bt_load ( BTWriteState wstate,
BTSpool btspool,
BTSpool btspool2 
)
static

Definition at line 677 of file nbtsort.c.

References _bt_buildadd(), _bt_freeskey(), _bt_mkscankey_nodata(), _bt_pagestate(), _bt_uppershutdown(), SortSupportData::abbreviate, ApplySortComparator(), AssertState, BTGreaterStrategyNumber, BTLessStrategyNumber, compare(), CurrentMemoryContext, i, BTWriteState::index, index_getattr, MAIN_FORKNUM, merge(), NULL, palloc0(), pfree(), PrepareSortSupportFromIndexRel(), RelationData::rd_smgr, RelationGetDescr, RelationGetNumberOfAttributes, RelationNeedsWAL, RelationOpenSmgr, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, smgrimmedsync(), BTSpool::sortstate, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, and tuplesort_getindextuple().

Referenced by _bt_leafbuild().

678 {
680  bool merge = (btspool2 != NULL);
681  IndexTuple itup,
682  itup2 = NULL;
683  bool load1;
684  TupleDesc tupdes = RelationGetDescr(wstate->index);
685  int i,
686  keysz = RelationGetNumberOfAttributes(wstate->index);
687  ScanKey indexScanKey = NULL;
688  SortSupport sortKeys;
689 
690  if (merge)
691  {
692  /*
693  * Another BTSpool for dead tuples exists. Now we have to merge
694  * btspool and btspool2.
695  */
696 
697  /* the preparation of merge */
698  itup = tuplesort_getindextuple(btspool->sortstate, true);
699  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
700  indexScanKey = _bt_mkscankey_nodata(wstate->index);
701 
702  /* Prepare SortSupport data for each column */
703  sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
704 
705  for (i = 0; i < keysz; i++)
706  {
707  SortSupport sortKey = sortKeys + i;
708  ScanKey scanKey = indexScanKey + i;
709  int16 strategy;
710 
711  sortKey->ssup_cxt = CurrentMemoryContext;
712  sortKey->ssup_collation = scanKey->sk_collation;
713  sortKey->ssup_nulls_first =
714  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
715  sortKey->ssup_attno = scanKey->sk_attno;
716  /* Abbreviation is not supported here */
717  sortKey->abbreviate = false;
718 
719  AssertState(sortKey->ssup_attno != 0);
720 
721  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
723 
724  PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
725  }
726 
727  _bt_freeskey(indexScanKey);
728 
729  for (;;)
730  {
731  load1 = true; /* load BTSpool next ? */
732  if (itup2 == NULL)
733  {
734  if (itup == NULL)
735  break;
736  }
737  else if (itup != NULL)
738  {
739  for (i = 1; i <= keysz; i++)
740  {
741  SortSupport entry;
742  Datum attrDatum1,
743  attrDatum2;
744  bool isNull1,
745  isNull2;
746  int32 compare;
747 
748  entry = sortKeys + i - 1;
749  attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
750  attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
751 
752  compare = ApplySortComparator(attrDatum1, isNull1,
753  attrDatum2, isNull2,
754  entry);
755  if (compare > 0)
756  {
757  load1 = false;
758  break;
759  }
760  else if (compare < 0)
761  break;
762  }
763  }
764  else
765  load1 = false;
766 
767  /* When we see first tuple, create first index page */
768  if (state == NULL)
769  state = _bt_pagestate(wstate, 0);
770 
771  if (load1)
772  {
773  _bt_buildadd(wstate, state, itup);
774  itup = tuplesort_getindextuple(btspool->sortstate, true);
775  }
776  else
777  {
778  _bt_buildadd(wstate, state, itup2);
779  itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
780  }
781  }
782  pfree(sortKeys);
783  }
784  else
785  {
786  /* merge is unnecessary */
787  while ((itup = tuplesort_getindextuple(btspool->sortstate,
788  true)) != NULL)
789  {
790  /* When we see first tuple, create first index page */
791  if (state == NULL)
792  state = _bt_pagestate(wstate, 0);
793 
794  _bt_buildadd(wstate, state, itup);
795  }
796  }
797 
798  /* Close down final pages and write the metapage */
799  _bt_uppershutdown(wstate, state);
800 
801  /*
802  * If the index is WAL-logged, we must fsync it down to disk before it's
803  * safe to commit the transaction. (For a non-WAL-logged index we don't
804  * care since the index will be uninteresting after a crash anyway.)
805  *
806  * It's obvious that we must do this when not WAL-logging the build. It's
807  * less obvious that we have to do it even if we did WAL-log the index
808  * pages. The reason is that since we're building outside shared buffers,
809  * a CHECKPOINT occurring during the build has no way to flush the
810  * previously written data to disk (indeed it won't know the index even
811  * exists). A crash later on would replay WAL from the checkpoint,
812  * therefore it wouldn't replay our earlier WAL entries. If we do not
813  * fsync those pages here, they might still not be on disk when the crash
814  * occurs.
815  */
816  if (RelationNeedsWAL(wstate->index))
817  {
818  RelationOpenSmgr(wstate->index);
820  }
821 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2170
signed short int16
Definition: c.h:255
bool ssup_nulls_first
Definition: sortsupport.h:75
ScanKey _bt_mkscankey_nodata(Relation rel)
Definition: nbtutils.c:115
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define AssertState(condition)
Definition: c.h:678
#define RelationGetDescr(relation)
Definition: rel.h:429
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:423
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
void _bt_freeskey(ScanKey skey)
Definition: nbtutils.c:155
Tuplesortstate * sortstate
Definition: nbtsort.c:87
static pairingheap_node * merge(pairingheap *heap, pairingheap_node *a, pairingheap_node *b)
Definition: pairingheap.c:79
signed int int32
Definition: c.h:256
#define RelationOpenSmgr(relation)
Definition: rel.h:461
void pfree(void *pointer)
Definition: mcxt.c:950
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
MemoryContext ssup_cxt
Definition: sortsupport.h:66
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:452
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy, SortSupport ssup)
Definition: sortsupport.c:160
Relation index
Definition: nbtsort.c:123
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:428
void * palloc0(Size size)
Definition: mcxt.c:878
uintptr_t Datum
Definition: postgres.h:372
static BTPageState * _bt_pagestate(BTWriteState *wstate, uint32 level)
Definition: nbtsort.c:329
AttrNumber ssup_attno
Definition: sortsupport.h:81
int sk_flags
Definition: skey.h:66
#define NULL
Definition: c.h:229
#define SK_BT_DESC
Definition: nbtree.h:427
Definition: regguts.h:298
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
#define RelationNeedsWAL(relation)
Definition: rel.h:506
Oid sk_collation
Definition: skey.h:70
int i
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
Definition: nbtsort.c:611
#define BTLessStrategyNumber
Definition: stratnum.h:29
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:201
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:734
AttrNumber sk_attno
Definition: skey.h:67
static BTPageState * _bt_pagestate ( BTWriteState wstate,
uint32  level 
)
static

Definition at line 329 of file nbtsort.c.

References _bt_blnewpage(), BTPageState::btps_blkno, BTPageState::btps_full, BTPageState::btps_lastoff, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_DEFAULT_FILLFACTOR, BTREE_NONLEAF_FILLFACTOR, BTWriteState::btws_pages_alloced, BTWriteState::index, NULL, P_HIKEY, palloc0(), and RelationGetTargetPageFreeSpace.

Referenced by _bt_buildadd(), and _bt_load().

330 {
332 
333  /* create initial page for level */
334  state->btps_page = _bt_blnewpage(level);
335 
336  /* and assign it a page position */
337  state->btps_blkno = wstate->btws_pages_alloced++;
338 
339  state->btps_minkey = NULL;
340  /* initialize lastoff so first item goes into P_FIRSTKEY */
341  state->btps_lastoff = P_HIKEY;
342  state->btps_level = level;
343  /* set "full" threshold based on level. See notes at head of file. */
344  if (level > 0)
345  state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
346  else
349  /* no parent level, yet */
350  state->btps_next = NULL;
351 
352  return state;
353 }
BlockNumber btws_pages_alloced
Definition: nbtsort.c:125
BlockNumber btps_blkno
Definition: nbtsort.c:109
IndexTuple btps_minkey
Definition: nbtsort.c:110
Page btps_page
Definition: nbtsort.c:108
#define BTREE_NONLEAF_FILLFACTOR
Definition: nbtree.h:133
Size btps_full
Definition: nbtsort.c:113
OffsetNumber btps_lastoff
Definition: nbtsort.c:111
#define BTREE_DEFAULT_FILLFACTOR
Definition: nbtree.h:132
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:308
Relation index
Definition: nbtsort.c:123
void * palloc0(Size size)
Definition: mcxt.c:878
uint32 btps_level
Definition: nbtsort.c:112
struct BTPageState * btps_next
Definition: nbtsort.c:114
#define NULL
Definition: c.h:229
Definition: regguts.h:298
#define P_HIKEY
Definition: nbtree.h:203
static Page _bt_blnewpage(uint32 level)
Definition: nbtsort.c:244
static void _bt_slideleft ( Page  page)
static

Definition at line 362 of file nbtsort.c.

References OffsetNumberNext, P_FIRSTKEY, P_HIKEY, PageGetItemId, PageGetMaxOffsetNumber, and PageIsEmpty.

Referenced by _bt_uppershutdown().

363 {
364  OffsetNumber off;
365  OffsetNumber maxoff;
366  ItemId previi;
367  ItemId thisii;
368 
369  if (!PageIsEmpty(page))
370  {
371  maxoff = PageGetMaxOffsetNumber(page);
372  previi = PageGetItemId(page, P_HIKEY);
373  for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
374  {
375  thisii = PageGetItemId(page, off);
376  *previi = *thisii;
377  previi = thisii;
378  }
379  ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
380  }
381 }
#define PageIsEmpty(page)
Definition: bufpage.h:219
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
uint16 OffsetNumber
Definition: off.h:24
#define P_FIRSTKEY
Definition: nbtree.h:204
struct ItemIdData ItemIdData
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define P_HIKEY
Definition: nbtree.h:203
static void _bt_sortaddtup ( Page  page,
Size  itemsize,
IndexTuple  itup,
OffsetNumber  itup_off 
)
static

Definition at line 397 of file nbtsort.c.

References elog, ERROR, InvalidOffsetNumber, P_FIRSTKEY, P_ISLEAF, PageAddItem, PageGetSpecialPointer, and IndexTupleData::t_info.

Referenced by _bt_buildadd().

401 {
403  IndexTupleData trunctuple;
404 
405  if (!P_ISLEAF(opaque) && itup_off == P_FIRSTKEY)
406  {
407  trunctuple = *itup;
408  trunctuple.t_info = sizeof(IndexTupleData);
409  itup = &trunctuple;
410  itemsize = sizeof(IndexTupleData);
411  }
412 
413  if (PageAddItem(page, (Item) itup, itemsize, itup_off,
414  false, false) == InvalidOffsetNumber)
415  elog(ERROR, "failed to add item to the index page");
416 }
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:413
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
#define ERROR
Definition: elog.h:43
#define P_FIRSTKEY
Definition: nbtree.h:204
struct IndexTupleData IndexTupleData
#define InvalidOffsetNumber
Definition: off.h:26
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
#define elog
Definition: elog.h:219
unsigned short t_info
Definition: itup.h:49
#define P_ISLEAF(opaque)
Definition: nbtree.h:176
void _bt_spool ( BTSpool btspool,
ItemPointer  self,
Datum values,
bool isnull 
)

Definition at line 190 of file nbtsort.c.

References BTSpool::index, BTSpool::sortstate, and tuplesort_putindextuplevalues().

Referenced by btbuildCallback().

191 {
192  tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
193  self, values, isnull);
194 }
Tuplesortstate * sortstate
Definition: nbtsort.c:87
Relation index
Definition: nbtsort.c:89
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
Definition: tuplesort.c:1407
static Datum values[MAXATTR]
Definition: bootstrap.c:163
void _bt_spooldestroy ( BTSpool btspool)

Definition at line 180 of file nbtsort.c.

References pfree(), BTSpool::sortstate, and tuplesort_end().

Referenced by btbuild().

181 {
182  tuplesort_end(btspool->sortstate);
183  pfree(btspool);
184 }
Tuplesortstate * sortstate
Definition: nbtsort.c:87
void pfree(void *pointer)
Definition: mcxt.c:950
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1167
BTSpool* _bt_spoolinit ( Relation  heap,
Relation  index,
bool  isunique,
bool  isdead 
)

Definition at line 152 of file nbtsort.c.

References BTSpool::heap, BTSpool::index, BTSpool::isunique, maintenance_work_mem, palloc0(), BTSpool::sortstate, tuplesort_begin_index_btree(), and work_mem.

Referenced by btbuild().

153 {
154  BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
155  int btKbytes;
156 
157  btspool->heap = heap;
158  btspool->index = index;
159  btspool->isunique = isunique;
160 
161  /*
162  * We size the sort area as maintenance_work_mem rather than work_mem to
163  * speed index creation. This should be OK since a single backend can't
164  * run multiple index creations in parallel. Note that creation of a
165  * unique index actually requires two BTSpool objects. We expect that the
166  * second one (for dead tuples) won't get very full, so we give it only
167  * work_mem.
168  */
169  btKbytes = isdead ? work_mem : maintenance_work_mem;
170  btspool->sortstate = tuplesort_begin_index_btree(heap, index, isunique,
171  btKbytes, false);
172 
173  return btspool;
174 }
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, bool randomAccess)
Definition: tuplesort.c:920
Tuplesortstate * sortstate
Definition: nbtsort.c:87
Relation heap
Definition: nbtsort.c:88
bool isunique
Definition: nbtsort.c:90
Relation index
Definition: nbtsort.c:89
void * palloc0(Size size)
Definition: mcxt.c:878
int work_mem
Definition: globals.c:112
int maintenance_work_mem
Definition: globals.c:113
static void _bt_uppershutdown ( BTWriteState wstate,
BTPageState state 
)
static

Definition at line 611 of file nbtsort.c.

References _bt_blwritepage(), _bt_buildadd(), _bt_initmetapage(), _bt_slideleft(), Assert, BTP_ROOT, BTPageOpaqueData::btpo_flags, BTPageState::btps_blkno, BTPageState::btps_level, BTPageState::btps_minkey, BTPageState::btps_next, BTPageState::btps_page, BTREE_METAPAGE, ItemPointerSet, NULL, P_HIKEY, P_NONE, PageGetSpecialPointer, palloc(), pfree(), and IndexTupleData::t_tid.

Referenced by _bt_load().

612 {
613  BTPageState *s;
614  BlockNumber rootblkno = P_NONE;
615  uint32 rootlevel = 0;
616  Page metapage;
617 
618  /*
619  * Each iteration of this loop completes one more level of the tree.
620  */
621  for (s = state; s != NULL; s = s->btps_next)
622  {
623  BlockNumber blkno;
624  BTPageOpaque opaque;
625 
626  blkno = s->btps_blkno;
628 
629  /*
630  * We have to link the last page on this level to somewhere.
631  *
632  * If we're at the top, it's the root, so attach it to the metapage.
633  * Otherwise, add an entry for it to its parent using its minimum key.
634  * This may cause the last page of the parent level to split, but
635  * that's not a problem -- we haven't gotten to it yet.
636  */
637  if (s->btps_next == NULL)
638  {
639  opaque->btpo_flags |= BTP_ROOT;
640  rootblkno = blkno;
641  rootlevel = s->btps_level;
642  }
643  else
644  {
645  Assert(s->btps_minkey != NULL);
646  ItemPointerSet(&(s->btps_minkey->t_tid), blkno, P_HIKEY);
647  _bt_buildadd(wstate, s->btps_next, s->btps_minkey);
648  pfree(s->btps_minkey);
649  s->btps_minkey = NULL;
650  }
651 
652  /*
653  * This is the rightmost page, so the ItemId array needs to be slid
654  * back one slot. Then we can dump out the page.
655  */
657  _bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
658  s->btps_page = NULL; /* writepage freed the workspace */
659  }
660 
661  /*
662  * As the last step in the process, construct the metapage and make it
663  * point to the new root (unless we had no data at all, in which case it's
664  * set to point to "P_NONE"). This changes the index to the "valid" state
665  * by filling in a valid magic number in the metapage.
666  */
667  metapage = (Page) palloc(BLCKSZ);
668  _bt_initmetapage(metapage, rootblkno, rootlevel);
669  _bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
670 }
#define BTP_ROOT
Definition: nbtree.h:71
ItemPointerData t_tid
Definition: itup.h:37
#define P_NONE
Definition: nbtree.h:168
uint32 BlockNumber
Definition: block.h:31
BlockNumber btps_blkno
Definition: nbtsort.c:109
static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
Definition: nbtsort.c:271
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
IndexTuple btps_minkey
Definition: nbtsort.c:110
Page btps_page
Definition: nbtsort.c:108
void pfree(void *pointer)
Definition: mcxt.c:950
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Definition: nbtsort.c:452
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:49
unsigned int uint32
Definition: c.h:268
#define BTREE_METAPAGE
Definition: nbtree.h:109
uint32 btps_level
Definition: nbtsort.c:112
struct BTPageState * btps_next
Definition: nbtsort.c:114
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
#define P_HIKEY
Definition: nbtree.h:203
static void _bt_slideleft(Page page)
Definition: nbtsort.c:362
void * palloc(Size size)
Definition: mcxt.c:849
uint16 btpo_flags
Definition: nbtree.h:63
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:104