PostgreSQL Source Code  git master
brin.c
Go to the documentation of this file.
1 /*
2  * brin.c
3  * Implementation of BRIN indexes for Postgres
4  *
5  * See src/backend/access/brin/README for details.
6  *
7  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/brin/brin.c
12  *
13  * TODO
14  * * ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
15  */
16 #include "postgres.h"
17 
18 #include "access/brin.h"
19 #include "access/brin_page.h"
20 #include "access/brin_pageops.h"
21 #include "access/brin_xlog.h"
22 #include "access/relation.h"
23 #include "access/reloptions.h"
24 #include "access/relscan.h"
25 #include "access/table.h"
26 #include "access/tableam.h"
27 #include "access/xloginsert.h"
28 #include "catalog/index.h"
29 #include "catalog/pg_am.h"
30 #include "commands/vacuum.h"
31 #include "miscadmin.h"
32 #include "pgstat.h"
33 #include "postmaster/autovacuum.h"
34 #include "storage/bufmgr.h"
35 #include "storage/freespace.h"
36 #include "utils/acl.h"
37 #include "utils/builtins.h"
38 #include "utils/datum.h"
39 #include "utils/index_selfuncs.h"
40 #include "utils/memutils.h"
41 #include "utils/rel.h"
42 
43 
44 /*
45  * We use a BrinBuildState during initial construction of a BRIN index.
46  * The running state is kept in a BrinMemTuple.
47  */
48 typedef struct BrinBuildState
49 {
59 
60 /*
61  * Struct used as "opaque" during index scans
62  */
63 typedef struct BrinOpaque
64 {
69 
70 #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
71 
73  BrinRevmap *revmap, BlockNumber pagesPerRange);
75 static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
76  bool include_partial, double *numSummarized, double *numExisting);
78 static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
79  BrinTuple *b);
80 static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
81 static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc,
82  BrinMemTuple *dtup, Datum *values, bool *nulls);
83 static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys);
84 
85 /*
86  * BRIN handler function: return IndexAmRoutine with access method parameters
87  * and callbacks.
88  */
89 Datum
91 {
93 
94  amroutine->amstrategies = 0;
97  amroutine->amcanorder = false;
98  amroutine->amcanorderbyop = false;
99  amroutine->amcanbackward = false;
100  amroutine->amcanunique = false;
101  amroutine->amcanmulticol = true;
102  amroutine->amoptionalkey = true;
103  amroutine->amsearcharray = false;
104  amroutine->amsearchnulls = true;
105  amroutine->amstorage = true;
106  amroutine->amclusterable = false;
107  amroutine->ampredlocks = false;
108  amroutine->amcanparallel = false;
109  amroutine->amcaninclude = false;
110  amroutine->amusemaintenanceworkmem = false;
111  amroutine->amhotblocking = false;
112  amroutine->amparallelvacuumoptions =
114  amroutine->amkeytype = InvalidOid;
115 
116  amroutine->ambuild = brinbuild;
117  amroutine->ambuildempty = brinbuildempty;
118  amroutine->aminsert = brininsert;
119  amroutine->ambulkdelete = brinbulkdelete;
120  amroutine->amvacuumcleanup = brinvacuumcleanup;
121  amroutine->amcanreturn = NULL;
122  amroutine->amcostestimate = brincostestimate;
123  amroutine->amoptions = brinoptions;
124  amroutine->amproperty = NULL;
125  amroutine->ambuildphasename = NULL;
126  amroutine->amvalidate = brinvalidate;
127  amroutine->amadjustmembers = NULL;
128  amroutine->ambeginscan = brinbeginscan;
129  amroutine->amrescan = brinrescan;
130  amroutine->amgettuple = NULL;
131  amroutine->amgetbitmap = bringetbitmap;
132  amroutine->amendscan = brinendscan;
133  amroutine->ammarkpos = NULL;
134  amroutine->amrestrpos = NULL;
135  amroutine->amestimateparallelscan = NULL;
136  amroutine->aminitparallelscan = NULL;
137  amroutine->amparallelrescan = NULL;
138 
139  PG_RETURN_POINTER(amroutine);
140 }
141 
142 /*
143  * A tuple in the heap is being inserted. To keep a brin index up to date,
144  * we need to obtain the relevant index tuple and compare its stored values
145  * with those of the new tuple. If the tuple values are not consistent with
146  * the summary tuple, we need to update the index tuple.
147  *
148  * If autosummarization is enabled, check if we need to summarize the previous
149  * page range.
150  *
151  * If the range is not currently summarized (i.e. the revmap returns NULL for
152  * it), there's nothing to do for this tuple.
153  */
154 bool
155 brininsert(Relation idxRel, Datum *values, bool *nulls,
156  ItemPointer heaptid, Relation heapRel,
157  IndexUniqueCheck checkUnique,
158  bool indexUnchanged,
159  IndexInfo *indexInfo)
160 {
161  BlockNumber pagesPerRange;
162  BlockNumber origHeapBlk;
163  BlockNumber heapBlk;
164  BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
165  BrinRevmap *revmap;
167  MemoryContext tupcxt = NULL;
169  bool autosummarize = BrinGetAutoSummarize(idxRel);
170 
171  revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
172 
173  /*
174  * origHeapBlk is the block number where the insertion occurred. heapBlk
175  * is the first block in the corresponding page range.
176  */
177  origHeapBlk = ItemPointerGetBlockNumber(heaptid);
178  heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
179 
180  for (;;)
181  {
182  bool need_insert = false;
183  OffsetNumber off;
184  BrinTuple *brtup;
185  BrinMemTuple *dtup;
186 
188 
189  /*
190  * If auto-summarization is enabled and we just inserted the first
191  * tuple into the first block of a new non-first page range, request a
192  * summarization run of the previous range.
193  */
194  if (autosummarize &&
195  heapBlk > 0 &&
196  heapBlk == origHeapBlk &&
198  {
199  BlockNumber lastPageRange = heapBlk - 1;
200  BrinTuple *lastPageTuple;
201 
202  lastPageTuple =
203  brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
204  NULL, BUFFER_LOCK_SHARE, NULL);
205  if (!lastPageTuple)
206  {
207  bool recorded;
208 
210  RelationGetRelid(idxRel),
211  lastPageRange);
212  if (!recorded)
213  ereport(LOG,
214  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
215  errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
216  RelationGetRelationName(idxRel),
217  lastPageRange)));
218  }
219  else
221  }
222 
223  brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
224  NULL, BUFFER_LOCK_SHARE, NULL);
225 
226  /* if range is unsummarized, there's nothing to do */
227  if (!brtup)
228  break;
229 
230  /* First time through in this statement? */
231  if (bdesc == NULL)
232  {
233  MemoryContextSwitchTo(indexInfo->ii_Context);
234  bdesc = brin_build_desc(idxRel);
235  indexInfo->ii_AmCache = (void *) bdesc;
236  MemoryContextSwitchTo(oldcxt);
237  }
238  /* First time through in this brininsert call? */
239  if (tupcxt == NULL)
240  {
242  "brininsert cxt",
244  MemoryContextSwitchTo(tupcxt);
245  }
246 
247  dtup = brin_deform_tuple(bdesc, brtup, NULL);
248 
249  need_insert = add_values_to_range(idxRel, bdesc, dtup, values, nulls);
250 
251  if (!need_insert)
252  {
253  /*
254  * The tuple is consistent with the new values, so there's nothing
255  * to do.
256  */
258  }
259  else
260  {
261  Page page = BufferGetPage(buf);
262  ItemId lp = PageGetItemId(page, off);
263  Size origsz;
264  BrinTuple *origtup;
265  Size newsz;
266  BrinTuple *newtup;
267  bool samepage;
268 
269  /*
270  * Make a copy of the old tuple, so that we can compare it after
271  * re-acquiring the lock.
272  */
273  origsz = ItemIdGetLength(lp);
274  origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
275 
276  /*
277  * Before releasing the lock, check if we can attempt a same-page
278  * update. Another process could insert a tuple concurrently in
279  * the same page though, so downstream we must be prepared to cope
280  * if this turns out to not be possible after all.
281  */
282  newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
283  samepage = brin_can_do_samepage_update(buf, origsz, newsz);
285 
286  /*
287  * Try to update the tuple. If this doesn't work for whatever
288  * reason, we need to restart from the top; the revmap might be
289  * pointing at a different tuple for this block now, so we need to
290  * recompute to ensure both our new heap tuple and the other
291  * inserter's are covered by the combined tuple. It might be that
292  * we don't need to update at all.
293  */
294  if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
295  buf, off, origtup, origsz, newtup, newsz,
296  samepage))
297  {
298  /* no luck; start over */
300  continue;
301  }
302  }
303 
304  /* success! */
305  break;
306  }
307 
308  brinRevmapTerminate(revmap);
309  if (BufferIsValid(buf))
311  MemoryContextSwitchTo(oldcxt);
312  if (tupcxt != NULL)
313  MemoryContextDelete(tupcxt);
314 
315  return false;
316 }
317 
318 /*
319  * Initialize state for a BRIN index scan.
320  *
321  * We read the metapage here to determine the pages-per-range number that this
322  * index was built with. Note that since this cannot be changed while we're
323  * holding lock on index, it's not necessary to recompute it during brinrescan.
324  */
326 brinbeginscan(Relation r, int nkeys, int norderbys)
327 {
328  IndexScanDesc scan;
329  BrinOpaque *opaque;
330 
331  scan = RelationGetIndexScan(r, nkeys, norderbys);
332 
333  opaque = (BrinOpaque *) palloc(sizeof(BrinOpaque));
334  opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange,
335  scan->xs_snapshot);
336  opaque->bo_bdesc = brin_build_desc(r);
337  scan->opaque = opaque;
338 
339  return scan;
340 }
341 
342 /*
343  * Execute the index scan.
344  *
345  * This works by reading index TIDs from the revmap, and obtaining the index
346  * tuples pointed to by them; the summary values in the index tuples are
347  * compared to the scan keys. We return into the TID bitmap all the pages in
348  * ranges corresponding to index tuples that match the scan keys.
349  *
350  * If a TID from the revmap is read as InvalidTID, we know that range is
351  * unsummarized. Pages in those ranges need to be returned regardless of scan
352  * keys.
353  */
354 int64
356 {
357  Relation idxRel = scan->indexRelation;
359  BrinDesc *bdesc;
360  Oid heapOid;
361  Relation heapRel;
362  BrinOpaque *opaque;
363  BlockNumber nblocks;
364  BlockNumber heapBlk;
365  int totalpages = 0;
366  FmgrInfo *consistentFn;
367  MemoryContext oldcxt;
368  MemoryContext perRangeCxt;
369  BrinMemTuple *dtup;
370  BrinTuple *btup = NULL;
371  Size btupsz = 0;
372  ScanKey **keys,
373  **nullkeys;
374  int *nkeys,
375  *nnullkeys;
376  int keyno;
377  char *ptr;
378  Size len;
379  char *tmp PG_USED_FOR_ASSERTS_ONLY;
380 
381  opaque = (BrinOpaque *) scan->opaque;
382  bdesc = opaque->bo_bdesc;
383  pgstat_count_index_scan(idxRel);
384 
385  /*
386  * We need to know the size of the table so that we know how long to
387  * iterate on the revmap.
388  */
389  heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
390  heapRel = table_open(heapOid, AccessShareLock);
391  nblocks = RelationGetNumberOfBlocks(heapRel);
392  table_close(heapRel, AccessShareLock);
393 
394  /*
395  * Make room for the consistent support procedures of indexed columns. We
396  * don't look them up here; we do that lazily the first time we see a scan
397  * key reference each of them. We rely on zeroing fn_oid to InvalidOid.
398  */
399  consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
400 
401  /*
402  * Make room for per-attribute lists of scan keys that we'll pass to the
403  * consistent support procedure. We don't know which attributes have scan
404  * keys, so we allocate space for all attributes. That may use more memory
405  * but it's probably cheaper than determining which attributes are used.
406  *
407  * We keep null and regular keys separate, so that we can pass just the
408  * regular keys to the consistent function easily.
409  *
410  * To reduce the allocation overhead, we allocate one big chunk and then
411  * carve it into smaller arrays ourselves. All the pieces have exactly the
412  * same lifetime, so that's OK.
413  *
414  * XXX The widest index can have 32 attributes, so the amount of wasted
415  * memory is negligible. We could invent a more compact approach (with
416  * just space for used attributes) but that would make the matching more
417  * complex so it's not a good trade-off.
418  */
419  len =
420  MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* regular keys */
421  MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
422  MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts) +
423  MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* NULL keys */
424  MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
425  MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
426 
427  ptr = palloc(len);
428  tmp = ptr;
429 
430  keys = (ScanKey **) ptr;
431  ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
432 
433  nullkeys = (ScanKey **) ptr;
434  ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
435 
436  nkeys = (int *) ptr;
437  ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
438 
439  nnullkeys = (int *) ptr;
440  ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
441 
442  for (int i = 0; i < bdesc->bd_tupdesc->natts; i++)
443  {
444  keys[i] = (ScanKey *) ptr;
445  ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
446 
447  nullkeys[i] = (ScanKey *) ptr;
448  ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
449  }
450 
451  Assert(tmp + len == ptr);
452 
453  /* zero the number of keys */
454  memset(nkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
455  memset(nnullkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
456 
457  /* Preprocess the scan keys - split them into per-attribute arrays. */
458  for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
459  {
460  ScanKey key = &scan->keyData[keyno];
461  AttrNumber keyattno = key->sk_attno;
462 
463  /*
464  * The collation of the scan key must match the collation used in the
465  * index column (but only if the search is not IS NULL/ IS NOT NULL).
466  * Otherwise we shouldn't be using this index ...
467  */
468  Assert((key->sk_flags & SK_ISNULL) ||
469  (key->sk_collation ==
470  TupleDescAttr(bdesc->bd_tupdesc,
471  keyattno - 1)->attcollation));
472 
473  /*
474  * First time we see this index attribute, so init as needed.
475  *
476  * This is a bit of an overkill - we don't know how many scan keys are
477  * there for this attribute, so we simply allocate the largest number
478  * possible (as if all keys were for this attribute). This may waste a
479  * bit of memory, but we only expect small number of scan keys in
480  * general, so this should be negligible, and repeated repalloc calls
481  * are not free either.
482  */
483  if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
484  {
485  FmgrInfo *tmp;
486 
487  /* First time we see this attribute, so no key/null keys. */
488  Assert(nkeys[keyattno - 1] == 0);
489  Assert(nnullkeys[keyattno - 1] == 0);
490 
491  tmp = index_getprocinfo(idxRel, keyattno,
493  fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
495  }
496 
497  /* Add key to the proper per-attribute array. */
498  if (key->sk_flags & SK_ISNULL)
499  {
500  nullkeys[keyattno - 1][nnullkeys[keyattno - 1]] = key;
501  nnullkeys[keyattno - 1]++;
502  }
503  else
504  {
505  keys[keyattno - 1][nkeys[keyattno - 1]] = key;
506  nkeys[keyattno - 1]++;
507  }
508  }
509 
510  /* allocate an initial in-memory tuple, out of the per-range memcxt */
511  dtup = brin_new_memtuple(bdesc);
512 
513  /*
514  * Setup and use a per-range memory context, which is reset every time we
515  * loop below. This avoids having to free the tuples within the loop.
516  */
518  "bringetbitmap cxt",
520  oldcxt = MemoryContextSwitchTo(perRangeCxt);
521 
522  /*
523  * Now scan the revmap. We start by querying for heap page 0,
524  * incrementing by the number of pages per range; this gives us a full
525  * view of the table.
526  */
527  for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
528  {
529  bool addrange;
530  bool gottuple = false;
531  BrinTuple *tup;
532  OffsetNumber off;
533  Size size;
534 
536 
538 
539  tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
540  &off, &size, BUFFER_LOCK_SHARE,
541  scan->xs_snapshot);
542  if (tup)
543  {
544  gottuple = true;
545  btup = brin_copy_tuple(tup, size, btup, &btupsz);
547  }
548 
549  /*
550  * For page ranges with no indexed tuple, we must return the whole
551  * range; otherwise, compare it to the scan keys.
552  */
553  if (!gottuple)
554  {
555  addrange = true;
556  }
557  else
558  {
559  dtup = brin_deform_tuple(bdesc, btup, dtup);
560  if (dtup->bt_placeholder)
561  {
562  /*
563  * Placeholder tuples are always returned, regardless of the
564  * values stored in them.
565  */
566  addrange = true;
567  }
568  else
569  {
570  int attno;
571 
572  /*
573  * Compare scan keys with summary values stored for the range.
574  * If scan keys are matched, the page range must be added to
575  * the bitmap. We initially assume the range needs to be
576  * added; in particular this serves the case where there are
577  * no keys.
578  */
579  addrange = true;
580  for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
581  {
582  BrinValues *bval;
583  Datum add;
584  Oid collation;
585 
586  /*
587  * skip attributes without any scan keys (both regular and
588  * IS [NOT] NULL)
589  */
590  if (nkeys[attno - 1] == 0 && nnullkeys[attno - 1] == 0)
591  continue;
592 
593  bval = &dtup->bt_columns[attno - 1];
594 
595  /*
596  * First check if there are any IS [NOT] NULL scan keys,
597  * and if we're violating them. In that case we can
598  * terminate early, without invoking the support function.
599  *
600  * As there may be more keys, we can only determine
601  * mismatch within this loop.
602  */
603  if (bdesc->bd_info[attno - 1]->oi_regular_nulls &&
604  !check_null_keys(bval, nullkeys[attno - 1],
605  nnullkeys[attno - 1]))
606  {
607  /*
608  * If any of the IS [NOT] NULL keys failed, the page
609  * range as a whole can't pass. So terminate the loop.
610  */
611  addrange = false;
612  break;
613  }
614 
615  /*
616  * So either there are no IS [NOT] NULL keys, or all
617  * passed. If there are no regular scan keys, we're done -
618  * the page range matches. If there are regular keys, but
619  * the page range is marked as 'all nulls' it can't
620  * possibly pass (we're assuming the operators are
621  * strict).
622  */
623 
624  /* No regular scan keys - page range as a whole passes. */
625  if (!nkeys[attno - 1])
626  continue;
627 
628  Assert((nkeys[attno - 1] > 0) &&
629  (nkeys[attno - 1] <= scan->numberOfKeys));
630 
631  /* If it is all nulls, it cannot possibly be consistent. */
632  if (bval->bv_allnulls)
633  {
634  addrange = false;
635  break;
636  }
637 
638  /*
639  * Collation from the first key (has to be the same for
640  * all keys for the same attribute).
641  */
642  collation = keys[attno - 1][0]->sk_collation;
643 
644  /*
645  * Check whether the scan key is consistent with the page
646  * range values; if so, have the pages in the range added
647  * to the output bitmap.
648  *
649  * The opclass may or may not support processing of
650  * multiple scan keys. We can determine that based on the
651  * number of arguments - functions with extra parameter
652  * (number of scan keys) do support this, otherwise we
653  * have to simply pass the scan keys one by one.
654  */
655  if (consistentFn[attno - 1].fn_nargs >= 4)
656  {
657  /* Check all keys at once */
658  add = FunctionCall4Coll(&consistentFn[attno - 1],
659  collation,
660  PointerGetDatum(bdesc),
661  PointerGetDatum(bval),
662  PointerGetDatum(keys[attno - 1]),
663  Int32GetDatum(nkeys[attno - 1]));
664  addrange = DatumGetBool(add);
665  }
666  else
667  {
668  /*
669  * Check keys one by one
670  *
671  * When there are multiple scan keys, failure to meet
672  * the criteria for a single one of them is enough to
673  * discard the range as a whole, so break out of the
674  * loop as soon as a false return value is obtained.
675  */
676  int keyno;
677 
678  for (keyno = 0; keyno < nkeys[attno - 1]; keyno++)
679  {
680  add = FunctionCall3Coll(&consistentFn[attno - 1],
681  keys[attno - 1][keyno]->sk_collation,
682  PointerGetDatum(bdesc),
683  PointerGetDatum(bval),
684  PointerGetDatum(keys[attno - 1][keyno]));
685  addrange = DatumGetBool(add);
686  if (!addrange)
687  break;
688  }
689  }
690  }
691  }
692  }
693 
694  /* add the pages in the range to the output bitmap, if needed */
695  if (addrange)
696  {
697  BlockNumber pageno;
698 
699  for (pageno = heapBlk;
700  pageno <= Min(nblocks, heapBlk + opaque->bo_pagesPerRange) - 1;
701  pageno++)
702  {
703  MemoryContextSwitchTo(oldcxt);
704  tbm_add_page(tbm, pageno);
705  totalpages++;
706  MemoryContextSwitchTo(perRangeCxt);
707  }
708  }
709  }
710 
711  MemoryContextSwitchTo(oldcxt);
712  MemoryContextDelete(perRangeCxt);
713 
714  if (buf != InvalidBuffer)
716 
717  /*
718  * XXX We have an approximation of the number of *pages* that our scan
719  * returns, but we don't have a precise idea of the number of heap tuples
720  * involved.
721  */
722  return totalpages * 10;
723 }
724 
725 /*
726  * Re-initialize state for a BRIN index scan
727  */
728 void
729 brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
730  ScanKey orderbys, int norderbys)
731 {
732  /*
733  * Other index AMs preprocess the scan keys at this point, or sometime
734  * early during the scan; this lets them optimize by removing redundant
735  * keys, or doing early returns when they are impossible to satisfy; see
736  * _bt_preprocess_keys for an example. Something like that could be added
737  * here someday, too.
738  */
739 
740  if (scankey && scan->numberOfKeys > 0)
741  memmove(scan->keyData, scankey,
742  scan->numberOfKeys * sizeof(ScanKeyData));
743 }
744 
745 /*
746  * Close down a BRIN index scan
747  */
748 void
750 {
751  BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
752 
754  brin_free_desc(opaque->bo_bdesc);
755  pfree(opaque);
756 }
757 
758 /*
759  * Per-heap-tuple callback for table_index_build_scan.
760  *
761  * Note we don't worry about the page range at the end of the table here; it is
762  * present in the build state struct after we're called the last time, but not
763  * inserted into the index. Caller must ensure to do so, if appropriate.
764  */
765 static void
767  ItemPointer tid,
768  Datum *values,
769  bool *isnull,
770  bool tupleIsAlive,
771  void *brstate)
772 {
773  BrinBuildState *state = (BrinBuildState *) brstate;
774  BlockNumber thisblock;
775 
776  thisblock = ItemPointerGetBlockNumber(tid);
777 
778  /*
779  * If we're in a block that belongs to a future range, summarize what
780  * we've got and start afresh. Note the scan might have skipped many
781  * pages, if they were devoid of live tuples; make sure to insert index
782  * tuples for those too.
783  */
784  while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
785  {
786 
787  BRIN_elog((DEBUG2,
788  "brinbuildCallback: completed a range: %u--%u",
789  state->bs_currRangeStart,
790  state->bs_currRangeStart + state->bs_pagesPerRange));
791 
792  /* create the index tuple and insert it */
794 
795  /* set state to correspond to the next range */
796  state->bs_currRangeStart += state->bs_pagesPerRange;
797 
798  /* re-initialize state for it */
799  brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
800  }
801 
802  /* Accumulate the current tuple into the running state */
803  (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
804  values, isnull);
805 }
806 
807 /*
808  * brinbuild() -- build a new BRIN index.
809  */
812 {
813  IndexBuildResult *result;
814  double reltuples;
815  double idxtuples;
816  BrinRevmap *revmap;
818  Buffer meta;
819  BlockNumber pagesPerRange;
820 
821  /*
822  * We expect to be called exactly once for any index relation.
823  */
825  elog(ERROR, "index \"%s\" already contains data",
827 
828  /*
829  * Critical section not required, because on error the creation of the
830  * whole relation will be rolled back.
831  */
832 
833  meta = ReadBuffer(index, P_NEW);
836 
839  MarkBufferDirty(meta);
840 
841  if (RelationNeedsWAL(index))
842  {
843  xl_brin_createidx xlrec;
844  XLogRecPtr recptr;
845  Page page;
846 
849 
850  XLogBeginInsert();
851  XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
853 
854  recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
855 
856  page = BufferGetPage(meta);
857  PageSetLSN(page, recptr);
858  }
859 
860  UnlockReleaseBuffer(meta);
861 
862  /*
863  * Initialize our state, including the deformed tuple state.
864  */
865  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
866  state = initialize_brin_buildstate(index, revmap, pagesPerRange);
867 
868  /*
869  * Now scan the relation. No syncscan allowed here because we want the
870  * heap blocks in physical order.
871  */
872  reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
873  brinbuildCallback, (void *) state, NULL);
874 
875  /* process the final batch */
877 
878  /* release resources */
879  idxtuples = state->bs_numtuples;
880  brinRevmapTerminate(state->bs_rmAccess);
882 
883  /*
884  * Return statistics
885  */
886  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
887 
888  result->heap_tuples = reltuples;
889  result->index_tuples = idxtuples;
890 
891  return result;
892 }
893 
894 void
896 {
897  Buffer metabuf;
898 
899  /* An empty BRIN index has a metapage only. */
900  metabuf =
903 
904  /* Initialize and xlog metabuffer. */
908  MarkBufferDirty(metabuf);
909  log_newpage_buffer(metabuf, true);
911 
912  UnlockReleaseBuffer(metabuf);
913 }
914 
915 /*
916  * brinbulkdelete
917  * Since there are no per-heap-tuple index tuples in BRIN indexes,
918  * there's not a lot we can do here.
919  *
920  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
921  * tuple is deleted), meaning the need to re-run summarization on the affected
922  * range. Would need to add an extra flag in brintuples for that.
923  */
926  IndexBulkDeleteCallback callback, void *callback_state)
927 {
928  /* allocate stats if first time through, else re-use existing struct */
929  if (stats == NULL)
931 
932  return stats;
933 }
934 
935 /*
936  * This routine is in charge of "vacuuming" a BRIN index: we just summarize
937  * ranges that are currently unsummarized.
938  */
941 {
942  Relation heapRel;
943 
944  /* No-op in ANALYZE ONLY mode */
945  if (info->analyze_only)
946  return stats;
947 
948  if (!stats)
950  stats->num_pages = RelationGetNumberOfBlocks(info->index);
951  /* rest of stats is initialized by zeroing */
952 
953  heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
955 
956  brin_vacuum_scan(info->index, info->strategy);
957 
958  brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
959  &stats->num_index_tuples, &stats->num_index_tuples);
960 
961  table_close(heapRel, AccessShareLock);
962 
963  return stats;
964 }
965 
966 /*
967  * reloptions processor for BRIN indexes
968  */
969 bytea *
970 brinoptions(Datum reloptions, bool validate)
971 {
972  static const relopt_parse_elt tab[] = {
973  {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
974  {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
975  };
976 
977  return (bytea *) build_reloptions(reloptions, validate,
979  sizeof(BrinOptions),
980  tab, lengthof(tab));
981 }
982 
983 /*
984  * SQL-callable function to scan through an index and summarize all ranges
985  * that are not currently summarized.
986  */
987 Datum
989 {
990  Datum relation = PG_GETARG_DATUM(0);
991 
993  relation,
995 }
996 
997 /*
998  * SQL-callable function to summarize the indicated page range, if not already
999  * summarized. If the second argument is BRIN_ALL_BLOCKRANGES, all
1000  * unsummarized ranges are summarized.
1001  */
1002 Datum
1004 {
1005  Oid indexoid = PG_GETARG_OID(0);
1006  int64 heapBlk64 = PG_GETARG_INT64(1);
1007  BlockNumber heapBlk;
1008  Oid heapoid;
1009  Relation indexRel;
1010  Relation heapRel;
1011  Oid save_userid;
1012  int save_sec_context;
1013  int save_nestlevel;
1014  double numSummarized = 0;
1015 
1016  if (RecoveryInProgress())
1017  ereport(ERROR,
1018  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1019  errmsg("recovery is in progress"),
1020  errhint("BRIN control functions cannot be executed during recovery.")));
1021 
1022  if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
1023  ereport(ERROR,
1024  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1025  errmsg("block number out of range: %lld",
1026  (long long) heapBlk64)));
1027  heapBlk = (BlockNumber) heapBlk64;
1028 
1029  /*
1030  * We must lock table before index to avoid deadlocks. However, if the
1031  * passed indexoid isn't an index then IndexGetRelation() will fail.
1032  * Rather than emitting a not-very-helpful error message, postpone
1033  * complaining, expecting that the is-it-an-index test below will fail.
1034  */
1035  heapoid = IndexGetRelation(indexoid, true);
1036  if (OidIsValid(heapoid))
1037  {
1038  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
1039 
1040  /*
1041  * Autovacuum calls us. For its benefit, switch to the table owner's
1042  * userid, so that any index functions are run as that user. Also
1043  * lock down security-restricted operations and arrange to make GUC
1044  * variable changes local to this command. This is harmless, albeit
1045  * unnecessary, when called from SQL, because we fail shortly if the
1046  * user does not own the index.
1047  */
1048  GetUserIdAndSecContext(&save_userid, &save_sec_context);
1049  SetUserIdAndSecContext(heapRel->rd_rel->relowner,
1050  save_sec_context | SECURITY_RESTRICTED_OPERATION);
1051  save_nestlevel = NewGUCNestLevel();
1052  }
1053  else
1054  heapRel = NULL;
1055 
1056  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1057 
1058  /* Must be a BRIN index */
1059  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1060  indexRel->rd_rel->relam != BRIN_AM_OID)
1061  ereport(ERROR,
1062  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1063  errmsg("\"%s\" is not a BRIN index",
1064  RelationGetRelationName(indexRel))));
1065 
1066  /* User must own the index (comparable to privileges needed for VACUUM) */
1067  if (heapRel != NULL && !pg_class_ownercheck(indexoid, save_userid))
1069  RelationGetRelationName(indexRel));
1070 
1071  /*
1072  * Since we did the IndexGetRelation call above without any lock, it's
1073  * barely possible that a race against an index drop/recreation could have
1074  * netted us the wrong table. Recheck.
1075  */
1076  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1077  ereport(ERROR,
1079  errmsg("could not open parent table of index \"%s\"",
1080  RelationGetRelationName(indexRel))));
1081 
1082  /* OK, do it */
1083  brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
1084 
1085  /* Roll back any GUC changes executed by index functions */
1086  AtEOXact_GUC(false, save_nestlevel);
1087 
1088  /* Restore userid and security context */
1089  SetUserIdAndSecContext(save_userid, save_sec_context);
1090 
1093 
1094  PG_RETURN_INT32((int32) numSummarized);
1095 }
1096 
1097 /*
1098  * SQL-callable interface to mark a range as no longer summarized
1099  */
1100 Datum
1102 {
1103  Oid indexoid = PG_GETARG_OID(0);
1104  int64 heapBlk64 = PG_GETARG_INT64(1);
1105  BlockNumber heapBlk;
1106  Oid heapoid;
1107  Relation heapRel;
1108  Relation indexRel;
1109  bool done;
1110 
1111  if (RecoveryInProgress())
1112  ereport(ERROR,
1113  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1114  errmsg("recovery is in progress"),
1115  errhint("BRIN control functions cannot be executed during recovery.")));
1116 
1117  if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
1118  ereport(ERROR,
1119  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1120  errmsg("block number out of range: %lld",
1121  (long long) heapBlk64)));
1122  heapBlk = (BlockNumber) heapBlk64;
1123 
1124  /*
1125  * We must lock table before index to avoid deadlocks. However, if the
1126  * passed indexoid isn't an index then IndexGetRelation() will fail.
1127  * Rather than emitting a not-very-helpful error message, postpone
1128  * complaining, expecting that the is-it-an-index test below will fail.
1129  *
1130  * Unlike brin_summarize_range(), autovacuum never calls this. Hence, we
1131  * don't switch userid.
1132  */
1133  heapoid = IndexGetRelation(indexoid, true);
1134  if (OidIsValid(heapoid))
1135  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
1136  else
1137  heapRel = NULL;
1138 
1139  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1140 
1141  /* Must be a BRIN index */
1142  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1143  indexRel->rd_rel->relam != BRIN_AM_OID)
1144  ereport(ERROR,
1145  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1146  errmsg("\"%s\" is not a BRIN index",
1147  RelationGetRelationName(indexRel))));
1148 
1149  /* User must own the index (comparable to privileges needed for VACUUM) */
1150  if (!pg_class_ownercheck(indexoid, GetUserId()))
1152  RelationGetRelationName(indexRel));
1153 
1154  /*
1155  * Since we did the IndexGetRelation call above without any lock, it's
1156  * barely possible that a race against an index drop/recreation could have
1157  * netted us the wrong table. Recheck.
1158  */
1159  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1160  ereport(ERROR,
1162  errmsg("could not open parent table of index \"%s\"",
1163  RelationGetRelationName(indexRel))));
1164 
1165  /* the revmap does the hard work */
1166  do
1167  {
1168  done = brinRevmapDesummarizeRange(indexRel, heapBlk);
1169  }
1170  while (!done);
1171 
1174 
1175  PG_RETURN_VOID();
1176 }
1177 
1178 /*
1179  * Build a BrinDesc used to create or scan a BRIN index
1180  */
1181 BrinDesc *
1183 {
1184  BrinOpcInfo **opcinfo;
1185  BrinDesc *bdesc;
1186  TupleDesc tupdesc;
1187  int totalstored = 0;
1188  int keyno;
1189  long totalsize;
1190  MemoryContext cxt;
1191  MemoryContext oldcxt;
1192 
1194  "brin desc cxt",
1196  oldcxt = MemoryContextSwitchTo(cxt);
1197  tupdesc = RelationGetDescr(rel);
1198 
1199  /*
1200  * Obtain BrinOpcInfo for each indexed column. While at it, accumulate
1201  * the number of columns stored, since the number is opclass-defined.
1202  */
1203  opcinfo = (BrinOpcInfo **) palloc(sizeof(BrinOpcInfo *) * tupdesc->natts);
1204  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1205  {
1206  FmgrInfo *opcInfoFn;
1207  Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
1208 
1209  opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO);
1210 
1211  opcinfo[keyno] = (BrinOpcInfo *)
1212  DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid));
1213  totalstored += opcinfo[keyno]->oi_nstored;
1214  }
1215 
1216  /* Allocate our result struct and fill it in */
1217  totalsize = offsetof(BrinDesc, bd_info) +
1218  sizeof(BrinOpcInfo *) * tupdesc->natts;
1219 
1220  bdesc = palloc(totalsize);
1221  bdesc->bd_context = cxt;
1222  bdesc->bd_index = rel;
1223  bdesc->bd_tupdesc = tupdesc;
1224  bdesc->bd_disktdesc = NULL; /* generated lazily */
1225  bdesc->bd_totalstored = totalstored;
1226 
1227  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1228  bdesc->bd_info[keyno] = opcinfo[keyno];
1229  pfree(opcinfo);
1230 
1231  MemoryContextSwitchTo(oldcxt);
1232 
1233  return bdesc;
1234 }
1235 
1236 void
1238 {
1239  /* make sure the tupdesc is still valid */
1240  Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
1241  /* no need for retail pfree */
1243 }
1244 
1245 /*
1246  * Fetch index's statistical data into *stats
1247  */
1248 void
1250 {
1251  Buffer metabuffer;
1252  Page metapage;
1253  BrinMetaPageData *metadata;
1254 
1255  metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
1256  LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
1257  metapage = BufferGetPage(metabuffer);
1258  metadata = (BrinMetaPageData *) PageGetContents(metapage);
1259 
1260  stats->pagesPerRange = metadata->pagesPerRange;
1261  stats->revmapNumPages = metadata->lastRevmapPage - 1;
1262 
1263  UnlockReleaseBuffer(metabuffer);
1264 }
1265 
1266 /*
1267  * Initialize a BrinBuildState appropriate to create tuples on the given index.
1268  */
1269 static BrinBuildState *
1271  BlockNumber pagesPerRange)
1272 {
1274 
1275  state = palloc(sizeof(BrinBuildState));
1276 
1277  state->bs_irel = idxRel;
1278  state->bs_numtuples = 0;
1279  state->bs_currentInsertBuf = InvalidBuffer;
1280  state->bs_pagesPerRange = pagesPerRange;
1281  state->bs_currRangeStart = 0;
1282  state->bs_rmAccess = revmap;
1283  state->bs_bdesc = brin_build_desc(idxRel);
1284  state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
1285 
1286  return state;
1287 }
1288 
1289 /*
1290  * Release resources associated with a BrinBuildState.
1291  */
1292 static void
1294 {
1295  /*
1296  * Release the last index buffer used. We might as well ensure that
1297  * whatever free space remains in that page is available in FSM, too.
1298  */
1299  if (!BufferIsInvalid(state->bs_currentInsertBuf))
1300  {
1301  Page page;
1302  Size freespace;
1303  BlockNumber blk;
1304 
1305  page = BufferGetPage(state->bs_currentInsertBuf);
1306  freespace = PageGetFreeSpace(page);
1307  blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
1308  ReleaseBuffer(state->bs_currentInsertBuf);
1309  RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
1310  FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
1311  }
1312 
1313  brin_free_desc(state->bs_bdesc);
1314  pfree(state->bs_dtuple);
1315  pfree(state);
1316 }
1317 
1318 /*
1319  * On the given BRIN index, summarize the heap page range that corresponds
1320  * to the heap block number given.
1321  *
1322  * This routine can run in parallel with insertions into the heap. To avoid
1323  * missing those values from the summary tuple, we first insert a placeholder
1324  * index tuple into the index, then execute the heap scan; transactions
1325  * concurrent with the scan update the placeholder tuple. After the scan, we
1326  * union the placeholder tuple with the one computed by this routine. The
1327  * update of the index value happens in a loop, so that if somebody updates
1328  * the placeholder tuple after we read it, we detect the case and try again.
1329  * This ensures that the concurrently inserted tuples are not lost.
1330  *
1331  * A further corner case is this routine being asked to summarize the partial
1332  * range at the end of the table. heapNumBlocks is the (possibly outdated)
1333  * table size; if we notice that the requested range lies beyond that size,
1334  * we re-compute the table size after inserting the placeholder tuple, to
1335  * avoid missing pages that were appended recently.
1336  */
1337 static void
1339  BlockNumber heapBlk, BlockNumber heapNumBlks)
1340 {
1341  Buffer phbuf;
1342  BrinTuple *phtup;
1343  Size phsz;
1344  OffsetNumber offset;
1345  BlockNumber scanNumBlks;
1346 
1347  /*
1348  * Insert the placeholder tuple
1349  */
1350  phbuf = InvalidBuffer;
1351  phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
1352  offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
1353  state->bs_rmAccess, &phbuf,
1354  heapBlk, phtup, phsz);
1355 
1356  /*
1357  * Compute range end. We hold ShareUpdateExclusive lock on table, so it
1358  * cannot shrink concurrently (but it can grow).
1359  */
1360  Assert(heapBlk % state->bs_pagesPerRange == 0);
1361  if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1362  {
1363  /*
1364  * If we're asked to scan what we believe to be the final range on the
1365  * table (i.e. a range that might be partial) we need to recompute our
1366  * idea of what the latest page is after inserting the placeholder
1367  * tuple. Anyone that grows the table later will update the
1368  * placeholder tuple, so it doesn't matter that we won't scan these
1369  * pages ourselves. Careful: the table might have been extended
1370  * beyond the current range, so clamp our result.
1371  *
1372  * Fortunately, this should occur infrequently.
1373  */
1374  scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1375  state->bs_pagesPerRange);
1376  }
1377  else
1378  {
1379  /* Easy case: range is known to be complete */
1380  scanNumBlks = state->bs_pagesPerRange;
1381  }
1382 
1383  /*
1384  * Execute the partial heap scan covering the heap blocks in the specified
1385  * page range, summarizing the heap tuples in it. This scan stops just
1386  * short of brinbuildCallback creating the new index entry.
1387  *
1388  * Note that it is critical we use the "any visible" mode of
1389  * table_index_build_range_scan here: otherwise, we would miss tuples
1390  * inserted by transactions that are still in progress, among other corner
1391  * cases.
1392  */
1393  state->bs_currRangeStart = heapBlk;
1394  table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
1395  heapBlk, scanNumBlks,
1396  brinbuildCallback, (void *) state, NULL);
1397 
1398  /*
1399  * Now we update the values obtained by the scan with the placeholder
1400  * tuple. We do this in a loop which only terminates if we're able to
1401  * update the placeholder tuple successfully; if we are not, this means
1402  * somebody else modified the placeholder tuple after we read it.
1403  */
1404  for (;;)
1405  {
1406  BrinTuple *newtup;
1407  Size newsize;
1408  bool didupdate;
1409  bool samepage;
1410 
1412 
1413  /*
1414  * Update the summary tuple and try to update.
1415  */
1416  newtup = brin_form_tuple(state->bs_bdesc,
1417  heapBlk, state->bs_dtuple, &newsize);
1418  samepage = brin_can_do_samepage_update(phbuf, phsz, newsize);
1419  didupdate =
1420  brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
1421  state->bs_rmAccess, heapBlk, phbuf, offset,
1422  phtup, phsz, newtup, newsize, samepage);
1423  brin_free_tuple(phtup);
1424  brin_free_tuple(newtup);
1425 
1426  /* If the update succeeded, we're done. */
1427  if (didupdate)
1428  break;
1429 
1430  /*
1431  * If the update didn't work, it might be because somebody updated the
1432  * placeholder tuple concurrently. Extract the new version, union it
1433  * with the values we have from the scan, and start over. (There are
1434  * other reasons for the update to fail, but it's simple to treat them
1435  * the same.)
1436  */
1437  phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
1438  &offset, &phsz, BUFFER_LOCK_SHARE,
1439  NULL);
1440  /* the placeholder tuple must exist */
1441  if (phtup == NULL)
1442  elog(ERROR, "missing placeholder tuple");
1443  phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
1445 
1446  /* merge it into the tuple from the heap scan */
1447  union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
1448  }
1449 
1450  ReleaseBuffer(phbuf);
1451 }
1452 
1453 /*
1454  * Summarize page ranges that are not already summarized. If pageRange is
1455  * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
1456  * page range containing the given heap page number is scanned.
1457  * If include_partial is true, then the partial range at the end of the table
1458  * is summarized, otherwise not.
1459  *
1460  * For each new index tuple inserted, *numSummarized (if not NULL) is
1461  * incremented; for each existing tuple, *numExisting (if not NULL) is
1462  * incremented.
1463  */
1464 static void
1466  bool include_partial, double *numSummarized, double *numExisting)
1467 {
1468  BrinRevmap *revmap;
1469  BrinBuildState *state = NULL;
1470  IndexInfo *indexInfo = NULL;
1471  BlockNumber heapNumBlocks;
1472  BlockNumber pagesPerRange;
1473  Buffer buf;
1474  BlockNumber startBlk;
1475 
1476  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
1477 
1478  /* determine range of pages to process */
1479  heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1480  if (pageRange == BRIN_ALL_BLOCKRANGES)
1481  startBlk = 0;
1482  else
1483  {
1484  startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1485  heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
1486  }
1487  if (startBlk > heapNumBlocks)
1488  {
1489  /* Nothing to do if start point is beyond end of table */
1490  brinRevmapTerminate(revmap);
1491  return;
1492  }
1493 
1494  /*
1495  * Scan the revmap to find unsummarized items.
1496  */
1497  buf = InvalidBuffer;
1498  for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
1499  {
1500  BrinTuple *tup;
1501  OffsetNumber off;
1502 
1503  /*
1504  * Unless requested to summarize even a partial range, go away now if
1505  * we think the next range is partial. Caller would pass true when it
1506  * is typically run once bulk data loading is done
1507  * (brin_summarize_new_values), and false when it is typically the
1508  * result of arbitrarily-scheduled maintenance command (vacuuming).
1509  */
1510  if (!include_partial &&
1511  (startBlk + pagesPerRange > heapNumBlocks))
1512  break;
1513 
1515 
1516  tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
1517  BUFFER_LOCK_SHARE, NULL);
1518  if (tup == NULL)
1519  {
1520  /* no revmap entry for this heap range. Summarize it. */
1521  if (state == NULL)
1522  {
1523  /* first time through */
1524  Assert(!indexInfo);
1526  pagesPerRange);
1527  indexInfo = BuildIndexInfo(index);
1528  }
1529  summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
1530 
1531  /* and re-initialize state for the next range */
1532  brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1533 
1534  if (numSummarized)
1535  *numSummarized += 1.0;
1536  }
1537  else
1538  {
1539  if (numExisting)
1540  *numExisting += 1.0;
1542  }
1543  }
1544 
1545  if (BufferIsValid(buf))
1546  ReleaseBuffer(buf);
1547 
1548  /* free resources */
1549  brinRevmapTerminate(revmap);
1550  if (state)
1551  {
1553  pfree(indexInfo);
1554  }
1555 }
1556 
1557 /*
1558  * Given a deformed tuple in the build state, convert it into the on-disk
1559  * format and insert it into the index, making the revmap point to it.
1560  */
1561 static void
1563 {
1564  BrinTuple *tup;
1565  Size size;
1566 
1567  tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
1568  state->bs_dtuple, &size);
1569  brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
1570  &state->bs_currentInsertBuf, state->bs_currRangeStart,
1571  tup, size);
1572  state->bs_numtuples++;
1573 
1574  pfree(tup);
1575 }
1576 
1577 /*
1578  * Given two deformed tuples, adjust the first one so that it's consistent
1579  * with the summary values in both.
1580  */
1581 static void
1583 {
1584  int keyno;
1585  BrinMemTuple *db;
1586  MemoryContext cxt;
1587  MemoryContext oldcxt;
1588 
1589  /* Use our own memory context to avoid retail pfree */
1591  "brin union",
1593  oldcxt = MemoryContextSwitchTo(cxt);
1594  db = brin_deform_tuple(bdesc, b, NULL);
1595  MemoryContextSwitchTo(oldcxt);
1596 
1597  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1598  {
1599  FmgrInfo *unionFn;
1600  BrinValues *col_a = &a->bt_columns[keyno];
1601  BrinValues *col_b = &db->bt_columns[keyno];
1602  BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
1603 
1604  if (opcinfo->oi_regular_nulls)
1605  {
1606  /* Adjust "hasnulls". */
1607  if (!col_a->bv_hasnulls && col_b->bv_hasnulls)
1608  col_a->bv_hasnulls = true;
1609 
1610  /* If there are no values in B, there's nothing left to do. */
1611  if (col_b->bv_allnulls)
1612  continue;
1613 
1614  /*
1615  * Adjust "allnulls". If A doesn't have values, just copy the
1616  * values from B into A, and we're done. We cannot run the
1617  * operators in this case, because values in A might contain
1618  * garbage. Note we already established that B contains values.
1619  */
1620  if (col_a->bv_allnulls)
1621  {
1622  int i;
1623 
1624  col_a->bv_allnulls = false;
1625 
1626  for (i = 0; i < opcinfo->oi_nstored; i++)
1627  col_a->bv_values[i] =
1628  datumCopy(col_b->bv_values[i],
1629  opcinfo->oi_typcache[i]->typbyval,
1630  opcinfo->oi_typcache[i]->typlen);
1631 
1632  continue;
1633  }
1634  }
1635 
1636  unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
1638  FunctionCall3Coll(unionFn,
1639  bdesc->bd_index->rd_indcollation[keyno],
1640  PointerGetDatum(bdesc),
1641  PointerGetDatum(col_a),
1642  PointerGetDatum(col_b));
1643  }
1644 
1645  MemoryContextDelete(cxt);
1646 }
1647 
1648 /*
1649  * brin_vacuum_scan
1650  * Do a complete scan of the index during VACUUM.
1651  *
1652  * This routine scans the complete index looking for uncatalogued index pages,
1653  * i.e. those that might have been lost due to a crash after index extension
1654  * and such.
1655  */
1656 static void
1658 {
1659  BlockNumber nblocks;
1660  BlockNumber blkno;
1661 
1662  /*
1663  * Scan the index in physical order, and clean up any possible mess in
1664  * each page.
1665  */
1666  nblocks = RelationGetNumberOfBlocks(idxrel);
1667  for (blkno = 0; blkno < nblocks; blkno++)
1668  {
1669  Buffer buf;
1670 
1672 
1673  buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno,
1674  RBM_NORMAL, strategy);
1675 
1676  brin_page_cleanup(idxrel, buf);
1677 
1678  ReleaseBuffer(buf);
1679  }
1680 
1681  /*
1682  * Update all upper pages in the index's FSM, as well. This ensures not
1683  * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
1684  * but also that any pre-existing damage or out-of-dateness is repaired.
1685  */
1686  FreeSpaceMapVacuum(idxrel);
1687 }
1688 
1689 static bool
1691  Datum *values, bool *nulls)
1692 {
1693  int keyno;
1694  bool modified = false;
1695 
1696  /*
1697  * Compare the key values of the new tuple to the stored index values; our
1698  * deformed tuple will get updated if the new tuple doesn't fit the
1699  * original range (note this means we can't break out of the loop early).
1700  * Make a note of whether this happens, so that we know to insert the
1701  * modified tuple later.
1702  */
1703  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1704  {
1705  Datum result;
1706  BrinValues *bval;
1707  FmgrInfo *addValue;
1708 
1709  bval = &dtup->bt_columns[keyno];
1710 
1711  if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno])
1712  {
1713  /*
1714  * If the new value is null, we record that we saw it if it's the
1715  * first one; otherwise, there's nothing to do.
1716  */
1717  if (!bval->bv_hasnulls)
1718  {
1719  bval->bv_hasnulls = true;
1720  modified = true;
1721  }
1722 
1723  continue;
1724  }
1725 
1726  addValue = index_getprocinfo(idxRel, keyno + 1,
1728  result = FunctionCall4Coll(addValue,
1729  idxRel->rd_indcollation[keyno],
1730  PointerGetDatum(bdesc),
1731  PointerGetDatum(bval),
1732  values[keyno],
1733  nulls[keyno]);
1734  /* if that returned true, we need to insert the updated tuple */
1735  modified |= DatumGetBool(result);
1736  }
1737 
1738  return modified;
1739 }
1740 
1741 static bool
1742 check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
1743 {
1744  int keyno;
1745 
1746  /*
1747  * First check if there are any IS [NOT] NULL scan keys, and if we're
1748  * violating them.
1749  */
1750  for (keyno = 0; keyno < nnullkeys; keyno++)
1751  {
1752  ScanKey key = nullkeys[keyno];
1753 
1754  Assert(key->sk_attno == bval->bv_attno);
1755 
1756  /* Handle only IS NULL/IS NOT NULL tests */
1757  if (!(key->sk_flags & SK_ISNULL))
1758  continue;
1759 
1760  if (key->sk_flags & SK_SEARCHNULL)
1761  {
1762  /* IS NULL scan key, but range has no NULLs */
1763  if (!bval->bv_allnulls && !bval->bv_hasnulls)
1764  return false;
1765  }
1766  else if (key->sk_flags & SK_SEARCHNOTNULL)
1767  {
1768  /*
1769  * For IS NOT NULL, we can only skip ranges that are known to have
1770  * only nulls.
1771  */
1772  if (bval->bv_allnulls)
1773  return false;
1774  }
1775  else
1776  {
1777  /*
1778  * Neither IS NULL nor IS NOT NULL was used; assume all indexable
1779  * operators are strict and thus return false with NULL value in
1780  * the scan key.
1781  */
1782  return false;
1783  }
1784  }
1785 
1786  return true;
1787 }
@ ACLCHECK_NOT_OWNER
Definition: acl.h:184
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:5171
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:3512
int16 AttrNumber
Definition: attnum.h:21
bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno)
Definition: autovacuum.c:3247
@ AVW_BRINSummarizeRange
Definition: autovacuum.h:25
uint32 BlockNumber
Definition: block.h:31
#define MaxBlockNumber
Definition: block.h:35
static Datum values[MAXATTR]
Definition: bootstrap.c:156
IndexBulkDeleteResult * brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: brin.c:940
static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
Definition: brin.c:1657
static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange)
Definition: brin.c:1270
Datum brin_desummarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:1101
void brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: brin.c:729
static void terminate_brin_buildstate(BrinBuildState *state)
Definition: brin.c:1293
Datum brin_summarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:1003
#define BRIN_ALL_BLOCKRANGES
Definition: brin.c:70
Datum brin_summarize_new_values(PG_FUNCTION_ARGS)
Definition: brin.c:988
IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys)
Definition: brin.c:326
IndexBuildResult * brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: brin.c:811
int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: brin.c:355
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting)
Definition: brin.c:1465
static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, Datum *values, bool *nulls)
Definition: brin.c:1690
static void form_and_insert_tuple(BrinBuildState *state)
Definition: brin.c:1562
void brinbuildempty(Relation index)
Definition: brin.c:895
void brin_free_desc(BrinDesc *bdesc)
Definition: brin.c:1237
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
Definition: brin.c:1582
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1249
BrinDesc * brin_build_desc(Relation rel)
Definition: brin.c:1182
struct BrinBuildState BrinBuildState
struct BrinOpaque BrinOpaque
static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks)
Definition: brin.c:1338
bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: brin.c:155
Datum brinhandler(PG_FUNCTION_ARGS)
Definition: brin.c:90
bytea * brinoptions(Datum reloptions, bool validate)
Definition: brin.c:970
IndexBulkDeleteResult * brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: brin.c:925
static void brinbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
Definition: brin.c:766
void brinendscan(IndexScanDesc scan)
Definition: brin.c:749
static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
Definition: brin.c:1742
#define BrinGetPagesPerRange(relation)
Definition: brin.h:39
#define BrinGetAutoSummarize(relation)
Definition: brin.h:45
#define BRIN_LAST_OPTIONAL_PROCNUM
Definition: brin_internal.h:78
#define BRIN_PROCNUM_UNION
Definition: brin_internal.h:73
#define BRIN_PROCNUM_OPTIONS
Definition: brin_internal.h:75
#define BRIN_PROCNUM_OPCINFO
Definition: brin_internal.h:70
#define BRIN_PROCNUM_CONSISTENT
Definition: brin_internal.h:72
#define BRIN_elog(args)
Definition: brin_internal.h:85
#define BRIN_PROCNUM_ADDVALUE
Definition: brin_internal.h:71
#define BRIN_CURRENT_VERSION
Definition: brin_page.h:72
#define BRIN_METAPAGE_BLKNO
Definition: brin_page.h:75
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:54
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:620
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:343
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:487
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:324
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
Definition: brin_revmap.c:328
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode, Snapshot snapshot)
Definition: brin_revmap.c:197
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, Snapshot snapshot)
Definition: brin_revmap.c:71
void brinRevmapTerminate(BrinRevmap *revmap)
Definition: brin_revmap.c:103
BrinTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, Size *size)
Definition: brin_tuple.c:99
BrinMemTuple * brin_new_memtuple(BrinDesc *brdesc)
Definition: brin_tuple.c:479
BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
Definition: brin_tuple.c:546
BrinMemTuple * brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
Definition: brin_tuple.c:506
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:443
void brin_free_tuple(BrinTuple *tuple)
Definition: brin_tuple.c:430
BrinTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
Definition: brin_tuple.c:385
bool brinvalidate(Oid opclassoid)
Definition: brin_validate.c:37
#define SizeOfBrinCreateIdx
Definition: brin_xlog.h:55
#define XLOG_BRIN_CREATE_INDEX
Definition: brin_xlog.h:31
int Buffer
Definition: buf.h:23
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2755
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3915
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3938
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1573
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4156
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:749
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:702
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
#define P_NEW
Definition: bufmgr.h:91
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:216
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
@ RBM_NORMAL
Definition: bufmgr.h:39
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
Pointer Page
Definition: bufpage.h:78
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:234
#define PageSetLSN(page, lsn)
Definition: bufpage.h:367
#define PageGetContents(page)
Definition: bufpage.h:245
#define Min(x, y)
Definition: c.h:986
#define MAXALIGN(LEN)
Definition: c.h:757
#define offsetof(type, field)
Definition: c.h:727
signed int int32
Definition: c.h:429
#define lengthof(array)
Definition: c.h:734
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
#define OidIsValid(objectId)
Definition: c.h:710
size_t Size
Definition: c.h:540
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errhint(const char *fmt,...)
Definition: elog.c:1151
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define LOG
Definition: elog.h:25
#define DEBUG2
Definition: elog.h:23
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1181
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1683
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1156
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:594
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:633
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:649
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:335
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:86
IndexUniqueCheck
Definition: genam.h:115
int NewGUCNestLevel(void)
Definition: guc.c:6467
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition: guc.c:6481
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition: index.c:3520
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2418
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:803
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
Assert(fmt[strlen(fmt) - 1] !='\n')
#define AccessShareLock
Definition: lockdefs.h:36
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
void pfree(void *pointer)
Definition: mcxt.c:1175
void * palloc0(Size size)
Definition: mcxt.c:1099
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
void * palloc(Size size)
Definition: mcxt.c:1068
#define AllocSetContextCreate
Definition: memutils.h:173
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:197
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:207
#define MemoryContextResetAndDeleteChildren(ctx)
Definition: memutils.h:67
#define SECURITY_RESTRICTED_OPERATION
Definition: miscadmin.h:313
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:603
Oid GetUserId(void)
Definition: miscinit.c:492
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:610
#define makeNode(_type_)
Definition: nodes.h:621
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
@ OBJECT_INDEX
Definition: parsenodes.h:2154
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:207
const void size_t len
static char * buf
Definition: pg_test_fsync.c:67
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:81
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:540
uintptr_t Datum
Definition: postgres.h:411
#define DatumGetBool(X)
Definition: postgres.h:437
#define DatumGetPointer(X)
Definition: postgres.h:593
#define Int32GetDatum(X)
Definition: postgres.h:523
#define PointerGetDatum(X)
Definition: postgres.h:600
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static void addrange(struct cvec *cv, chr from, chr to)
Definition: regc_cvec.c:90
#define RelationGetRelid(relation)
Definition: rel.h:489
#define RelationGetDescr(relation)
Definition: rel.h:515
#define RelationGetRelationName(relation)
Definition: rel.h:523
#define RelationNeedsWAL(relation)
Definition: rel.h:613
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
Definition: reloptions.c:1913
@ RELOPT_KIND_BRIN
Definition: reloptions.h:52
@ RELOPT_TYPE_INT
Definition: reloptions.h:32
@ RELOPT_TYPE_BOOL
Definition: reloptions.h:31
@ MAIN_FORKNUM
Definition: relpath.h:43
@ INIT_FORKNUM
Definition: relpath.h:46
void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7734
#define SK_SEARCHNOTNULL
Definition: skey.h:122
#define SK_SEARCHNULL
Definition: skey.h:121
#define SK_ISNULL
Definition: skey.h:115
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
BrinMemTuple * bs_dtuple
Definition: brin.c:57
Relation bs_irel
Definition: brin.c:50
BlockNumber bs_pagesPerRange
Definition: brin.c:53
Buffer bs_currentInsertBuf
Definition: brin.c:52
int bs_numtuples
Definition: brin.c:51
BrinRevmap * bs_rmAccess
Definition: brin.c:55
BlockNumber bs_currRangeStart
Definition: brin.c:54
BrinDesc * bs_bdesc
Definition: brin.c:56
int bd_totalstored
Definition: brin_internal.h:59
TupleDesc bd_tupdesc
Definition: brin_internal.h:53
BrinOpcInfo * bd_info[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:62
Relation bd_index
Definition: brin_internal.h:50
MemoryContext bd_context
Definition: brin_internal.h:47
TupleDesc bd_disktdesc
Definition: brin_internal.h:56
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_tuple.h:54
bool bt_placeholder
Definition: brin_tuple.h:46
BlockNumber lastRevmapPage
Definition: brin_page.h:69
BlockNumber pagesPerRange
Definition: brin_page.h:68
BlockNumber bo_pagesPerRange
Definition: brin.c:65
BrinDesc * bo_bdesc
Definition: brin.c:67
BrinRevmap * bo_rmAccess
Definition: brin.c:66
TypeCacheEntry * oi_typcache[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:37
uint16 oi_nstored
Definition: brin_internal.h:28
bool oi_regular_nulls
Definition: brin_internal.h:31
BlockNumber revmapNumPages
Definition: brin.h:34
BlockNumber pagesPerRange
Definition: brin.h:33
bool bv_hasnulls
Definition: brin_tuple.h:32
Datum * bv_values
Definition: brin_tuple.h:34
AttrNumber bv_attno
Definition: brin_tuple.h:31
bool bv_allnulls
Definition: brin_tuple.h:33
Definition: fmgr.h:57
ambuildphasename_function ambuildphasename
Definition: amapi.h:270
ambuildempty_function ambuildempty
Definition: amapi.h:262
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:265
bool amclusterable
Definition: amapi.h:238
amoptions_function amoptions
Definition: amapi.h:268
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:282
amrestrpos_function amrestrpos
Definition: amapi.h:279
aminsert_function aminsert
Definition: amapi.h:263
amendscan_function amendscan
Definition: amapi.h:277
uint16 amoptsprocnum
Definition: amapi.h:218
amparallelrescan_function amparallelrescan
Definition: amapi.h:284
Oid amkeytype
Definition: amapi.h:252
bool ampredlocks
Definition: amapi.h:240
uint16 amsupport
Definition: amapi.h:216
amcostestimate_function amcostestimate
Definition: amapi.h:267
bool amcanorderbyop
Definition: amapi.h:222
amadjustmembers_function amadjustmembers
Definition: amapi.h:272
ambuild_function ambuild
Definition: amapi.h:261
bool amstorage
Definition: amapi.h:236
uint16 amstrategies
Definition: amapi.h:214
bool amoptionalkey
Definition: amapi.h:230
amgettuple_function amgettuple
Definition: amapi.h:275
amcanreturn_function amcanreturn
Definition: amapi.h:266
bool amcanunique
Definition: amapi.h:226
amgetbitmap_function amgetbitmap
Definition: amapi.h:276
amproperty_function amproperty
Definition: amapi.h:269
ambulkdelete_function ambulkdelete
Definition: amapi.h:264
bool amsearcharray
Definition: amapi.h:232
amvalidate_function amvalidate
Definition: amapi.h:271
ammarkpos_function ammarkpos
Definition: amapi.h:278
bool amcanmulticol
Definition: amapi.h:228
bool amusemaintenanceworkmem
Definition: amapi.h:246
ambeginscan_function ambeginscan
Definition: amapi.h:273
bool amcanparallel
Definition: amapi.h:242
amrescan_function amrescan
Definition: amapi.h:274
bool amcanorder
Definition: amapi.h:220
bool amhotblocking
Definition: amapi.h:248
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:283
uint8 amparallelvacuumoptions
Definition: amapi.h:250
bool amcanbackward
Definition: amapi.h:224
bool amcaninclude
Definition: amapi.h:244
bool amsearchnulls
Definition: amapi.h:234
double heap_tuples
Definition: genam.h:32
double index_tuples
Definition: genam.h:33
BlockNumber num_pages
Definition: genam.h:76
double num_index_tuples
Definition: genam.h:78
void * ii_AmCache
Definition: execnodes.h:184
MemoryContext ii_Context
Definition: execnodes.h:185
struct ScanKeyData * keyData
Definition: relscan.h:122
Relation indexRelation
Definition: relscan.h:118
struct SnapshotData * xs_snapshot
Definition: relscan.h:119
Relation index
Definition: genam.h:46
bool analyze_only
Definition: genam.h:47
BufferAccessStrategy strategy
Definition: genam.h:52
Oid * rd_indcollation
Definition: rel.h:213
Form_pg_class rd_rel
Definition: rel.h:109
Oid sk_collation
Definition: skey.h:70
int tdrefcount
Definition: tupdesc.h:84
bool typbyval
Definition: typcache.h:40
int16 typlen
Definition: typcache.h:39
Definition: type.h:90
Definition: regguts.h:318
Definition: c.h:622
BlockNumber pagesPerRange
Definition: brin_xlog.h:52
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1780
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1747
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition: tidbitmap.c:442
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:62
bool RecoveryInProgress(void)
Definition: xlog.c:5753
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:443
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1177
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:351
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33