PostgreSQL Source Code  git master
brin.c
Go to the documentation of this file.
1 /*
2  * brin.c
3  * Implementation of BRIN indexes for Postgres
4  *
5  * See src/backend/access/brin/README for details.
6  *
7  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/brin/brin.c
12  *
13  * TODO
14  * * ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
15  */
16 #include "postgres.h"
17 
18 #include "access/brin.h"
19 #include "access/brin_page.h"
20 #include "access/brin_pageops.h"
21 #include "access/brin_xlog.h"
22 #include "access/relation.h"
23 #include "access/reloptions.h"
24 #include "access/relscan.h"
25 #include "access/table.h"
26 #include "access/tableam.h"
27 #include "access/xloginsert.h"
28 #include "catalog/index.h"
29 #include "catalog/pg_am.h"
30 #include "commands/vacuum.h"
31 #include "miscadmin.h"
32 #include "pgstat.h"
33 #include "postmaster/autovacuum.h"
34 #include "storage/bufmgr.h"
35 #include "storage/freespace.h"
36 #include "utils/acl.h"
37 #include "utils/builtins.h"
38 #include "utils/datum.h"
39 #include "utils/guc.h"
40 #include "utils/index_selfuncs.h"
41 #include "utils/memutils.h"
42 #include "utils/rel.h"
43 
44 
45 /*
46  * We use a BrinBuildState during initial construction of a BRIN index.
47  * The running state is kept in a BrinMemTuple.
48  */
49 typedef struct BrinBuildState
50 {
60 
61 /*
62  * Struct used as "opaque" during index scans
63  */
64 typedef struct BrinOpaque
65 {
70 
71 #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
72 
74  BrinRevmap *revmap, BlockNumber pagesPerRange);
76 static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
77  bool include_partial, double *numSummarized, double *numExisting);
79 static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
80  BrinTuple *b);
81 static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
82 static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc,
83  BrinMemTuple *dtup, Datum *values, bool *nulls);
84 static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys);
85 
86 /*
87  * BRIN handler function: return IndexAmRoutine with access method parameters
88  * and callbacks.
89  */
90 Datum
92 {
94 
95  amroutine->amstrategies = 0;
98  amroutine->amcanorder = false;
99  amroutine->amcanorderbyop = false;
100  amroutine->amcanbackward = false;
101  amroutine->amcanunique = false;
102  amroutine->amcanmulticol = true;
103  amroutine->amoptionalkey = true;
104  amroutine->amsearcharray = false;
105  amroutine->amsearchnulls = true;
106  amroutine->amstorage = true;
107  amroutine->amclusterable = false;
108  amroutine->ampredlocks = false;
109  amroutine->amcanparallel = false;
110  amroutine->amcaninclude = false;
111  amroutine->amusemaintenanceworkmem = false;
112  amroutine->amsummarizing = true;
113  amroutine->amparallelvacuumoptions =
115  amroutine->amkeytype = InvalidOid;
116 
117  amroutine->ambuild = brinbuild;
118  amroutine->ambuildempty = brinbuildempty;
119  amroutine->aminsert = brininsert;
120  amroutine->ambulkdelete = brinbulkdelete;
121  amroutine->amvacuumcleanup = brinvacuumcleanup;
122  amroutine->amcanreturn = NULL;
123  amroutine->amcostestimate = brincostestimate;
124  amroutine->amoptions = brinoptions;
125  amroutine->amproperty = NULL;
126  amroutine->ambuildphasename = NULL;
127  amroutine->amvalidate = brinvalidate;
128  amroutine->amadjustmembers = NULL;
129  amroutine->ambeginscan = brinbeginscan;
130  amroutine->amrescan = brinrescan;
131  amroutine->amgettuple = NULL;
132  amroutine->amgetbitmap = bringetbitmap;
133  amroutine->amendscan = brinendscan;
134  amroutine->ammarkpos = NULL;
135  amroutine->amrestrpos = NULL;
136  amroutine->amestimateparallelscan = NULL;
137  amroutine->aminitparallelscan = NULL;
138  amroutine->amparallelrescan = NULL;
139 
140  PG_RETURN_POINTER(amroutine);
141 }
142 
143 /*
144  * A tuple in the heap is being inserted. To keep a brin index up to date,
145  * we need to obtain the relevant index tuple and compare its stored values
146  * with those of the new tuple. If the tuple values are not consistent with
147  * the summary tuple, we need to update the index tuple.
148  *
149  * If autosummarization is enabled, check if we need to summarize the previous
150  * page range.
151  *
152  * If the range is not currently summarized (i.e. the revmap returns NULL for
153  * it), there's nothing to do for this tuple.
154  */
155 bool
156 brininsert(Relation idxRel, Datum *values, bool *nulls,
157  ItemPointer heaptid, Relation heapRel,
158  IndexUniqueCheck checkUnique,
159  bool indexUnchanged,
160  IndexInfo *indexInfo)
161 {
162  BlockNumber pagesPerRange;
163  BlockNumber origHeapBlk;
164  BlockNumber heapBlk;
165  BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
166  BrinRevmap *revmap;
168  MemoryContext tupcxt = NULL;
170  bool autosummarize = BrinGetAutoSummarize(idxRel);
171 
172  revmap = brinRevmapInitialize(idxRel, &pagesPerRange);
173 
174  /*
175  * origHeapBlk is the block number where the insertion occurred. heapBlk
176  * is the first block in the corresponding page range.
177  */
178  origHeapBlk = ItemPointerGetBlockNumber(heaptid);
179  heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
180 
181  for (;;)
182  {
183  bool need_insert = false;
184  OffsetNumber off;
185  BrinTuple *brtup;
186  BrinMemTuple *dtup;
187 
189 
190  /*
191  * If auto-summarization is enabled and we just inserted the first
192  * tuple into the first block of a new non-first page range, request a
193  * summarization run of the previous range.
194  */
195  if (autosummarize &&
196  heapBlk > 0 &&
197  heapBlk == origHeapBlk &&
199  {
200  BlockNumber lastPageRange = heapBlk - 1;
201  BrinTuple *lastPageTuple;
202 
203  lastPageTuple =
204  brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
205  NULL, BUFFER_LOCK_SHARE);
206  if (!lastPageTuple)
207  {
208  bool recorded;
209 
211  RelationGetRelid(idxRel),
212  lastPageRange);
213  if (!recorded)
214  ereport(LOG,
215  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
216  errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
217  RelationGetRelationName(idxRel),
218  lastPageRange)));
219  }
220  else
222  }
223 
224  brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
225  NULL, BUFFER_LOCK_SHARE);
226 
227  /* if range is unsummarized, there's nothing to do */
228  if (!brtup)
229  break;
230 
231  /* First time through in this statement? */
232  if (bdesc == NULL)
233  {
234  MemoryContextSwitchTo(indexInfo->ii_Context);
235  bdesc = brin_build_desc(idxRel);
236  indexInfo->ii_AmCache = (void *) bdesc;
237  MemoryContextSwitchTo(oldcxt);
238  }
239  /* First time through in this brininsert call? */
240  if (tupcxt == NULL)
241  {
243  "brininsert cxt",
245  MemoryContextSwitchTo(tupcxt);
246  }
247 
248  dtup = brin_deform_tuple(bdesc, brtup, NULL);
249 
250  need_insert = add_values_to_range(idxRel, bdesc, dtup, values, nulls);
251 
252  if (!need_insert)
253  {
254  /*
255  * The tuple is consistent with the new values, so there's nothing
256  * to do.
257  */
259  }
260  else
261  {
262  Page page = BufferGetPage(buf);
263  ItemId lp = PageGetItemId(page, off);
264  Size origsz;
265  BrinTuple *origtup;
266  Size newsz;
267  BrinTuple *newtup;
268  bool samepage;
269 
270  /*
271  * Make a copy of the old tuple, so that we can compare it after
272  * re-acquiring the lock.
273  */
274  origsz = ItemIdGetLength(lp);
275  origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
276 
277  /*
278  * Before releasing the lock, check if we can attempt a same-page
279  * update. Another process could insert a tuple concurrently in
280  * the same page though, so downstream we must be prepared to cope
281  * if this turns out to not be possible after all.
282  */
283  newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
284  samepage = brin_can_do_samepage_update(buf, origsz, newsz);
286 
287  /*
288  * Try to update the tuple. If this doesn't work for whatever
289  * reason, we need to restart from the top; the revmap might be
290  * pointing at a different tuple for this block now, so we need to
291  * recompute to ensure both our new heap tuple and the other
292  * inserter's are covered by the combined tuple. It might be that
293  * we don't need to update at all.
294  */
295  if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
296  buf, off, origtup, origsz, newtup, newsz,
297  samepage))
298  {
299  /* no luck; start over */
301  continue;
302  }
303  }
304 
305  /* success! */
306  break;
307  }
308 
309  brinRevmapTerminate(revmap);
310  if (BufferIsValid(buf))
312  MemoryContextSwitchTo(oldcxt);
313  if (tupcxt != NULL)
314  MemoryContextDelete(tupcxt);
315 
316  return false;
317 }
318 
319 /*
320  * Initialize state for a BRIN index scan.
321  *
322  * We read the metapage here to determine the pages-per-range number that this
323  * index was built with. Note that since this cannot be changed while we're
324  * holding lock on index, it's not necessary to recompute it during brinrescan.
325  */
327 brinbeginscan(Relation r, int nkeys, int norderbys)
328 {
329  IndexScanDesc scan;
330  BrinOpaque *opaque;
331 
332  scan = RelationGetIndexScan(r, nkeys, norderbys);
333 
334  opaque = palloc_object(BrinOpaque);
335  opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange);
336  opaque->bo_bdesc = brin_build_desc(r);
337  scan->opaque = opaque;
338 
339  return scan;
340 }
341 
342 /*
343  * Execute the index scan.
344  *
345  * This works by reading index TIDs from the revmap, and obtaining the index
346  * tuples pointed to by them; the summary values in the index tuples are
347  * compared to the scan keys. We return into the TID bitmap all the pages in
348  * ranges corresponding to index tuples that match the scan keys.
349  *
350  * If a TID from the revmap is read as InvalidTID, we know that range is
351  * unsummarized. Pages in those ranges need to be returned regardless of scan
352  * keys.
353  */
354 int64
356 {
357  Relation idxRel = scan->indexRelation;
359  BrinDesc *bdesc;
360  Oid heapOid;
361  Relation heapRel;
362  BrinOpaque *opaque;
363  BlockNumber nblocks;
364  BlockNumber heapBlk;
365  int totalpages = 0;
366  FmgrInfo *consistentFn;
367  MemoryContext oldcxt;
368  MemoryContext perRangeCxt;
369  BrinMemTuple *dtup;
370  BrinTuple *btup = NULL;
371  Size btupsz = 0;
372  ScanKey **keys,
373  **nullkeys;
374  int *nkeys,
375  *nnullkeys;
376  char *ptr;
377  Size len;
378  char *tmp PG_USED_FOR_ASSERTS_ONLY;
379 
380  opaque = (BrinOpaque *) scan->opaque;
381  bdesc = opaque->bo_bdesc;
382  pgstat_count_index_scan(idxRel);
383 
384  /*
385  * We need to know the size of the table so that we know how long to
386  * iterate on the revmap.
387  */
388  heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
389  heapRel = table_open(heapOid, AccessShareLock);
390  nblocks = RelationGetNumberOfBlocks(heapRel);
391  table_close(heapRel, AccessShareLock);
392 
393  /*
394  * Make room for the consistent support procedures of indexed columns. We
395  * don't look them up here; we do that lazily the first time we see a scan
396  * key reference each of them. We rely on zeroing fn_oid to InvalidOid.
397  */
398  consistentFn = palloc0_array(FmgrInfo, bdesc->bd_tupdesc->natts);
399 
400  /*
401  * Make room for per-attribute lists of scan keys that we'll pass to the
402  * consistent support procedure. We don't know which attributes have scan
403  * keys, so we allocate space for all attributes. That may use more memory
404  * but it's probably cheaper than determining which attributes are used.
405  *
406  * We keep null and regular keys separate, so that we can pass just the
407  * regular keys to the consistent function easily.
408  *
409  * To reduce the allocation overhead, we allocate one big chunk and then
410  * carve it into smaller arrays ourselves. All the pieces have exactly the
411  * same lifetime, so that's OK.
412  *
413  * XXX The widest index can have 32 attributes, so the amount of wasted
414  * memory is negligible. We could invent a more compact approach (with
415  * just space for used attributes) but that would make the matching more
416  * complex so it's not a good trade-off.
417  */
418  len =
419  MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* regular keys */
420  MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
421  MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts) +
422  MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* NULL keys */
423  MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
424  MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
425 
426  ptr = palloc(len);
427  tmp = ptr;
428 
429  keys = (ScanKey **) ptr;
430  ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
431 
432  nullkeys = (ScanKey **) ptr;
433  ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
434 
435  nkeys = (int *) ptr;
436  ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
437 
438  nnullkeys = (int *) ptr;
439  ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
440 
441  for (int i = 0; i < bdesc->bd_tupdesc->natts; i++)
442  {
443  keys[i] = (ScanKey *) ptr;
444  ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
445 
446  nullkeys[i] = (ScanKey *) ptr;
447  ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
448  }
449 
450  Assert(tmp + len == ptr);
451 
452  /* zero the number of keys */
453  memset(nkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
454  memset(nnullkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
455 
456  /* Preprocess the scan keys - split them into per-attribute arrays. */
457  for (int keyno = 0; keyno < scan->numberOfKeys; keyno++)
458  {
459  ScanKey key = &scan->keyData[keyno];
460  AttrNumber keyattno = key->sk_attno;
461 
462  /*
463  * The collation of the scan key must match the collation used in the
464  * index column (but only if the search is not IS NULL/ IS NOT NULL).
465  * Otherwise we shouldn't be using this index ...
466  */
467  Assert((key->sk_flags & SK_ISNULL) ||
468  (key->sk_collation ==
469  TupleDescAttr(bdesc->bd_tupdesc,
470  keyattno - 1)->attcollation));
471 
472  /*
473  * First time we see this index attribute, so init as needed.
474  *
475  * This is a bit of an overkill - we don't know how many scan keys are
476  * there for this attribute, so we simply allocate the largest number
477  * possible (as if all keys were for this attribute). This may waste a
478  * bit of memory, but we only expect small number of scan keys in
479  * general, so this should be negligible, and repeated repalloc calls
480  * are not free either.
481  */
482  if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
483  {
484  FmgrInfo *tmp;
485 
486  /* First time we see this attribute, so no key/null keys. */
487  Assert(nkeys[keyattno - 1] == 0);
488  Assert(nnullkeys[keyattno - 1] == 0);
489 
490  tmp = index_getprocinfo(idxRel, keyattno,
492  fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
494  }
495 
496  /* Add key to the proper per-attribute array. */
497  if (key->sk_flags & SK_ISNULL)
498  {
499  nullkeys[keyattno - 1][nnullkeys[keyattno - 1]] = key;
500  nnullkeys[keyattno - 1]++;
501  }
502  else
503  {
504  keys[keyattno - 1][nkeys[keyattno - 1]] = key;
505  nkeys[keyattno - 1]++;
506  }
507  }
508 
509  /* allocate an initial in-memory tuple, out of the per-range memcxt */
510  dtup = brin_new_memtuple(bdesc);
511 
512  /*
513  * Setup and use a per-range memory context, which is reset every time we
514  * loop below. This avoids having to free the tuples within the loop.
515  */
517  "bringetbitmap cxt",
519  oldcxt = MemoryContextSwitchTo(perRangeCxt);
520 
521  /*
522  * Now scan the revmap. We start by querying for heap page 0,
523  * incrementing by the number of pages per range; this gives us a full
524  * view of the table.
525  */
526  for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
527  {
528  bool addrange;
529  bool gottuple = false;
530  BrinTuple *tup;
531  OffsetNumber off;
532  Size size;
533 
535 
537 
538  tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
539  &off, &size, BUFFER_LOCK_SHARE);
540  if (tup)
541  {
542  gottuple = true;
543  btup = brin_copy_tuple(tup, size, btup, &btupsz);
545  }
546 
547  /*
548  * For page ranges with no indexed tuple, we must return the whole
549  * range; otherwise, compare it to the scan keys.
550  */
551  if (!gottuple)
552  {
553  addrange = true;
554  }
555  else
556  {
557  dtup = brin_deform_tuple(bdesc, btup, dtup);
558  if (dtup->bt_placeholder)
559  {
560  /*
561  * Placeholder tuples are always returned, regardless of the
562  * values stored in them.
563  */
564  addrange = true;
565  }
566  else
567  {
568  int attno;
569 
570  /*
571  * Compare scan keys with summary values stored for the range.
572  * If scan keys are matched, the page range must be added to
573  * the bitmap. We initially assume the range needs to be
574  * added; in particular this serves the case where there are
575  * no keys.
576  */
577  addrange = true;
578  for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
579  {
580  BrinValues *bval;
581  Datum add;
582  Oid collation;
583 
584  /*
585  * skip attributes without any scan keys (both regular and
586  * IS [NOT] NULL)
587  */
588  if (nkeys[attno - 1] == 0 && nnullkeys[attno - 1] == 0)
589  continue;
590 
591  bval = &dtup->bt_columns[attno - 1];
592 
593  /*
594  * If the BRIN tuple indicates that this range is empty,
595  * we can skip it: there's nothing to match. We don't
596  * need to examine the next columns.
597  */
598  if (dtup->bt_empty_range)
599  {
600  addrange = false;
601  break;
602  }
603 
604  /*
605  * First check if there are any IS [NOT] NULL scan keys,
606  * and if we're violating them. In that case we can
607  * terminate early, without invoking the support function.
608  *
609  * As there may be more keys, we can only determine
610  * mismatch within this loop.
611  */
612  if (bdesc->bd_info[attno - 1]->oi_regular_nulls &&
613  !check_null_keys(bval, nullkeys[attno - 1],
614  nnullkeys[attno - 1]))
615  {
616  /*
617  * If any of the IS [NOT] NULL keys failed, the page
618  * range as a whole can't pass. So terminate the loop.
619  */
620  addrange = false;
621  break;
622  }
623 
624  /*
625  * So either there are no IS [NOT] NULL keys, or all
626  * passed. If there are no regular scan keys, we're done -
627  * the page range matches. If there are regular keys, but
628  * the page range is marked as 'all nulls' it can't
629  * possibly pass (we're assuming the operators are
630  * strict).
631  */
632 
633  /* No regular scan keys - page range as a whole passes. */
634  if (!nkeys[attno - 1])
635  continue;
636 
637  Assert((nkeys[attno - 1] > 0) &&
638  (nkeys[attno - 1] <= scan->numberOfKeys));
639 
640  /* If it is all nulls, it cannot possibly be consistent. */
641  if (bval->bv_allnulls)
642  {
643  addrange = false;
644  break;
645  }
646 
647  /*
648  * Collation from the first key (has to be the same for
649  * all keys for the same attribute).
650  */
651  collation = keys[attno - 1][0]->sk_collation;
652 
653  /*
654  * Check whether the scan key is consistent with the page
655  * range values; if so, have the pages in the range added
656  * to the output bitmap.
657  *
658  * The opclass may or may not support processing of
659  * multiple scan keys. We can determine that based on the
660  * number of arguments - functions with extra parameter
661  * (number of scan keys) do support this, otherwise we
662  * have to simply pass the scan keys one by one.
663  */
664  if (consistentFn[attno - 1].fn_nargs >= 4)
665  {
666  /* Check all keys at once */
667  add = FunctionCall4Coll(&consistentFn[attno - 1],
668  collation,
669  PointerGetDatum(bdesc),
670  PointerGetDatum(bval),
671  PointerGetDatum(keys[attno - 1]),
672  Int32GetDatum(nkeys[attno - 1]));
673  addrange = DatumGetBool(add);
674  }
675  else
676  {
677  /*
678  * Check keys one by one
679  *
680  * When there are multiple scan keys, failure to meet
681  * the criteria for a single one of them is enough to
682  * discard the range as a whole, so break out of the
683  * loop as soon as a false return value is obtained.
684  */
685  int keyno;
686 
687  for (keyno = 0; keyno < nkeys[attno - 1]; keyno++)
688  {
689  add = FunctionCall3Coll(&consistentFn[attno - 1],
690  keys[attno - 1][keyno]->sk_collation,
691  PointerGetDatum(bdesc),
692  PointerGetDatum(bval),
693  PointerGetDatum(keys[attno - 1][keyno]));
694  addrange = DatumGetBool(add);
695  if (!addrange)
696  break;
697  }
698  }
699 
700  /*
701  * If we found a scan key eliminating the range, no need
702  * to check additional ones.
703  */
704  if (!addrange)
705  break;
706  }
707  }
708  }
709 
710  /* add the pages in the range to the output bitmap, if needed */
711  if (addrange)
712  {
713  BlockNumber pageno;
714 
715  for (pageno = heapBlk;
716  pageno <= Min(nblocks, heapBlk + opaque->bo_pagesPerRange) - 1;
717  pageno++)
718  {
719  MemoryContextSwitchTo(oldcxt);
720  tbm_add_page(tbm, pageno);
721  totalpages++;
722  MemoryContextSwitchTo(perRangeCxt);
723  }
724  }
725  }
726 
727  MemoryContextSwitchTo(oldcxt);
728  MemoryContextDelete(perRangeCxt);
729 
730  if (buf != InvalidBuffer)
732 
733  /*
734  * XXX We have an approximation of the number of *pages* that our scan
735  * returns, but we don't have a precise idea of the number of heap tuples
736  * involved.
737  */
738  return totalpages * 10;
739 }
740 
741 /*
742  * Re-initialize state for a BRIN index scan
743  */
744 void
745 brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
746  ScanKey orderbys, int norderbys)
747 {
748  /*
749  * Other index AMs preprocess the scan keys at this point, or sometime
750  * early during the scan; this lets them optimize by removing redundant
751  * keys, or doing early returns when they are impossible to satisfy; see
752  * _bt_preprocess_keys for an example. Something like that could be added
753  * here someday, too.
754  */
755 
756  if (scankey && scan->numberOfKeys > 0)
757  memmove(scan->keyData, scankey,
758  scan->numberOfKeys * sizeof(ScanKeyData));
759 }
760 
761 /*
762  * Close down a BRIN index scan
763  */
764 void
766 {
767  BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
768 
770  brin_free_desc(opaque->bo_bdesc);
771  pfree(opaque);
772 }
773 
774 /*
775  * Per-heap-tuple callback for table_index_build_scan.
776  *
777  * Note we don't worry about the page range at the end of the table here; it is
778  * present in the build state struct after we're called the last time, but not
779  * inserted into the index. Caller must ensure to do so, if appropriate.
780  */
781 static void
783  ItemPointer tid,
784  Datum *values,
785  bool *isnull,
786  bool tupleIsAlive,
787  void *brstate)
788 {
789  BrinBuildState *state = (BrinBuildState *) brstate;
790  BlockNumber thisblock;
791 
792  thisblock = ItemPointerGetBlockNumber(tid);
793 
794  /*
795  * If we're in a block that belongs to a future range, summarize what
796  * we've got and start afresh. Note the scan might have skipped many
797  * pages, if they were devoid of live tuples; make sure to insert index
798  * tuples for those too.
799  */
800  while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
801  {
802 
803  BRIN_elog((DEBUG2,
804  "brinbuildCallback: completed a range: %u--%u",
805  state->bs_currRangeStart,
806  state->bs_currRangeStart + state->bs_pagesPerRange));
807 
808  /* create the index tuple and insert it */
810 
811  /* set state to correspond to the next range */
812  state->bs_currRangeStart += state->bs_pagesPerRange;
813 
814  /* re-initialize state for it */
815  brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
816  }
817 
818  /* Accumulate the current tuple into the running state */
819  (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
820  values, isnull);
821 }
822 
823 /*
824  * brinbuild() -- build a new BRIN index.
825  */
828 {
829  IndexBuildResult *result;
830  double reltuples;
831  double idxtuples;
832  BrinRevmap *revmap;
834  Buffer meta;
835  BlockNumber pagesPerRange;
836 
837  /*
838  * We expect to be called exactly once for any index relation.
839  */
841  elog(ERROR, "index \"%s\" already contains data",
843 
844  /*
845  * Critical section not required, because on error the creation of the
846  * whole relation will be rolled back.
847  */
848 
852 
855  MarkBufferDirty(meta);
856 
857  if (RelationNeedsWAL(index))
858  {
859  xl_brin_createidx xlrec;
860  XLogRecPtr recptr;
861  Page page;
862 
865 
866  XLogBeginInsert();
867  XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
869 
870  recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
871 
872  page = BufferGetPage(meta);
873  PageSetLSN(page, recptr);
874  }
875 
876  UnlockReleaseBuffer(meta);
877 
878  /*
879  * Initialize our state, including the deformed tuple state.
880  */
881  revmap = brinRevmapInitialize(index, &pagesPerRange);
882  state = initialize_brin_buildstate(index, revmap, pagesPerRange);
883 
884  /*
885  * Now scan the relation. No syncscan allowed here because we want the
886  * heap blocks in physical order.
887  */
888  reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
889  brinbuildCallback, (void *) state, NULL);
890 
891  /* process the final batch */
893 
894  /* release resources */
895  idxtuples = state->bs_numtuples;
896  brinRevmapTerminate(state->bs_rmAccess);
898 
899  /*
900  * Return statistics
901  */
903 
904  result->heap_tuples = reltuples;
905  result->index_tuples = idxtuples;
906 
907  return result;
908 }
909 
910 void
912 {
913  Buffer metabuf;
914 
915  /* An empty BRIN index has a metapage only. */
916  metabuf = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
918 
919  /* Initialize and xlog metabuffer. */
923  MarkBufferDirty(metabuf);
924  log_newpage_buffer(metabuf, true);
926 
927  UnlockReleaseBuffer(metabuf);
928 }
929 
930 /*
931  * brinbulkdelete
932  * Since there are no per-heap-tuple index tuples in BRIN indexes,
933  * there's not a lot we can do here.
934  *
935  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
936  * tuple is deleted), meaning the need to re-run summarization on the affected
937  * range. Would need to add an extra flag in brintuples for that.
938  */
941  IndexBulkDeleteCallback callback, void *callback_state)
942 {
943  /* allocate stats if first time through, else re-use existing struct */
944  if (stats == NULL)
946 
947  return stats;
948 }
949 
950 /*
951  * This routine is in charge of "vacuuming" a BRIN index: we just summarize
952  * ranges that are currently unsummarized.
953  */
956 {
957  Relation heapRel;
958 
959  /* No-op in ANALYZE ONLY mode */
960  if (info->analyze_only)
961  return stats;
962 
963  if (!stats)
965  stats->num_pages = RelationGetNumberOfBlocks(info->index);
966  /* rest of stats is initialized by zeroing */
967 
968  heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
970 
971  brin_vacuum_scan(info->index, info->strategy);
972 
973  brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
974  &stats->num_index_tuples, &stats->num_index_tuples);
975 
976  table_close(heapRel, AccessShareLock);
977 
978  return stats;
979 }
980 
981 /*
982  * reloptions processor for BRIN indexes
983  */
984 bytea *
985 brinoptions(Datum reloptions, bool validate)
986 {
987  static const relopt_parse_elt tab[] = {
988  {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
989  {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
990  };
991 
992  return (bytea *) build_reloptions(reloptions, validate,
994  sizeof(BrinOptions),
995  tab, lengthof(tab));
996 }
997 
998 /*
999  * SQL-callable function to scan through an index and summarize all ranges
1000  * that are not currently summarized.
1001  */
1002 Datum
1004 {
1005  Datum relation = PG_GETARG_DATUM(0);
1006 
1008  relation,
1010 }
1011 
1012 /*
1013  * SQL-callable function to summarize the indicated page range, if not already
1014  * summarized. If the second argument is BRIN_ALL_BLOCKRANGES, all
1015  * unsummarized ranges are summarized.
1016  */
1017 Datum
1019 {
1020  Oid indexoid = PG_GETARG_OID(0);
1021  int64 heapBlk64 = PG_GETARG_INT64(1);
1022  BlockNumber heapBlk;
1023  Oid heapoid;
1024  Relation indexRel;
1025  Relation heapRel;
1026  Oid save_userid;
1027  int save_sec_context;
1028  int save_nestlevel;
1029  double numSummarized = 0;
1030 
1031  if (RecoveryInProgress())
1032  ereport(ERROR,
1033  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1034  errmsg("recovery is in progress"),
1035  errhint("BRIN control functions cannot be executed during recovery.")));
1036 
1037  if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
1038  ereport(ERROR,
1039  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1040  errmsg("block number out of range: %lld",
1041  (long long) heapBlk64)));
1042  heapBlk = (BlockNumber) heapBlk64;
1043 
1044  /*
1045  * We must lock table before index to avoid deadlocks. However, if the
1046  * passed indexoid isn't an index then IndexGetRelation() will fail.
1047  * Rather than emitting a not-very-helpful error message, postpone
1048  * complaining, expecting that the is-it-an-index test below will fail.
1049  */
1050  heapoid = IndexGetRelation(indexoid, true);
1051  if (OidIsValid(heapoid))
1052  {
1053  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
1054 
1055  /*
1056  * Autovacuum calls us. For its benefit, switch to the table owner's
1057  * userid, so that any index functions are run as that user. Also
1058  * lock down security-restricted operations and arrange to make GUC
1059  * variable changes local to this command. This is harmless, albeit
1060  * unnecessary, when called from SQL, because we fail shortly if the
1061  * user does not own the index.
1062  */
1063  GetUserIdAndSecContext(&save_userid, &save_sec_context);
1064  SetUserIdAndSecContext(heapRel->rd_rel->relowner,
1065  save_sec_context | SECURITY_RESTRICTED_OPERATION);
1066  save_nestlevel = NewGUCNestLevel();
1067  }
1068  else
1069  {
1070  heapRel = NULL;
1071  /* Set these just to suppress "uninitialized variable" warnings */
1072  save_userid = InvalidOid;
1073  save_sec_context = -1;
1074  save_nestlevel = -1;
1075  }
1076 
1077  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1078 
1079  /* Must be a BRIN index */
1080  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1081  indexRel->rd_rel->relam != BRIN_AM_OID)
1082  ereport(ERROR,
1083  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1084  errmsg("\"%s\" is not a BRIN index",
1085  RelationGetRelationName(indexRel))));
1086 
1087  /* User must own the index (comparable to privileges needed for VACUUM) */
1088  if (heapRel != NULL && !object_ownercheck(RelationRelationId, indexoid, save_userid))
1090  RelationGetRelationName(indexRel));
1091 
1092  /*
1093  * Since we did the IndexGetRelation call above without any lock, it's
1094  * barely possible that a race against an index drop/recreation could have
1095  * netted us the wrong table. Recheck.
1096  */
1097  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1098  ereport(ERROR,
1100  errmsg("could not open parent table of index \"%s\"",
1101  RelationGetRelationName(indexRel))));
1102 
1103  /* OK, do it */
1104  brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
1105 
1106  /* Roll back any GUC changes executed by index functions */
1107  AtEOXact_GUC(false, save_nestlevel);
1108 
1109  /* Restore userid and security context */
1110  SetUserIdAndSecContext(save_userid, save_sec_context);
1111 
1114 
1115  PG_RETURN_INT32((int32) numSummarized);
1116 }
1117 
1118 /*
1119  * SQL-callable interface to mark a range as no longer summarized
1120  */
1121 Datum
1123 {
1124  Oid indexoid = PG_GETARG_OID(0);
1125  int64 heapBlk64 = PG_GETARG_INT64(1);
1126  BlockNumber heapBlk;
1127  Oid heapoid;
1128  Relation heapRel;
1129  Relation indexRel;
1130  bool done;
1131 
1132  if (RecoveryInProgress())
1133  ereport(ERROR,
1134  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1135  errmsg("recovery is in progress"),
1136  errhint("BRIN control functions cannot be executed during recovery.")));
1137 
1138  if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
1139  ereport(ERROR,
1140  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1141  errmsg("block number out of range: %lld",
1142  (long long) heapBlk64)));
1143  heapBlk = (BlockNumber) heapBlk64;
1144 
1145  /*
1146  * We must lock table before index to avoid deadlocks. However, if the
1147  * passed indexoid isn't an index then IndexGetRelation() will fail.
1148  * Rather than emitting a not-very-helpful error message, postpone
1149  * complaining, expecting that the is-it-an-index test below will fail.
1150  *
1151  * Unlike brin_summarize_range(), autovacuum never calls this. Hence, we
1152  * don't switch userid.
1153  */
1154  heapoid = IndexGetRelation(indexoid, true);
1155  if (OidIsValid(heapoid))
1156  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
1157  else
1158  heapRel = NULL;
1159 
1160  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1161 
1162  /* Must be a BRIN index */
1163  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1164  indexRel->rd_rel->relam != BRIN_AM_OID)
1165  ereport(ERROR,
1166  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1167  errmsg("\"%s\" is not a BRIN index",
1168  RelationGetRelationName(indexRel))));
1169 
1170  /* User must own the index (comparable to privileges needed for VACUUM) */
1171  if (!object_ownercheck(RelationRelationId, indexoid, GetUserId()))
1173  RelationGetRelationName(indexRel));
1174 
1175  /*
1176  * Since we did the IndexGetRelation call above without any lock, it's
1177  * barely possible that a race against an index drop/recreation could have
1178  * netted us the wrong table. Recheck.
1179  */
1180  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1181  ereport(ERROR,
1183  errmsg("could not open parent table of index \"%s\"",
1184  RelationGetRelationName(indexRel))));
1185 
1186  /* the revmap does the hard work */
1187  do
1188  {
1189  done = brinRevmapDesummarizeRange(indexRel, heapBlk);
1190  }
1191  while (!done);
1192 
1195 
1196  PG_RETURN_VOID();
1197 }
1198 
1199 /*
1200  * Build a BrinDesc used to create or scan a BRIN index
1201  */
1202 BrinDesc *
1204 {
1205  BrinOpcInfo **opcinfo;
1206  BrinDesc *bdesc;
1207  TupleDesc tupdesc;
1208  int totalstored = 0;
1209  int keyno;
1210  long totalsize;
1211  MemoryContext cxt;
1212  MemoryContext oldcxt;
1213 
1215  "brin desc cxt",
1217  oldcxt = MemoryContextSwitchTo(cxt);
1218  tupdesc = RelationGetDescr(rel);
1219 
1220  /*
1221  * Obtain BrinOpcInfo for each indexed column. While at it, accumulate
1222  * the number of columns stored, since the number is opclass-defined.
1223  */
1224  opcinfo = palloc_array(BrinOpcInfo *, tupdesc->natts);
1225  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1226  {
1227  FmgrInfo *opcInfoFn;
1228  Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
1229 
1230  opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO);
1231 
1232  opcinfo[keyno] = (BrinOpcInfo *)
1233  DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid));
1234  totalstored += opcinfo[keyno]->oi_nstored;
1235  }
1236 
1237  /* Allocate our result struct and fill it in */
1238  totalsize = offsetof(BrinDesc, bd_info) +
1239  sizeof(BrinOpcInfo *) * tupdesc->natts;
1240 
1241  bdesc = palloc(totalsize);
1242  bdesc->bd_context = cxt;
1243  bdesc->bd_index = rel;
1244  bdesc->bd_tupdesc = tupdesc;
1245  bdesc->bd_disktdesc = NULL; /* generated lazily */
1246  bdesc->bd_totalstored = totalstored;
1247 
1248  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1249  bdesc->bd_info[keyno] = opcinfo[keyno];
1250  pfree(opcinfo);
1251 
1252  MemoryContextSwitchTo(oldcxt);
1253 
1254  return bdesc;
1255 }
1256 
1257 void
1259 {
1260  /* make sure the tupdesc is still valid */
1261  Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
1262  /* no need for retail pfree */
1264 }
1265 
1266 /*
1267  * Fetch index's statistical data into *stats
1268  */
1269 void
1271 {
1272  Buffer metabuffer;
1273  Page metapage;
1274  BrinMetaPageData *metadata;
1275 
1276  metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
1277  LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
1278  metapage = BufferGetPage(metabuffer);
1279  metadata = (BrinMetaPageData *) PageGetContents(metapage);
1280 
1281  stats->pagesPerRange = metadata->pagesPerRange;
1282  stats->revmapNumPages = metadata->lastRevmapPage - 1;
1283 
1284  UnlockReleaseBuffer(metabuffer);
1285 }
1286 
1287 /*
1288  * Initialize a BrinBuildState appropriate to create tuples on the given index.
1289  */
1290 static BrinBuildState *
1292  BlockNumber pagesPerRange)
1293 {
1295 
1297 
1298  state->bs_irel = idxRel;
1299  state->bs_numtuples = 0;
1300  state->bs_currentInsertBuf = InvalidBuffer;
1301  state->bs_pagesPerRange = pagesPerRange;
1302  state->bs_currRangeStart = 0;
1303  state->bs_rmAccess = revmap;
1304  state->bs_bdesc = brin_build_desc(idxRel);
1305  state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
1306 
1307  return state;
1308 }
1309 
1310 /*
1311  * Release resources associated with a BrinBuildState.
1312  */
1313 static void
1315 {
1316  /*
1317  * Release the last index buffer used. We might as well ensure that
1318  * whatever free space remains in that page is available in FSM, too.
1319  */
1320  if (!BufferIsInvalid(state->bs_currentInsertBuf))
1321  {
1322  Page page;
1323  Size freespace;
1324  BlockNumber blk;
1325 
1326  page = BufferGetPage(state->bs_currentInsertBuf);
1327  freespace = PageGetFreeSpace(page);
1328  blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
1329  ReleaseBuffer(state->bs_currentInsertBuf);
1330  RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
1331  FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
1332  }
1333 
1334  brin_free_desc(state->bs_bdesc);
1335  pfree(state->bs_dtuple);
1336  pfree(state);
1337 }
1338 
1339 /*
1340  * On the given BRIN index, summarize the heap page range that corresponds
1341  * to the heap block number given.
1342  *
1343  * This routine can run in parallel with insertions into the heap. To avoid
1344  * missing those values from the summary tuple, we first insert a placeholder
1345  * index tuple into the index, then execute the heap scan; transactions
1346  * concurrent with the scan update the placeholder tuple. After the scan, we
1347  * union the placeholder tuple with the one computed by this routine. The
1348  * update of the index value happens in a loop, so that if somebody updates
1349  * the placeholder tuple after we read it, we detect the case and try again.
1350  * This ensures that the concurrently inserted tuples are not lost.
1351  *
1352  * A further corner case is this routine being asked to summarize the partial
1353  * range at the end of the table. heapNumBlocks is the (possibly outdated)
1354  * table size; if we notice that the requested range lies beyond that size,
1355  * we re-compute the table size after inserting the placeholder tuple, to
1356  * avoid missing pages that were appended recently.
1357  */
1358 static void
1360  BlockNumber heapBlk, BlockNumber heapNumBlks)
1361 {
1362  Buffer phbuf;
1363  BrinTuple *phtup;
1364  Size phsz;
1365  OffsetNumber offset;
1366  BlockNumber scanNumBlks;
1367 
1368  /*
1369  * Insert the placeholder tuple
1370  */
1371  phbuf = InvalidBuffer;
1372  phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
1373  offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
1374  state->bs_rmAccess, &phbuf,
1375  heapBlk, phtup, phsz);
1376 
1377  /*
1378  * Compute range end. We hold ShareUpdateExclusive lock on table, so it
1379  * cannot shrink concurrently (but it can grow).
1380  */
1381  Assert(heapBlk % state->bs_pagesPerRange == 0);
1382  if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1383  {
1384  /*
1385  * If we're asked to scan what we believe to be the final range on the
1386  * table (i.e. a range that might be partial) we need to recompute our
1387  * idea of what the latest page is after inserting the placeholder
1388  * tuple. Anyone that grows the table later will update the
1389  * placeholder tuple, so it doesn't matter that we won't scan these
1390  * pages ourselves. Careful: the table might have been extended
1391  * beyond the current range, so clamp our result.
1392  *
1393  * Fortunately, this should occur infrequently.
1394  */
1395  scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1396  state->bs_pagesPerRange);
1397  }
1398  else
1399  {
1400  /* Easy case: range is known to be complete */
1401  scanNumBlks = state->bs_pagesPerRange;
1402  }
1403 
1404  /*
1405  * Execute the partial heap scan covering the heap blocks in the specified
1406  * page range, summarizing the heap tuples in it. This scan stops just
1407  * short of brinbuildCallback creating the new index entry.
1408  *
1409  * Note that it is critical we use the "any visible" mode of
1410  * table_index_build_range_scan here: otherwise, we would miss tuples
1411  * inserted by transactions that are still in progress, among other corner
1412  * cases.
1413  */
1414  state->bs_currRangeStart = heapBlk;
1415  table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
1416  heapBlk, scanNumBlks,
1417  brinbuildCallback, (void *) state, NULL);
1418 
1419  /*
1420  * Now we update the values obtained by the scan with the placeholder
1421  * tuple. We do this in a loop which only terminates if we're able to
1422  * update the placeholder tuple successfully; if we are not, this means
1423  * somebody else modified the placeholder tuple after we read it.
1424  */
1425  for (;;)
1426  {
1427  BrinTuple *newtup;
1428  Size newsize;
1429  bool didupdate;
1430  bool samepage;
1431 
1433 
1434  /*
1435  * Update the summary tuple and try to update.
1436  */
1437  newtup = brin_form_tuple(state->bs_bdesc,
1438  heapBlk, state->bs_dtuple, &newsize);
1439  samepage = brin_can_do_samepage_update(phbuf, phsz, newsize);
1440  didupdate =
1441  brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
1442  state->bs_rmAccess, heapBlk, phbuf, offset,
1443  phtup, phsz, newtup, newsize, samepage);
1444  brin_free_tuple(phtup);
1445  brin_free_tuple(newtup);
1446 
1447  /* If the update succeeded, we're done. */
1448  if (didupdate)
1449  break;
1450 
1451  /*
1452  * If the update didn't work, it might be because somebody updated the
1453  * placeholder tuple concurrently. Extract the new version, union it
1454  * with the values we have from the scan, and start over. (There are
1455  * other reasons for the update to fail, but it's simple to treat them
1456  * the same.)
1457  */
1458  phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
1459  &offset, &phsz, BUFFER_LOCK_SHARE);
1460  /* the placeholder tuple must exist */
1461  if (phtup == NULL)
1462  elog(ERROR, "missing placeholder tuple");
1463  phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
1465 
1466  /* merge it into the tuple from the heap scan */
1467  union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
1468  }
1469 
1470  ReleaseBuffer(phbuf);
1471 }
1472 
1473 /*
1474  * Summarize page ranges that are not already summarized. If pageRange is
1475  * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
1476  * page range containing the given heap page number is scanned.
1477  * If include_partial is true, then the partial range at the end of the table
1478  * is summarized, otherwise not.
1479  *
1480  * For each new index tuple inserted, *numSummarized (if not NULL) is
1481  * incremented; for each existing tuple, *numExisting (if not NULL) is
1482  * incremented.
1483  */
1484 static void
1486  bool include_partial, double *numSummarized, double *numExisting)
1487 {
1488  BrinRevmap *revmap;
1489  BrinBuildState *state = NULL;
1490  IndexInfo *indexInfo = NULL;
1491  BlockNumber heapNumBlocks;
1492  BlockNumber pagesPerRange;
1493  Buffer buf;
1494  BlockNumber startBlk;
1495 
1496  revmap = brinRevmapInitialize(index, &pagesPerRange);
1497 
1498  /* determine range of pages to process */
1499  heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1500  if (pageRange == BRIN_ALL_BLOCKRANGES)
1501  startBlk = 0;
1502  else
1503  {
1504  startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1505  heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
1506  }
1507  if (startBlk > heapNumBlocks)
1508  {
1509  /* Nothing to do if start point is beyond end of table */
1510  brinRevmapTerminate(revmap);
1511  return;
1512  }
1513 
1514  /*
1515  * Scan the revmap to find unsummarized items.
1516  */
1517  buf = InvalidBuffer;
1518  for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
1519  {
1520  BrinTuple *tup;
1521  OffsetNumber off;
1522 
1523  /*
1524  * Unless requested to summarize even a partial range, go away now if
1525  * we think the next range is partial. Caller would pass true when it
1526  * is typically run once bulk data loading is done
1527  * (brin_summarize_new_values), and false when it is typically the
1528  * result of arbitrarily-scheduled maintenance command (vacuuming).
1529  */
1530  if (!include_partial &&
1531  (startBlk + pagesPerRange > heapNumBlocks))
1532  break;
1533 
1535 
1536  tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
1538  if (tup == NULL)
1539  {
1540  /* no revmap entry for this heap range. Summarize it. */
1541  if (state == NULL)
1542  {
1543  /* first time through */
1544  Assert(!indexInfo);
1546  pagesPerRange);
1547  indexInfo = BuildIndexInfo(index);
1548  }
1549  summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
1550 
1551  /* and re-initialize state for the next range */
1552  brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1553 
1554  if (numSummarized)
1555  *numSummarized += 1.0;
1556  }
1557  else
1558  {
1559  if (numExisting)
1560  *numExisting += 1.0;
1562  }
1563  }
1564 
1565  if (BufferIsValid(buf))
1566  ReleaseBuffer(buf);
1567 
1568  /* free resources */
1569  brinRevmapTerminate(revmap);
1570  if (state)
1571  {
1573  pfree(indexInfo);
1574  }
1575 }
1576 
1577 /*
1578  * Given a deformed tuple in the build state, convert it into the on-disk
1579  * format and insert it into the index, making the revmap point to it.
1580  */
1581 static void
1583 {
1584  BrinTuple *tup;
1585  Size size;
1586 
1587  tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
1588  state->bs_dtuple, &size);
1589  brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
1590  &state->bs_currentInsertBuf, state->bs_currRangeStart,
1591  tup, size);
1592  state->bs_numtuples++;
1593 
1594  pfree(tup);
1595 }
1596 
1597 /*
1598  * Given two deformed tuples, adjust the first one so that it's consistent
1599  * with the summary values in both.
1600  */
1601 static void
1603 {
1604  int keyno;
1605  BrinMemTuple *db;
1606  MemoryContext cxt;
1607  MemoryContext oldcxt;
1608 
1609  /* Use our own memory context to avoid retail pfree */
1611  "brin union",
1613  oldcxt = MemoryContextSwitchTo(cxt);
1614  db = brin_deform_tuple(bdesc, b, NULL);
1615  MemoryContextSwitchTo(oldcxt);
1616 
1617  /*
1618  * Check if the ranges are empty.
1619  *
1620  * If at least one of them is empty, we don't need to call per-key union
1621  * functions at all. If "b" is empty, we just use "a" as the result (it
1622  * might be empty fine, but that's fine). If "a" is empty but "b" is not,
1623  * we use "b" as the result (but we have to copy the data into "a" first).
1624  *
1625  * Only when both ranges are non-empty, we actually do the per-key merge.
1626  */
1627 
1628  /* If "b" is empty - ignore it and just use "a" (even if it's empty etc.). */
1629  if (db->bt_empty_range)
1630  {
1631  /* skip the per-key merge */
1632  MemoryContextDelete(cxt);
1633  return;
1634  }
1635 
1636  /*
1637  * Now we know "b" is not empty. If "a" is empty, then "b" is the result.
1638  * But we need to copy the data from "b" to "a" first, because that's how
1639  * we pass result out.
1640  *
1641  * We have to copy all the global/per-key flags etc. too.
1642  */
1643  if (a->bt_empty_range)
1644  {
1645  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1646  {
1647  int i;
1648  BrinValues *col_a = &a->bt_columns[keyno];
1649  BrinValues *col_b = &db->bt_columns[keyno];
1650  BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
1651 
1652  col_a->bv_allnulls = col_b->bv_allnulls;
1653  col_a->bv_hasnulls = col_b->bv_hasnulls;
1654 
1655  /* If "b" has no data, we're done. */
1656  if (col_b->bv_allnulls)
1657  continue;
1658 
1659  for (i = 0; i < opcinfo->oi_nstored; i++)
1660  col_a->bv_values[i] =
1661  datumCopy(col_b->bv_values[i],
1662  opcinfo->oi_typcache[i]->typbyval,
1663  opcinfo->oi_typcache[i]->typlen);
1664  }
1665 
1666  /* "a" started empty, but "b" was not empty, so remember that */
1667  a->bt_empty_range = false;
1668 
1669  /* skip the per-key merge */
1670  MemoryContextDelete(cxt);
1671  return;
1672  }
1673 
1674  /* Now we know neither range is empty. */
1675  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1676  {
1677  FmgrInfo *unionFn;
1678  BrinValues *col_a = &a->bt_columns[keyno];
1679  BrinValues *col_b = &db->bt_columns[keyno];
1680  BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
1681 
1682  if (opcinfo->oi_regular_nulls)
1683  {
1684  /* Does the "b" summary represent any NULL values? */
1685  bool b_has_nulls = (col_b->bv_hasnulls || col_b->bv_allnulls);
1686 
1687  /* Adjust "hasnulls". */
1688  if (!col_a->bv_allnulls && b_has_nulls)
1689  col_a->bv_hasnulls = true;
1690 
1691  /* If there are no values in B, there's nothing left to do. */
1692  if (col_b->bv_allnulls)
1693  continue;
1694 
1695  /*
1696  * Adjust "allnulls". If A doesn't have values, just copy the
1697  * values from B into A, and we're done. We cannot run the
1698  * operators in this case, because values in A might contain
1699  * garbage. Note we already established that B contains values.
1700  *
1701  * Also adjust "hasnulls" in order not to forget the summary
1702  * represents NULL values. This is not redundant with the earlier
1703  * update, because that only happens when allnulls=false.
1704  */
1705  if (col_a->bv_allnulls)
1706  {
1707  int i;
1708 
1709  col_a->bv_allnulls = false;
1710  col_a->bv_hasnulls = true;
1711 
1712  for (i = 0; i < opcinfo->oi_nstored; i++)
1713  col_a->bv_values[i] =
1714  datumCopy(col_b->bv_values[i],
1715  opcinfo->oi_typcache[i]->typbyval,
1716  opcinfo->oi_typcache[i]->typlen);
1717 
1718  continue;
1719  }
1720  }
1721 
1722  unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
1724  FunctionCall3Coll(unionFn,
1725  bdesc->bd_index->rd_indcollation[keyno],
1726  PointerGetDatum(bdesc),
1727  PointerGetDatum(col_a),
1728  PointerGetDatum(col_b));
1729  }
1730 
1731  MemoryContextDelete(cxt);
1732 }
1733 
1734 /*
1735  * brin_vacuum_scan
1736  * Do a complete scan of the index during VACUUM.
1737  *
1738  * This routine scans the complete index looking for uncatalogued index pages,
1739  * i.e. those that might have been lost due to a crash after index extension
1740  * and such.
1741  */
1742 static void
1744 {
1745  BlockNumber nblocks;
1746  BlockNumber blkno;
1747 
1748  /*
1749  * Scan the index in physical order, and clean up any possible mess in
1750  * each page.
1751  */
1752  nblocks = RelationGetNumberOfBlocks(idxrel);
1753  for (blkno = 0; blkno < nblocks; blkno++)
1754  {
1755  Buffer buf;
1756 
1758 
1759  buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno,
1760  RBM_NORMAL, strategy);
1761 
1762  brin_page_cleanup(idxrel, buf);
1763 
1764  ReleaseBuffer(buf);
1765  }
1766 
1767  /*
1768  * Update all upper pages in the index's FSM, as well. This ensures not
1769  * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
1770  * but also that any pre-existing damage or out-of-dateness is repaired.
1771  */
1772  FreeSpaceMapVacuum(idxrel);
1773 }
1774 
1775 static bool
1777  Datum *values, bool *nulls)
1778 {
1779  int keyno;
1780 
1781  /* If the range starts empty, we're certainly going to modify it. */
1782  bool modified = dtup->bt_empty_range;
1783 
1784  /*
1785  * Compare the key values of the new tuple to the stored index values; our
1786  * deformed tuple will get updated if the new tuple doesn't fit the
1787  * original range (note this means we can't break out of the loop early).
1788  * Make a note of whether this happens, so that we know to insert the
1789  * modified tuple later.
1790  */
1791  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1792  {
1793  Datum result;
1794  BrinValues *bval;
1795  FmgrInfo *addValue;
1796  bool has_nulls;
1797 
1798  bval = &dtup->bt_columns[keyno];
1799 
1800  /*
1801  * Does the range have actual NULL values? Either of the flags can be
1802  * set, but we ignore the state before adding first row.
1803  *
1804  * We have to remember this, because we'll modify the flags and we
1805  * need to know if the range started as empty.
1806  */
1807  has_nulls = ((!dtup->bt_empty_range) &&
1808  (bval->bv_hasnulls || bval->bv_allnulls));
1809 
1810  /*
1811  * If the value we're adding is NULL, handle it locally. Otherwise
1812  * call the BRIN_PROCNUM_ADDVALUE procedure.
1813  */
1814  if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno])
1815  {
1816  /*
1817  * If the new value is null, we record that we saw it if it's the
1818  * first one; otherwise, there's nothing to do.
1819  */
1820  if (!bval->bv_hasnulls)
1821  {
1822  bval->bv_hasnulls = true;
1823  modified = true;
1824  }
1825 
1826  continue;
1827  }
1828 
1829  addValue = index_getprocinfo(idxRel, keyno + 1,
1831  result = FunctionCall4Coll(addValue,
1832  idxRel->rd_indcollation[keyno],
1833  PointerGetDatum(bdesc),
1834  PointerGetDatum(bval),
1835  values[keyno],
1836  nulls[keyno]);
1837  /* if that returned true, we need to insert the updated tuple */
1838  modified |= DatumGetBool(result);
1839 
1840  /*
1841  * If the range was had actual NULL values (i.e. did not start empty),
1842  * make sure we don't forget about the NULL values. Either the
1843  * allnulls flag is still set to true, or (if the opclass cleared it)
1844  * we need to set hasnulls=true.
1845  *
1846  * XXX This can only happen when the opclass modified the tuple, so
1847  * the modified flag should be set.
1848  */
1849  if (has_nulls && !(bval->bv_hasnulls || bval->bv_allnulls))
1850  {
1851  Assert(modified);
1852  bval->bv_hasnulls = true;
1853  }
1854  }
1855 
1856  /*
1857  * After updating summaries for all the keys, mark it as not empty.
1858  *
1859  * If we're actually changing the flag value (i.e. tuple started as
1860  * empty), we should have modified the tuple. So we should not see empty
1861  * range that was not modified.
1862  */
1863  Assert(!dtup->bt_empty_range || modified);
1864  dtup->bt_empty_range = false;
1865 
1866  return modified;
1867 }
1868 
1869 static bool
1870 check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
1871 {
1872  int keyno;
1873 
1874  /*
1875  * First check if there are any IS [NOT] NULL scan keys, and if we're
1876  * violating them.
1877  */
1878  for (keyno = 0; keyno < nnullkeys; keyno++)
1879  {
1880  ScanKey key = nullkeys[keyno];
1881 
1882  Assert(key->sk_attno == bval->bv_attno);
1883 
1884  /* Handle only IS NULL/IS NOT NULL tests */
1885  if (!(key->sk_flags & SK_ISNULL))
1886  continue;
1887 
1888  if (key->sk_flags & SK_SEARCHNULL)
1889  {
1890  /* IS NULL scan key, but range has no NULLs */
1891  if (!bval->bv_allnulls && !bval->bv_hasnulls)
1892  return false;
1893  }
1894  else if (key->sk_flags & SK_SEARCHNOTNULL)
1895  {
1896  /*
1897  * For IS NOT NULL, we can only skip ranges that are known to have
1898  * only nulls.
1899  */
1900  if (bval->bv_allnulls)
1901  return false;
1902  }
1903  else
1904  {
1905  /*
1906  * Neither IS NULL nor IS NOT NULL was used; assume all indexable
1907  * operators are strict and thus return false with NULL value in
1908  * the scan key.
1909  */
1910  return false;
1911  }
1912  }
1913 
1914  return true;
1915 }
@ ACLCHECK_NOT_OWNER
Definition: acl.h:184
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:2669
bool object_ownercheck(Oid classid, Oid objectid, Oid roleid)
Definition: aclchk.c:3961
int16 AttrNumber
Definition: attnum.h:21
bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno)
Definition: autovacuum.c:3335
@ AVW_BRINSummarizeRange
Definition: autovacuum.h:25
uint32 BlockNumber
Definition: block.h:31
#define MaxBlockNumber
Definition: block.h:35
static Datum values[MAXATTR]
Definition: bootstrap.c:156
IndexBulkDeleteResult * brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: brin.c:955
static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
Definition: brin.c:1743
static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange)
Definition: brin.c:1291
Datum brin_desummarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:1122
void brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: brin.c:745
static void terminate_brin_buildstate(BrinBuildState *state)
Definition: brin.c:1314
Datum brin_summarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:1018
#define BRIN_ALL_BLOCKRANGES
Definition: brin.c:71
Datum brin_summarize_new_values(PG_FUNCTION_ARGS)
Definition: brin.c:1003
IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys)
Definition: brin.c:327
IndexBuildResult * brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: brin.c:827
int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: brin.c:355
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting)
Definition: brin.c:1485
static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, Datum *values, bool *nulls)
Definition: brin.c:1776
static void form_and_insert_tuple(BrinBuildState *state)
Definition: brin.c:1582
void brinbuildempty(Relation index)
Definition: brin.c:911
void brin_free_desc(BrinDesc *bdesc)
Definition: brin.c:1258
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
Definition: brin.c:1602
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1270
BrinDesc * brin_build_desc(Relation rel)
Definition: brin.c:1203
struct BrinBuildState BrinBuildState
struct BrinOpaque BrinOpaque
static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks)
Definition: brin.c:1359
bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: brin.c:156
Datum brinhandler(PG_FUNCTION_ARGS)
Definition: brin.c:91
bytea * brinoptions(Datum reloptions, bool validate)
Definition: brin.c:985
IndexBulkDeleteResult * brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: brin.c:940
static void brinbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
Definition: brin.c:782
void brinendscan(IndexScanDesc scan)
Definition: brin.c:765
static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
Definition: brin.c:1870
#define BrinGetPagesPerRange(relation)
Definition: brin.h:39
#define BrinGetAutoSummarize(relation)
Definition: brin.h:45
#define BRIN_LAST_OPTIONAL_PROCNUM
Definition: brin_internal.h:78
#define BRIN_PROCNUM_UNION
Definition: brin_internal.h:73
#define BRIN_PROCNUM_OPTIONS
Definition: brin_internal.h:75
#define BRIN_PROCNUM_OPCINFO
Definition: brin_internal.h:70
#define BRIN_PROCNUM_CONSISTENT
Definition: brin_internal.h:72
#define BRIN_elog(args)
Definition: brin_internal.h:85
#define BRIN_PROCNUM_ADDVALUE
Definition: brin_internal.h:71
#define BRIN_CURRENT_VERSION
Definition: brin_page.h:72
#define BRIN_METAPAGE_BLKNO
Definition: brin_page.h:75
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:54
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:625
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:343
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:487
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:324
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
Definition: brin_revmap.c:324
void brinRevmapTerminate(BrinRevmap *revmap)
Definition: brin_revmap.c:101
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode)
Definition: brin_revmap.c:195
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
Definition: brin_revmap.c:71
BrinTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, Size *size)
Definition: brin_tuple.c:99
BrinMemTuple * brin_new_memtuple(BrinDesc *brdesc)
Definition: brin_tuple.c:482
BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
Definition: brin_tuple.c:553
BrinMemTuple * brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
Definition: brin_tuple.c:511
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:446
void brin_free_tuple(BrinTuple *tuple)
Definition: brin_tuple.c:433
BrinTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
Definition: brin_tuple.c:388
bool brinvalidate(Oid opclassoid)
Definition: brin_validate.c:37
#define SizeOfBrinCreateIdx
Definition: brin_xlog.h:55
#define XLOG_BRIN_CREATE_INDEX
Definition: brin_xlog.h:31
int Buffer
Definition: buf.h:23
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3290
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:812
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4480
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4497
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2111
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4715
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:755
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:708
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:227
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:73
@ EB_LOCK_FIRST
Definition: bufmgr.h:85
@ RBM_NORMAL
Definition: bufmgr.h:44
#define BMR_REL(p_rel)
Definition: bufmgr.h:106
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
static char * PageGetContents(Page page)
Definition: bufpage.h:254
Pointer Page
Definition: bufpage.h:78
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define Min(x, y)
Definition: c.h:993
#define MAXALIGN(LEN)
Definition: c.h:800
signed int int32
Definition: c.h:483
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:171
#define lengthof(array)
Definition: c.h:777
#define OidIsValid(objectId)
Definition: c.h:764
size_t Size
Definition: c.h:594
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define palloc_object(type)
Definition: fe_memutils.h:62
#define palloc_array(type, count)
Definition: fe_memutils.h:64
#define palloc0_array(type, count)
Definition: fe_memutils.h:65
#define palloc0_object(type)
Definition: fe_memutils.h:63
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1179
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1790
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1154
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:580
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:644
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:660
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:335
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:87
IndexUniqueCheck
Definition: genam.h:116
int NewGUCNestLevel(void)
Definition: guc.c:2201
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition: guc.c:2215
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition: index.c:3539
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2426
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:811
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
Assert(fmt[strlen(fmt) - 1] !='\n')
#define AccessShareLock
Definition: lockdefs.h:36
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
void pfree(void *pointer)
Definition: mcxt.c:1456
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:403
void * palloc(Size size)
Definition: mcxt.c:1226
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:163
#define MemoryContextResetAndDeleteChildren(ctx)
Definition: memutils.h:70
#define SECURITY_RESTRICTED_OPERATION
Definition: miscadmin.h:314
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:630
Oid GetUserId(void)
Definition: miscinit.c:509
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:637
#define makeNode(_type_)
Definition: nodes.h:176
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
@ OBJECT_INDEX
Definition: parsenodes.h:2140
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
const void size_t len
static char * buf
Definition: pg_test_fsync.c:67
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:78
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:623
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static void addrange(struct cvec *cv, chr from, chr to)
Definition: regc_cvec.c:90
#define RelationGetRelid(relation)
Definition: rel.h:504
#define RelationGetDescr(relation)
Definition: rel.h:530
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define RelationNeedsWAL(relation)
Definition: rel.h:629
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
Definition: reloptions.c:1910
@ RELOPT_KIND_BRIN
Definition: reloptions.h:52
@ RELOPT_TYPE_INT
Definition: reloptions.h:32
@ RELOPT_TYPE_BOOL
Definition: reloptions.h:31
@ MAIN_FORKNUM
Definition: relpath.h:50
@ INIT_FORKNUM
Definition: relpath.h:53
void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7815
#define SK_SEARCHNOTNULL
Definition: skey.h:122
#define SK_SEARCHNULL
Definition: skey.h:121
#define SK_ISNULL
Definition: skey.h:115
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
BrinMemTuple * bs_dtuple
Definition: brin.c:58
Relation bs_irel
Definition: brin.c:51
BlockNumber bs_pagesPerRange
Definition: brin.c:54
Buffer bs_currentInsertBuf
Definition: brin.c:53
int bs_numtuples
Definition: brin.c:52
BrinRevmap * bs_rmAccess
Definition: brin.c:56
BlockNumber bs_currRangeStart
Definition: brin.c:55
BrinDesc * bs_bdesc
Definition: brin.c:57
int bd_totalstored
Definition: brin_internal.h:59
TupleDesc bd_tupdesc
Definition: brin_internal.h:53
BrinOpcInfo * bd_info[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:62
Relation bd_index
Definition: brin_internal.h:50
MemoryContext bd_context
Definition: brin_internal.h:47
TupleDesc bd_disktdesc
Definition: brin_internal.h:56
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_tuple.h:55
bool bt_placeholder
Definition: brin_tuple.h:46
bool bt_empty_range
Definition: brin_tuple.h:47
BlockNumber lastRevmapPage
Definition: brin_page.h:69
BlockNumber pagesPerRange
Definition: brin_page.h:68
BlockNumber bo_pagesPerRange
Definition: brin.c:66
BrinDesc * bo_bdesc
Definition: brin.c:68
BrinRevmap * bo_rmAccess
Definition: brin.c:67
TypeCacheEntry * oi_typcache[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:37
uint16 oi_nstored
Definition: brin_internal.h:28
bool oi_regular_nulls
Definition: brin_internal.h:31
BlockNumber revmapNumPages
Definition: brin.h:34
BlockNumber pagesPerRange
Definition: brin.h:33
bool bv_hasnulls
Definition: brin_tuple.h:32
Datum * bv_values
Definition: brin_tuple.h:34
AttrNumber bv_attno
Definition: brin_tuple.h:31
bool bv_allnulls
Definition: brin_tuple.h:33
Definition: fmgr.h:57
ambuildphasename_function ambuildphasename
Definition: amapi.h:270
ambuildempty_function ambuildempty
Definition: amapi.h:262
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:265
bool amclusterable
Definition: amapi.h:238
amoptions_function amoptions
Definition: amapi.h:268
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:282
amrestrpos_function amrestrpos
Definition: amapi.h:279
aminsert_function aminsert
Definition: amapi.h:263
amendscan_function amendscan
Definition: amapi.h:277
uint16 amoptsprocnum
Definition: amapi.h:218
amparallelrescan_function amparallelrescan
Definition: amapi.h:284
Oid amkeytype
Definition: amapi.h:252
bool ampredlocks
Definition: amapi.h:240
uint16 amsupport
Definition: amapi.h:216
amcostestimate_function amcostestimate
Definition: amapi.h:267
bool amcanorderbyop
Definition: amapi.h:222
amadjustmembers_function amadjustmembers
Definition: amapi.h:272
ambuild_function ambuild
Definition: amapi.h:261
bool amstorage
Definition: amapi.h:236
uint16 amstrategies
Definition: amapi.h:214
bool amoptionalkey
Definition: amapi.h:230
amgettuple_function amgettuple
Definition: amapi.h:275
amcanreturn_function amcanreturn
Definition: amapi.h:266
bool amcanunique
Definition: amapi.h:226
amgetbitmap_function amgetbitmap
Definition: amapi.h:276
amproperty_function amproperty
Definition: amapi.h:269
ambulkdelete_function ambulkdelete
Definition: amapi.h:264
bool amsearcharray
Definition: amapi.h:232
bool amsummarizing
Definition: amapi.h:248
amvalidate_function amvalidate
Definition: amapi.h:271
ammarkpos_function ammarkpos
Definition: amapi.h:278
bool amcanmulticol
Definition: amapi.h:228
bool amusemaintenanceworkmem
Definition: amapi.h:246
ambeginscan_function ambeginscan
Definition: amapi.h:273
bool amcanparallel
Definition: amapi.h:242
amrescan_function amrescan
Definition: amapi.h:274
bool amcanorder
Definition: amapi.h:220
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:283
uint8 amparallelvacuumoptions
Definition: amapi.h:250
bool amcanbackward
Definition: amapi.h:224
bool amcaninclude
Definition: amapi.h:244
bool amsearchnulls
Definition: amapi.h:234
double heap_tuples
Definition: genam.h:32
double index_tuples
Definition: genam.h:33
BlockNumber num_pages
Definition: genam.h:77
double num_index_tuples
Definition: genam.h:79
void * ii_AmCache
Definition: execnodes.h:201
MemoryContext ii_Context
Definition: execnodes.h:202
struct ScanKeyData * keyData
Definition: relscan.h:122
Relation indexRelation
Definition: relscan.h:118
Relation index
Definition: genam.h:46
bool analyze_only
Definition: genam.h:48
BufferAccessStrategy strategy
Definition: genam.h:53
Oid * rd_indcollation
Definition: rel.h:216
Form_pg_class rd_rel
Definition: rel.h:111
Oid sk_collation
Definition: skey.h:70
int tdrefcount
Definition: tupdesc.h:84
bool typbyval
Definition: typcache.h:40
int16 typlen
Definition: typcache.h:39
Definition: type.h:95
Definition: regguts.h:323
Definition: c.h:676
BlockNumber pagesPerRange
Definition: brin_xlog.h:52
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1805
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1772
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition: tidbitmap.c:442
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:62
bool RecoveryInProgress(void)
Definition: xlog.c:5948
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:461
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1225
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33