PostgreSQL Source Code  git master
brin.c
Go to the documentation of this file.
1 /*
2  * brin.c
3  * Implementation of BRIN indexes for Postgres
4  *
5  * See src/backend/access/brin/README for details.
6  *
7  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/brin/brin.c
12  *
13  * TODO
14  * * ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
15  */
16 #include "postgres.h"
17 
18 #include "access/brin.h"
19 #include "access/brin_page.h"
20 #include "access/brin_pageops.h"
21 #include "access/brin_xlog.h"
22 #include "access/relation.h"
23 #include "access/reloptions.h"
24 #include "access/relscan.h"
25 #include "access/table.h"
26 #include "access/tableam.h"
27 #include "access/xloginsert.h"
28 #include "catalog/index.h"
29 #include "catalog/pg_am.h"
30 #include "commands/vacuum.h"
31 #include "miscadmin.h"
32 #include "pgstat.h"
33 #include "postmaster/autovacuum.h"
34 #include "storage/bufmgr.h"
35 #include "storage/freespace.h"
36 #include "utils/builtins.h"
37 #include "utils/index_selfuncs.h"
38 #include "utils/memutils.h"
39 #include "utils/rel.h"
40 
41 
42 /*
43  * We use a BrinBuildState during initial construction of a BRIN index.
44  * The running state is kept in a BrinMemTuple.
45  */
46 typedef struct BrinBuildState
47 {
57 
58 /*
59  * Struct used as "opaque" during index scans
60  */
61 typedef struct BrinOpaque
62 {
66 } BrinOpaque;
67 
68 #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
69 
71  BrinRevmap *revmap, BlockNumber pagesPerRange);
73 static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
74  bool include_partial, double *numSummarized, double *numExisting);
76 static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
77  BrinTuple *b);
78 static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
79 
80 
81 /*
82  * BRIN handler function: return IndexAmRoutine with access method parameters
83  * and callbacks.
84  */
85 Datum
87 {
89 
90  amroutine->amstrategies = 0;
92  amroutine->amcanorder = false;
93  amroutine->amcanorderbyop = false;
94  amroutine->amcanbackward = false;
95  amroutine->amcanunique = false;
96  amroutine->amcanmulticol = true;
97  amroutine->amoptionalkey = true;
98  amroutine->amsearcharray = false;
99  amroutine->amsearchnulls = true;
100  amroutine->amstorage = true;
101  amroutine->amclusterable = false;
102  amroutine->ampredlocks = false;
103  amroutine->amcanparallel = false;
104  amroutine->amcaninclude = false;
105  amroutine->amusemaintenanceworkmem = false;
106  amroutine->amparallelvacuumoptions =
108  amroutine->amkeytype = InvalidOid;
109 
110  amroutine->ambuild = brinbuild;
111  amroutine->ambuildempty = brinbuildempty;
112  amroutine->aminsert = brininsert;
113  amroutine->ambulkdelete = brinbulkdelete;
114  amroutine->amvacuumcleanup = brinvacuumcleanup;
115  amroutine->amcanreturn = NULL;
116  amroutine->amcostestimate = brincostestimate;
117  amroutine->amoptions = brinoptions;
118  amroutine->amproperty = NULL;
119  amroutine->ambuildphasename = NULL;
120  amroutine->amvalidate = brinvalidate;
121  amroutine->ambeginscan = brinbeginscan;
122  amroutine->amrescan = brinrescan;
123  amroutine->amgettuple = NULL;
124  amroutine->amgetbitmap = bringetbitmap;
125  amroutine->amendscan = brinendscan;
126  amroutine->ammarkpos = NULL;
127  amroutine->amrestrpos = NULL;
128  amroutine->amestimateparallelscan = NULL;
129  amroutine->aminitparallelscan = NULL;
130  amroutine->amparallelrescan = NULL;
131 
132  PG_RETURN_POINTER(amroutine);
133 }
134 
135 /*
136  * A tuple in the heap is being inserted. To keep a brin index up to date,
137  * we need to obtain the relevant index tuple and compare its stored values
138  * with those of the new tuple. If the tuple values are not consistent with
139  * the summary tuple, we need to update the index tuple.
140  *
141  * If autosummarization is enabled, check if we need to summarize the previous
142  * page range.
143  *
144  * If the range is not currently summarized (i.e. the revmap returns NULL for
145  * it), there's nothing to do for this tuple.
146  */
147 bool
148 brininsert(Relation idxRel, Datum *values, bool *nulls,
149  ItemPointer heaptid, Relation heapRel,
150  IndexUniqueCheck checkUnique,
151  IndexInfo *indexInfo)
152 {
153  BlockNumber pagesPerRange;
154  BlockNumber origHeapBlk;
155  BlockNumber heapBlk;
156  BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
157  BrinRevmap *revmap;
159  MemoryContext tupcxt = NULL;
161  bool autosummarize = BrinGetAutoSummarize(idxRel);
162 
163  revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
164 
165  /*
166  * origHeapBlk is the block number where the insertion occurred. heapBlk
167  * is the first block in the corresponding page range.
168  */
169  origHeapBlk = ItemPointerGetBlockNumber(heaptid);
170  heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
171 
172  for (;;)
173  {
174  bool need_insert = false;
175  OffsetNumber off;
176  BrinTuple *brtup;
177  BrinMemTuple *dtup;
178  int keyno;
179 
181 
182  /*
183  * If auto-summarization is enabled and we just inserted the first
184  * tuple into the first block of a new non-first page range, request a
185  * summarization run of the previous range.
186  */
187  if (autosummarize &&
188  heapBlk > 0 &&
189  heapBlk == origHeapBlk &&
191  {
192  BlockNumber lastPageRange = heapBlk - 1;
193  BrinTuple *lastPageTuple;
194 
195  lastPageTuple =
196  brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
197  NULL, BUFFER_LOCK_SHARE, NULL);
198  if (!lastPageTuple)
199  {
200  bool recorded;
201 
203  RelationGetRelid(idxRel),
204  lastPageRange);
205  if (!recorded)
206  ereport(LOG,
207  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
208  errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
209  RelationGetRelationName(idxRel),
210  lastPageRange)));
211  }
212  else
214  }
215 
216  brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
217  NULL, BUFFER_LOCK_SHARE, NULL);
218 
219  /* if range is unsummarized, there's nothing to do */
220  if (!brtup)
221  break;
222 
223  /* First time through in this statement? */
224  if (bdesc == NULL)
225  {
226  MemoryContextSwitchTo(indexInfo->ii_Context);
227  bdesc = brin_build_desc(idxRel);
228  indexInfo->ii_AmCache = (void *) bdesc;
229  MemoryContextSwitchTo(oldcxt);
230  }
231  /* First time through in this brininsert call? */
232  if (tupcxt == NULL)
233  {
235  "brininsert cxt",
237  MemoryContextSwitchTo(tupcxt);
238  }
239 
240  dtup = brin_deform_tuple(bdesc, brtup, NULL);
241 
242  /*
243  * Compare the key values of the new tuple to the stored index values;
244  * our deformed tuple will get updated if the new tuple doesn't fit
245  * the original range (note this means we can't break out of the loop
246  * early). Make a note of whether this happens, so that we know to
247  * insert the modified tuple later.
248  */
249  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
250  {
251  Datum result;
252  BrinValues *bval;
253  FmgrInfo *addValue;
254 
255  bval = &dtup->bt_columns[keyno];
256  addValue = index_getprocinfo(idxRel, keyno + 1,
258  result = FunctionCall4Coll(addValue,
259  idxRel->rd_indcollation[keyno],
260  PointerGetDatum(bdesc),
261  PointerGetDatum(bval),
262  values[keyno],
263  nulls[keyno]);
264  /* if that returned true, we need to insert the updated tuple */
265  need_insert |= DatumGetBool(result);
266  }
267 
268  if (!need_insert)
269  {
270  /*
271  * The tuple is consistent with the new values, so there's nothing
272  * to do.
273  */
275  }
276  else
277  {
278  Page page = BufferGetPage(buf);
279  ItemId lp = PageGetItemId(page, off);
280  Size origsz;
281  BrinTuple *origtup;
282  Size newsz;
283  BrinTuple *newtup;
284  bool samepage;
285 
286  /*
287  * Make a copy of the old tuple, so that we can compare it after
288  * re-acquiring the lock.
289  */
290  origsz = ItemIdGetLength(lp);
291  origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
292 
293  /*
294  * Before releasing the lock, check if we can attempt a same-page
295  * update. Another process could insert a tuple concurrently in
296  * the same page though, so downstream we must be prepared to cope
297  * if this turns out to not be possible after all.
298  */
299  newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
300  samepage = brin_can_do_samepage_update(buf, origsz, newsz);
302 
303  /*
304  * Try to update the tuple. If this doesn't work for whatever
305  * reason, we need to restart from the top; the revmap might be
306  * pointing at a different tuple for this block now, so we need to
307  * recompute to ensure both our new heap tuple and the other
308  * inserter's are covered by the combined tuple. It might be that
309  * we don't need to update at all.
310  */
311  if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
312  buf, off, origtup, origsz, newtup, newsz,
313  samepage))
314  {
315  /* no luck; start over */
317  continue;
318  }
319  }
320 
321  /* success! */
322  break;
323  }
324 
325  brinRevmapTerminate(revmap);
326  if (BufferIsValid(buf))
328  MemoryContextSwitchTo(oldcxt);
329  if (tupcxt != NULL)
330  MemoryContextDelete(tupcxt);
331 
332  return false;
333 }
334 
335 /*
336  * Initialize state for a BRIN index scan.
337  *
338  * We read the metapage here to determine the pages-per-range number that this
339  * index was built with. Note that since this cannot be changed while we're
340  * holding lock on index, it's not necessary to recompute it during brinrescan.
341  */
343 brinbeginscan(Relation r, int nkeys, int norderbys)
344 {
345  IndexScanDesc scan;
346  BrinOpaque *opaque;
347 
348  scan = RelationGetIndexScan(r, nkeys, norderbys);
349 
350  opaque = (BrinOpaque *) palloc(sizeof(BrinOpaque));
351  opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange,
352  scan->xs_snapshot);
353  opaque->bo_bdesc = brin_build_desc(r);
354  scan->opaque = opaque;
355 
356  return scan;
357 }
358 
359 /*
360  * Execute the index scan.
361  *
362  * This works by reading index TIDs from the revmap, and obtaining the index
363  * tuples pointed to by them; the summary values in the index tuples are
364  * compared to the scan keys. We return into the TID bitmap all the pages in
365  * ranges corresponding to index tuples that match the scan keys.
366  *
367  * If a TID from the revmap is read as InvalidTID, we know that range is
368  * unsummarized. Pages in those ranges need to be returned regardless of scan
369  * keys.
370  */
371 int64
373 {
374  Relation idxRel = scan->indexRelation;
376  BrinDesc *bdesc;
377  Oid heapOid;
378  Relation heapRel;
379  BrinOpaque *opaque;
380  BlockNumber nblocks;
381  BlockNumber heapBlk;
382  int totalpages = 0;
383  FmgrInfo *consistentFn;
384  MemoryContext oldcxt;
385  MemoryContext perRangeCxt;
386  BrinMemTuple *dtup;
387  BrinTuple *btup = NULL;
388  Size btupsz = 0;
389 
390  opaque = (BrinOpaque *) scan->opaque;
391  bdesc = opaque->bo_bdesc;
392  pgstat_count_index_scan(idxRel);
393 
394  /*
395  * We need to know the size of the table so that we know how long to
396  * iterate on the revmap.
397  */
398  heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
399  heapRel = table_open(heapOid, AccessShareLock);
400  nblocks = RelationGetNumberOfBlocks(heapRel);
401  table_close(heapRel, AccessShareLock);
402 
403  /*
404  * Make room for the consistent support procedures of indexed columns. We
405  * don't look them up here; we do that lazily the first time we see a scan
406  * key reference each of them. We rely on zeroing fn_oid to InvalidOid.
407  */
408  consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
409 
410  /* allocate an initial in-memory tuple, out of the per-range memcxt */
411  dtup = brin_new_memtuple(bdesc);
412 
413  /*
414  * Setup and use a per-range memory context, which is reset every time we
415  * loop below. This avoids having to free the tuples within the loop.
416  */
418  "bringetbitmap cxt",
420  oldcxt = MemoryContextSwitchTo(perRangeCxt);
421 
422  /*
423  * Now scan the revmap. We start by querying for heap page 0,
424  * incrementing by the number of pages per range; this gives us a full
425  * view of the table.
426  */
427  for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
428  {
429  bool addrange;
430  bool gottuple = false;
431  BrinTuple *tup;
432  OffsetNumber off;
433  Size size;
434 
436 
438 
439  tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
440  &off, &size, BUFFER_LOCK_SHARE,
441  scan->xs_snapshot);
442  if (tup)
443  {
444  gottuple = true;
445  btup = brin_copy_tuple(tup, size, btup, &btupsz);
447  }
448 
449  /*
450  * For page ranges with no indexed tuple, we must return the whole
451  * range; otherwise, compare it to the scan keys.
452  */
453  if (!gottuple)
454  {
455  addrange = true;
456  }
457  else
458  {
459  dtup = brin_deform_tuple(bdesc, btup, dtup);
460  if (dtup->bt_placeholder)
461  {
462  /*
463  * Placeholder tuples are always returned, regardless of the
464  * values stored in them.
465  */
466  addrange = true;
467  }
468  else
469  {
470  int keyno;
471 
472  /*
473  * Compare scan keys with summary values stored for the range.
474  * If scan keys are matched, the page range must be added to
475  * the bitmap. We initially assume the range needs to be
476  * added; in particular this serves the case where there are
477  * no keys.
478  */
479  addrange = true;
480  for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
481  {
482  ScanKey key = &scan->keyData[keyno];
483  AttrNumber keyattno = key->sk_attno;
484  BrinValues *bval = &dtup->bt_columns[keyattno - 1];
485  Datum add;
486 
487  /*
488  * The collation of the scan key must match the collation
489  * used in the index column (but only if the search is not
490  * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using
491  * this index ...
492  */
493  Assert((key->sk_flags & SK_ISNULL) ||
494  (key->sk_collation ==
495  TupleDescAttr(bdesc->bd_tupdesc,
496  keyattno - 1)->attcollation));
497 
498  /* First time this column? look up consistent function */
499  if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
500  {
501  FmgrInfo *tmp;
502 
503  tmp = index_getprocinfo(idxRel, keyattno,
505  fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
507  }
508 
509  /*
510  * Check whether the scan key is consistent with the page
511  * range values; if so, have the pages in the range added
512  * to the output bitmap.
513  *
514  * When there are multiple scan keys, failure to meet the
515  * criteria for a single one of them is enough to discard
516  * the range as a whole, so break out of the loop as soon
517  * as a false return value is obtained.
518  */
519  add = FunctionCall3Coll(&consistentFn[keyattno - 1],
520  key->sk_collation,
521  PointerGetDatum(bdesc),
522  PointerGetDatum(bval),
523  PointerGetDatum(key));
524  addrange = DatumGetBool(add);
525  if (!addrange)
526  break;
527  }
528  }
529  }
530 
531  /* add the pages in the range to the output bitmap, if needed */
532  if (addrange)
533  {
534  BlockNumber pageno;
535 
536  for (pageno = heapBlk;
537  pageno <= heapBlk + opaque->bo_pagesPerRange - 1;
538  pageno++)
539  {
540  MemoryContextSwitchTo(oldcxt);
541  tbm_add_page(tbm, pageno);
542  totalpages++;
543  MemoryContextSwitchTo(perRangeCxt);
544  }
545  }
546  }
547 
548  MemoryContextSwitchTo(oldcxt);
549  MemoryContextDelete(perRangeCxt);
550 
551  if (buf != InvalidBuffer)
552  ReleaseBuffer(buf);
553 
554  /*
555  * XXX We have an approximation of the number of *pages* that our scan
556  * returns, but we don't have a precise idea of the number of heap tuples
557  * involved.
558  */
559  return totalpages * 10;
560 }
561 
562 /*
563  * Re-initialize state for a BRIN index scan
564  */
565 void
566 brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
567  ScanKey orderbys, int norderbys)
568 {
569  /*
570  * Other index AMs preprocess the scan keys at this point, or sometime
571  * early during the scan; this lets them optimize by removing redundant
572  * keys, or doing early returns when they are impossible to satisfy; see
573  * _bt_preprocess_keys for an example. Something like that could be added
574  * here someday, too.
575  */
576 
577  if (scankey && scan->numberOfKeys > 0)
578  memmove(scan->keyData, scankey,
579  scan->numberOfKeys * sizeof(ScanKeyData));
580 }
581 
582 /*
583  * Close down a BRIN index scan
584  */
585 void
587 {
588  BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
589 
591  brin_free_desc(opaque->bo_bdesc);
592  pfree(opaque);
593 }
594 
595 /*
596  * Per-heap-tuple callback for table_index_build_scan.
597  *
598  * Note we don't worry about the page range at the end of the table here; it is
599  * present in the build state struct after we're called the last time, but not
600  * inserted into the index. Caller must ensure to do so, if appropriate.
601  */
602 static void
604  ItemPointer tid,
605  Datum *values,
606  bool *isnull,
607  bool tupleIsAlive,
608  void *brstate)
609 {
610  BrinBuildState *state = (BrinBuildState *) brstate;
611  BlockNumber thisblock;
612  int i;
613 
614  thisblock = ItemPointerGetBlockNumber(tid);
615 
616  /*
617  * If we're in a block that belongs to a future range, summarize what
618  * we've got and start afresh. Note the scan might have skipped many
619  * pages, if they were devoid of live tuples; make sure to insert index
620  * tuples for those too.
621  */
622  while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
623  {
624 
625  BRIN_elog((DEBUG2,
626  "brinbuildCallback: completed a range: %u--%u",
627  state->bs_currRangeStart,
628  state->bs_currRangeStart + state->bs_pagesPerRange));
629 
630  /* create the index tuple and insert it */
631  form_and_insert_tuple(state);
632 
633  /* set state to correspond to the next range */
634  state->bs_currRangeStart += state->bs_pagesPerRange;
635 
636  /* re-initialize state for it */
638  }
639 
640  /* Accumulate the current tuple into the running state */
641  for (i = 0; i < state->bs_bdesc->bd_tupdesc->natts; i++)
642  {
643  FmgrInfo *addValue;
644  BrinValues *col;
646 
647  col = &state->bs_dtuple->bt_columns[i];
648  addValue = index_getprocinfo(index, i + 1,
650 
651  /*
652  * Update dtuple state, if and as necessary.
653  */
654  FunctionCall4Coll(addValue,
655  attr->attcollation,
656  PointerGetDatum(state->bs_bdesc),
657  PointerGetDatum(col),
658  values[i], isnull[i]);
659  }
660 }
661 
662 /*
663  * brinbuild() -- build a new BRIN index.
664  */
667 {
668  IndexBuildResult *result;
669  double reltuples;
670  double idxtuples;
671  BrinRevmap *revmap;
673  Buffer meta;
674  BlockNumber pagesPerRange;
675 
676  /*
677  * We expect to be called exactly once for any index relation.
678  */
679  if (RelationGetNumberOfBlocks(index) != 0)
680  elog(ERROR, "index \"%s\" already contains data",
681  RelationGetRelationName(index));
682 
683  /*
684  * Critical section not required, because on error the creation of the
685  * whole relation will be rolled back.
686  */
687 
688  meta = ReadBuffer(index, P_NEW);
691 
694  MarkBufferDirty(meta);
695 
696  if (RelationNeedsWAL(index))
697  {
698  xl_brin_createidx xlrec;
699  XLogRecPtr recptr;
700  Page page;
701 
703  xlrec.pagesPerRange = BrinGetPagesPerRange(index);
704 
705  XLogBeginInsert();
706  XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
708 
709  recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
710 
711  page = BufferGetPage(meta);
712  PageSetLSN(page, recptr);
713  }
714 
715  UnlockReleaseBuffer(meta);
716 
717  /*
718  * Initialize our state, including the deformed tuple state.
719  */
720  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
721  state = initialize_brin_buildstate(index, revmap, pagesPerRange);
722 
723  /*
724  * Now scan the relation. No syncscan allowed here because we want the
725  * heap blocks in physical order.
726  */
727  reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
728  brinbuildCallback, (void *) state, NULL);
729 
730  /* process the final batch */
731  form_and_insert_tuple(state);
732 
733  /* release resources */
734  idxtuples = state->bs_numtuples;
735  brinRevmapTerminate(state->bs_rmAccess);
737 
738  /*
739  * Return statistics
740  */
741  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
742 
743  result->heap_tuples = reltuples;
744  result->index_tuples = idxtuples;
745 
746  return result;
747 }
748 
749 void
751 {
752  Buffer metabuf;
753 
754  /* An empty BRIN index has a metapage only. */
755  metabuf =
758 
759  /* Initialize and xlog metabuffer. */
763  MarkBufferDirty(metabuf);
764  log_newpage_buffer(metabuf, true);
766 
767  UnlockReleaseBuffer(metabuf);
768 }
769 
770 /*
771  * brinbulkdelete
772  * Since there are no per-heap-tuple index tuples in BRIN indexes,
773  * there's not a lot we can do here.
774  *
775  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
776  * tuple is deleted), meaning the need to re-run summarization on the affected
777  * range. Would need to add an extra flag in brintuples for that.
778  */
781  IndexBulkDeleteCallback callback, void *callback_state)
782 {
783  /* allocate stats if first time through, else re-use existing struct */
784  if (stats == NULL)
786 
787  return stats;
788 }
789 
790 /*
791  * This routine is in charge of "vacuuming" a BRIN index: we just summarize
792  * ranges that are currently unsummarized.
793  */
796 {
797  Relation heapRel;
798 
799  /* No-op in ANALYZE ONLY mode */
800  if (info->analyze_only)
801  return stats;
802 
803  if (!stats)
805  stats->num_pages = RelationGetNumberOfBlocks(info->index);
806  /* rest of stats is initialized by zeroing */
807 
808  heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
810 
811  brin_vacuum_scan(info->index, info->strategy);
812 
813  brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
814  &stats->num_index_tuples, &stats->num_index_tuples);
815 
816  table_close(heapRel, AccessShareLock);
817 
818  return stats;
819 }
820 
821 /*
822  * reloptions processor for BRIN indexes
823  */
824 bytea *
825 brinoptions(Datum reloptions, bool validate)
826 {
827  static const relopt_parse_elt tab[] = {
828  {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
829  {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
830  };
831 
832  return (bytea *) build_reloptions(reloptions, validate,
834  sizeof(BrinOptions),
835  tab, lengthof(tab));
836 }
837 
838 /*
839  * SQL-callable function to scan through an index and summarize all ranges
840  * that are not currently summarized.
841  */
842 Datum
844 {
845  Datum relation = PG_GETARG_DATUM(0);
846 
848  relation,
850 }
851 
852 /*
853  * SQL-callable function to summarize the indicated page range, if not already
854  * summarized. If the second argument is BRIN_ALL_BLOCKRANGES, all
855  * unsummarized ranges are summarized.
856  */
857 Datum
859 {
860  Oid indexoid = PG_GETARG_OID(0);
861  int64 heapBlk64 = PG_GETARG_INT64(1);
862  BlockNumber heapBlk;
863  Oid heapoid;
864  Relation indexRel;
865  Relation heapRel;
866  double numSummarized = 0;
867 
868  if (RecoveryInProgress())
869  ereport(ERROR,
870  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
871  errmsg("recovery is in progress"),
872  errhint("BRIN control functions cannot be executed during recovery.")));
873 
874  if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
875  {
876  char *blk = psprintf(INT64_FORMAT, heapBlk64);
877 
878  ereport(ERROR,
879  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
880  errmsg("block number out of range: %s", blk)));
881  }
882  heapBlk = (BlockNumber) heapBlk64;
883 
884  /*
885  * We must lock table before index to avoid deadlocks. However, if the
886  * passed indexoid isn't an index then IndexGetRelation() will fail.
887  * Rather than emitting a not-very-helpful error message, postpone
888  * complaining, expecting that the is-it-an-index test below will fail.
889  */
890  heapoid = IndexGetRelation(indexoid, true);
891  if (OidIsValid(heapoid))
892  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
893  else
894  heapRel = NULL;
895 
896  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
897 
898  /* Must be a BRIN index */
899  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
900  indexRel->rd_rel->relam != BRIN_AM_OID)
901  ereport(ERROR,
902  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
903  errmsg("\"%s\" is not a BRIN index",
904  RelationGetRelationName(indexRel))));
905 
906  /* User must own the index (comparable to privileges needed for VACUUM) */
907  if (!pg_class_ownercheck(indexoid, GetUserId()))
909  RelationGetRelationName(indexRel));
910 
911  /*
912  * Since we did the IndexGetRelation call above without any lock, it's
913  * barely possible that a race against an index drop/recreation could have
914  * netted us the wrong table. Recheck.
915  */
916  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
917  ereport(ERROR,
919  errmsg("could not open parent table of index %s",
920  RelationGetRelationName(indexRel))));
921 
922  /* OK, do it */
923  brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
924 
927 
928  PG_RETURN_INT32((int32) numSummarized);
929 }
930 
931 /*
932  * SQL-callable interface to mark a range as no longer summarized
933  */
934 Datum
936 {
937  Oid indexoid = PG_GETARG_OID(0);
938  int64 heapBlk64 = PG_GETARG_INT64(1);
939  BlockNumber heapBlk;
940  Oid heapoid;
941  Relation heapRel;
942  Relation indexRel;
943  bool done;
944 
945  if (RecoveryInProgress())
946  ereport(ERROR,
947  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
948  errmsg("recovery is in progress"),
949  errhint("BRIN control functions cannot be executed during recovery.")));
950 
951  if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
952  {
953  char *blk = psprintf(INT64_FORMAT, heapBlk64);
954 
955  ereport(ERROR,
956  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
957  errmsg("block number out of range: %s", blk)));
958  }
959  heapBlk = (BlockNumber) heapBlk64;
960 
961  /*
962  * We must lock table before index to avoid deadlocks. However, if the
963  * passed indexoid isn't an index then IndexGetRelation() will fail.
964  * Rather than emitting a not-very-helpful error message, postpone
965  * complaining, expecting that the is-it-an-index test below will fail.
966  */
967  heapoid = IndexGetRelation(indexoid, true);
968  if (OidIsValid(heapoid))
969  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
970  else
971  heapRel = NULL;
972 
973  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
974 
975  /* Must be a BRIN index */
976  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
977  indexRel->rd_rel->relam != BRIN_AM_OID)
978  ereport(ERROR,
979  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
980  errmsg("\"%s\" is not a BRIN index",
981  RelationGetRelationName(indexRel))));
982 
983  /* User must own the index (comparable to privileges needed for VACUUM) */
984  if (!pg_class_ownercheck(indexoid, GetUserId()))
986  RelationGetRelationName(indexRel));
987 
988  /*
989  * Since we did the IndexGetRelation call above without any lock, it's
990  * barely possible that a race against an index drop/recreation could have
991  * netted us the wrong table. Recheck.
992  */
993  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
994  ereport(ERROR,
996  errmsg("could not open parent table of index %s",
997  RelationGetRelationName(indexRel))));
998 
999  /* the revmap does the hard work */
1000  do
1001  {
1002  done = brinRevmapDesummarizeRange(indexRel, heapBlk);
1003  }
1004  while (!done);
1005 
1008 
1009  PG_RETURN_VOID();
1010 }
1011 
1012 /*
1013  * Build a BrinDesc used to create or scan a BRIN index
1014  */
1015 BrinDesc *
1017 {
1018  BrinOpcInfo **opcinfo;
1019  BrinDesc *bdesc;
1020  TupleDesc tupdesc;
1021  int totalstored = 0;
1022  int keyno;
1023  long totalsize;
1024  MemoryContext cxt;
1025  MemoryContext oldcxt;
1026 
1028  "brin desc cxt",
1030  oldcxt = MemoryContextSwitchTo(cxt);
1031  tupdesc = RelationGetDescr(rel);
1032 
1033  /*
1034  * Obtain BrinOpcInfo for each indexed column. While at it, accumulate
1035  * the number of columns stored, since the number is opclass-defined.
1036  */
1037  opcinfo = (BrinOpcInfo **) palloc(sizeof(BrinOpcInfo *) * tupdesc->natts);
1038  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1039  {
1040  FmgrInfo *opcInfoFn;
1041  Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
1042 
1043  opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO);
1044 
1045  opcinfo[keyno] = (BrinOpcInfo *)
1046  DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid));
1047  totalstored += opcinfo[keyno]->oi_nstored;
1048  }
1049 
1050  /* Allocate our result struct and fill it in */
1051  totalsize = offsetof(BrinDesc, bd_info) +
1052  sizeof(BrinOpcInfo *) * tupdesc->natts;
1053 
1054  bdesc = palloc(totalsize);
1055  bdesc->bd_context = cxt;
1056  bdesc->bd_index = rel;
1057  bdesc->bd_tupdesc = tupdesc;
1058  bdesc->bd_disktdesc = NULL; /* generated lazily */
1059  bdesc->bd_totalstored = totalstored;
1060 
1061  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1062  bdesc->bd_info[keyno] = opcinfo[keyno];
1063  pfree(opcinfo);
1064 
1065  MemoryContextSwitchTo(oldcxt);
1066 
1067  return bdesc;
1068 }
1069 
1070 void
1072 {
1073  /* make sure the tupdesc is still valid */
1074  Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
1075  /* no need for retail pfree */
1077 }
1078 
1079 /*
1080  * Fetch index's statistical data into *stats
1081  */
1082 void
1084 {
1085  Buffer metabuffer;
1086  Page metapage;
1087  BrinMetaPageData *metadata;
1088 
1089  metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
1090  LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
1091  metapage = BufferGetPage(metabuffer);
1092  metadata = (BrinMetaPageData *) PageGetContents(metapage);
1093 
1094  stats->pagesPerRange = metadata->pagesPerRange;
1095  stats->revmapNumPages = metadata->lastRevmapPage - 1;
1096 
1097  UnlockReleaseBuffer(metabuffer);
1098 }
1099 
1100 /*
1101  * Initialize a BrinBuildState appropriate to create tuples on the given index.
1102  */
1103 static BrinBuildState *
1105  BlockNumber pagesPerRange)
1106 {
1108 
1109  state = palloc(sizeof(BrinBuildState));
1110 
1111  state->bs_irel = idxRel;
1112  state->bs_numtuples = 0;
1114  state->bs_pagesPerRange = pagesPerRange;
1115  state->bs_currRangeStart = 0;
1116  state->bs_rmAccess = revmap;
1117  state->bs_bdesc = brin_build_desc(idxRel);
1118  state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
1119 
1121 
1122  return state;
1123 }
1124 
1125 /*
1126  * Release resources associated with a BrinBuildState.
1127  */
1128 static void
1130 {
1131  /*
1132  * Release the last index buffer used. We might as well ensure that
1133  * whatever free space remains in that page is available in FSM, too.
1134  */
1135  if (!BufferIsInvalid(state->bs_currentInsertBuf))
1136  {
1137  Page page;
1138  Size freespace;
1139  BlockNumber blk;
1140 
1141  page = BufferGetPage(state->bs_currentInsertBuf);
1142  freespace = PageGetFreeSpace(page);
1145  RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
1146  FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
1147  }
1148 
1149  brin_free_desc(state->bs_bdesc);
1150  pfree(state->bs_dtuple);
1151  pfree(state);
1152 }
1153 
1154 /*
1155  * On the given BRIN index, summarize the heap page range that corresponds
1156  * to the heap block number given.
1157  *
1158  * This routine can run in parallel with insertions into the heap. To avoid
1159  * missing those values from the summary tuple, we first insert a placeholder
1160  * index tuple into the index, then execute the heap scan; transactions
1161  * concurrent with the scan update the placeholder tuple. After the scan, we
1162  * union the placeholder tuple with the one computed by this routine. The
1163  * update of the index value happens in a loop, so that if somebody updates
1164  * the placeholder tuple after we read it, we detect the case and try again.
1165  * This ensures that the concurrently inserted tuples are not lost.
1166  *
1167  * A further corner case is this routine being asked to summarize the partial
1168  * range at the end of the table. heapNumBlocks is the (possibly outdated)
1169  * table size; if we notice that the requested range lies beyond that size,
1170  * we re-compute the table size after inserting the placeholder tuple, to
1171  * avoid missing pages that were appended recently.
1172  */
1173 static void
1175  BlockNumber heapBlk, BlockNumber heapNumBlks)
1176 {
1177  Buffer phbuf;
1178  BrinTuple *phtup;
1179  Size phsz;
1180  OffsetNumber offset;
1181  BlockNumber scanNumBlks;
1182 
1183  /*
1184  * Insert the placeholder tuple
1185  */
1186  phbuf = InvalidBuffer;
1187  phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
1188  offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
1189  state->bs_rmAccess, &phbuf,
1190  heapBlk, phtup, phsz);
1191 
1192  /*
1193  * Compute range end. We hold ShareUpdateExclusive lock on table, so it
1194  * cannot shrink concurrently (but it can grow).
1195  */
1196  Assert(heapBlk % state->bs_pagesPerRange == 0);
1197  if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1198  {
1199  /*
1200  * If we're asked to scan what we believe to be the final range on the
1201  * table (i.e. a range that might be partial) we need to recompute our
1202  * idea of what the latest page is after inserting the placeholder
1203  * tuple. Anyone that grows the table later will update the
1204  * placeholder tuple, so it doesn't matter that we won't scan these
1205  * pages ourselves. Careful: the table might have been extended
1206  * beyond the current range, so clamp our result.
1207  *
1208  * Fortunately, this should occur infrequently.
1209  */
1210  scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1211  state->bs_pagesPerRange);
1212  }
1213  else
1214  {
1215  /* Easy case: range is known to be complete */
1216  scanNumBlks = state->bs_pagesPerRange;
1217  }
1218 
1219  /*
1220  * Execute the partial heap scan covering the heap blocks in the specified
1221  * page range, summarizing the heap tuples in it. This scan stops just
1222  * short of brinbuildCallback creating the new index entry.
1223  *
1224  * Note that it is critical we use the "any visible" mode of
1225  * table_index_build_range_scan here: otherwise, we would miss tuples
1226  * inserted by transactions that are still in progress, among other corner
1227  * cases.
1228  */
1229  state->bs_currRangeStart = heapBlk;
1230  table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
1231  heapBlk, scanNumBlks,
1232  brinbuildCallback, (void *) state, NULL);
1233 
1234  /*
1235  * Now we update the values obtained by the scan with the placeholder
1236  * tuple. We do this in a loop which only terminates if we're able to
1237  * update the placeholder tuple successfully; if we are not, this means
1238  * somebody else modified the placeholder tuple after we read it.
1239  */
1240  for (;;)
1241  {
1242  BrinTuple *newtup;
1243  Size newsize;
1244  bool didupdate;
1245  bool samepage;
1246 
1248 
1249  /*
1250  * Update the summary tuple and try to update.
1251  */
1252  newtup = brin_form_tuple(state->bs_bdesc,
1253  heapBlk, state->bs_dtuple, &newsize);
1254  samepage = brin_can_do_samepage_update(phbuf, phsz, newsize);
1255  didupdate =
1256  brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
1257  state->bs_rmAccess, heapBlk, phbuf, offset,
1258  phtup, phsz, newtup, newsize, samepage);
1259  brin_free_tuple(phtup);
1260  brin_free_tuple(newtup);
1261 
1262  /* If the update succeeded, we're done. */
1263  if (didupdate)
1264  break;
1265 
1266  /*
1267  * If the update didn't work, it might be because somebody updated the
1268  * placeholder tuple concurrently. Extract the new version, union it
1269  * with the values we have from the scan, and start over. (There are
1270  * other reasons for the update to fail, but it's simple to treat them
1271  * the same.)
1272  */
1273  phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
1274  &offset, &phsz, BUFFER_LOCK_SHARE,
1275  NULL);
1276  /* the placeholder tuple must exist */
1277  if (phtup == NULL)
1278  elog(ERROR, "missing placeholder tuple");
1279  phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
1281 
1282  /* merge it into the tuple from the heap scan */
1283  union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
1284  }
1285 
1286  ReleaseBuffer(phbuf);
1287 }
1288 
1289 /*
1290  * Summarize page ranges that are not already summarized. If pageRange is
1291  * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
1292  * page range containing the given heap page number is scanned.
1293  * If include_partial is true, then the partial range at the end of the table
1294  * is summarized, otherwise not.
1295  *
1296  * For each new index tuple inserted, *numSummarized (if not NULL) is
1297  * incremented; for each existing tuple, *numExisting (if not NULL) is
1298  * incremented.
1299  */
1300 static void
1302  bool include_partial, double *numSummarized, double *numExisting)
1303 {
1304  BrinRevmap *revmap;
1305  BrinBuildState *state = NULL;
1306  IndexInfo *indexInfo = NULL;
1307  BlockNumber heapNumBlocks;
1308  BlockNumber pagesPerRange;
1309  Buffer buf;
1310  BlockNumber startBlk;
1311 
1312  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
1313 
1314  /* determine range of pages to process */
1315  heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1316  if (pageRange == BRIN_ALL_BLOCKRANGES)
1317  startBlk = 0;
1318  else
1319  {
1320  startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1321  heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
1322  }
1323  if (startBlk > heapNumBlocks)
1324  {
1325  /* Nothing to do if start point is beyond end of table */
1326  brinRevmapTerminate(revmap);
1327  return;
1328  }
1329 
1330  /*
1331  * Scan the revmap to find unsummarized items.
1332  */
1333  buf = InvalidBuffer;
1334  for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
1335  {
1336  BrinTuple *tup;
1337  OffsetNumber off;
1338 
1339  /*
1340  * Unless requested to summarize even a partial range, go away now if
1341  * we think the next range is partial. Caller would pass true when it
1342  * is typically run once bulk data loading is done
1343  * (brin_summarize_new_values), and false when it is typically the
1344  * result of arbitrarily-scheduled maintenance command (vacuuming).
1345  */
1346  if (!include_partial &&
1347  (startBlk + pagesPerRange > heapNumBlocks))
1348  break;
1349 
1351 
1352  tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
1353  BUFFER_LOCK_SHARE, NULL);
1354  if (tup == NULL)
1355  {
1356  /* no revmap entry for this heap range. Summarize it. */
1357  if (state == NULL)
1358  {
1359  /* first time through */
1360  Assert(!indexInfo);
1361  state = initialize_brin_buildstate(index, revmap,
1362  pagesPerRange);
1363  indexInfo = BuildIndexInfo(index);
1364  }
1365  summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
1366 
1367  /* and re-initialize state for the next range */
1369 
1370  if (numSummarized)
1371  *numSummarized += 1.0;
1372  }
1373  else
1374  {
1375  if (numExisting)
1376  *numExisting += 1.0;
1378  }
1379  }
1380 
1381  if (BufferIsValid(buf))
1382  ReleaseBuffer(buf);
1383 
1384  /* free resources */
1385  brinRevmapTerminate(revmap);
1386  if (state)
1387  {
1389  pfree(indexInfo);
1390  }
1391 }
1392 
1393 /*
1394  * Given a deformed tuple in the build state, convert it into the on-disk
1395  * format and insert it into the index, making the revmap point to it.
1396  */
1397 static void
1399 {
1400  BrinTuple *tup;
1401  Size size;
1402 
1403  tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
1404  state->bs_dtuple, &size);
1405  brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
1406  &state->bs_currentInsertBuf, state->bs_currRangeStart,
1407  tup, size);
1408  state->bs_numtuples++;
1409 
1410  pfree(tup);
1411 }
1412 
1413 /*
1414  * Given two deformed tuples, adjust the first one so that it's consistent
1415  * with the summary values in both.
1416  */
1417 static void
1419 {
1420  int keyno;
1421  BrinMemTuple *db;
1422  MemoryContext cxt;
1423  MemoryContext oldcxt;
1424 
1425  /* Use our own memory context to avoid retail pfree */
1427  "brin union",
1429  oldcxt = MemoryContextSwitchTo(cxt);
1430  db = brin_deform_tuple(bdesc, b, NULL);
1431  MemoryContextSwitchTo(oldcxt);
1432 
1433  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1434  {
1435  FmgrInfo *unionFn;
1436  BrinValues *col_a = &a->bt_columns[keyno];
1437  BrinValues *col_b = &db->bt_columns[keyno];
1438 
1439  unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
1441  FunctionCall3Coll(unionFn,
1442  bdesc->bd_index->rd_indcollation[keyno],
1443  PointerGetDatum(bdesc),
1444  PointerGetDatum(col_a),
1445  PointerGetDatum(col_b));
1446  }
1447 
1448  MemoryContextDelete(cxt);
1449 }
1450 
1451 /*
1452  * brin_vacuum_scan
1453  * Do a complete scan of the index during VACUUM.
1454  *
1455  * This routine scans the complete index looking for uncatalogued index pages,
1456  * i.e. those that might have been lost due to a crash after index extension
1457  * and such.
1458  */
1459 static void
1461 {
1462  BlockNumber nblocks;
1463  BlockNumber blkno;
1464 
1465  /*
1466  * Scan the index in physical order, and clean up any possible mess in
1467  * each page.
1468  */
1469  nblocks = RelationGetNumberOfBlocks(idxrel);
1470  for (blkno = 0; blkno < nblocks; blkno++)
1471  {
1472  Buffer buf;
1473 
1475 
1476  buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno,
1477  RBM_NORMAL, strategy);
1478 
1479  brin_page_cleanup(idxrel, buf);
1480 
1481  ReleaseBuffer(buf);
1482  }
1483 
1484  /*
1485  * Update all upper pages in the index's FSM, as well. This ensures not
1486  * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
1487  * but also that any pre-existing damage or out-of-dateness is repaired.
1488  */
1489  FreeSpaceMapVacuum(idxrel);
1490 }
void brin_free_desc(BrinDesc *bdesc)
Definition: brin.c:1071
IndexBulkDeleteResult * brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: brin.c:780
ambeginscan_function ambeginscan
Definition: amapi.h:225
uint8 amparallelvacuumoptions
Definition: amapi.h:203
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno)
Definition: autovacuum.c:3214
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
MemoryContext ii_Context
Definition: execnodes.h:176
Definition: fmgr.h:56
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition: index.c:3379
ambulkdelete_function ambulkdelete
Definition: amapi.h:217
#define BRIN_CURRENT_VERSION
Definition: brin_page.h:72
bool amcanmulticol
Definition: amapi.h:183
uint16 amsupport
Definition: amapi.h:173
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define BRIN_elog(args)
Definition: brin_internal.h:81
#define AllocSetContextCreate
Definition: memutils.h:170
#define BRIN_METAPAGE_BLKNO
Definition: brin_page.h:75
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
IndexBuildResult * brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: brin.c:666
int errhint(const char *fmt,...)
Definition: elog.c:1069
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1009
bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: brin.c:148
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:73
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:794
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:320
#define SizeOfBrinCreateIdx
Definition: brin_xlog.h:55
amgettuple_function amgettuple
Definition: amapi.h:227
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define RelationGetDescr(relation)
Definition: rel.h:454
void brinRevmapTerminate(BrinRevmap *revmap)
Definition: brin_revmap.c:103
Oid GetUserId(void)
Definition: miscinit.c:380
bool amcanorderbyop
Definition: amapi.h:177
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
Definition: brin.c:1418
amproperty_function amproperty
Definition: amapi.h:222
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode, Snapshot snapshot)
Definition: brin_revmap.c:197
bool brinvalidate(Oid opclassoid)
Definition: brin_validate.c:37
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define PointerGetDatum(X)
Definition: postgres.h:556
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
static void addrange(struct cvec *cv, chr from, chr to)
Definition: regc_cvec.c:90
Buffer bs_currentInsertBuf
Definition: brin.c:50
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:202
static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks)
Definition: brin.c:1174
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
amparallelrescan_function amparallelrescan
Definition: amapi.h:236
#define Min(x, y)
Definition: c.h:911
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
bool amstorage
Definition: amapi.h:191
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define XLOG_BRIN_CREATE_INDEX
Definition: brin_xlog.h:31
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
#define AccessShareLock
Definition: lockdefs.h:36
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define BRIN_PROCNUM_OPCINFO
Definition: brin_internal.h:67
int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: brin.c:372
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:608
Relation index
Definition: genam.h:46
bool ampredlocks
Definition: amapi.h:195
struct SnapshotData * xs_snapshot
Definition: relscan.h:104
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1197
uint32 BlockNumber
Definition: block.h:31
static void form_and_insert_tuple(BrinBuildState *state)
Definition: brin.c:1398
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3375
#define P_NEW
Definition: bufmgr.h:81
aminsert_function aminsert
Definition: amapi.h:216
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2284
#define lengthof(array)
Definition: c.h:669
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:88
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:84
unsigned int Oid
Definition: postgres_ext.h:31
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:54
bool RecoveryInProgress(void)
Definition: xlog.c:7930
BrinMemTuple * brin_new_memtuple(BrinDesc *brdesc)
Definition: brin_tuple.c:356
Oid amkeytype
Definition: amapi.h:205
#define BrinGetPagesPerRange(relation)
Definition: brin.h:39
#define OidIsValid(objectId)
Definition: c.h:645
Relation bs_irel
Definition: brin.c:48
bool amoptionalkey
Definition: amapi.h:185
BlockNumber bs_currRangeStart
Definition: brin.c:52
void brinendscan(IndexScanDesc scan)
Definition: brin.c:586
amvalidate_function amvalidate
Definition: amapi.h:224
BlockNumber bo_pagesPerRange
Definition: brin.c:63
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:581
signed int int32
Definition: c.h:347
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
Definition: reloptions.c:1552
Relation indexRelation
Definition: relscan.h:103
bytea * brinoptions(Datum reloptions, bool validate)
Definition: brin.c:825
uint16 OffsetNumber
Definition: off.h:24
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:620
Definition: type.h:89
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:343
Datum brin_summarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:858
BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
Definition: brin_tuple.c:421
IndexUniqueCheck
Definition: genam.h:112
struct BrinOpaque BrinOpaque
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:3352
#define BRIN_PROCNUM_ADDVALUE
Definition: brin_internal.h:68
BlockNumber bs_pagesPerRange
Definition: brin.c:51
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
amgetbitmap_function amgetbitmap
Definition: amapi.h:228
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:487
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3398
bool bt_placeholder
Definition: brin_tuple.h:38
Oid * rd_indcollation
Definition: rel.h:174
#define ERROR
Definition: elog.h:43
#define MaxBlockNumber
Definition: block.h:35
Relation bd_index
Definition: brin_internal.h:47
ambuild_function ambuild
Definition: amapi.h:214
amoptions_function amoptions
Definition: amapi.h:221
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1517
BlockNumber num_pages
Definition: genam.h:74
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
bool amcaninclude
Definition: amapi.h:199
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:610
Datum brin_desummarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:935
#define DEBUG2
Definition: elog.h:24
amcostestimate_function amcostestimate
Definition: amapi.h:220
uint16 oi_nstored
Definition: brin_internal.h:28
bool amcanunique
Definition: amapi.h:181
int bd_totalstored
Definition: brin_internal.h:56
struct BrinBuildState BrinBuildState
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_tuple.h:46
#define BufferIsInvalid(buffer)
Definition: buf.h:31
static char * buf
Definition: pg_test_fsync.c:67
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:218
BrinRevmap * bo_rmAccess
Definition: brin.c:64
amendscan_function amendscan
Definition: amapi.h:229
#define memmove(d, s, c)
Definition: c.h:1266
#define PG_GETARG_OID(n)
Definition: fmgr.h:270
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1550
bool amcanbackward
Definition: amapi.h:179
#define FirstOffsetNumber
Definition: off.h:27
#define REGBUF_STANDARD
Definition: xloginsert.h:35
static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
Definition: brin.c:1460
BrinRevmap * bs_rmAccess
Definition: brin.c:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define RelationGetRelationName(relation)
Definition: rel.h:462
static void brinbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
Definition: brin.c:603
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:1375
TupleDesc bd_tupdesc
Definition: brin_internal.h:50
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
void brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: brin.c:566
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1699
#define SK_ISNULL
Definition: skey.h:115
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting)
Definition: brin.c:1301
#define ereport(elevel, rest)
Definition: elog.h:141
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:324
amrescan_function amrescan
Definition: amapi.h:226
bool amcanparallel
Definition: amapi.h:197
IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys)
Definition: brin.c:343
void * ii_AmCache
Definition: execnodes.h:175
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:333
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
BlockNumber lastRevmapPage
Definition: brin_page.h:69
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define MemoryContextResetAndDeleteChildren(ctx)
Definition: memutils.h:67
bool amsearchnulls
Definition: amapi.h:189
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
BrinDesc * bo_bdesc
Definition: brin.c:65
void * palloc0(Size size)
Definition: mcxt.c:980
#define PageGetContents(page)
Definition: bufpage.h:246
uintptr_t Datum
Definition: postgres.h:367
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3612
bool amclusterable
Definition: amapi.h:193
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:198
BrinOpcInfo * bd_info[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:59
bool amsearcharray
Definition: amapi.h:187
#define InvalidOid
Definition: postgres_ext.h:36
BlockNumber pagesPerRange
Definition: brin.h:33
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1172
bool amusemaintenanceworkmem
Definition: amapi.h:201
BlockNumber pagesPerRange
Definition: brin_page.h:68
void brin_free_tuple(BrinTuple *tuple)
Definition: brin_tuple.c:307
#define PG_RETURN_VOID()
Definition: fmgr.h:339
#define makeNode(_type_)
Definition: nodes.h:573
BrinDesc * brin_build_desc(Relation rel)
Definition: brin.c:1016
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
int sk_flags
Definition: skey.h:66
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:739
Definition: regguts.h:298
#define BRIN_PROCNUM_CONSISTENT
Definition: brin_internal.h:69
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4753
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
Definition: brin_revmap.c:321
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:596
size_t Size
Definition: c.h:467
void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6954
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, Snapshot snapshot)
Definition: brin_revmap.c:71
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
ammarkpos_function ammarkpos
Definition: amapi.h:230
bool amcanorder
Definition: amapi.h:175
ambuildphasename_function ambuildphasename
Definition: amapi.h:223
#define RelationNeedsWAL(relation)
Definition: rel.h:530
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:234
#define INT64_FORMAT
Definition: c.h:401
#define DatumGetPointer(X)
Definition: postgres.h:549
struct ScanKeyData * keyData
Definition: relscan.h:107
#define BRIN_ALL_BLOCKRANGES
Definition: brin.c:68
int bs_numtuples
Definition: brin.c:49
MemoryContext bd_context
Definition: brin_internal.h:44
#define BRIN_LAST_OPTIONAL_PROCNUM
Definition: brin_internal.h:74
static Datum values[MAXATTR]
Definition: bootstrap.c:167
uint16 amstrategies
Definition: amapi.h:171
#define BRIN_PROCNUM_UNION
Definition: brin_internal.h:70
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2623
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition: tidbitmap.c:442
#define BrinGetAutoSummarize(relation)
Definition: brin.h:45
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
TupleDesc bd_disktdesc
Definition: brin_internal.h:53
BrinTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, Size *size)
Definition: brin_tuple.c:88
Oid sk_collation
Definition: skey.h:70
#define elog(elevel,...)
Definition: elog.h:228
ambuildempty_function ambuildempty
Definition: amapi.h:215
int i
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:633
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:87
IndexBulkDeleteResult * brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: brin.c:795
BlockNumber pagesPerRange
Definition: brin_xlog.h:52
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
int tdrefcount
Definition: tupdesc.h:84
Definition: c.h:556
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
Datum brin_summarize_new_values(PG_FUNCTION_ARGS)
Definition: brin.c:843
BrinMemTuple * bs_dtuple
Definition: brin.c:55
static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange)
Definition: brin.c:1104
void brinbuildempty(Relation index)
Definition: brin.c:750
BrinDesc * bs_bdesc
Definition: brin.c:54
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PG_GETARG_INT64(n)
Definition: fmgr.h:277
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:60
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
BrinTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
Definition: brin_tuple.c:262
amcanreturn_function amcanreturn
Definition: amapi.h:219
int16 AttrNumber
Definition: attnum.h:21
#define RelationGetRelid(relation)
Definition: rel.h:428
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:126
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:352
float4 reltuples
Definition: pg_class.h:63
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:617
Datum brinhandler(PG_FUNCTION_ARGS)
Definition: brin.c:86
#define offsetof(type, field)
Definition: c.h:662
AttrNumber sk_attno
Definition: skey.h:67
Pointer Page
Definition: bufpage.h:78
double index_tuples
Definition: genam.h:33
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:235
double heap_tuples
Definition: genam.h:32
BrinMemTuple * brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
Definition: brin_tuple.c:383
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1083
static void terminate_brin_buildstate(BrinBuildState *state)
Definition: brin.c:1129
BlockNumber revmapNumPages
Definition: brin.h:34
amrestrpos_function amrestrpos
Definition: amapi.h:231