PostgreSQL Source Code  git master
brin.c
Go to the documentation of this file.
1 /*
2  * brin.c
3  * Implementation of BRIN indexes for Postgres
4  *
5  * See src/backend/access/brin/README for details.
6  *
7  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/brin/brin.c
12  *
13  * TODO
14  * * ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
15  */
16 #include "postgres.h"
17 
18 #include "access/brin.h"
19 #include "access/brin_page.h"
20 #include "access/brin_pageops.h"
21 #include "access/brin_xlog.h"
22 #include "access/relation.h"
23 #include "access/reloptions.h"
24 #include "access/relscan.h"
25 #include "access/table.h"
26 #include "access/tableam.h"
27 #include "access/xloginsert.h"
28 #include "catalog/index.h"
29 #include "catalog/pg_am.h"
30 #include "miscadmin.h"
31 #include "pgstat.h"
32 #include "postmaster/autovacuum.h"
33 #include "storage/bufmgr.h"
34 #include "storage/freespace.h"
35 #include "utils/builtins.h"
36 #include "utils/index_selfuncs.h"
37 #include "utils/memutils.h"
38 #include "utils/rel.h"
39 
40 
41 /*
42  * We use a BrinBuildState during initial construction of a BRIN index.
43  * The running state is kept in a BrinMemTuple.
44  */
45 typedef struct BrinBuildState
46 {
56 
57 /*
58  * Struct used as "opaque" during index scans
59  */
60 typedef struct BrinOpaque
61 {
65 } BrinOpaque;
66 
67 #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
68 
70  BrinRevmap *revmap, BlockNumber pagesPerRange);
72 static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
73  bool include_partial, double *numSummarized, double *numExisting);
75 static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
76  BrinTuple *b);
77 static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
78 
79 
80 /*
81  * BRIN handler function: return IndexAmRoutine with access method parameters
82  * and callbacks.
83  */
84 Datum
86 {
88 
89  amroutine->amstrategies = 0;
91  amroutine->amcanorder = false;
92  amroutine->amcanorderbyop = false;
93  amroutine->amcanbackward = false;
94  amroutine->amcanunique = false;
95  amroutine->amcanmulticol = true;
96  amroutine->amoptionalkey = true;
97  amroutine->amsearcharray = false;
98  amroutine->amsearchnulls = true;
99  amroutine->amstorage = true;
100  amroutine->amclusterable = false;
101  amroutine->ampredlocks = false;
102  amroutine->amcanparallel = false;
103  amroutine->amcaninclude = false;
104  amroutine->amkeytype = InvalidOid;
105 
106  amroutine->ambuild = brinbuild;
107  amroutine->ambuildempty = brinbuildempty;
108  amroutine->aminsert = brininsert;
109  amroutine->ambulkdelete = brinbulkdelete;
110  amroutine->amvacuumcleanup = brinvacuumcleanup;
111  amroutine->amcanreturn = NULL;
112  amroutine->amcostestimate = brincostestimate;
113  amroutine->amoptions = brinoptions;
114  amroutine->amproperty = NULL;
115  amroutine->ambuildphasename = NULL;
116  amroutine->amvalidate = brinvalidate;
117  amroutine->ambeginscan = brinbeginscan;
118  amroutine->amrescan = brinrescan;
119  amroutine->amgettuple = NULL;
120  amroutine->amgetbitmap = bringetbitmap;
121  amroutine->amendscan = brinendscan;
122  amroutine->ammarkpos = NULL;
123  amroutine->amrestrpos = NULL;
124  amroutine->amestimateparallelscan = NULL;
125  amroutine->aminitparallelscan = NULL;
126  amroutine->amparallelrescan = NULL;
127 
128  PG_RETURN_POINTER(amroutine);
129 }
130 
131 /*
132  * A tuple in the heap is being inserted. To keep a brin index up to date,
133  * we need to obtain the relevant index tuple and compare its stored values
134  * with those of the new tuple. If the tuple values are not consistent with
135  * the summary tuple, we need to update the index tuple.
136  *
137  * If autosummarization is enabled, check if we need to summarize the previous
138  * page range.
139  *
140  * If the range is not currently summarized (i.e. the revmap returns NULL for
141  * it), there's nothing to do for this tuple.
142  */
143 bool
144 brininsert(Relation idxRel, Datum *values, bool *nulls,
145  ItemPointer heaptid, Relation heapRel,
146  IndexUniqueCheck checkUnique,
147  IndexInfo *indexInfo)
148 {
149  BlockNumber pagesPerRange;
150  BlockNumber origHeapBlk;
151  BlockNumber heapBlk;
152  BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
153  BrinRevmap *revmap;
155  MemoryContext tupcxt = NULL;
157  bool autosummarize = BrinGetAutoSummarize(idxRel);
158 
159  revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
160 
161  /*
162  * origHeapBlk is the block number where the insertion occurred. heapBlk
163  * is the first block in the corresponding page range.
164  */
165  origHeapBlk = ItemPointerGetBlockNumber(heaptid);
166  heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
167 
168  for (;;)
169  {
170  bool need_insert = false;
171  OffsetNumber off;
172  BrinTuple *brtup;
173  BrinMemTuple *dtup;
174  int keyno;
175 
177 
178  /*
179  * If auto-summarization is enabled and we just inserted the first
180  * tuple into the first block of a new non-first page range, request a
181  * summarization run of the previous range.
182  */
183  if (autosummarize &&
184  heapBlk > 0 &&
185  heapBlk == origHeapBlk &&
187  {
188  BlockNumber lastPageRange = heapBlk - 1;
189  BrinTuple *lastPageTuple;
190 
191  lastPageTuple =
192  brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
193  NULL, BUFFER_LOCK_SHARE, NULL);
194  if (!lastPageTuple)
195  {
196  bool recorded;
197 
199  RelationGetRelid(idxRel),
200  lastPageRange);
201  if (!recorded)
202  ereport(LOG,
203  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
204  errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
205  RelationGetRelationName(idxRel),
206  lastPageRange)));
207  }
208  else
210  }
211 
212  brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
213  NULL, BUFFER_LOCK_SHARE, NULL);
214 
215  /* if range is unsummarized, there's nothing to do */
216  if (!brtup)
217  break;
218 
219  /* First time through in this statement? */
220  if (bdesc == NULL)
221  {
222  MemoryContextSwitchTo(indexInfo->ii_Context);
223  bdesc = brin_build_desc(idxRel);
224  indexInfo->ii_AmCache = (void *) bdesc;
225  MemoryContextSwitchTo(oldcxt);
226  }
227  /* First time through in this brininsert call? */
228  if (tupcxt == NULL)
229  {
231  "brininsert cxt",
233  MemoryContextSwitchTo(tupcxt);
234  }
235 
236  dtup = brin_deform_tuple(bdesc, brtup, NULL);
237 
238  /*
239  * Compare the key values of the new tuple to the stored index values;
240  * our deformed tuple will get updated if the new tuple doesn't fit
241  * the original range (note this means we can't break out of the loop
242  * early). Make a note of whether this happens, so that we know to
243  * insert the modified tuple later.
244  */
245  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
246  {
247  Datum result;
248  BrinValues *bval;
249  FmgrInfo *addValue;
250 
251  bval = &dtup->bt_columns[keyno];
252  addValue = index_getprocinfo(idxRel, keyno + 1,
254  result = FunctionCall4Coll(addValue,
255  idxRel->rd_indcollation[keyno],
256  PointerGetDatum(bdesc),
257  PointerGetDatum(bval),
258  values[keyno],
259  nulls[keyno]);
260  /* if that returned true, we need to insert the updated tuple */
261  need_insert |= DatumGetBool(result);
262  }
263 
264  if (!need_insert)
265  {
266  /*
267  * The tuple is consistent with the new values, so there's nothing
268  * to do.
269  */
271  }
272  else
273  {
274  Page page = BufferGetPage(buf);
275  ItemId lp = PageGetItemId(page, off);
276  Size origsz;
277  BrinTuple *origtup;
278  Size newsz;
279  BrinTuple *newtup;
280  bool samepage;
281 
282  /*
283  * Make a copy of the old tuple, so that we can compare it after
284  * re-acquiring the lock.
285  */
286  origsz = ItemIdGetLength(lp);
287  origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
288 
289  /*
290  * Before releasing the lock, check if we can attempt a same-page
291  * update. Another process could insert a tuple concurrently in
292  * the same page though, so downstream we must be prepared to cope
293  * if this turns out to not be possible after all.
294  */
295  newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
296  samepage = brin_can_do_samepage_update(buf, origsz, newsz);
298 
299  /*
300  * Try to update the tuple. If this doesn't work for whatever
301  * reason, we need to restart from the top; the revmap might be
302  * pointing at a different tuple for this block now, so we need to
303  * recompute to ensure both our new heap tuple and the other
304  * inserter's are covered by the combined tuple. It might be that
305  * we don't need to update at all.
306  */
307  if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
308  buf, off, origtup, origsz, newtup, newsz,
309  samepage))
310  {
311  /* no luck; start over */
313  continue;
314  }
315  }
316 
317  /* success! */
318  break;
319  }
320 
321  brinRevmapTerminate(revmap);
322  if (BufferIsValid(buf))
324  MemoryContextSwitchTo(oldcxt);
325  if (tupcxt != NULL)
326  MemoryContextDelete(tupcxt);
327 
328  return false;
329 }
330 
331 /*
332  * Initialize state for a BRIN index scan.
333  *
334  * We read the metapage here to determine the pages-per-range number that this
335  * index was built with. Note that since this cannot be changed while we're
336  * holding lock on index, it's not necessary to recompute it during brinrescan.
337  */
339 brinbeginscan(Relation r, int nkeys, int norderbys)
340 {
341  IndexScanDesc scan;
342  BrinOpaque *opaque;
343 
344  scan = RelationGetIndexScan(r, nkeys, norderbys);
345 
346  opaque = (BrinOpaque *) palloc(sizeof(BrinOpaque));
347  opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange,
348  scan->xs_snapshot);
349  opaque->bo_bdesc = brin_build_desc(r);
350  scan->opaque = opaque;
351 
352  return scan;
353 }
354 
355 /*
356  * Execute the index scan.
357  *
358  * This works by reading index TIDs from the revmap, and obtaining the index
359  * tuples pointed to by them; the summary values in the index tuples are
360  * compared to the scan keys. We return into the TID bitmap all the pages in
361  * ranges corresponding to index tuples that match the scan keys.
362  *
363  * If a TID from the revmap is read as InvalidTID, we know that range is
364  * unsummarized. Pages in those ranges need to be returned regardless of scan
365  * keys.
366  */
367 int64
369 {
370  Relation idxRel = scan->indexRelation;
372  BrinDesc *bdesc;
373  Oid heapOid;
374  Relation heapRel;
375  BrinOpaque *opaque;
376  BlockNumber nblocks;
377  BlockNumber heapBlk;
378  int totalpages = 0;
379  FmgrInfo *consistentFn;
380  MemoryContext oldcxt;
381  MemoryContext perRangeCxt;
382  BrinMemTuple *dtup;
383  BrinTuple *btup = NULL;
384  Size btupsz = 0;
385 
386  opaque = (BrinOpaque *) scan->opaque;
387  bdesc = opaque->bo_bdesc;
388  pgstat_count_index_scan(idxRel);
389 
390  /*
391  * We need to know the size of the table so that we know how long to
392  * iterate on the revmap.
393  */
394  heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
395  heapRel = table_open(heapOid, AccessShareLock);
396  nblocks = RelationGetNumberOfBlocks(heapRel);
397  table_close(heapRel, AccessShareLock);
398 
399  /*
400  * Make room for the consistent support procedures of indexed columns. We
401  * don't look them up here; we do that lazily the first time we see a scan
402  * key reference each of them. We rely on zeroing fn_oid to InvalidOid.
403  */
404  consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
405 
406  /* allocate an initial in-memory tuple, out of the per-range memcxt */
407  dtup = brin_new_memtuple(bdesc);
408 
409  /*
410  * Setup and use a per-range memory context, which is reset every time we
411  * loop below. This avoids having to free the tuples within the loop.
412  */
414  "bringetbitmap cxt",
416  oldcxt = MemoryContextSwitchTo(perRangeCxt);
417 
418  /*
419  * Now scan the revmap. We start by querying for heap page 0,
420  * incrementing by the number of pages per range; this gives us a full
421  * view of the table.
422  */
423  for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
424  {
425  bool addrange;
426  bool gottuple = false;
427  BrinTuple *tup;
428  OffsetNumber off;
429  Size size;
430 
432 
434 
435  tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
436  &off, &size, BUFFER_LOCK_SHARE,
437  scan->xs_snapshot);
438  if (tup)
439  {
440  gottuple = true;
441  btup = brin_copy_tuple(tup, size, btup, &btupsz);
443  }
444 
445  /*
446  * For page ranges with no indexed tuple, we must return the whole
447  * range; otherwise, compare it to the scan keys.
448  */
449  if (!gottuple)
450  {
451  addrange = true;
452  }
453  else
454  {
455  dtup = brin_deform_tuple(bdesc, btup, dtup);
456  if (dtup->bt_placeholder)
457  {
458  /*
459  * Placeholder tuples are always returned, regardless of the
460  * values stored in them.
461  */
462  addrange = true;
463  }
464  else
465  {
466  int keyno;
467 
468  /*
469  * Compare scan keys with summary values stored for the range.
470  * If scan keys are matched, the page range must be added to
471  * the bitmap. We initially assume the range needs to be
472  * added; in particular this serves the case where there are
473  * no keys.
474  */
475  addrange = true;
476  for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
477  {
478  ScanKey key = &scan->keyData[keyno];
479  AttrNumber keyattno = key->sk_attno;
480  BrinValues *bval = &dtup->bt_columns[keyattno - 1];
481  Datum add;
482 
483  /*
484  * The collation of the scan key must match the collation
485  * used in the index column (but only if the search is not
486  * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using
487  * this index ...
488  */
489  Assert((key->sk_flags & SK_ISNULL) ||
490  (key->sk_collation ==
491  TupleDescAttr(bdesc->bd_tupdesc,
492  keyattno - 1)->attcollation));
493 
494  /* First time this column? look up consistent function */
495  if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
496  {
497  FmgrInfo *tmp;
498 
499  tmp = index_getprocinfo(idxRel, keyattno,
501  fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
503  }
504 
505  /*
506  * Check whether the scan key is consistent with the page
507  * range values; if so, have the pages in the range added
508  * to the output bitmap.
509  *
510  * When there are multiple scan keys, failure to meet the
511  * criteria for a single one of them is enough to discard
512  * the range as a whole, so break out of the loop as soon
513  * as a false return value is obtained.
514  */
515  add = FunctionCall3Coll(&consistentFn[keyattno - 1],
516  key->sk_collation,
517  PointerGetDatum(bdesc),
518  PointerGetDatum(bval),
519  PointerGetDatum(key));
520  addrange = DatumGetBool(add);
521  if (!addrange)
522  break;
523  }
524  }
525  }
526 
527  /* add the pages in the range to the output bitmap, if needed */
528  if (addrange)
529  {
530  BlockNumber pageno;
531 
532  for (pageno = heapBlk;
533  pageno <= heapBlk + opaque->bo_pagesPerRange - 1;
534  pageno++)
535  {
536  MemoryContextSwitchTo(oldcxt);
537  tbm_add_page(tbm, pageno);
538  totalpages++;
539  MemoryContextSwitchTo(perRangeCxt);
540  }
541  }
542  }
543 
544  MemoryContextSwitchTo(oldcxt);
545  MemoryContextDelete(perRangeCxt);
546 
547  if (buf != InvalidBuffer)
548  ReleaseBuffer(buf);
549 
550  /*
551  * XXX We have an approximation of the number of *pages* that our scan
552  * returns, but we don't have a precise idea of the number of heap tuples
553  * involved.
554  */
555  return totalpages * 10;
556 }
557 
558 /*
559  * Re-initialize state for a BRIN index scan
560  */
561 void
562 brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
563  ScanKey orderbys, int norderbys)
564 {
565  /*
566  * Other index AMs preprocess the scan keys at this point, or sometime
567  * early during the scan; this lets them optimize by removing redundant
568  * keys, or doing early returns when they are impossible to satisfy; see
569  * _bt_preprocess_keys for an example. Something like that could be added
570  * here someday, too.
571  */
572 
573  if (scankey && scan->numberOfKeys > 0)
574  memmove(scan->keyData, scankey,
575  scan->numberOfKeys * sizeof(ScanKeyData));
576 }
577 
578 /*
579  * Close down a BRIN index scan
580  */
581 void
583 {
584  BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
585 
587  brin_free_desc(opaque->bo_bdesc);
588  pfree(opaque);
589 }
590 
591 /*
592  * Per-heap-tuple callback for table_index_build_scan.
593  *
594  * Note we don't worry about the page range at the end of the table here; it is
595  * present in the build state struct after we're called the last time, but not
596  * inserted into the index. Caller must ensure to do so, if appropriate.
597  */
598 static void
600  HeapTuple htup,
601  Datum *values,
602  bool *isnull,
603  bool tupleIsAlive,
604  void *brstate)
605 {
606  BrinBuildState *state = (BrinBuildState *) brstate;
607  BlockNumber thisblock;
608  int i;
609 
610  thisblock = ItemPointerGetBlockNumber(&htup->t_self);
611 
612  /*
613  * If we're in a block that belongs to a future range, summarize what
614  * we've got and start afresh. Note the scan might have skipped many
615  * pages, if they were devoid of live tuples; make sure to insert index
616  * tuples for those too.
617  */
618  while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
619  {
620 
621  BRIN_elog((DEBUG2,
622  "brinbuildCallback: completed a range: %u--%u",
623  state->bs_currRangeStart,
624  state->bs_currRangeStart + state->bs_pagesPerRange));
625 
626  /* create the index tuple and insert it */
627  form_and_insert_tuple(state);
628 
629  /* set state to correspond to the next range */
630  state->bs_currRangeStart += state->bs_pagesPerRange;
631 
632  /* re-initialize state for it */
634  }
635 
636  /* Accumulate the current tuple into the running state */
637  for (i = 0; i < state->bs_bdesc->bd_tupdesc->natts; i++)
638  {
639  FmgrInfo *addValue;
640  BrinValues *col;
642 
643  col = &state->bs_dtuple->bt_columns[i];
644  addValue = index_getprocinfo(index, i + 1,
646 
647  /*
648  * Update dtuple state, if and as necessary.
649  */
650  FunctionCall4Coll(addValue,
651  attr->attcollation,
652  PointerGetDatum(state->bs_bdesc),
653  PointerGetDatum(col),
654  values[i], isnull[i]);
655  }
656 }
657 
658 /*
659  * brinbuild() -- build a new BRIN index.
660  */
663 {
664  IndexBuildResult *result;
665  double reltuples;
666  double idxtuples;
667  BrinRevmap *revmap;
669  Buffer meta;
670  BlockNumber pagesPerRange;
671 
672  /*
673  * We expect to be called exactly once for any index relation.
674  */
675  if (RelationGetNumberOfBlocks(index) != 0)
676  elog(ERROR, "index \"%s\" already contains data",
677  RelationGetRelationName(index));
678 
679  /*
680  * Critical section not required, because on error the creation of the
681  * whole relation will be rolled back.
682  */
683 
684  meta = ReadBuffer(index, P_NEW);
687 
690  MarkBufferDirty(meta);
691 
692  if (RelationNeedsWAL(index))
693  {
694  xl_brin_createidx xlrec;
695  XLogRecPtr recptr;
696  Page page;
697 
699  xlrec.pagesPerRange = BrinGetPagesPerRange(index);
700 
701  XLogBeginInsert();
702  XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
704 
705  recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
706 
707  page = BufferGetPage(meta);
708  PageSetLSN(page, recptr);
709  }
710 
711  UnlockReleaseBuffer(meta);
712 
713  /*
714  * Initialize our state, including the deformed tuple state.
715  */
716  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
717  state = initialize_brin_buildstate(index, revmap, pagesPerRange);
718 
719  /*
720  * Now scan the relation. No syncscan allowed here because we want the
721  * heap blocks in physical order.
722  */
723  reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
724  brinbuildCallback, (void *) state, NULL);
725 
726  /* process the final batch */
727  form_and_insert_tuple(state);
728 
729  /* release resources */
730  idxtuples = state->bs_numtuples;
731  brinRevmapTerminate(state->bs_rmAccess);
733 
734  /*
735  * Return statistics
736  */
737  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
738 
739  result->heap_tuples = reltuples;
740  result->index_tuples = idxtuples;
741 
742  return result;
743 }
744 
745 void
747 {
748  Buffer metabuf;
749 
750  /* An empty BRIN index has a metapage only. */
751  metabuf =
754 
755  /* Initialize and xlog metabuffer. */
759  MarkBufferDirty(metabuf);
760  log_newpage_buffer(metabuf, true);
762 
763  UnlockReleaseBuffer(metabuf);
764 }
765 
766 /*
767  * brinbulkdelete
768  * Since there are no per-heap-tuple index tuples in BRIN indexes,
769  * there's not a lot we can do here.
770  *
771  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
772  * tuple is deleted), meaning the need to re-run summarization on the affected
773  * range. Would need to add an extra flag in brintuples for that.
774  */
777  IndexBulkDeleteCallback callback, void *callback_state)
778 {
779  /* allocate stats if first time through, else re-use existing struct */
780  if (stats == NULL)
782 
783  return stats;
784 }
785 
786 /*
787  * This routine is in charge of "vacuuming" a BRIN index: we just summarize
788  * ranges that are currently unsummarized.
789  */
792 {
793  Relation heapRel;
794 
795  /* No-op in ANALYZE ONLY mode */
796  if (info->analyze_only)
797  return stats;
798 
799  if (!stats)
801  stats->num_pages = RelationGetNumberOfBlocks(info->index);
802  /* rest of stats is initialized by zeroing */
803 
804  heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
806 
807  brin_vacuum_scan(info->index, info->strategy);
808 
809  brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
810  &stats->num_index_tuples, &stats->num_index_tuples);
811 
812  table_close(heapRel, AccessShareLock);
813 
814  return stats;
815 }
816 
817 /*
818  * reloptions processor for BRIN indexes
819  */
820 bytea *
821 brinoptions(Datum reloptions, bool validate)
822 {
824  BrinOptions *rdopts;
825  int numoptions;
826  static const relopt_parse_elt tab[] = {
827  {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
828  {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
829  };
830 
831  options = parseRelOptions(reloptions, validate, RELOPT_KIND_BRIN,
832  &numoptions);
833 
834  /* if none set, we're done */
835  if (numoptions == 0)
836  return NULL;
837 
838  rdopts = allocateReloptStruct(sizeof(BrinOptions), options, numoptions);
839 
840  fillRelOptions((void *) rdopts, sizeof(BrinOptions), options, numoptions,
841  validate, tab, lengthof(tab));
842 
843  pfree(options);
844 
845  return (bytea *) rdopts;
846 }
847 
848 /*
849  * SQL-callable function to scan through an index and summarize all ranges
850  * that are not currently summarized.
851  */
852 Datum
854 {
855  Datum relation = PG_GETARG_DATUM(0);
856 
858  relation,
860 }
861 
862 /*
863  * SQL-callable function to summarize the indicated page range, if not already
864  * summarized. If the second argument is BRIN_ALL_BLOCKRANGES, all
865  * unsummarized ranges are summarized.
866  */
867 Datum
869 {
870  Oid indexoid = PG_GETARG_OID(0);
871  int64 heapBlk64 = PG_GETARG_INT64(1);
872  BlockNumber heapBlk;
873  Oid heapoid;
874  Relation indexRel;
875  Relation heapRel;
876  double numSummarized = 0;
877 
878  if (RecoveryInProgress())
879  ereport(ERROR,
880  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
881  errmsg("recovery is in progress"),
882  errhint("BRIN control functions cannot be executed during recovery.")));
883 
884  if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
885  {
886  char *blk = psprintf(INT64_FORMAT, heapBlk64);
887 
888  ereport(ERROR,
889  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
890  errmsg("block number out of range: %s", blk)));
891  }
892  heapBlk = (BlockNumber) heapBlk64;
893 
894  /*
895  * We must lock table before index to avoid deadlocks. However, if the
896  * passed indexoid isn't an index then IndexGetRelation() will fail.
897  * Rather than emitting a not-very-helpful error message, postpone
898  * complaining, expecting that the is-it-an-index test below will fail.
899  */
900  heapoid = IndexGetRelation(indexoid, true);
901  if (OidIsValid(heapoid))
902  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
903  else
904  heapRel = NULL;
905 
906  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
907 
908  /* Must be a BRIN index */
909  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
910  indexRel->rd_rel->relam != BRIN_AM_OID)
911  ereport(ERROR,
912  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
913  errmsg("\"%s\" is not a BRIN index",
914  RelationGetRelationName(indexRel))));
915 
916  /* User must own the index (comparable to privileges needed for VACUUM) */
917  if (!pg_class_ownercheck(indexoid, GetUserId()))
919  RelationGetRelationName(indexRel));
920 
921  /*
922  * Since we did the IndexGetRelation call above without any lock, it's
923  * barely possible that a race against an index drop/recreation could have
924  * netted us the wrong table. Recheck.
925  */
926  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
927  ereport(ERROR,
929  errmsg("could not open parent table of index %s",
930  RelationGetRelationName(indexRel))));
931 
932  /* OK, do it */
933  brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
934 
937 
938  PG_RETURN_INT32((int32) numSummarized);
939 }
940 
941 /*
942  * SQL-callable interface to mark a range as no longer summarized
943  */
944 Datum
946 {
947  Oid indexoid = PG_GETARG_OID(0);
948  int64 heapBlk64 = PG_GETARG_INT64(1);
949  BlockNumber heapBlk;
950  Oid heapoid;
951  Relation heapRel;
952  Relation indexRel;
953  bool done;
954 
955  if (RecoveryInProgress())
956  ereport(ERROR,
957  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
958  errmsg("recovery is in progress"),
959  errhint("BRIN control functions cannot be executed during recovery.")));
960 
961  if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
962  {
963  char *blk = psprintf(INT64_FORMAT, heapBlk64);
964 
965  ereport(ERROR,
966  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
967  errmsg("block number out of range: %s", blk)));
968  }
969  heapBlk = (BlockNumber) heapBlk64;
970 
971  /*
972  * We must lock table before index to avoid deadlocks. However, if the
973  * passed indexoid isn't an index then IndexGetRelation() will fail.
974  * Rather than emitting a not-very-helpful error message, postpone
975  * complaining, expecting that the is-it-an-index test below will fail.
976  */
977  heapoid = IndexGetRelation(indexoid, true);
978  if (OidIsValid(heapoid))
979  heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
980  else
981  heapRel = NULL;
982 
983  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
984 
985  /* Must be a BRIN index */
986  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
987  indexRel->rd_rel->relam != BRIN_AM_OID)
988  ereport(ERROR,
989  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
990  errmsg("\"%s\" is not a BRIN index",
991  RelationGetRelationName(indexRel))));
992 
993  /* User must own the index (comparable to privileges needed for VACUUM) */
994  if (!pg_class_ownercheck(indexoid, GetUserId()))
996  RelationGetRelationName(indexRel));
997 
998  /*
999  * Since we did the IndexGetRelation call above without any lock, it's
1000  * barely possible that a race against an index drop/recreation could have
1001  * netted us the wrong table. Recheck.
1002  */
1003  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1004  ereport(ERROR,
1006  errmsg("could not open parent table of index %s",
1007  RelationGetRelationName(indexRel))));
1008 
1009  /* the revmap does the hard work */
1010  do
1011  {
1012  done = brinRevmapDesummarizeRange(indexRel, heapBlk);
1013  }
1014  while (!done);
1015 
1018 
1019  PG_RETURN_VOID();
1020 }
1021 
1022 /*
1023  * Build a BrinDesc used to create or scan a BRIN index
1024  */
1025 BrinDesc *
1027 {
1028  BrinOpcInfo **opcinfo;
1029  BrinDesc *bdesc;
1030  TupleDesc tupdesc;
1031  int totalstored = 0;
1032  int keyno;
1033  long totalsize;
1034  MemoryContext cxt;
1035  MemoryContext oldcxt;
1036 
1038  "brin desc cxt",
1040  oldcxt = MemoryContextSwitchTo(cxt);
1041  tupdesc = RelationGetDescr(rel);
1042 
1043  /*
1044  * Obtain BrinOpcInfo for each indexed column. While at it, accumulate
1045  * the number of columns stored, since the number is opclass-defined.
1046  */
1047  opcinfo = (BrinOpcInfo **) palloc(sizeof(BrinOpcInfo *) * tupdesc->natts);
1048  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1049  {
1050  FmgrInfo *opcInfoFn;
1051  Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
1052 
1053  opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO);
1054 
1055  opcinfo[keyno] = (BrinOpcInfo *)
1056  DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid));
1057  totalstored += opcinfo[keyno]->oi_nstored;
1058  }
1059 
1060  /* Allocate our result struct and fill it in */
1061  totalsize = offsetof(BrinDesc, bd_info) +
1062  sizeof(BrinOpcInfo *) * tupdesc->natts;
1063 
1064  bdesc = palloc(totalsize);
1065  bdesc->bd_context = cxt;
1066  bdesc->bd_index = rel;
1067  bdesc->bd_tupdesc = tupdesc;
1068  bdesc->bd_disktdesc = NULL; /* generated lazily */
1069  bdesc->bd_totalstored = totalstored;
1070 
1071  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1072  bdesc->bd_info[keyno] = opcinfo[keyno];
1073  pfree(opcinfo);
1074 
1075  MemoryContextSwitchTo(oldcxt);
1076 
1077  return bdesc;
1078 }
1079 
1080 void
1082 {
1083  /* make sure the tupdesc is still valid */
1084  Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
1085  /* no need for retail pfree */
1087 }
1088 
1089 /*
1090  * Fetch index's statistical data into *stats
1091  */
1092 void
1094 {
1095  Buffer metabuffer;
1096  Page metapage;
1097  BrinMetaPageData *metadata;
1098 
1099  metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
1100  LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
1101  metapage = BufferGetPage(metabuffer);
1102  metadata = (BrinMetaPageData *) PageGetContents(metapage);
1103 
1104  stats->pagesPerRange = metadata->pagesPerRange;
1105  stats->revmapNumPages = metadata->lastRevmapPage - 1;
1106 
1107  UnlockReleaseBuffer(metabuffer);
1108 }
1109 
1110 /*
1111  * Initialize a BrinBuildState appropriate to create tuples on the given index.
1112  */
1113 static BrinBuildState *
1115  BlockNumber pagesPerRange)
1116 {
1118 
1119  state = palloc(sizeof(BrinBuildState));
1120 
1121  state->bs_irel = idxRel;
1122  state->bs_numtuples = 0;
1124  state->bs_pagesPerRange = pagesPerRange;
1125  state->bs_currRangeStart = 0;
1126  state->bs_rmAccess = revmap;
1127  state->bs_bdesc = brin_build_desc(idxRel);
1128  state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
1129 
1131 
1132  return state;
1133 }
1134 
1135 /*
1136  * Release resources associated with a BrinBuildState.
1137  */
1138 static void
1140 {
1141  /*
1142  * Release the last index buffer used. We might as well ensure that
1143  * whatever free space remains in that page is available in FSM, too.
1144  */
1145  if (!BufferIsInvalid(state->bs_currentInsertBuf))
1146  {
1147  Page page;
1148  Size freespace;
1149  BlockNumber blk;
1150 
1151  page = BufferGetPage(state->bs_currentInsertBuf);
1152  freespace = PageGetFreeSpace(page);
1155  RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
1156  FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
1157  }
1158 
1159  brin_free_desc(state->bs_bdesc);
1160  pfree(state->bs_dtuple);
1161  pfree(state);
1162 }
1163 
1164 /*
1165  * On the given BRIN index, summarize the heap page range that corresponds
1166  * to the heap block number given.
1167  *
1168  * This routine can run in parallel with insertions into the heap. To avoid
1169  * missing those values from the summary tuple, we first insert a placeholder
1170  * index tuple into the index, then execute the heap scan; transactions
1171  * concurrent with the scan update the placeholder tuple. After the scan, we
1172  * union the placeholder tuple with the one computed by this routine. The
1173  * update of the index value happens in a loop, so that if somebody updates
1174  * the placeholder tuple after we read it, we detect the case and try again.
1175  * This ensures that the concurrently inserted tuples are not lost.
1176  *
1177  * A further corner case is this routine being asked to summarize the partial
1178  * range at the end of the table. heapNumBlocks is the (possibly outdated)
1179  * table size; if we notice that the requested range lies beyond that size,
1180  * we re-compute the table size after inserting the placeholder tuple, to
1181  * avoid missing pages that were appended recently.
1182  */
1183 static void
1185  BlockNumber heapBlk, BlockNumber heapNumBlks)
1186 {
1187  Buffer phbuf;
1188  BrinTuple *phtup;
1189  Size phsz;
1190  OffsetNumber offset;
1191  BlockNumber scanNumBlks;
1192 
1193  /*
1194  * Insert the placeholder tuple
1195  */
1196  phbuf = InvalidBuffer;
1197  phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
1198  offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
1199  state->bs_rmAccess, &phbuf,
1200  heapBlk, phtup, phsz);
1201 
1202  /*
1203  * Compute range end. We hold ShareUpdateExclusive lock on table, so it
1204  * cannot shrink concurrently (but it can grow).
1205  */
1206  Assert(heapBlk % state->bs_pagesPerRange == 0);
1207  if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1208  {
1209  /*
1210  * If we're asked to scan what we believe to be the final range on the
1211  * table (i.e. a range that might be partial) we need to recompute our
1212  * idea of what the latest page is after inserting the placeholder
1213  * tuple. Anyone that grows the table later will update the
1214  * placeholder tuple, so it doesn't matter that we won't scan these
1215  * pages ourselves. Careful: the table might have been extended
1216  * beyond the current range, so clamp our result.
1217  *
1218  * Fortunately, this should occur infrequently.
1219  */
1220  scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1221  state->bs_pagesPerRange);
1222  }
1223  else
1224  {
1225  /* Easy case: range is known to be complete */
1226  scanNumBlks = state->bs_pagesPerRange;
1227  }
1228 
1229  /*
1230  * Execute the partial heap scan covering the heap blocks in the specified
1231  * page range, summarizing the heap tuples in it. This scan stops just
1232  * short of brinbuildCallback creating the new index entry.
1233  *
1234  * Note that it is critical we use the "any visible" mode of
1235  * table_index_build_range_scan here: otherwise, we would miss tuples
1236  * inserted by transactions that are still in progress, among other corner
1237  * cases.
1238  */
1239  state->bs_currRangeStart = heapBlk;
1240  table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
1241  heapBlk, scanNumBlks,
1242  brinbuildCallback, (void *) state, NULL);
1243 
1244  /*
1245  * Now we update the values obtained by the scan with the placeholder
1246  * tuple. We do this in a loop which only terminates if we're able to
1247  * update the placeholder tuple successfully; if we are not, this means
1248  * somebody else modified the placeholder tuple after we read it.
1249  */
1250  for (;;)
1251  {
1252  BrinTuple *newtup;
1253  Size newsize;
1254  bool didupdate;
1255  bool samepage;
1256 
1258 
1259  /*
1260  * Update the summary tuple and try to update.
1261  */
1262  newtup = brin_form_tuple(state->bs_bdesc,
1263  heapBlk, state->bs_dtuple, &newsize);
1264  samepage = brin_can_do_samepage_update(phbuf, phsz, newsize);
1265  didupdate =
1266  brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
1267  state->bs_rmAccess, heapBlk, phbuf, offset,
1268  phtup, phsz, newtup, newsize, samepage);
1269  brin_free_tuple(phtup);
1270  brin_free_tuple(newtup);
1271 
1272  /* If the update succeeded, we're done. */
1273  if (didupdate)
1274  break;
1275 
1276  /*
1277  * If the update didn't work, it might be because somebody updated the
1278  * placeholder tuple concurrently. Extract the new version, union it
1279  * with the values we have from the scan, and start over. (There are
1280  * other reasons for the update to fail, but it's simple to treat them
1281  * the same.)
1282  */
1283  phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
1284  &offset, &phsz, BUFFER_LOCK_SHARE,
1285  NULL);
1286  /* the placeholder tuple must exist */
1287  if (phtup == NULL)
1288  elog(ERROR, "missing placeholder tuple");
1289  phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
1291 
1292  /* merge it into the tuple from the heap scan */
1293  union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
1294  }
1295 
1296  ReleaseBuffer(phbuf);
1297 }
1298 
1299 /*
1300  * Summarize page ranges that are not already summarized. If pageRange is
1301  * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
1302  * page range containing the given heap page number is scanned.
1303  * If include_partial is true, then the partial range at the end of the table
1304  * is summarized, otherwise not.
1305  *
1306  * For each new index tuple inserted, *numSummarized (if not NULL) is
1307  * incremented; for each existing tuple, *numExisting (if not NULL) is
1308  * incremented.
1309  */
1310 static void
1312  bool include_partial, double *numSummarized, double *numExisting)
1313 {
1314  BrinRevmap *revmap;
1315  BrinBuildState *state = NULL;
1316  IndexInfo *indexInfo = NULL;
1317  BlockNumber heapNumBlocks;
1318  BlockNumber pagesPerRange;
1319  Buffer buf;
1320  BlockNumber startBlk;
1321 
1322  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
1323 
1324  /* determine range of pages to process */
1325  heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1326  if (pageRange == BRIN_ALL_BLOCKRANGES)
1327  startBlk = 0;
1328  else
1329  {
1330  startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1331  heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
1332  }
1333  if (startBlk > heapNumBlocks)
1334  {
1335  /* Nothing to do if start point is beyond end of table */
1336  brinRevmapTerminate(revmap);
1337  return;
1338  }
1339 
1340  /*
1341  * Scan the revmap to find unsummarized items.
1342  */
1343  buf = InvalidBuffer;
1344  for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
1345  {
1346  BrinTuple *tup;
1347  OffsetNumber off;
1348 
1349  /*
1350  * Unless requested to summarize even a partial range, go away now if
1351  * we think the next range is partial. Caller would pass true when it
1352  * is typically run once bulk data loading is done
1353  * (brin_summarize_new_values), and false when it is typically the
1354  * result of arbitrarily-scheduled maintenance command (vacuuming).
1355  */
1356  if (!include_partial &&
1357  (startBlk + pagesPerRange > heapNumBlocks))
1358  break;
1359 
1361 
1362  tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
1363  BUFFER_LOCK_SHARE, NULL);
1364  if (tup == NULL)
1365  {
1366  /* no revmap entry for this heap range. Summarize it. */
1367  if (state == NULL)
1368  {
1369  /* first time through */
1370  Assert(!indexInfo);
1371  state = initialize_brin_buildstate(index, revmap,
1372  pagesPerRange);
1373  indexInfo = BuildIndexInfo(index);
1374  }
1375  summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
1376 
1377  /* and re-initialize state for the next range */
1379 
1380  if (numSummarized)
1381  *numSummarized += 1.0;
1382  }
1383  else
1384  {
1385  if (numExisting)
1386  *numExisting += 1.0;
1388  }
1389  }
1390 
1391  if (BufferIsValid(buf))
1392  ReleaseBuffer(buf);
1393 
1394  /* free resources */
1395  brinRevmapTerminate(revmap);
1396  if (state)
1397  {
1399  pfree(indexInfo);
1400  }
1401 }
1402 
1403 /*
1404  * Given a deformed tuple in the build state, convert it into the on-disk
1405  * format and insert it into the index, making the revmap point to it.
1406  */
1407 static void
1409 {
1410  BrinTuple *tup;
1411  Size size;
1412 
1413  tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
1414  state->bs_dtuple, &size);
1415  brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
1416  &state->bs_currentInsertBuf, state->bs_currRangeStart,
1417  tup, size);
1418  state->bs_numtuples++;
1419 
1420  pfree(tup);
1421 }
1422 
1423 /*
1424  * Given two deformed tuples, adjust the first one so that it's consistent
1425  * with the summary values in both.
1426  */
1427 static void
1429 {
1430  int keyno;
1431  BrinMemTuple *db;
1432  MemoryContext cxt;
1433  MemoryContext oldcxt;
1434 
1435  /* Use our own memory context to avoid retail pfree */
1437  "brin union",
1439  oldcxt = MemoryContextSwitchTo(cxt);
1440  db = brin_deform_tuple(bdesc, b, NULL);
1441  MemoryContextSwitchTo(oldcxt);
1442 
1443  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1444  {
1445  FmgrInfo *unionFn;
1446  BrinValues *col_a = &a->bt_columns[keyno];
1447  BrinValues *col_b = &db->bt_columns[keyno];
1448 
1449  unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
1451  FunctionCall3Coll(unionFn,
1452  bdesc->bd_index->rd_indcollation[keyno],
1453  PointerGetDatum(bdesc),
1454  PointerGetDatum(col_a),
1455  PointerGetDatum(col_b));
1456  }
1457 
1458  MemoryContextDelete(cxt);
1459 }
1460 
1461 /*
1462  * brin_vacuum_scan
1463  * Do a complete scan of the index during VACUUM.
1464  *
1465  * This routine scans the complete index looking for uncatalogued index pages,
1466  * i.e. those that might have been lost due to a crash after index extension
1467  * and such.
1468  */
1469 static void
1471 {
1472  BlockNumber nblocks;
1473  BlockNumber blkno;
1474 
1475  /*
1476  * Scan the index in physical order, and clean up any possible mess in
1477  * each page.
1478  */
1479  nblocks = RelationGetNumberOfBlocks(idxrel);
1480  for (blkno = 0; blkno < nblocks; blkno++)
1481  {
1482  Buffer buf;
1483 
1485 
1486  buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno,
1487  RBM_NORMAL, strategy);
1488 
1489  brin_page_cleanup(idxrel, buf);
1490 
1491  ReleaseBuffer(buf);
1492  }
1493 
1494  /*
1495  * Update all upper pages in the index's FSM, as well. This ensures not
1496  * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
1497  * but also that any pre-existing damage or out-of-dateness is repaired.
1498  */
1499  FreeSpaceMapVacuum(idxrel);
1500 }
void brin_free_desc(BrinDesc *bdesc)
Definition: brin.c:1081
IndexBulkDeleteResult * brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: brin.c:776
ambeginscan_function ambeginscan
Definition: amapi.h:221
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno)
Definition: autovacuum.c:3216
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
MemoryContext ii_Context
Definition: execnodes.h:177
Definition: fmgr.h:56
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition: index.c:3275
ambulkdelete_function ambulkdelete
Definition: amapi.h:213
#define BRIN_CURRENT_VERSION
Definition: brin_page.h:72
bool amcanmulticol
Definition: amapi.h:183
uint16 amsupport
Definition: amapi.h:173
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define BRIN_elog(args)
Definition: brin_internal.h:81
#define AllocSetContextCreate
Definition: memutils.h:169
#define BRIN_METAPAGE_BLKNO
Definition: brin_page.h:75
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
IndexBuildResult * brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: brin.c:662
int errhint(const char *fmt,...)
Definition: elog.c:974
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1009
bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: brin.c:144
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:72
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:794
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:321
#define SizeOfBrinCreateIdx
Definition: brin_xlog.h:55
amgettuple_function amgettuple
Definition: amapi.h:223
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define RelationGetDescr(relation)
Definition: rel.h:442
void brinRevmapTerminate(BrinRevmap *revmap)
Definition: brin_revmap.c:103
Oid GetUserId(void)
Definition: miscinit.c:380
bool amcanorderbyop
Definition: amapi.h:177
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
Definition: brin.c:1428
amproperty_function amproperty
Definition: amapi.h:218
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode, Snapshot snapshot)
Definition: brin_revmap.c:197
bool brinvalidate(Oid opclassoid)
Definition: brin_validate.c:38
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define PointerGetDatum(X)
Definition: postgres.h:556
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
static void addrange(struct cvec *cv, chr from, chr to)
Definition: regc_cvec.c:90
Buffer bs_currentInsertBuf
Definition: brin.c:49
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:201
static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks)
Definition: brin.c:1184
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
amparallelrescan_function amparallelrescan
Definition: amapi.h:232
#define Min(x, y)
Definition: c.h:904
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
bool amstorage
Definition: amapi.h:191
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define XLOG_BRIN_CREATE_INDEX
Definition: brin_xlog.h:31
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
#define AccessShareLock
Definition: lockdefs.h:36
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define BRIN_PROCNUM_OPCINFO
Definition: brin_internal.h:67
int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: brin.c:368
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:570
Relation index
Definition: genam.h:46
static void brinbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
Definition: brin.c:599
bool ampredlocks
Definition: amapi.h:195
struct SnapshotData * xs_snapshot
Definition: relscan.h:104
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1197
uint32 BlockNumber
Definition: block.h:31
static void form_and_insert_tuple(BrinBuildState *state)
Definition: brin.c:1408
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3353
#define P_NEW
Definition: bufmgr.h:81
aminsert_function aminsert
Definition: amapi.h:212
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2230
#define lengthof(array)
Definition: c.h:662
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:88
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:83
unsigned int Oid
Definition: postgres_ext.h:31
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:55
bool RecoveryInProgress(void)
Definition: xlog.c:7898
BrinMemTuple * brin_new_memtuple(BrinDesc *brdesc)
Definition: brin_tuple.c:357
Oid amkeytype
Definition: amapi.h:201
#define BrinGetPagesPerRange(relation)
Definition: brin.h:39
#define OidIsValid(objectId)
Definition: c.h:638
Relation bs_irel
Definition: brin.c:47
bool amoptionalkey
Definition: amapi.h:185
BlockNumber bs_currRangeStart
Definition: brin.c:51
void brinendscan(IndexScanDesc scan)
Definition: brin.c:582
amvalidate_function amvalidate
Definition: amapi.h:220
BlockNumber bo_pagesPerRange
Definition: brin.c:62
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:581
signed int int32
Definition: c.h:346
Relation indexRelation
Definition: relscan.h:103
bytea * brinoptions(Datum reloptions, bool validate)
Definition: brin.c:821
uint16 OffsetNumber
Definition: off.h:24
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:621
Definition: type.h:89
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:344
Datum brin_summarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:868
BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
Definition: brin_tuple.c:422
IndexUniqueCheck
Definition: genam.h:112
struct BrinOpaque BrinOpaque
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:3353
#define BRIN_PROCNUM_ADDVALUE
Definition: brin_internal.h:68
BlockNumber bs_pagesPerRange
Definition: brin.c:50
void pfree(void *pointer)
Definition: mcxt.c:1031
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
amgetbitmap_function amgetbitmap
Definition: amapi.h:224
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:488
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3376
bool bt_placeholder
Definition: brin_tuple.h:38
Oid * rd_indcollation
Definition: rel.h:168
#define ERROR
Definition: elog.h:43
#define MaxBlockNumber
Definition: block.h:35
Relation bd_index
Definition: brin_internal.h:47
ambuild_function ambuild
Definition: amapi.h:210
amoptions_function amoptions
Definition: amapi.h:217
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1499
BlockNumber num_pages
Definition: genam.h:74
ItemPointerData t_self
Definition: htup.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
bool amcaninclude
Definition: amapi.h:199
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:191
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:610
Datum brin_desummarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:945
#define DEBUG2
Definition: elog.h:24
amcostestimate_function amcostestimate
Definition: amapi.h:216
uint16 oi_nstored
Definition: brin_internal.h:28
bool amcanunique
Definition: amapi.h:181
void * allocateReloptStruct(Size base, relopt_value *options, int numoptions)
Definition: reloptions.c:1268
int bd_totalstored
Definition: brin_internal.h:56
struct BrinBuildState BrinBuildState
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_tuple.h:46
#define BufferIsInvalid(buffer)
Definition: buf.h:31
static char * buf
Definition: pg_test_fsync.c:68
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:214
BrinRevmap * bo_rmAccess
Definition: brin.c:63
amendscan_function amendscan
Definition: amapi.h:225
#define memmove(d, s, c)
Definition: c.h:1238
#define PG_GETARG_OID(n)
Definition: fmgr.h:270
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1532
bool amcanbackward
Definition: amapi.h:179
#define FirstOffsetNumber
Definition: off.h:27
#define REGBUF_STANDARD
Definition: xloginsert.h:35
static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
Definition: brin.c:1470
BrinRevmap * bs_rmAccess
Definition: brin.c:52
#define DatumGetBool(X)
Definition: postgres.h:393
#define RelationGetRelationName(relation)
Definition: rel.h:450
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:1373
TupleDesc bd_tupdesc
Definition: brin_internal.h:50
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
void brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: brin.c:562
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
static uint64 totalsize
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1699
#define SK_ISNULL
Definition: skey.h:115
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting)
Definition: brin.c:1311
#define ereport(elevel, rest)
Definition: elog.h:141
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:325
amrescan_function amrescan
Definition: amapi.h:222
bool amcanparallel
Definition: amapi.h:197
IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys)
Definition: brin.c:339
void * ii_AmCache
Definition: execnodes.h:176
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:349
static char ** options
void fillRelOptions(void *rdopts, Size basesize, relopt_value *options, int numoptions, bool validate, const relopt_parse_elt *elems, int numelems)
Definition: reloptions.c:1292
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
BlockNumber lastRevmapPage
Definition: brin_page.h:69
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define MemoryContextResetAndDeleteChildren(ctx)
Definition: memutils.h:67
bool amsearchnulls
Definition: amapi.h:189
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
BrinDesc * bo_bdesc
Definition: brin.c:64
void * palloc0(Size size)
Definition: mcxt.c:955
#define PageGetContents(page)
Definition: bufpage.h:246
uintptr_t Datum
Definition: postgres.h:367
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3590
bool amclusterable
Definition: amapi.h:193
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:198
BrinOpcInfo * bd_info[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:59
bool amsearcharray
Definition: amapi.h:187
#define InvalidOid
Definition: postgres_ext.h:36
BlockNumber pagesPerRange
Definition: brin.h:33
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1172
BlockNumber pagesPerRange
Definition: brin_page.h:68
void brin_free_tuple(BrinTuple *tuple)
Definition: brin_tuple.c:308
#define PG_RETURN_VOID()
Definition: fmgr.h:339
#define makeNode(_type_)
Definition: nodes.h:572
BrinDesc * brin_build_desc(Relation rel)
Definition: brin.c:1026
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
int sk_flags
Definition: skey.h:66
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
Definition: regguts.h:298
#define BRIN_PROCNUM_CONSISTENT
Definition: brin_internal.h:69
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4755
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
Definition: brin_revmap.c:321
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:596
size_t Size
Definition: c.h:466
void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6841
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, Snapshot snapshot)
Definition: brin_revmap.c:71
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
ammarkpos_function ammarkpos
Definition: amapi.h:226
bool amcanorder
Definition: amapi.h:175
ambuildphasename_function ambuildphasename
Definition: amapi.h:219
#define RelationNeedsWAL(relation)
Definition: rel.h:518
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:230
#define INT64_FORMAT
Definition: c.h:400
#define DatumGetPointer(X)
Definition: postgres.h:549
struct ScanKeyData * keyData
Definition: relscan.h:107
#define BRIN_ALL_BLOCKRANGES
Definition: brin.c:67
int bs_numtuples
Definition: brin.c:48
MemoryContext bd_context
Definition: brin_internal.h:44
#define BRIN_LAST_OPTIONAL_PROCNUM
Definition: brin_internal.h:74
static Datum values[MAXATTR]
Definition: bootstrap.c:167
uint16 amstrategies
Definition: amapi.h:171
#define BRIN_PROCNUM_UNION
Definition: brin_internal.h:70
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2613
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition: tidbitmap.c:442
#define BrinGetAutoSummarize(relation)
Definition: brin.h:43
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:784
TupleDesc bd_disktdesc
Definition: brin_internal.h:53
BrinTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, Size *size)
Definition: brin_tuple.c:89
Oid sk_collation
Definition: skey.h:70
#define elog(elevel,...)
Definition: elog.h:226
ambuildempty_function ambuildempty
Definition: amapi.h:211
int i
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:634
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:87
IndexBulkDeleteResult * brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: brin.c:791
BlockNumber pagesPerRange
Definition: brin_xlog.h:52
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
int tdrefcount
Definition: tupdesc.h:84
Definition: c.h:549
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
relopt_value * parseRelOptions(Datum options, bool validate, relopt_kind kind, int *numrelopts)
Definition: reloptions.c:1074
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
Datum brin_summarize_new_values(PG_FUNCTION_ARGS)
Definition: brin.c:853
BrinMemTuple * bs_dtuple
Definition: brin.c:54
static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange)
Definition: brin.c:1114
void brinbuildempty(Relation index)
Definition: brin.c:746
BrinDesc * bs_bdesc
Definition: brin.c:53
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PG_GETARG_INT64(n)
Definition: fmgr.h:277
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
BrinTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
Definition: brin_tuple.c:263
amcanreturn_function amcanreturn
Definition: amapi.h:215
int16 AttrNumber
Definition: attnum.h:21
#define RelationGetRelid(relation)
Definition: rel.h:416
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:126
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:368
float4 reltuples
Definition: pg_class.h:63
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:618
Datum brinhandler(PG_FUNCTION_ARGS)
Definition: brin.c:85
#define offsetof(type, field)
Definition: c.h:655
AttrNumber sk_attno
Definition: skey.h:67
Pointer Page
Definition: bufpage.h:78
double index_tuples
Definition: genam.h:33
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:231
double heap_tuples
Definition: genam.h:32
BrinMemTuple * brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
Definition: brin_tuple.c:384
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1093
static void terminate_brin_buildstate(BrinBuildState *state)
Definition: brin.c:1139
BlockNumber revmapNumPages
Definition: brin.h:34
amrestrpos_function amrestrpos
Definition: amapi.h:227