PostgreSQL Source Code  git master
brin.c
Go to the documentation of this file.
1 /*
2  * brin.c
3  * Implementation of BRIN indexes for Postgres
4  *
5  * See src/backend/access/brin/README for details.
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/brin/brin.c
12  *
13  * TODO
14  * * ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
15  */
16 #include "postgres.h"
17 
18 #include "access/brin.h"
19 #include "access/brin_page.h"
20 #include "access/brin_pageops.h"
21 #include "access/brin_xlog.h"
22 #include "access/reloptions.h"
23 #include "access/relscan.h"
24 #include "access/xloginsert.h"
25 #include "catalog/index.h"
26 #include "catalog/pg_am.h"
27 #include "miscadmin.h"
28 #include "pgstat.h"
29 #include "postmaster/autovacuum.h"
30 #include "storage/bufmgr.h"
31 #include "storage/freespace.h"
32 #include "utils/builtins.h"
33 #include "utils/index_selfuncs.h"
34 #include "utils/memutils.h"
35 #include "utils/rel.h"
36 
37 
38 /*
39  * We use a BrinBuildState during initial construction of a BRIN index.
40  * The running state is kept in a BrinMemTuple.
41  */
42 typedef struct BrinBuildState
43 {
53 
54 /*
55  * Struct used as "opaque" during index scans
56  */
57 typedef struct BrinOpaque
58 {
62 } BrinOpaque;
63 
64 #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
65 
67  BrinRevmap *revmap, BlockNumber pagesPerRange);
69 static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
70  bool include_partial, double *numSummarized, double *numExisting);
72 static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
73  BrinTuple *b);
74 static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
75 
76 
77 /*
78  * BRIN handler function: return IndexAmRoutine with access method parameters
79  * and callbacks.
80  */
81 Datum
83 {
85 
86  amroutine->amstrategies = 0;
88  amroutine->amcanorder = false;
89  amroutine->amcanorderbyop = false;
90  amroutine->amcanbackward = false;
91  amroutine->amcanunique = false;
92  amroutine->amcanmulticol = true;
93  amroutine->amoptionalkey = true;
94  amroutine->amsearcharray = false;
95  amroutine->amsearchnulls = true;
96  amroutine->amstorage = true;
97  amroutine->amclusterable = false;
98  amroutine->ampredlocks = false;
99  amroutine->amcanparallel = false;
100  amroutine->amcaninclude = false;
101  amroutine->amkeytype = InvalidOid;
102 
103  amroutine->ambuild = brinbuild;
104  amroutine->ambuildempty = brinbuildempty;
105  amroutine->aminsert = brininsert;
106  amroutine->ambulkdelete = brinbulkdelete;
107  amroutine->amvacuumcleanup = brinvacuumcleanup;
108  amroutine->amcanreturn = NULL;
109  amroutine->amcostestimate = brincostestimate;
110  amroutine->amoptions = brinoptions;
111  amroutine->amproperty = NULL;
112  amroutine->amvalidate = brinvalidate;
113  amroutine->ambeginscan = brinbeginscan;
114  amroutine->amrescan = brinrescan;
115  amroutine->amgettuple = NULL;
116  amroutine->amgetbitmap = bringetbitmap;
117  amroutine->amendscan = brinendscan;
118  amroutine->ammarkpos = NULL;
119  amroutine->amrestrpos = NULL;
120  amroutine->amestimateparallelscan = NULL;
121  amroutine->aminitparallelscan = NULL;
122  amroutine->amparallelrescan = NULL;
123 
124  PG_RETURN_POINTER(amroutine);
125 }
126 
127 /*
128  * A tuple in the heap is being inserted. To keep a brin index up to date,
129  * we need to obtain the relevant index tuple and compare its stored values
130  * with those of the new tuple. If the tuple values are not consistent with
131  * the summary tuple, we need to update the index tuple.
132  *
133  * If autosummarization is enabled, check if we need to summarize the previous
134  * page range.
135  *
136  * If the range is not currently summarized (i.e. the revmap returns NULL for
137  * it), there's nothing to do for this tuple.
138  */
139 bool
140 brininsert(Relation idxRel, Datum *values, bool *nulls,
141  ItemPointer heaptid, Relation heapRel,
142  IndexUniqueCheck checkUnique,
143  IndexInfo *indexInfo)
144 {
145  BlockNumber pagesPerRange;
146  BlockNumber origHeapBlk;
147  BlockNumber heapBlk;
148  BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
149  BrinRevmap *revmap;
151  MemoryContext tupcxt = NULL;
153  bool autosummarize = BrinGetAutoSummarize(idxRel);
154 
155  revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
156 
157  /*
158  * origHeapBlk is the block number where the insertion occurred. heapBlk
159  * is the first block in the corresponding page range.
160  */
161  origHeapBlk = ItemPointerGetBlockNumber(heaptid);
162  heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
163 
164  for (;;)
165  {
166  bool need_insert = false;
167  OffsetNumber off;
168  BrinTuple *brtup;
169  BrinMemTuple *dtup;
170  int keyno;
171 
173 
174  /*
175  * If auto-summarization is enabled and we just inserted the first
176  * tuple into the first block of a new non-first page range, request a
177  * summarization run of the previous range.
178  */
179  if (autosummarize &&
180  heapBlk > 0 &&
181  heapBlk == origHeapBlk &&
183  {
184  BlockNumber lastPageRange = heapBlk - 1;
185  BrinTuple *lastPageTuple;
186 
187  lastPageTuple =
188  brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
189  NULL, BUFFER_LOCK_SHARE, NULL);
190  if (!lastPageTuple)
191  {
192  bool recorded;
193 
195  RelationGetRelid(idxRel),
196  lastPageRange);
197  if (!recorded)
198  ereport(LOG,
199  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
200  errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
201  RelationGetRelationName(idxRel),
202  lastPageRange)));
203  }
204  else
206  }
207 
208  brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
209  NULL, BUFFER_LOCK_SHARE, NULL);
210 
211  /* if range is unsummarized, there's nothing to do */
212  if (!brtup)
213  break;
214 
215  /* First time through in this statement? */
216  if (bdesc == NULL)
217  {
218  MemoryContextSwitchTo(indexInfo->ii_Context);
219  bdesc = brin_build_desc(idxRel);
220  indexInfo->ii_AmCache = (void *) bdesc;
221  MemoryContextSwitchTo(oldcxt);
222  }
223  /* First time through in this brininsert call? */
224  if (tupcxt == NULL)
225  {
227  "brininsert cxt",
229  MemoryContextSwitchTo(tupcxt);
230  }
231 
232  dtup = brin_deform_tuple(bdesc, brtup, NULL);
233 
234  /*
235  * Compare the key values of the new tuple to the stored index values;
236  * our deformed tuple will get updated if the new tuple doesn't fit
237  * the original range (note this means we can't break out of the loop
238  * early). Make a note of whether this happens, so that we know to
239  * insert the modified tuple later.
240  */
241  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
242  {
243  Datum result;
244  BrinValues *bval;
245  FmgrInfo *addValue;
246 
247  bval = &dtup->bt_columns[keyno];
248  addValue = index_getprocinfo(idxRel, keyno + 1,
250  result = FunctionCall4Coll(addValue,
251  idxRel->rd_indcollation[keyno],
252  PointerGetDatum(bdesc),
253  PointerGetDatum(bval),
254  values[keyno],
255  nulls[keyno]);
256  /* if that returned true, we need to insert the updated tuple */
257  need_insert |= DatumGetBool(result);
258  }
259 
260  if (!need_insert)
261  {
262  /*
263  * The tuple is consistent with the new values, so there's nothing
264  * to do.
265  */
267  }
268  else
269  {
270  Page page = BufferGetPage(buf);
271  ItemId lp = PageGetItemId(page, off);
272  Size origsz;
273  BrinTuple *origtup;
274  Size newsz;
275  BrinTuple *newtup;
276  bool samepage;
277 
278  /*
279  * Make a copy of the old tuple, so that we can compare it after
280  * re-acquiring the lock.
281  */
282  origsz = ItemIdGetLength(lp);
283  origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
284 
285  /*
286  * Before releasing the lock, check if we can attempt a same-page
287  * update. Another process could insert a tuple concurrently in
288  * the same page though, so downstream we must be prepared to cope
289  * if this turns out to not be possible after all.
290  */
291  newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
292  samepage = brin_can_do_samepage_update(buf, origsz, newsz);
294 
295  /*
296  * Try to update the tuple. If this doesn't work for whatever
297  * reason, we need to restart from the top; the revmap might be
298  * pointing at a different tuple for this block now, so we need to
299  * recompute to ensure both our new heap tuple and the other
300  * inserter's are covered by the combined tuple. It might be that
301  * we don't need to update at all.
302  */
303  if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
304  buf, off, origtup, origsz, newtup, newsz,
305  samepage))
306  {
307  /* no luck; start over */
309  continue;
310  }
311  }
312 
313  /* success! */
314  break;
315  }
316 
317  brinRevmapTerminate(revmap);
318  if (BufferIsValid(buf))
320  MemoryContextSwitchTo(oldcxt);
321  if (tupcxt != NULL)
322  MemoryContextDelete(tupcxt);
323 
324  return false;
325 }
326 
327 /*
328  * Initialize state for a BRIN index scan.
329  *
330  * We read the metapage here to determine the pages-per-range number that this
331  * index was built with. Note that since this cannot be changed while we're
332  * holding lock on index, it's not necessary to recompute it during brinrescan.
333  */
335 brinbeginscan(Relation r, int nkeys, int norderbys)
336 {
337  IndexScanDesc scan;
338  BrinOpaque *opaque;
339 
340  scan = RelationGetIndexScan(r, nkeys, norderbys);
341 
342  opaque = (BrinOpaque *) palloc(sizeof(BrinOpaque));
343  opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange,
344  scan->xs_snapshot);
345  opaque->bo_bdesc = brin_build_desc(r);
346  scan->opaque = opaque;
347 
348  return scan;
349 }
350 
351 /*
352  * Execute the index scan.
353  *
354  * This works by reading index TIDs from the revmap, and obtaining the index
355  * tuples pointed to by them; the summary values in the index tuples are
356  * compared to the scan keys. We return into the TID bitmap all the pages in
357  * ranges corresponding to index tuples that match the scan keys.
358  *
359  * If a TID from the revmap is read as InvalidTID, we know that range is
360  * unsummarized. Pages in those ranges need to be returned regardless of scan
361  * keys.
362  */
363 int64
365 {
366  Relation idxRel = scan->indexRelation;
368  BrinDesc *bdesc;
369  Oid heapOid;
370  Relation heapRel;
371  BrinOpaque *opaque;
372  BlockNumber nblocks;
373  BlockNumber heapBlk;
374  int totalpages = 0;
375  FmgrInfo *consistentFn;
376  MemoryContext oldcxt;
377  MemoryContext perRangeCxt;
378  BrinMemTuple *dtup;
379  BrinTuple *btup = NULL;
380  Size btupsz = 0;
381 
382  opaque = (BrinOpaque *) scan->opaque;
383  bdesc = opaque->bo_bdesc;
384  pgstat_count_index_scan(idxRel);
385 
386  /*
387  * We need to know the size of the table so that we know how long to
388  * iterate on the revmap.
389  */
390  heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
391  heapRel = heap_open(heapOid, AccessShareLock);
392  nblocks = RelationGetNumberOfBlocks(heapRel);
393  heap_close(heapRel, AccessShareLock);
394 
395  /*
396  * Make room for the consistent support procedures of indexed columns. We
397  * don't look them up here; we do that lazily the first time we see a scan
398  * key reference each of them. We rely on zeroing fn_oid to InvalidOid.
399  */
400  consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
401 
402  /* allocate an initial in-memory tuple, out of the per-range memcxt */
403  dtup = brin_new_memtuple(bdesc);
404 
405  /*
406  * Setup and use a per-range memory context, which is reset every time we
407  * loop below. This avoids having to free the tuples within the loop.
408  */
410  "bringetbitmap cxt",
412  oldcxt = MemoryContextSwitchTo(perRangeCxt);
413 
414  /*
415  * Now scan the revmap. We start by querying for heap page 0,
416  * incrementing by the number of pages per range; this gives us a full
417  * view of the table.
418  */
419  for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
420  {
421  bool addrange;
422  bool gottuple = false;
423  BrinTuple *tup;
424  OffsetNumber off;
425  Size size;
426 
428 
430 
431  tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
432  &off, &size, BUFFER_LOCK_SHARE,
433  scan->xs_snapshot);
434  if (tup)
435  {
436  gottuple = true;
437  btup = brin_copy_tuple(tup, size, btup, &btupsz);
439  }
440 
441  /*
442  * For page ranges with no indexed tuple, we must return the whole
443  * range; otherwise, compare it to the scan keys.
444  */
445  if (!gottuple)
446  {
447  addrange = true;
448  }
449  else
450  {
451  dtup = brin_deform_tuple(bdesc, btup, dtup);
452  if (dtup->bt_placeholder)
453  {
454  /*
455  * Placeholder tuples are always returned, regardless of the
456  * values stored in them.
457  */
458  addrange = true;
459  }
460  else
461  {
462  int keyno;
463 
464  /*
465  * Compare scan keys with summary values stored for the range.
466  * If scan keys are matched, the page range must be added to
467  * the bitmap. We initially assume the range needs to be
468  * added; in particular this serves the case where there are
469  * no keys.
470  */
471  addrange = true;
472  for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
473  {
474  ScanKey key = &scan->keyData[keyno];
475  AttrNumber keyattno = key->sk_attno;
476  BrinValues *bval = &dtup->bt_columns[keyattno - 1];
477  Datum add;
478 
479  /*
480  * The collation of the scan key must match the collation
481  * used in the index column (but only if the search is not
482  * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using
483  * this index ...
484  */
485  Assert((key->sk_flags & SK_ISNULL) ||
486  (key->sk_collation ==
487  TupleDescAttr(bdesc->bd_tupdesc,
488  keyattno - 1)->attcollation));
489 
490  /* First time this column? look up consistent function */
491  if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
492  {
493  FmgrInfo *tmp;
494 
495  tmp = index_getprocinfo(idxRel, keyattno,
497  fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
499  }
500 
501  /*
502  * Check whether the scan key is consistent with the page
503  * range values; if so, have the pages in the range added
504  * to the output bitmap.
505  *
506  * When there are multiple scan keys, failure to meet the
507  * criteria for a single one of them is enough to discard
508  * the range as a whole, so break out of the loop as soon
509  * as a false return value is obtained.
510  */
511  add = FunctionCall3Coll(&consistentFn[keyattno - 1],
512  key->sk_collation,
513  PointerGetDatum(bdesc),
514  PointerGetDatum(bval),
515  PointerGetDatum(key));
516  addrange = DatumGetBool(add);
517  if (!addrange)
518  break;
519  }
520  }
521  }
522 
523  /* add the pages in the range to the output bitmap, if needed */
524  if (addrange)
525  {
526  BlockNumber pageno;
527 
528  for (pageno = heapBlk;
529  pageno <= heapBlk + opaque->bo_pagesPerRange - 1;
530  pageno++)
531  {
532  MemoryContextSwitchTo(oldcxt);
533  tbm_add_page(tbm, pageno);
534  totalpages++;
535  MemoryContextSwitchTo(perRangeCxt);
536  }
537  }
538  }
539 
540  MemoryContextSwitchTo(oldcxt);
541  MemoryContextDelete(perRangeCxt);
542 
543  if (buf != InvalidBuffer)
544  ReleaseBuffer(buf);
545 
546  /*
547  * XXX We have an approximation of the number of *pages* that our scan
548  * returns, but we don't have a precise idea of the number of heap tuples
549  * involved.
550  */
551  return totalpages * 10;
552 }
553 
554 /*
555  * Re-initialize state for a BRIN index scan
556  */
557 void
558 brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
559  ScanKey orderbys, int norderbys)
560 {
561  /*
562  * Other index AMs preprocess the scan keys at this point, or sometime
563  * early during the scan; this lets them optimize by removing redundant
564  * keys, or doing early returns when they are impossible to satisfy; see
565  * _bt_preprocess_keys for an example. Something like that could be added
566  * here someday, too.
567  */
568 
569  if (scankey && scan->numberOfKeys > 0)
570  memmove(scan->keyData, scankey,
571  scan->numberOfKeys * sizeof(ScanKeyData));
572 }
573 
574 /*
575  * Close down a BRIN index scan
576  */
577 void
579 {
580  BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
581 
583  brin_free_desc(opaque->bo_bdesc);
584  pfree(opaque);
585 }
586 
587 /*
588  * Per-heap-tuple callback for IndexBuildHeapScan.
589  *
590  * Note we don't worry about the page range at the end of the table here; it is
591  * present in the build state struct after we're called the last time, but not
592  * inserted into the index. Caller must ensure to do so, if appropriate.
593  */
594 static void
596  HeapTuple htup,
597  Datum *values,
598  bool *isnull,
599  bool tupleIsAlive,
600  void *brstate)
601 {
602  BrinBuildState *state = (BrinBuildState *) brstate;
603  BlockNumber thisblock;
604  int i;
605 
606  thisblock = ItemPointerGetBlockNumber(&htup->t_self);
607 
608  /*
609  * If we're in a block that belongs to a future range, summarize what
610  * we've got and start afresh. Note the scan might have skipped many
611  * pages, if they were devoid of live tuples; make sure to insert index
612  * tuples for those too.
613  */
614  while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
615  {
616 
617  BRIN_elog((DEBUG2,
618  "brinbuildCallback: completed a range: %u--%u",
619  state->bs_currRangeStart,
620  state->bs_currRangeStart + state->bs_pagesPerRange));
621 
622  /* create the index tuple and insert it */
623  form_and_insert_tuple(state);
624 
625  /* set state to correspond to the next range */
626  state->bs_currRangeStart += state->bs_pagesPerRange;
627 
628  /* re-initialize state for it */
630  }
631 
632  /* Accumulate the current tuple into the running state */
633  for (i = 0; i < state->bs_bdesc->bd_tupdesc->natts; i++)
634  {
635  FmgrInfo *addValue;
636  BrinValues *col;
638 
639  col = &state->bs_dtuple->bt_columns[i];
640  addValue = index_getprocinfo(index, i + 1,
642 
643  /*
644  * Update dtuple state, if and as necessary.
645  */
646  FunctionCall4Coll(addValue,
647  attr->attcollation,
648  PointerGetDatum(state->bs_bdesc),
649  PointerGetDatum(col),
650  values[i], isnull[i]);
651  }
652 }
653 
654 /*
655  * brinbuild() -- build a new BRIN index.
656  */
659 {
660  IndexBuildResult *result;
661  double reltuples;
662  double idxtuples;
663  BrinRevmap *revmap;
665  Buffer meta;
666  BlockNumber pagesPerRange;
667 
668  /*
669  * We expect to be called exactly once for any index relation.
670  */
671  if (RelationGetNumberOfBlocks(index) != 0)
672  elog(ERROR, "index \"%s\" already contains data",
673  RelationGetRelationName(index));
674 
675  /*
676  * Critical section not required, because on error the creation of the
677  * whole relation will be rolled back.
678  */
679 
680  meta = ReadBuffer(index, P_NEW);
683 
686  MarkBufferDirty(meta);
687 
688  if (RelationNeedsWAL(index))
689  {
690  xl_brin_createidx xlrec;
691  XLogRecPtr recptr;
692  Page page;
693 
695  xlrec.pagesPerRange = BrinGetPagesPerRange(index);
696 
697  XLogBeginInsert();
698  XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
700 
701  recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
702 
703  page = BufferGetPage(meta);
704  PageSetLSN(page, recptr);
705  }
706 
707  UnlockReleaseBuffer(meta);
708 
709  /*
710  * Initialize our state, including the deformed tuple state.
711  */
712  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
713  state = initialize_brin_buildstate(index, revmap, pagesPerRange);
714 
715  /*
716  * Now scan the relation. No syncscan allowed here because we want the
717  * heap blocks in physical order.
718  */
719  reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
720  brinbuildCallback, (void *) state, NULL);
721 
722  /* process the final batch */
723  form_and_insert_tuple(state);
724 
725  /* release resources */
726  idxtuples = state->bs_numtuples;
727  brinRevmapTerminate(state->bs_rmAccess);
729 
730  /*
731  * Return statistics
732  */
733  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
734 
735  result->heap_tuples = reltuples;
736  result->index_tuples = idxtuples;
737 
738  return result;
739 }
740 
741 void
743 {
744  Buffer metabuf;
745 
746  /* An empty BRIN index has a metapage only. */
747  metabuf =
750 
751  /* Initialize and xlog metabuffer. */
755  MarkBufferDirty(metabuf);
756  log_newpage_buffer(metabuf, true);
758 
759  UnlockReleaseBuffer(metabuf);
760 }
761 
762 /*
763  * brinbulkdelete
764  * Since there are no per-heap-tuple index tuples in BRIN indexes,
765  * there's not a lot we can do here.
766  *
767  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
768  * tuple is deleted), meaning the need to re-run summarization on the affected
769  * range. Would need to add an extra flag in brintuples for that.
770  */
773  IndexBulkDeleteCallback callback, void *callback_state)
774 {
775  /* allocate stats if first time through, else re-use existing struct */
776  if (stats == NULL)
778 
779  return stats;
780 }
781 
782 /*
783  * This routine is in charge of "vacuuming" a BRIN index: we just summarize
784  * ranges that are currently unsummarized.
785  */
788 {
789  Relation heapRel;
790 
791  /* No-op in ANALYZE ONLY mode */
792  if (info->analyze_only)
793  return stats;
794 
795  if (!stats)
797  stats->num_pages = RelationGetNumberOfBlocks(info->index);
798  /* rest of stats is initialized by zeroing */
799 
800  heapRel = heap_open(IndexGetRelation(RelationGetRelid(info->index), false),
802 
803  brin_vacuum_scan(info->index, info->strategy);
804 
805  brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
806  &stats->num_index_tuples, &stats->num_index_tuples);
807 
808  heap_close(heapRel, AccessShareLock);
809 
810  return stats;
811 }
812 
813 /*
814  * reloptions processor for BRIN indexes
815  */
816 bytea *
817 brinoptions(Datum reloptions, bool validate)
818 {
820  BrinOptions *rdopts;
821  int numoptions;
822  static const relopt_parse_elt tab[] = {
823  {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
824  {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
825  };
826 
827  options = parseRelOptions(reloptions, validate, RELOPT_KIND_BRIN,
828  &numoptions);
829 
830  /* if none set, we're done */
831  if (numoptions == 0)
832  return NULL;
833 
834  rdopts = allocateReloptStruct(sizeof(BrinOptions), options, numoptions);
835 
836  fillRelOptions((void *) rdopts, sizeof(BrinOptions), options, numoptions,
837  validate, tab, lengthof(tab));
838 
839  pfree(options);
840 
841  return (bytea *) rdopts;
842 }
843 
844 /*
845  * SQL-callable function to scan through an index and summarize all ranges
846  * that are not currently summarized.
847  */
848 Datum
850 {
851  Datum relation = PG_GETARG_DATUM(0);
852 
854  relation,
856 }
857 
858 /*
859  * SQL-callable function to summarize the indicated page range, if not already
860  * summarized. If the second argument is BRIN_ALL_BLOCKRANGES, all
861  * unsummarized ranges are summarized.
862  */
863 Datum
865 {
866  Oid indexoid = PG_GETARG_OID(0);
867  int64 heapBlk64 = PG_GETARG_INT64(1);
868  BlockNumber heapBlk;
869  Oid heapoid;
870  Relation indexRel;
871  Relation heapRel;
872  double numSummarized = 0;
873 
874  if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
875  {
876  char *blk = psprintf(INT64_FORMAT, heapBlk64);
877 
878  ereport(ERROR,
879  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
880  errmsg("block number out of range: %s", blk)));
881  }
882  heapBlk = (BlockNumber) heapBlk64;
883 
884  /*
885  * We must lock table before index to avoid deadlocks. However, if the
886  * passed indexoid isn't an index then IndexGetRelation() will fail.
887  * Rather than emitting a not-very-helpful error message, postpone
888  * complaining, expecting that the is-it-an-index test below will fail.
889  */
890  heapoid = IndexGetRelation(indexoid, true);
891  if (OidIsValid(heapoid))
892  heapRel = heap_open(heapoid, ShareUpdateExclusiveLock);
893  else
894  heapRel = NULL;
895 
896  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
897 
898  /* Must be a BRIN index */
899  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
900  indexRel->rd_rel->relam != BRIN_AM_OID)
901  ereport(ERROR,
902  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
903  errmsg("\"%s\" is not a BRIN index",
904  RelationGetRelationName(indexRel))));
905 
906  /* User must own the index (comparable to privileges needed for VACUUM) */
907  if (!pg_class_ownercheck(indexoid, GetUserId()))
909  RelationGetRelationName(indexRel));
910 
911  /*
912  * Since we did the IndexGetRelation call above without any lock, it's
913  * barely possible that a race against an index drop/recreation could have
914  * netted us the wrong table. Recheck.
915  */
916  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
917  ereport(ERROR,
919  errmsg("could not open parent table of index %s",
920  RelationGetRelationName(indexRel))));
921 
922  /* OK, do it */
923  brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
924 
927 
928  PG_RETURN_INT32((int32) numSummarized);
929 }
930 
931 /*
932  * SQL-callable interface to mark a range as no longer summarized
933  */
934 Datum
936 {
937  Oid indexoid = PG_GETARG_OID(0);
938  int64 heapBlk64 = PG_GETARG_INT64(1);
939  BlockNumber heapBlk;
940  Oid heapoid;
941  Relation heapRel;
942  Relation indexRel;
943  bool done;
944 
945  if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
946  {
947  char *blk = psprintf(INT64_FORMAT, heapBlk64);
948 
949  ereport(ERROR,
950  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
951  errmsg("block number out of range: %s", blk)));
952  }
953  heapBlk = (BlockNumber) heapBlk64;
954 
955  /*
956  * We must lock table before index to avoid deadlocks. However, if the
957  * passed indexoid isn't an index then IndexGetRelation() will fail.
958  * Rather than emitting a not-very-helpful error message, postpone
959  * complaining, expecting that the is-it-an-index test below will fail.
960  */
961  heapoid = IndexGetRelation(indexoid, true);
962  if (OidIsValid(heapoid))
963  heapRel = heap_open(heapoid, ShareUpdateExclusiveLock);
964  else
965  heapRel = NULL;
966 
967  indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
968 
969  /* Must be a BRIN index */
970  if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
971  indexRel->rd_rel->relam != BRIN_AM_OID)
972  ereport(ERROR,
973  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
974  errmsg("\"%s\" is not a BRIN index",
975  RelationGetRelationName(indexRel))));
976 
977  /* User must own the index (comparable to privileges needed for VACUUM) */
978  if (!pg_class_ownercheck(indexoid, GetUserId()))
980  RelationGetRelationName(indexRel));
981 
982  /*
983  * Since we did the IndexGetRelation call above without any lock, it's
984  * barely possible that a race against an index drop/recreation could have
985  * netted us the wrong table. Recheck.
986  */
987  if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
988  ereport(ERROR,
990  errmsg("could not open parent table of index %s",
991  RelationGetRelationName(indexRel))));
992 
993  /* the revmap does the hard work */
994  do
995  {
996  done = brinRevmapDesummarizeRange(indexRel, heapBlk);
997  }
998  while (!done);
999 
1002 
1003  PG_RETURN_VOID();
1004 }
1005 
1006 /*
1007  * Build a BrinDesc used to create or scan a BRIN index
1008  */
1009 BrinDesc *
1011 {
1012  BrinOpcInfo **opcinfo;
1013  BrinDesc *bdesc;
1014  TupleDesc tupdesc;
1015  int totalstored = 0;
1016  int keyno;
1017  long totalsize;
1018  MemoryContext cxt;
1019  MemoryContext oldcxt;
1020 
1022  "brin desc cxt",
1024  oldcxt = MemoryContextSwitchTo(cxt);
1025  tupdesc = RelationGetDescr(rel);
1026 
1027  /*
1028  * Obtain BrinOpcInfo for each indexed column. While at it, accumulate
1029  * the number of columns stored, since the number is opclass-defined.
1030  */
1031  opcinfo = (BrinOpcInfo **) palloc(sizeof(BrinOpcInfo *) * tupdesc->natts);
1032  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1033  {
1034  FmgrInfo *opcInfoFn;
1035  Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
1036 
1037  opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO);
1038 
1039  opcinfo[keyno] = (BrinOpcInfo *)
1040  DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid));
1041  totalstored += opcinfo[keyno]->oi_nstored;
1042  }
1043 
1044  /* Allocate our result struct and fill it in */
1045  totalsize = offsetof(BrinDesc, bd_info) +
1046  sizeof(BrinOpcInfo *) * tupdesc->natts;
1047 
1048  bdesc = palloc(totalsize);
1049  bdesc->bd_context = cxt;
1050  bdesc->bd_index = rel;
1051  bdesc->bd_tupdesc = tupdesc;
1052  bdesc->bd_disktdesc = NULL; /* generated lazily */
1053  bdesc->bd_totalstored = totalstored;
1054 
1055  for (keyno = 0; keyno < tupdesc->natts; keyno++)
1056  bdesc->bd_info[keyno] = opcinfo[keyno];
1057  pfree(opcinfo);
1058 
1059  MemoryContextSwitchTo(oldcxt);
1060 
1061  return bdesc;
1062 }
1063 
1064 void
1066 {
1067  /* make sure the tupdesc is still valid */
1068  Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
1069  /* no need for retail pfree */
1071 }
1072 
1073 /*
1074  * Fetch index's statistical data into *stats
1075  */
1076 void
1078 {
1079  Buffer metabuffer;
1080  Page metapage;
1081  BrinMetaPageData *metadata;
1082 
1083  metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
1084  LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
1085  metapage = BufferGetPage(metabuffer);
1086  metadata = (BrinMetaPageData *) PageGetContents(metapage);
1087 
1088  stats->pagesPerRange = metadata->pagesPerRange;
1089  stats->revmapNumPages = metadata->lastRevmapPage - 1;
1090 
1091  UnlockReleaseBuffer(metabuffer);
1092 }
1093 
1094 /*
1095  * Initialize a BrinBuildState appropriate to create tuples on the given index.
1096  */
1097 static BrinBuildState *
1099  BlockNumber pagesPerRange)
1100 {
1102 
1103  state = palloc(sizeof(BrinBuildState));
1104 
1105  state->bs_irel = idxRel;
1106  state->bs_numtuples = 0;
1108  state->bs_pagesPerRange = pagesPerRange;
1109  state->bs_currRangeStart = 0;
1110  state->bs_rmAccess = revmap;
1111  state->bs_bdesc = brin_build_desc(idxRel);
1112  state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
1113 
1115 
1116  return state;
1117 }
1118 
1119 /*
1120  * Release resources associated with a BrinBuildState.
1121  */
1122 static void
1124 {
1125  /*
1126  * Release the last index buffer used. We might as well ensure that
1127  * whatever free space remains in that page is available in FSM, too.
1128  */
1129  if (!BufferIsInvalid(state->bs_currentInsertBuf))
1130  {
1131  Page page;
1132  Size freespace;
1133  BlockNumber blk;
1134 
1135  page = BufferGetPage(state->bs_currentInsertBuf);
1136  freespace = PageGetFreeSpace(page);
1139  RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
1140  FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
1141  }
1142 
1143  brin_free_desc(state->bs_bdesc);
1144  pfree(state->bs_dtuple);
1145  pfree(state);
1146 }
1147 
1148 /*
1149  * On the given BRIN index, summarize the heap page range that corresponds
1150  * to the heap block number given.
1151  *
1152  * This routine can run in parallel with insertions into the heap. To avoid
1153  * missing those values from the summary tuple, we first insert a placeholder
1154  * index tuple into the index, then execute the heap scan; transactions
1155  * concurrent with the scan update the placeholder tuple. After the scan, we
1156  * union the placeholder tuple with the one computed by this routine. The
1157  * update of the index value happens in a loop, so that if somebody updates
1158  * the placeholder tuple after we read it, we detect the case and try again.
1159  * This ensures that the concurrently inserted tuples are not lost.
1160  *
1161  * A further corner case is this routine being asked to summarize the partial
1162  * range at the end of the table. heapNumBlocks is the (possibly outdated)
1163  * table size; if we notice that the requested range lies beyond that size,
1164  * we re-compute the table size after inserting the placeholder tuple, to
1165  * avoid missing pages that were appended recently.
1166  */
1167 static void
1169  BlockNumber heapBlk, BlockNumber heapNumBlks)
1170 {
1171  Buffer phbuf;
1172  BrinTuple *phtup;
1173  Size phsz;
1174  OffsetNumber offset;
1175  BlockNumber scanNumBlks;
1176 
1177  /*
1178  * Insert the placeholder tuple
1179  */
1180  phbuf = InvalidBuffer;
1181  phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
1182  offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
1183  state->bs_rmAccess, &phbuf,
1184  heapBlk, phtup, phsz);
1185 
1186  /*
1187  * Compute range end. We hold ShareUpdateExclusive lock on table, so it
1188  * cannot shrink concurrently (but it can grow).
1189  */
1190  Assert(heapBlk % state->bs_pagesPerRange == 0);
1191  if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1192  {
1193  /*
1194  * If we're asked to scan what we believe to be the final range on the
1195  * table (i.e. a range that might be partial) we need to recompute our
1196  * idea of what the latest page is after inserting the placeholder
1197  * tuple. Anyone that grows the table later will update the
1198  * placeholder tuple, so it doesn't matter that we won't scan these
1199  * pages ourselves. Careful: the table might have been extended
1200  * beyond the current range, so clamp our result.
1201  *
1202  * Fortunately, this should occur infrequently.
1203  */
1204  scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1205  state->bs_pagesPerRange);
1206  }
1207  else
1208  {
1209  /* Easy case: range is known to be complete */
1210  scanNumBlks = state->bs_pagesPerRange;
1211  }
1212 
1213  /*
1214  * Execute the partial heap scan covering the heap blocks in the specified
1215  * page range, summarizing the heap tuples in it. This scan stops just
1216  * short of brinbuildCallback creating the new index entry.
1217  *
1218  * Note that it is critical we use the "any visible" mode of
1219  * IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted
1220  * by transactions that are still in progress, among other corner cases.
1221  */
1222  state->bs_currRangeStart = heapBlk;
1223  IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
1224  heapBlk, scanNumBlks,
1225  brinbuildCallback, (void *) state, NULL);
1226 
1227  /*
1228  * Now we update the values obtained by the scan with the placeholder
1229  * tuple. We do this in a loop which only terminates if we're able to
1230  * update the placeholder tuple successfully; if we are not, this means
1231  * somebody else modified the placeholder tuple after we read it.
1232  */
1233  for (;;)
1234  {
1235  BrinTuple *newtup;
1236  Size newsize;
1237  bool didupdate;
1238  bool samepage;
1239 
1241 
1242  /*
1243  * Update the summary tuple and try to update.
1244  */
1245  newtup = brin_form_tuple(state->bs_bdesc,
1246  heapBlk, state->bs_dtuple, &newsize);
1247  samepage = brin_can_do_samepage_update(phbuf, phsz, newsize);
1248  didupdate =
1249  brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
1250  state->bs_rmAccess, heapBlk, phbuf, offset,
1251  phtup, phsz, newtup, newsize, samepage);
1252  brin_free_tuple(phtup);
1253  brin_free_tuple(newtup);
1254 
1255  /* If the update succeeded, we're done. */
1256  if (didupdate)
1257  break;
1258 
1259  /*
1260  * If the update didn't work, it might be because somebody updated the
1261  * placeholder tuple concurrently. Extract the new version, union it
1262  * with the values we have from the scan, and start over. (There are
1263  * other reasons for the update to fail, but it's simple to treat them
1264  * the same.)
1265  */
1266  phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
1267  &offset, &phsz, BUFFER_LOCK_SHARE,
1268  NULL);
1269  /* the placeholder tuple must exist */
1270  if (phtup == NULL)
1271  elog(ERROR, "missing placeholder tuple");
1272  phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
1274 
1275  /* merge it into the tuple from the heap scan */
1276  union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
1277  }
1278 
1279  ReleaseBuffer(phbuf);
1280 }
1281 
1282 /*
1283  * Summarize page ranges that are not already summarized. If pageRange is
1284  * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
1285  * page range containing the given heap page number is scanned.
1286  * If include_partial is true, then the partial range at the end of the table
1287  * is summarized, otherwise not.
1288  *
1289  * For each new index tuple inserted, *numSummarized (if not NULL) is
1290  * incremented; for each existing tuple, *numExisting (if not NULL) is
1291  * incremented.
1292  */
1293 static void
1295  bool include_partial, double *numSummarized, double *numExisting)
1296 {
1297  BrinRevmap *revmap;
1298  BrinBuildState *state = NULL;
1299  IndexInfo *indexInfo = NULL;
1300  BlockNumber heapNumBlocks;
1301  BlockNumber pagesPerRange;
1302  Buffer buf;
1303  BlockNumber startBlk;
1304 
1305  revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
1306 
1307  /* determine range of pages to process */
1308  heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1309  if (pageRange == BRIN_ALL_BLOCKRANGES)
1310  startBlk = 0;
1311  else
1312  {
1313  startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1314  heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
1315  }
1316  if (startBlk > heapNumBlocks)
1317  {
1318  /* Nothing to do if start point is beyond end of table */
1319  brinRevmapTerminate(revmap);
1320  return;
1321  }
1322 
1323  /*
1324  * Scan the revmap to find unsummarized items.
1325  */
1326  buf = InvalidBuffer;
1327  for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
1328  {
1329  BrinTuple *tup;
1330  OffsetNumber off;
1331 
1332  /*
1333  * Unless requested to summarize even a partial range, go away now if
1334  * we think the next range is partial. Caller would pass true when it
1335  * is typically run once bulk data loading is done
1336  * (brin_summarize_new_values), and false when it is typically the
1337  * result of arbitrarily-scheduled maintenance command (vacuuming).
1338  */
1339  if (!include_partial &&
1340  (startBlk + pagesPerRange > heapNumBlocks))
1341  break;
1342 
1344 
1345  tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
1346  BUFFER_LOCK_SHARE, NULL);
1347  if (tup == NULL)
1348  {
1349  /* no revmap entry for this heap range. Summarize it. */
1350  if (state == NULL)
1351  {
1352  /* first time through */
1353  Assert(!indexInfo);
1354  state = initialize_brin_buildstate(index, revmap,
1355  pagesPerRange);
1356  indexInfo = BuildIndexInfo(index);
1357  }
1358  summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
1359 
1360  /* and re-initialize state for the next range */
1362 
1363  if (numSummarized)
1364  *numSummarized += 1.0;
1365  }
1366  else
1367  {
1368  if (numExisting)
1369  *numExisting += 1.0;
1371  }
1372  }
1373 
1374  if (BufferIsValid(buf))
1375  ReleaseBuffer(buf);
1376 
1377  /* free resources */
1378  brinRevmapTerminate(revmap);
1379  if (state)
1380  {
1382  pfree(indexInfo);
1383  }
1384 }
1385 
1386 /*
1387  * Given a deformed tuple in the build state, convert it into the on-disk
1388  * format and insert it into the index, making the revmap point to it.
1389  */
1390 static void
1392 {
1393  BrinTuple *tup;
1394  Size size;
1395 
1396  tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
1397  state->bs_dtuple, &size);
1398  brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
1399  &state->bs_currentInsertBuf, state->bs_currRangeStart,
1400  tup, size);
1401  state->bs_numtuples++;
1402 
1403  pfree(tup);
1404 }
1405 
1406 /*
1407  * Given two deformed tuples, adjust the first one so that it's consistent
1408  * with the summary values in both.
1409  */
1410 static void
1412 {
1413  int keyno;
1414  BrinMemTuple *db;
1415  MemoryContext cxt;
1416  MemoryContext oldcxt;
1417 
1418  /* Use our own memory context to avoid retail pfree */
1420  "brin union",
1422  oldcxt = MemoryContextSwitchTo(cxt);
1423  db = brin_deform_tuple(bdesc, b, NULL);
1424  MemoryContextSwitchTo(oldcxt);
1425 
1426  for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
1427  {
1428  FmgrInfo *unionFn;
1429  BrinValues *col_a = &a->bt_columns[keyno];
1430  BrinValues *col_b = &db->bt_columns[keyno];
1431 
1432  unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
1434  FunctionCall3Coll(unionFn,
1435  bdesc->bd_index->rd_indcollation[keyno],
1436  PointerGetDatum(bdesc),
1437  PointerGetDatum(col_a),
1438  PointerGetDatum(col_b));
1439  }
1440 
1441  MemoryContextDelete(cxt);
1442 }
1443 
1444 /*
1445  * brin_vacuum_scan
1446  * Do a complete scan of the index during VACUUM.
1447  *
1448  * This routine scans the complete index looking for uncatalogued index pages,
1449  * i.e. those that might have been lost due to a crash after index extension
1450  * and such.
1451  */
1452 static void
1454 {
1455  BlockNumber nblocks;
1456  BlockNumber blkno;
1457 
1458  /*
1459  * Scan the index in physical order, and clean up any possible mess in
1460  * each page.
1461  */
1462  nblocks = RelationGetNumberOfBlocks(idxrel);
1463  for (blkno = 0; blkno < nblocks; blkno++)
1464  {
1465  Buffer buf;
1466 
1468 
1469  buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno,
1470  RBM_NORMAL, strategy);
1471 
1472  brin_page_cleanup(idxrel, buf);
1473 
1474  ReleaseBuffer(buf);
1475  }
1476 
1477  /*
1478  * Update all upper pages in the index's FSM, as well. This ensures not
1479  * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
1480  * but also that any pre-existing damage or out-of-dateness is repaired.
1481  */
1482  FreeSpaceMapVacuum(idxrel);
1483 }
void brin_free_desc(BrinDesc *bdesc)
Definition: brin.c:1065
IndexBulkDeleteResult * brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: brin.c:772
ambeginscan_function ambeginscan
Definition: amapi.h:217
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:326
bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno)
Definition: autovacuum.c:3233
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
MemoryContext ii_Context
Definition: execnodes.h:171
Definition: fmgr.h:56
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition: index.c:3591
ambulkdelete_function ambulkdelete
Definition: amapi.h:210
#define BRIN_CURRENT_VERSION
Definition: brin_page.h:72
bool amcanmulticol
Definition: amapi.h:180
uint16 amsupport
Definition: amapi.h:170
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define BRIN_elog(args)
Definition: brin_internal.h:81
#define BRIN_METAPAGE_BLKNO
Definition: brin_page.h:75
IndexBuildResult * brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: brin.c:658
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1009
bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: brin.c:140
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:62
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:855
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:321
#define SizeOfBrinCreateIdx
Definition: brin_xlog.h:55
amgettuple_function amgettuple
Definition: amapi.h:219
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define RelationGetDescr(relation)
Definition: rel.h:433
void brinRevmapTerminate(BrinRevmap *revmap)
Definition: brin_revmap.c:103
Oid GetUserId(void)
Definition: miscinit.c:379
bool amcanorderbyop
Definition: amapi.h:174
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
Definition: brin.c:1411
amproperty_function amproperty
Definition: amapi.h:215
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode, Snapshot snapshot)
Definition: brin_revmap.c:197
bool brinvalidate(Oid opclassoid)
Definition: brin_validate.c:38
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define PointerGetDatum(X)
Definition: postgres.h:539
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:238
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:93
static void addrange(struct cvec *cv, chr from, chr to)
Definition: regc_cvec.c:90
Buffer bs_currentInsertBuf
Definition: brin.c:46
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:202
static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks)
Definition: brin.c:1168
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
amparallelrescan_function amparallelrescan
Definition: amapi.h:228
#define Min(x, y)
Definition: c.h:857
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
BufferAccessStrategy strategy
Definition: genam.h:51
bool amstorage
Definition: amapi.h:188
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define XLOG_BRIN_CREATE_INDEX
Definition: brin_xlog.h:31
#define PG_RETURN_INT32(x)
Definition: fmgr.h:319
#define AccessShareLock
Definition: lockdefs.h:36
Snapshot xs_snapshot
Definition: relscan.h:92
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:32
#define BRIN_PROCNUM_OPCINFO
Definition: brin_internal.h:67
int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: brin.c:364
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
int errcode(int sqlerrcode)
Definition: elog.c:575
Relation index
Definition: genam.h:46
static void brinbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
Definition: brin.c:595
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: heapam.c:1270
bool ampredlocks
Definition: amapi.h:192
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1179
uint32 BlockNumber
Definition: block.h:31
static void form_and_insert_tuple(BrinBuildState *state)
Definition: brin.c:1391
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define P_NEW
Definition: bufmgr.h:82
#define heap_close(r, l)
Definition: heapam.h:97
aminsert_function aminsert
Definition: amapi.h:209
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:1745
#define lengthof(array)
Definition: c.h:629
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:84
unsigned int Oid
Definition: postgres_ext.h:31
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:55
BrinMemTuple * brin_new_memtuple(BrinDesc *brdesc)
Definition: brin_tuple.c:357
Oid amkeytype
Definition: amapi.h:198
#define BrinGetPagesPerRange(relation)
Definition: brin.h:40
#define OidIsValid(objectId)
Definition: c.h:605
Relation bs_irel
Definition: brin.c:44
bool amoptionalkey
Definition: amapi.h:182
BlockNumber bs_currRangeStart
Definition: brin.c:48
void brinendscan(IndexScanDesc scan)
Definition: brin.c:578
amvalidate_function amvalidate
Definition: amapi.h:216
int natts
Definition: tupdesc.h:82
BlockNumber bo_pagesPerRange
Definition: brin.c:59
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:578
signed int int32
Definition: c.h:313
Relation indexRelation
Definition: relscan.h:91
bytea * brinoptions(Datum reloptions, bool validate)
Definition: brin.c:817
uint16 OffsetNumber
Definition: off.h:24
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:621
Definition: type.h:89
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:344
Datum brin_summarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:864
BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
Definition: brin_tuple.c:422
IndexUniqueCheck
Definition: genam.h:111
struct BrinOpaque BrinOpaque
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:3348
#define BRIN_PROCNUM_ADDVALUE
Definition: brin_internal.h:68
BlockNumber bs_pagesPerRange
Definition: brin.c:47
void pfree(void *pointer)
Definition: mcxt.c:1031
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
amgetbitmap_function amgetbitmap
Definition: amapi.h:220
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:488
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
bool bt_placeholder
Definition: brin_tuple.h:38
Oid * rd_indcollation
Definition: rel.h:165
#define ERROR
Definition: elog.h:43
#define MaxBlockNumber
Definition: block.h:35
Relation bd_index
Definition: brin_internal.h:47
ambuild_function ambuild
Definition: amapi.h:207
amoptions_function amoptions
Definition: amapi.h:214
double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, IndexBuildCallback callback, void *callback_state, HeapScanDesc scan)
Definition: index.c:2418
BlockNumber num_pages
Definition: genam.h:73
ItemPointerData t_self
Definition: htup.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
bool amcaninclude
Definition: amapi.h:196
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:609
Datum brin_desummarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:935
#define DEBUG2
Definition: elog.h:24
amcostestimate_function amcostestimate
Definition: amapi.h:213
uint16 oi_nstored
Definition: brin_internal.h:28
bool amcanunique
Definition: amapi.h:178
void * allocateReloptStruct(Size base, relopt_value *options, int numoptions)
Definition: reloptions.c:1242
int bd_totalstored
Definition: brin_internal.h:56
struct BrinBuildState BrinBuildState
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_tuple.h:46
#define BufferIsInvalid(buffer)
Definition: buf.h:31
static char * buf
Definition: pg_test_fsync.c:67
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:211
BrinRevmap * bo_rmAccess
Definition: brin.c:60
amendscan_function amendscan
Definition: amapi.h:221
#define memmove(d, s, c)
Definition: c.h:1100
#define PG_GETARG_OID(n)
Definition: fmgr.h:245
bool amcanbackward
Definition: amapi.h:176
#define FirstOffsetNumber
Definition: off.h:27
#define REGBUF_STANDARD
Definition: xloginsert.h:34
static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
Definition: brin.c:1453
BrinRevmap * bs_rmAccess
Definition: brin.c:49
#define DatumGetBool(X)
Definition: postgres.h:376
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:1291
TupleDesc bd_tupdesc
Definition: brin_internal.h:50
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:197
void brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: brin.c:558
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
static uint64 totalsize
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1876
#define SK_ISNULL
Definition: skey.h:115
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting)
Definition: brin.c:1294
#define ereport(elevel, rest)
Definition: elog.h:122
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:325
amrescan_function amrescan
Definition: amapi.h:218
bool amcanparallel
Definition: amapi.h:194
IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys)
Definition: brin.c:335
void * ii_AmCache
Definition: execnodes.h:170
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:349
static char ** options
#define AllocSetContextCreate(parent, name, allocparams)
Definition: memutils.h:170
void fillRelOptions(void *rdopts, Size basesize, relopt_value *options, int numoptions, bool validate, const relopt_parse_elt *elems, int numelems)
Definition: reloptions.c:1266
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
BlockNumber lastRevmapPage
Definition: brin_page.h:69
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define MemoryContextResetAndDeleteChildren(ctx)
Definition: memutils.h:67
bool amsearchnulls
Definition: amapi.h:186
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
BrinDesc * bo_bdesc
Definition: brin.c:61
void * palloc0(Size size)
Definition: mcxt.c:955
#define PageGetContents(page)
Definition: bufpage.h:242
uintptr_t Datum
Definition: postgres.h:365
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1294
bool amclusterable
Definition: amapi.h:190
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
BrinOpcInfo * bd_info[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:59
bool amsearcharray
Definition: amapi.h:184
#define InvalidOid
Definition: postgres_ext.h:36
BlockNumber pagesPerRange
Definition: brin.h:34
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1154
BlockNumber pagesPerRange
Definition: brin_page.h:68
void brin_free_tuple(BrinTuple *tuple)
Definition: brin_tuple.c:308
#define PG_RETURN_VOID()
Definition: fmgr.h:314
#define makeNode(_type_)
Definition: nodes.h:565
BrinDesc * brin_build_desc(Relation rel)
Definition: brin.c:1010
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
int sk_flags
Definition: skey.h:66
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:699
Definition: regguts.h:298
#define BRIN_PROCNUM_CONSISTENT
Definition: brin_internal.h:69
double IndexBuildHeapRangeScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, HeapScanDesc scan)
Definition: index.c:2444
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4748
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
Definition: brin_revmap.c:321
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
size_t Size
Definition: c.h:433
void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:8024
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, Snapshot snapshot)
Definition: brin_revmap.c:71
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:95
ammarkpos_function ammarkpos
Definition: amapi.h:222
bool amcanorder
Definition: amapi.h:172
ScanKey keyData
Definition: relscan.h:95
#define RelationNeedsWAL(relation)
Definition: rel.h:510
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:226
#define INT64_FORMAT
Definition: c.h:367
#define DatumGetPointer(X)
Definition: postgres.h:532
#define BRIN_ALL_BLOCKRANGES
Definition: brin.c:64
int bs_numtuples
Definition: brin.c:45
MemoryContext bd_context
Definition: brin_internal.h:44
#define BRIN_LAST_OPTIONAL_PROCNUM
Definition: brin_internal.h:74
static Datum values[MAXATTR]
Definition: bootstrap.c:164
uint16 amstrategies
Definition: amapi.h:168
#define BRIN_PROCNUM_UNION
Definition: brin_internal.h:70
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition: tidbitmap.c:442
#define BrinGetAutoSummarize(relation)
Definition: brin.h:44
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:797
TupleDesc bd_disktdesc
Definition: brin_internal.h:53
BrinTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, Size *size)
Definition: brin_tuple.c:89
int tdrefcount
Definition: tupdesc.h:86
Oid sk_collation
Definition: skey.h:70
ambuildempty_function ambuildempty
Definition: amapi.h:208
int i
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:608
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
IndexBulkDeleteResult * brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: brin.c:787
BlockNumber pagesPerRange
Definition: brin_xlog.h:52
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:78
Definition: c.h:516
#define PG_FUNCTION_ARGS
Definition: fmgr.h:163
relopt_value * parseRelOptions(Datum options, bool validate, relopt_kind kind, int *numrelopts)
Definition: reloptions.c:1048
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
Datum brin_summarize_new_values(PG_FUNCTION_ARGS)
Definition: brin.c:849
BrinMemTuple * bs_dtuple
Definition: brin.c:51
static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange)
Definition: brin.c:1098
void brinbuildempty(Relation index)
Definition: brin.c:742
BrinDesc * bs_bdesc
Definition: brin.c:50
#define elog
Definition: elog.h:219
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:76
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PG_GETARG_INT64(n)
Definition: fmgr.h:252
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
BrinTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
Definition: brin_tuple.c:263
amcanreturn_function amcanreturn
Definition: amapi.h:212
int16 AttrNumber
Definition: attnum.h:21
#define RelationGetRelid(relation)
Definition: rel.h:407
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:150
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:368
float4 reltuples
Definition: pg_class.h:44
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:592
Datum brinhandler(PG_FUNCTION_ARGS)
Definition: brin.c:82
#define offsetof(type, field)
Definition: c.h:622
AttrNumber sk_attno
Definition: skey.h:67
Pointer Page
Definition: bufpage.h:74
double index_tuples
Definition: genam.h:33
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:83
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:227
double heap_tuples
Definition: genam.h:32
BrinMemTuple * brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
Definition: brin_tuple.c:384
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1077
static void terminate_brin_buildstate(BrinBuildState *state)
Definition: brin.c:1123
BlockNumber revmapNumPages
Definition: brin.h:35
amrestrpos_function amrestrpos
Definition: amapi.h:223