PostgreSQL Source Code git master
Loading...
Searching...
No Matches
brin.c File Reference
#include "postgres.h"
#include "access/brin.h"
#include "access/brin_page.h"
#include "access/brin_pageops.h"
#include "access/brin_xlog.h"
#include "access/relation.h"
#include "access/reloptions.h"
#include "access/relscan.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "commands/vacuum.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
#include "storage/condition_variable.h"
#include "storage/freespace.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
#include "utils/datum.h"
#include "utils/fmgrprotos.h"
#include "utils/guc.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/tuplesort.h"
#include "utils/wait_event.h"
Include dependency graph for brin.c:

Go to the source code of this file.

Data Structures

struct  BrinShared
 
struct  BrinLeader
 
struct  BrinBuildState
 
struct  BrinInsertState
 
struct  BrinOpaque
 

Macros

#define PARALLEL_KEY_BRIN_SHARED   UINT64CONST(0xB000000000000001)
 
#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xB000000000000002)
 
#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xB000000000000003)
 
#define PARALLEL_KEY_WAL_USAGE   UINT64CONST(0xB000000000000004)
 
#define PARALLEL_KEY_BUFFER_USAGE   UINT64CONST(0xB000000000000005)
 
#define ParallelTableScanFromBrinShared(shared)    (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BrinShared)))
 
#define BRIN_ALL_BLOCKRANGES   InvalidBlockNumber
 

Typedefs

typedef struct BrinShared BrinShared
 
typedef struct BrinLeader BrinLeader
 
typedef struct BrinBuildState BrinBuildState
 
typedef struct BrinInsertState BrinInsertState
 
typedef struct BrinOpaque BrinOpaque
 

Functions

static BrinBuildStateinitialize_brin_buildstate (Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange, BlockNumber tablePages)
 
static BrinInsertStateinitialize_brin_insertstate (Relation idxRel, IndexInfo *indexInfo)
 
static void terminate_brin_buildstate (BrinBuildState *state)
 
static void brinsummarize (Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting)
 
static void form_and_insert_tuple (BrinBuildState *state)
 
static void form_and_spill_tuple (BrinBuildState *state)
 
static void union_tuples (BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
 
static void brin_vacuum_scan (Relation idxrel, BufferAccessStrategy strategy)
 
static bool add_values_to_range (Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, const Datum *values, const bool *nulls)
 
static bool check_null_keys (BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
 
static void brin_fill_empty_ranges (BrinBuildState *state, BlockNumber prevRange, BlockNumber nextRange)
 
static void _brin_begin_parallel (BrinBuildState *buildstate, Relation heap, Relation index, bool isconcurrent, int request)
 
static void _brin_end_parallel (BrinLeader *brinleader, BrinBuildState *state)
 
static Size _brin_parallel_estimate_shared (Relation heap, Snapshot snapshot)
 
static double _brin_parallel_heapscan (BrinBuildState *state)
 
static double _brin_parallel_merge (BrinBuildState *state)
 
static void _brin_leader_participate_as_worker (BrinBuildState *buildstate, Relation heap, Relation index)
 
static void _brin_parallel_scan_and_build (BrinBuildState *state, BrinShared *brinshared, Sharedsort *sharedsort, Relation heap, Relation index, int sortmem, bool progress)
 
Datum brinhandler (PG_FUNCTION_ARGS)
 
bool brininsert (Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
void brininsertcleanup (Relation index, IndexInfo *indexInfo)
 
IndexScanDesc brinbeginscan (Relation r, int nkeys, int norderbys)
 
int64 bringetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
void brinrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void brinendscan (IndexScanDesc scan)
 
static void brinbuildCallback (Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
 
static void brinbuildCallbackParallel (Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
 
IndexBuildResultbrinbuild (Relation heap, Relation index, IndexInfo *indexInfo)
 
void brinbuildempty (Relation index)
 
IndexBulkDeleteResultbrinbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbrinvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
byteabrinoptions (Datum reloptions, bool validate)
 
Datum brin_summarize_new_values (PG_FUNCTION_ARGS)
 
Datum brin_summarize_range (PG_FUNCTION_ARGS)
 
Datum brin_desummarize_range (PG_FUNCTION_ARGS)
 
BrinDescbrin_build_desc (Relation rel)
 
void brin_free_desc (BrinDesc *bdesc)
 
void brinGetStats (Relation index, BrinStatsData *stats)
 
static void summarize_range (IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks)
 
void _brin_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 
static void brin_build_empty_tuple (BrinBuildState *state, BlockNumber blkno)
 

Macro Definition Documentation

◆ BRIN_ALL_BLOCKRANGES

#define BRIN_ALL_BLOCKRANGES   InvalidBlockNumber

Definition at line 213 of file brin.c.

◆ PARALLEL_KEY_BRIN_SHARED

#define PARALLEL_KEY_BRIN_SHARED   UINT64CONST(0xB000000000000001)

Definition at line 51 of file brin.c.

◆ PARALLEL_KEY_BUFFER_USAGE

#define PARALLEL_KEY_BUFFER_USAGE   UINT64CONST(0xB000000000000005)

Definition at line 55 of file brin.c.

◆ PARALLEL_KEY_QUERY_TEXT

#define PARALLEL_KEY_QUERY_TEXT   UINT64CONST(0xB000000000000003)

Definition at line 53 of file brin.c.

◆ PARALLEL_KEY_TUPLESORT

#define PARALLEL_KEY_TUPLESORT   UINT64CONST(0xB000000000000002)

Definition at line 52 of file brin.c.

◆ PARALLEL_KEY_WAL_USAGE

#define PARALLEL_KEY_WAL_USAGE   UINT64CONST(0xB000000000000004)

Definition at line 54 of file brin.c.

◆ ParallelTableScanFromBrinShared

#define ParallelTableScanFromBrinShared (   shared)     (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BrinShared)))

Definition at line 120 of file brin.c.

126{
127 /* parallel context itself */
128 ParallelContext *pcxt;
129
130 /*
131 * nparticipanttuplesorts is the exact number of worker processes
132 * successfully launched, plus one leader process if it participates as a
133 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
134 * participating as a worker).
135 */
136 int nparticipanttuplesorts;
137
138 /*
139 * Leader process convenience pointers to shared state (leader avoids TOC
140 * lookups).
141 *
142 * brinshared is the shared state for entire build. sharedsort is the
143 * shared, tuplesort-managed state passed to each process tuplesort.
144 * snapshot is the snapshot used by the scan iff an MVCC snapshot is
145 * required.
146 */
147 BrinShared *brinshared;
148 Sharedsort *sharedsort;
149 Snapshot snapshot;
150 WalUsage *walusage;
151 BufferUsage *bufferusage;
152} BrinLeader;
153
154/*
155 * We use a BrinBuildState during initial construction of a BRIN index.
156 * The running state is kept in a BrinMemTuple.
157 */
158typedef struct BrinBuildState
159{
161 double bs_numtuples;
162 double bs_reltuples;
170
174
175 /*
176 * bs_leader is only present when a parallel index build is performed, and
177 * only in the leader process. (Actually, only the leader process has a
178 * BrinBuildState.)
179 */
181 int bs_worker_id;
182
183 /*
184 * The sortstate is used by workers (including the leader). It has to be
185 * part of the build state, because that's the only thing passed to the
186 * build callback etc.
187 */
190
191/*
192 * We use a BrinInsertState to capture running state spanning multiple
193 * brininsert invocations, within the same command.
194 */
195typedef struct BrinInsertState
196{
201
202/*
203 * Struct used as "opaque" during index scans
204 */
205typedef struct BrinOpaque
206{
210} BrinOpaque;
211
212#define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
213
216 BlockNumber pagesPerRange,
221 bool include_partial, double *numSummarized, double *numExisting);
225 BrinTuple *b);
228 BrinMemTuple *dtup, const Datum *values, const bool *nulls);
229static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys);
232
233/* parallel index builds */
235 bool isconcurrent, int request);
241 Relation heap, Relation index);
243 BrinShared *brinshared,
244 Sharedsort *sharedsort,
245 Relation heap, Relation index,
246 int sortmem, bool progress);
247
248/*
249 * BRIN handler function: return IndexAmRoutine with access method parameters
250 * and callbacks.
251 */
252Datum
254{
255 static const IndexAmRoutine amroutine = {
257 .amstrategies = 0,
258 .amsupport = BRIN_LAST_OPTIONAL_PROCNUM,
259 .amoptsprocnum = BRIN_PROCNUM_OPTIONS,
260 .amcanorder = false,
261 .amcanorderbyop = false,
262 .amcanhash = false,
263 .amconsistentequality = false,
264 .amconsistentordering = false,
265 .amcanbackward = false,
266 .amcanunique = false,
267 .amcanmulticol = true,
268 .amoptionalkey = true,
269 .amsearcharray = false,
270 .amsearchnulls = true,
271 .amstorage = true,
272 .amclusterable = false,
273 .ampredlocks = false,
274 .amcanparallel = false,
275 .amcanbuildparallel = true,
276 .amcaninclude = false,
277 .amusemaintenanceworkmem = false,
278 .amsummarizing = true,
279 .amparallelvacuumoptions =
281 .amkeytype = InvalidOid,
282
283 .ambuild = brinbuild,
284 .ambuildempty = brinbuildempty,
285 .aminsert = brininsert,
286 .aminsertcleanup = brininsertcleanup,
287 .ambulkdelete = brinbulkdelete,
288 .amvacuumcleanup = brinvacuumcleanup,
289 .amcanreturn = NULL,
290 .amcostestimate = brincostestimate,
291 .amgettreeheight = NULL,
292 .amoptions = brinoptions,
293 .amproperty = NULL,
294 .ambuildphasename = NULL,
295 .amvalidate = brinvalidate,
296 .amadjustmembers = NULL,
297 .ambeginscan = brinbeginscan,
298 .amrescan = brinrescan,
299 .amgettuple = NULL,
300 .amgetbitmap = bringetbitmap,
301 .amendscan = brinendscan,
302 .ammarkpos = NULL,
303 .amrestrpos = NULL,
304 .amestimateparallelscan = NULL,
305 .aminitparallelscan = NULL,
306 .amparallelrescan = NULL,
307 .amtranslatestrategy = NULL,
308 .amtranslatecmptype = NULL,
309 };
310
312}
313
314/*
315 * Initialize a BrinInsertState to maintain state to be used across multiple
316 * tuple inserts, within the same command.
317 */
318static BrinInsertState *
320{
321 BrinInsertState *bistate;
323
326 bistate->bis_desc = brin_build_desc(idxRel);
328 &bistate->bis_pages_per_range);
329 indexInfo->ii_AmCache = bistate;
331
332 return bistate;
333}
334
335/*
336 * A tuple in the heap is being inserted. To keep a brin index up to date,
337 * we need to obtain the relevant index tuple and compare its stored values
338 * with those of the new tuple. If the tuple values are not consistent with
339 * the summary tuple, we need to update the index tuple.
340 *
341 * If autosummarization is enabled, check if we need to summarize the previous
342 * page range.
343 *
344 * If the range is not currently summarized (i.e. the revmap returns NULL for
345 * it), there's nothing to do for this tuple.
346 */
347bool
348brininsert(Relation idxRel, Datum *values, bool *nulls,
351 bool indexUnchanged,
352 IndexInfo *indexInfo)
353{
354 BlockNumber pagesPerRange;
356 BlockNumber heapBlk;
357 BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache;
363 bool autosummarize = BrinGetAutoSummarize(idxRel);
364
365 /*
366 * If first time through in this statement, initialize the insert state
367 * that we keep for all the inserts in the command.
368 */
369 if (!bistate)
370 bistate = initialize_brin_insertstate(idxRel, indexInfo);
371
372 revmap = bistate->bis_rmAccess;
373 bdesc = bistate->bis_desc;
374 pagesPerRange = bistate->bis_pages_per_range;
375
376 /*
377 * origHeapBlk is the block number where the insertion occurred. heapBlk
378 * is the first block in the corresponding page range.
379 */
381 heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
382
383 for (;;)
384 {
385 bool need_insert = false;
386 OffsetNumber off;
389
391
392 /*
393 * If auto-summarization is enabled and we just inserted the first
394 * tuple into the first block of a new non-first page range, request a
395 * summarization run of the previous range.
396 */
397 if (autosummarize &&
398 heapBlk > 0 &&
399 heapBlk == origHeapBlk &&
401 {
402 BlockNumber lastPageRange = heapBlk - 1;
404
408 if (!lastPageTuple)
409 {
410 bool recorded;
411
415 if (!recorded)
416 ereport(LOG,
418 errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
420 lastPageRange)));
421 }
422 else
424 }
425
426 brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
428
429 /* if range is unsummarized, there's nothing to do */
430 if (!brtup)
431 break;
432
433 /* First time through in this brininsert call? */
434 if (tupcxt == NULL)
435 {
437 "brininsert cxt",
440 }
441
443
445
446 if (!need_insert)
447 {
448 /*
449 * The tuple is consistent with the new values, so there's nothing
450 * to do.
451 */
453 }
454 else
455 {
456 Page page = BufferGetPage(buf);
457 ItemId lp = PageGetItemId(page, off);
458 Size origsz;
460 Size newsz;
462 bool samepage;
463
464 /*
465 * Make a copy of the old tuple, so that we can compare it after
466 * re-acquiring the lock.
467 */
470
471 /*
472 * Before releasing the lock, check if we can attempt a same-page
473 * update. Another process could insert a tuple concurrently in
474 * the same page though, so downstream we must be prepared to cope
475 * if this turns out to not be possible after all.
476 */
477 newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
480
481 /*
482 * Try to update the tuple. If this doesn't work for whatever
483 * reason, we need to restart from the top; the revmap might be
484 * pointing at a different tuple for this block now, so we need to
485 * recompute to ensure both our new heap tuple and the other
486 * inserter's are covered by the combined tuple. It might be that
487 * we don't need to update at all.
488 */
489 if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
490 buf, off, origtup, origsz, newtup, newsz,
491 samepage))
492 {
493 /* no luck; start over */
495 continue;
496 }
497 }
498
499 /* success! */
500 break;
501 }
502
503 if (BufferIsValid(buf))
506 if (tupcxt != NULL)
508
509 return false;
510}
511
512/*
513 * Callback to clean up the BrinInsertState once all tuple inserts are done.
514 */
515void
517{
518 BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache;
519
520 /* bail out if cache not initialized */
521 if (bistate == NULL)
522 return;
523
524 /* do this first to avoid dangling pointer if we fail partway through */
525 indexInfo->ii_AmCache = NULL;
526
527 /*
528 * Clean up the revmap. Note that the brinDesc has already been cleaned up
529 * as part of its own memory context.
530 */
531 brinRevmapTerminate(bistate->bis_rmAccess);
532 pfree(bistate);
533}
534
535/*
536 * Initialize state for a BRIN index scan.
537 *
538 * We read the metapage here to determine the pages-per-range number that this
539 * index was built with. Note that since this cannot be changed while we're
540 * holding lock on index, it's not necessary to recompute it during brinrescan.
541 */
543brinbeginscan(Relation r, int nkeys, int norderbys)
544{
545 IndexScanDesc scan;
546 BrinOpaque *opaque;
547
548 scan = RelationGetIndexScan(r, nkeys, norderbys);
549
550 opaque = palloc_object(BrinOpaque);
552 opaque->bo_bdesc = brin_build_desc(r);
553 scan->opaque = opaque;
554
555 return scan;
556}
557
558/*
559 * Execute the index scan.
560 *
561 * This works by reading index TIDs from the revmap, and obtaining the index
562 * tuples pointed to by them; the summary values in the index tuples are
563 * compared to the scan keys. We return into the TID bitmap all the pages in
564 * ranges corresponding to index tuples that match the scan keys.
565 *
566 * If a TID from the revmap is read as InvalidTID, we know that range is
567 * unsummarized. Pages in those ranges need to be returned regardless of scan
568 * keys.
569 */
570int64
572{
576 Oid heapOid;
577 Relation heapRel;
578 BrinOpaque *opaque;
579 BlockNumber nblocks;
580 int64 totalpages = 0;
581 FmgrInfo *consistentFn;
586 Size btupsz = 0;
587 ScanKey **keys,
588 **nullkeys;
589 int *nkeys,
590 *nnullkeys;
591 char *ptr;
592 Size len;
593 char *tmp PG_USED_FOR_ASSERTS_ONLY;
594
595 opaque = (BrinOpaque *) scan->opaque;
596 bdesc = opaque->bo_bdesc;
598 if (scan->instrument)
599 scan->instrument->nsearches++;
600
601 /*
602 * We need to know the size of the table so that we know how long to
603 * iterate on the revmap.
604 */
605 heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
606 heapRel = table_open(heapOid, AccessShareLock);
607 nblocks = RelationGetNumberOfBlocks(heapRel);
609
610 /*
611 * Make room for the consistent support procedures of indexed columns. We
612 * don't look them up here; we do that lazily the first time we see a scan
613 * key reference each of them. We rely on zeroing fn_oid to InvalidOid.
614 */
615 consistentFn = palloc0_array(FmgrInfo, bdesc->bd_tupdesc->natts);
616
617 /*
618 * Make room for per-attribute lists of scan keys that we'll pass to the
619 * consistent support procedure. We don't know which attributes have scan
620 * keys, so we allocate space for all attributes. That may use more memory
621 * but it's probably cheaper than determining which attributes are used.
622 *
623 * We keep null and regular keys separate, so that we can pass just the
624 * regular keys to the consistent function easily.
625 *
626 * To reduce the allocation overhead, we allocate one big chunk and then
627 * carve it into smaller arrays ourselves. All the pieces have exactly the
628 * same lifetime, so that's OK.
629 *
630 * XXX The widest index can have 32 attributes, so the amount of wasted
631 * memory is negligible. We could invent a more compact approach (with
632 * just space for used attributes) but that would make the matching more
633 * complex so it's not a good trade-off.
634 */
635 len =
636 MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* regular keys */
637 MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
638 MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts) +
639 MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* NULL keys */
640 MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
641 MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
642
643 ptr = palloc(len);
644 tmp = ptr;
645
646 keys = (ScanKey **) ptr;
647 ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
648
649 nullkeys = (ScanKey **) ptr;
650 ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
651
652 nkeys = (int *) ptr;
653 ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
654
655 nnullkeys = (int *) ptr;
656 ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
657
658 for (int i = 0; i < bdesc->bd_tupdesc->natts; i++)
659 {
660 keys[i] = (ScanKey *) ptr;
661 ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
662
663 nullkeys[i] = (ScanKey *) ptr;
664 ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
665 }
666
667 Assert(tmp + len == ptr);
668
669 /* zero the number of keys */
670 memset(nkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
671 memset(nnullkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
672
673 /* Preprocess the scan keys - split them into per-attribute arrays. */
674 for (int keyno = 0; keyno < scan->numberOfKeys; keyno++)
675 {
676 ScanKey key = &scan->keyData[keyno];
677 AttrNumber keyattno = key->sk_attno;
678
679 /*
680 * The collation of the scan key must match the collation used in the
681 * index column (but only if the search is not IS NULL/ IS NOT NULL).
682 * Otherwise we shouldn't be using this index ...
683 */
684 Assert((key->sk_flags & SK_ISNULL) ||
685 (key->sk_collation ==
686 TupleDescAttr(bdesc->bd_tupdesc,
687 keyattno - 1)->attcollation));
688
689 /*
690 * First time we see this index attribute, so init as needed.
691 *
692 * This is a bit of an overkill - we don't know how many scan keys are
693 * there for this attribute, so we simply allocate the largest number
694 * possible (as if all keys were for this attribute). This may waste a
695 * bit of memory, but we only expect small number of scan keys in
696 * general, so this should be negligible, and repeated repalloc calls
697 * are not free either.
698 */
699 if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
700 {
701 FmgrInfo *tmp;
702
703 /* First time we see this attribute, so no key/null keys. */
704 Assert(nkeys[keyattno - 1] == 0);
705 Assert(nnullkeys[keyattno - 1] == 0);
706
709 fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
711 }
712
713 /* Add key to the proper per-attribute array. */
714 if (key->sk_flags & SK_ISNULL)
715 {
716 nullkeys[keyattno - 1][nnullkeys[keyattno - 1]] = key;
717 nnullkeys[keyattno - 1]++;
718 }
719 else
720 {
721 keys[keyattno - 1][nkeys[keyattno - 1]] = key;
722 nkeys[keyattno - 1]++;
723 }
724 }
725
726 /* allocate an initial in-memory tuple, out of the per-range memcxt */
728
729 /*
730 * Setup and use a per-range memory context, which is reset every time we
731 * loop below. This avoids having to free the tuples within the loop.
732 */
734 "bringetbitmap cxt",
737
738 /*
739 * Now scan the revmap. We start by querying for heap page 0,
740 * incrementing by the number of pages per range; this gives us a full
741 * view of the table. We make use of uint64 for heapBlk as a BlockNumber
742 * could wrap for tables with close to 2^32 pages.
743 */
744 for (uint64 heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
745 {
746 bool addrange;
747 bool gottuple = false;
748 BrinTuple *tup;
749 OffsetNumber off;
750 Size size;
751
753
755
756 tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, (BlockNumber) heapBlk, &buf,
757 &off, &size, BUFFER_LOCK_SHARE);
758 if (tup)
759 {
760 gottuple = true;
761 btup = brin_copy_tuple(tup, size, btup, &btupsz);
763 }
764
765 /*
766 * For page ranges with no indexed tuple, we must return the whole
767 * range; otherwise, compare it to the scan keys.
768 */
769 if (!gottuple)
770 {
771 addrange = true;
772 }
773 else
774 {
776 if (dtup->bt_placeholder)
777 {
778 /*
779 * Placeholder tuples are always returned, regardless of the
780 * values stored in them.
781 */
782 addrange = true;
783 }
784 else
785 {
786 int attno;
787
788 /*
789 * Compare scan keys with summary values stored for the range.
790 * If scan keys are matched, the page range must be added to
791 * the bitmap. We initially assume the range needs to be
792 * added; in particular this serves the case where there are
793 * no keys.
794 */
795 addrange = true;
796 for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
797 {
798 BrinValues *bval;
799 Datum add;
800 Oid collation;
801
802 /*
803 * skip attributes without any scan keys (both regular and
804 * IS [NOT] NULL)
805 */
806 if (nkeys[attno - 1] == 0 && nnullkeys[attno - 1] == 0)
807 continue;
808
809 bval = &dtup->bt_columns[attno - 1];
810
811 /*
812 * If the BRIN tuple indicates that this range is empty,
813 * we can skip it: there's nothing to match. We don't
814 * need to examine the next columns.
815 */
816 if (dtup->bt_empty_range)
817 {
818 addrange = false;
819 break;
820 }
821
822 /*
823 * First check if there are any IS [NOT] NULL scan keys,
824 * and if we're violating them. In that case we can
825 * terminate early, without invoking the support function.
826 *
827 * As there may be more keys, we can only determine
828 * mismatch within this loop.
829 */
830 if (bdesc->bd_info[attno - 1]->oi_regular_nulls &&
831 !check_null_keys(bval, nullkeys[attno - 1],
832 nnullkeys[attno - 1]))
833 {
834 /*
835 * If any of the IS [NOT] NULL keys failed, the page
836 * range as a whole can't pass. So terminate the loop.
837 */
838 addrange = false;
839 break;
840 }
841
842 /*
843 * So either there are no IS [NOT] NULL keys, or all
844 * passed. If there are no regular scan keys, we're done -
845 * the page range matches. If there are regular keys, but
846 * the page range is marked as 'all nulls' it can't
847 * possibly pass (we're assuming the operators are
848 * strict).
849 */
850
851 /* No regular scan keys - page range as a whole passes. */
852 if (!nkeys[attno - 1])
853 continue;
854
855 Assert((nkeys[attno - 1] > 0) &&
856 (nkeys[attno - 1] <= scan->numberOfKeys));
857
858 /* If it is all nulls, it cannot possibly be consistent. */
859 if (bval->bv_allnulls)
860 {
861 addrange = false;
862 break;
863 }
864
865 /*
866 * Collation from the first key (has to be the same for
867 * all keys for the same attribute).
868 */
869 collation = keys[attno - 1][0]->sk_collation;
870
871 /*
872 * Check whether the scan key is consistent with the page
873 * range values; if so, have the pages in the range added
874 * to the output bitmap.
875 *
876 * The opclass may or may not support processing of
877 * multiple scan keys. We can determine that based on the
878 * number of arguments - functions with extra parameter
879 * (number of scan keys) do support this, otherwise we
880 * have to simply pass the scan keys one by one.
881 */
882 if (consistentFn[attno - 1].fn_nargs >= 4)
883 {
884 /* Check all keys at once */
885 add = FunctionCall4Coll(&consistentFn[attno - 1],
886 collation,
888 PointerGetDatum(bval),
889 PointerGetDatum(keys[attno - 1]),
890 Int32GetDatum(nkeys[attno - 1]));
892 }
893 else
894 {
895 /*
896 * Check keys one by one
897 *
898 * When there are multiple scan keys, failure to meet
899 * the criteria for a single one of them is enough to
900 * discard the range as a whole, so break out of the
901 * loop as soon as a false return value is obtained.
902 */
903 int keyno;
904
905 for (keyno = 0; keyno < nkeys[attno - 1]; keyno++)
906 {
907 add = FunctionCall3Coll(&consistentFn[attno - 1],
908 keys[attno - 1][keyno]->sk_collation,
910 PointerGetDatum(bval),
911 PointerGetDatum(keys[attno - 1][keyno]));
913 if (!addrange)
914 break;
915 }
916 }
917
918 /*
919 * If we found a scan key eliminating the range, no need
920 * to check additional ones.
921 */
922 if (!addrange)
923 break;
924 }
925 }
926 }
927
928 /* add the pages in the range to the output bitmap, if needed */
929 if (addrange)
930 {
931 uint64 pageno;
932
933 for (pageno = heapBlk;
934 pageno <= Min(nblocks, heapBlk + opaque->bo_pagesPerRange) - 1;
935 pageno++)
936 {
938 tbm_add_page(tbm, pageno);
939 totalpages++;
941 }
942 }
943 }
944
947
948 if (buf != InvalidBuffer)
950
951 /*
952 * XXX We have an approximation of the number of *pages* that our scan
953 * returns, but we don't have a precise idea of the number of heap tuples
954 * involved.
955 */
956 return totalpages * 10;
957}
958
959/*
960 * Re-initialize state for a BRIN index scan
961 */
962void
964 ScanKey orderbys, int norderbys)
965{
966 /*
967 * Other index AMs preprocess the scan keys at this point, or sometime
968 * early during the scan; this lets them optimize by removing redundant
969 * keys, or doing early returns when they are impossible to satisfy; see
970 * _bt_preprocess_keys for an example. Something like that could be added
971 * here someday, too.
972 */
973
974 if (scankey && scan->numberOfKeys > 0)
975 memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
976}
977
978/*
979 * Close down a BRIN index scan
980 */
981void
983{
984 BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
985
986 brinRevmapTerminate(opaque->bo_rmAccess);
987 brin_free_desc(opaque->bo_bdesc);
988 pfree(opaque);
989}
990
991/*
992 * Per-heap-tuple callback for table_index_build_scan.
993 *
994 * Note we don't worry about the page range at the end of the table here; it is
995 * present in the build state struct after we're called the last time, but not
996 * inserted into the index. Caller must ensure to do so, if appropriate.
997 */
998static void
1000 ItemPointer tid,
1001 Datum *values,
1002 bool *isnull,
1003 bool tupleIsAlive,
1004 void *brstate)
1005{
1008
1010
1011 /*
1012 * If we're in a block that belongs to a future range, summarize what
1013 * we've got and start afresh. Note the scan might have skipped many
1014 * pages, if they were devoid of live tuples; make sure to insert index
1015 * tuples for those too.
1016 */
1017 while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
1018 {
1019
1021 "brinbuildCallback: completed a range: %u--%u",
1022 state->bs_currRangeStart,
1023 state->bs_currRangeStart + state->bs_pagesPerRange));
1024
1025 /* create the index tuple and insert it */
1027
1028 /* set state to correspond to the next range */
1029 state->bs_currRangeStart += state->bs_pagesPerRange;
1030
1031 /* re-initialize state for it */
1032 brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1033 }
1034
1035 /* Accumulate the current tuple into the running state */
1036 (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
1037 values, isnull);
1038}
1039
1040/*
1041 * Per-heap-tuple callback for table_index_build_scan with parallelism.
1042 *
1043 * A version of the callback used by parallel index builds. The main difference
1044 * is that instead of writing the BRIN tuples into the index, we write them
1045 * into a shared tuplesort, and leave the insertion up to the leader (which may
1046 * reorder them a bit etc.). The callback also does not generate empty ranges,
1047 * those will be added by the leader when merging results from workers.
1048 */
1049static void
1051 ItemPointer tid,
1052 Datum *values,
1053 bool *isnull,
1054 bool tupleIsAlive,
1055 void *brstate)
1056{
1059
1061
1062 /*
1063 * If we're in a block that belongs to a different range, summarize what
1064 * we've got and start afresh. Note the scan might have skipped many
1065 * pages, if they were devoid of live tuples; we do not create empty BRIN
1066 * ranges here - the leader is responsible for filling them in.
1067 *
1068 * Unlike serial builds, parallel index builds allow synchronized seqscans
1069 * (because that's what parallel scans do). This means the block may wrap
1070 * around to the beginning of the relation, so the condition needs to
1071 * check for both future and past ranges.
1072 */
1073 if ((thisblock < state->bs_currRangeStart) ||
1074 (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1))
1075 {
1076
1078 "brinbuildCallbackParallel: completed a range: %u--%u",
1079 state->bs_currRangeStart,
1080 state->bs_currRangeStart + state->bs_pagesPerRange));
1081
1082 /* create the index tuple and write it into the tuplesort */
1084
1085 /*
1086 * Set state to correspond to the next range (for this block).
1087 *
1088 * This skips ranges that are either empty (and so we don't get any
1089 * tuples to summarize), or processed by other workers. We can't
1090 * differentiate those cases here easily, so we leave it up to the
1091 * leader to fill empty ranges where needed.
1092 */
1093 state->bs_currRangeStart
1094 = state->bs_pagesPerRange * (thisblock / state->bs_pagesPerRange);
1095
1096 /* re-initialize state for it */
1097 brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1098 }
1099
1100 /* Accumulate the current tuple into the running state */
1101 (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
1102 values, isnull);
1103}
1104
1105/*
1106 * brinbuild() -- build a new BRIN index.
1107 */
1109brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
1110{
1111 IndexBuildResult *result;
1112 double reltuples;
1113 double idxtuples;
1116 Buffer meta;
1117 BlockNumber pagesPerRange;
1118
1119 /*
1120 * We expect to be called exactly once for any index relation.
1121 */
1123 elog(ERROR, "index \"%s\" already contains data",
1125
1126 /*
1127 * Critical section not required, because on error the creation of the
1128 * whole relation will be rolled back.
1129 */
1130
1134
1137 MarkBufferDirty(meta);
1138
1140 {
1143 Page page;
1144
1146 xlrec.pagesPerRange = BrinGetPagesPerRange(index);
1147
1151
1153
1154 page = BufferGetPage(meta);
1155 PageSetLSN(page, recptr);
1156 }
1157
1158 UnlockReleaseBuffer(meta);
1159
1160 /*
1161 * Initialize our state, including the deformed tuple state.
1162 */
1163 revmap = brinRevmapInitialize(index, &pagesPerRange);
1164 state = initialize_brin_buildstate(index, revmap, pagesPerRange,
1166
1167 /*
1168 * Attempt to launch parallel worker scan when required
1169 *
1170 * XXX plan_create_index_workers makes the number of workers dependent on
1171 * maintenance_work_mem, requiring 32MB for each worker. That makes sense
1172 * for btree, but not for BRIN, which can do with much less memory. So
1173 * maybe make that somehow less strict, optionally?
1174 */
1175 if (indexInfo->ii_ParallelWorkers > 0)
1176 _brin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
1177 indexInfo->ii_ParallelWorkers);
1178
1179 /*
1180 * If parallel build requested and at least one worker process was
1181 * successfully launched, set up coordination state, wait for workers to
1182 * complete. Then read all tuples from the shared tuplesort and insert
1183 * them into the index.
1184 *
1185 * In serial mode, simply scan the table and build the index one index
1186 * tuple at a time.
1187 */
1188 if (state->bs_leader)
1189 {
1191
1193 coordinate->isWorker = false;
1194 coordinate->nParticipants =
1195 state->bs_leader->nparticipanttuplesorts;
1196 coordinate->sharedsort = state->bs_leader->sharedsort;
1197
1198 /*
1199 * Begin leader tuplesort.
1200 *
1201 * In cases where parallelism is involved, the leader receives the
1202 * same share of maintenance_work_mem as a serial sort (it is
1203 * generally treated in the same way as a serial sort once we return).
1204 * Parallel worker Tuplesortstates will have received only a fraction
1205 * of maintenance_work_mem, though.
1206 *
1207 * We rely on the lifetime of the Leader Tuplesortstate almost not
1208 * overlapping with any worker Tuplesortstate's lifetime. There may
1209 * be some small overlap, but that's okay because we rely on leader
1210 * Tuplesortstate only allocating a small, fixed amount of memory
1211 * here. When its tuplesort_performsort() is called (by our caller),
1212 * and significant amounts of memory are likely to be used, all
1213 * workers must have already freed almost all memory held by their
1214 * Tuplesortstates (they are about to go away completely, too). The
1215 * overall effect is that maintenance_work_mem always represents an
1216 * absolute high watermark on the amount of memory used by a CREATE
1217 * INDEX operation, regardless of the use of parallelism or any other
1218 * factor.
1219 */
1220 state->bs_sortstate =
1223
1224 /* scan the relation and merge per-worker results */
1225 reltuples = _brin_parallel_merge(state);
1226
1227 _brin_end_parallel(state->bs_leader, state);
1228 }
1229 else /* no parallel index build */
1230 {
1231 /*
1232 * Now scan the relation. No syncscan allowed here because we want
1233 * the heap blocks in physical order (we want to produce the ranges
1234 * starting from block 0, and the callback also relies on this to not
1235 * generate summary for the same range twice).
1236 */
1237 reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
1239
1240 /*
1241 * process the final batch
1242 *
1243 * XXX Note this does not update state->bs_currRangeStart, i.e. it
1244 * stays set to the last range added to the index. This is OK, because
1245 * that's what brin_fill_empty_ranges expects.
1246 */
1248
1249 /*
1250 * Backfill the final ranges with empty data.
1251 *
1252 * This saves us from doing what amounts to full table scans when the
1253 * index with a predicate like WHERE (nonnull_column IS NULL), or
1254 * other very selective predicates.
1255 */
1257 state->bs_currRangeStart,
1258 state->bs_maxRangeStart);
1259 }
1260
1261 /* release resources */
1262 idxtuples = state->bs_numtuples;
1263 brinRevmapTerminate(state->bs_rmAccess);
1265
1266 /*
1267 * Return statistics
1268 */
1270
1271 result->heap_tuples = reltuples;
1272 result->index_tuples = idxtuples;
1273
1274 return result;
1275}
1276
1277void
1279{
1281
1282 /* An empty BRIN index has a metapage only. */
1285
1286 /* Initialize and xlog metabuffer. */
1293
1295}
1296
1297/*
1298 * brinbulkdelete
1299 * Since there are no per-heap-tuple index tuples in BRIN indexes,
1300 * there's not a lot we can do here.
1301 *
1302 * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
1303 * tuple is deleted), meaning the need to re-run summarization on the affected
1304 * range. Would need to add an extra flag in brintuples for that.
1305 */
1308 IndexBulkDeleteCallback callback, void *callback_state)
1309{
1310 /* allocate stats if first time through, else re-use existing struct */
1311 if (stats == NULL)
1313
1314 return stats;
1315}
1316
1317/*
1318 * This routine is in charge of "vacuuming" a BRIN index: we just summarize
1319 * ranges that are currently unsummarized.
1320 */
1323{
1324 Relation heapRel;
1325
1326 /* No-op in ANALYZE ONLY mode */
1327 if (info->analyze_only)
1328 return stats;
1329
1330 if (!stats)
1333 /* rest of stats is initialized by zeroing */
1334
1335 heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
1337
1338 brin_vacuum_scan(info->index, info->strategy);
1339
1340 brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
1341 &stats->num_index_tuples, &stats->num_index_tuples);
1342
1343 table_close(heapRel, AccessShareLock);
1344
1345 return stats;
1346}
1347
1348/*
1349 * reloptions processor for BRIN indexes
1350 */
1351bytea *
1352brinoptions(Datum reloptions, bool validate)
1353{
1354 static const relopt_parse_elt tab[] = {
1355 {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
1356 {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
1357 };
1358
1359 return (bytea *) build_reloptions(reloptions, validate,
1361 sizeof(BrinOptions),
1362 tab, lengthof(tab));
1363}
1364
1365/*
1366 * SQL-callable function to scan through an index and summarize all ranges
1367 * that are not currently summarized.
1368 */
1369Datum
1371{
1372 Datum relation = PG_GETARG_DATUM(0);
1373
1375 relation,
1377}
1378
1379/*
1380 * SQL-callable function to summarize the indicated page range, if not already
1381 * summarized. If the second argument is BRIN_ALL_BLOCKRANGES, all
1382 * unsummarized ranges are summarized.
1383 */
1384Datum
1386{
1387 Oid indexoid = PG_GETARG_OID(0);
1389 BlockNumber heapBlk;
1390 Oid heapoid;
1391 Relation indexRel;
1392 Relation heapRel;
1393 Oid save_userid;
1394 int save_sec_context;
1395 int save_nestlevel;
1396 double numSummarized = 0;
1397
1398 if (RecoveryInProgress())
1399 ereport(ERROR,
1401 errmsg("recovery is in progress"),
1402 errhint("BRIN control functions cannot be executed during recovery.")));
1403
1405 ereport(ERROR,
1407 errmsg("block number out of range: %" PRId64, heapBlk64)));
1408 heapBlk = (BlockNumber) heapBlk64;
1409
1410 /*
1411 * We must lock table before index to avoid deadlocks. However, if the
1412 * passed indexoid isn't an index then IndexGetRelation() will fail.
1413 * Rather than emitting a not-very-helpful error message, postpone
1414 * complaining, expecting that the is-it-an-index test below will fail.
1415 */
1416 heapoid = IndexGetRelation(indexoid, true);
1417 if (OidIsValid(heapoid))
1418 {
1420
1421 /*
1422 * Autovacuum calls us. For its benefit, switch to the table owner's
1423 * userid, so that any index functions are run as that user. Also
1424 * lock down security-restricted operations and arrange to make GUC
1425 * variable changes local to this command. This is harmless, albeit
1426 * unnecessary, when called from SQL, because we fail shortly if the
1427 * user does not own the index.
1428 */
1429 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1430 SetUserIdAndSecContext(heapRel->rd_rel->relowner,
1431 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1432 save_nestlevel = NewGUCNestLevel();
1434 }
1435 else
1436 {
1437 heapRel = NULL;
1438 /* Set these just to suppress "uninitialized variable" warnings */
1439 save_userid = InvalidOid;
1440 save_sec_context = -1;
1441 save_nestlevel = -1;
1442 }
1443
1444 indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1445
1446 /* Must be a BRIN index */
1447 if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1448 indexRel->rd_rel->relam != BRIN_AM_OID)
1449 ereport(ERROR,
1451 errmsg("\"%s\" is not a BRIN index",
1452 RelationGetRelationName(indexRel))));
1453
1454 /* User must own the index (comparable to privileges needed for VACUUM) */
1455 if (heapRel != NULL && !object_ownercheck(RelationRelationId, indexoid, save_userid))
1457 RelationGetRelationName(indexRel));
1458
1459 /*
1460 * Since we did the IndexGetRelation call above without any lock, it's
1461 * barely possible that a race against an index drop/recreation could have
1462 * netted us the wrong table. Recheck.
1463 */
1464 if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1465 ereport(ERROR,
1467 errmsg("could not open parent table of index \"%s\"",
1468 RelationGetRelationName(indexRel))));
1469
1470 /* see gin_clean_pending_list() */
1471 if (indexRel->rd_index->indisvalid)
1472 brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
1473 else
1476 errmsg("index \"%s\" is not valid",
1477 RelationGetRelationName(indexRel))));
1478
1479 /* Roll back any GUC changes executed by index functions */
1480 AtEOXact_GUC(false, save_nestlevel);
1481
1482 /* Restore userid and security context */
1483 SetUserIdAndSecContext(save_userid, save_sec_context);
1484
1487
1489}
1490
1491/*
1492 * SQL-callable interface to mark a range as no longer summarized
1493 */
1494Datum
1496{
1497 Oid indexoid = PG_GETARG_OID(0);
1499 BlockNumber heapBlk;
1500 Oid heapoid;
1501 Relation heapRel;
1502 Relation indexRel;
1503 bool done;
1504
1505 if (RecoveryInProgress())
1506 ereport(ERROR,
1508 errmsg("recovery is in progress"),
1509 errhint("BRIN control functions cannot be executed during recovery.")));
1510
1511 if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
1512 ereport(ERROR,
1514 errmsg("block number out of range: %" PRId64,
1515 heapBlk64)));
1516 heapBlk = (BlockNumber) heapBlk64;
1517
1518 /*
1519 * We must lock table before index to avoid deadlocks. However, if the
1520 * passed indexoid isn't an index then IndexGetRelation() will fail.
1521 * Rather than emitting a not-very-helpful error message, postpone
1522 * complaining, expecting that the is-it-an-index test below will fail.
1523 *
1524 * Unlike brin_summarize_range(), autovacuum never calls this. Hence, we
1525 * don't switch userid.
1526 */
1527 heapoid = IndexGetRelation(indexoid, true);
1528 if (OidIsValid(heapoid))
1530 else
1531 heapRel = NULL;
1532
1533 indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1534
1535 /* Must be a BRIN index */
1536 if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1537 indexRel->rd_rel->relam != BRIN_AM_OID)
1538 ereport(ERROR,
1540 errmsg("\"%s\" is not a BRIN index",
1541 RelationGetRelationName(indexRel))));
1542
1543 /* User must own the index (comparable to privileges needed for VACUUM) */
1546 RelationGetRelationName(indexRel));
1547
1548 /*
1549 * Since we did the IndexGetRelation call above without any lock, it's
1550 * barely possible that a race against an index drop/recreation could have
1551 * netted us the wrong table. Recheck.
1552 */
1553 if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1554 ereport(ERROR,
1556 errmsg("could not open parent table of index \"%s\"",
1557 RelationGetRelationName(indexRel))));
1558
1559 /* see gin_clean_pending_list() */
1560 if (indexRel->rd_index->indisvalid)
1561 {
1562 /* the revmap does the hard work */
1563 do
1564 {
1565 done = brinRevmapDesummarizeRange(indexRel, heapBlk);
1566 }
1567 while (!done);
1568 }
1569 else
1572 errmsg("index \"%s\" is not valid",
1573 RelationGetRelationName(indexRel))));
1574
1577
1579}
1580
1581/*
1582 * Build a BrinDesc used to create or scan a BRIN index
1583 */
1584BrinDesc *
1586{
1588 BrinDesc *bdesc;
1589 TupleDesc tupdesc;
1590 int totalstored = 0;
1591 int keyno;
1592 long totalsize;
1593 MemoryContext cxt;
1595
1597 "brin desc cxt",
1600 tupdesc = RelationGetDescr(rel);
1601
1602 /*
1603 * Obtain BrinOpcInfo for each indexed column. While at it, accumulate
1604 * the number of columns stored, since the number is opclass-defined.
1605 */
1606 opcinfo = palloc_array(BrinOpcInfo *, tupdesc->natts);
1607 for (keyno = 0; keyno < tupdesc->natts; keyno++)
1608 {
1610 Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
1611
1613
1614 opcinfo[keyno] = (BrinOpcInfo *)
1616 totalstored += opcinfo[keyno]->oi_nstored;
1617 }
1618
1619 /* Allocate our result struct and fill it in */
1620 totalsize = offsetof(BrinDesc, bd_info) +
1621 sizeof(BrinOpcInfo *) * tupdesc->natts;
1622
1623 bdesc = palloc(totalsize);
1624 bdesc->bd_context = cxt;
1625 bdesc->bd_index = rel;
1626 bdesc->bd_tupdesc = tupdesc;
1627 bdesc->bd_disktdesc = NULL; /* generated lazily */
1628 bdesc->bd_totalstored = totalstored;
1629
1630 for (keyno = 0; keyno < tupdesc->natts; keyno++)
1631 bdesc->bd_info[keyno] = opcinfo[keyno];
1632 pfree(opcinfo);
1633
1635
1636 return bdesc;
1637}
1638
1639void
1641{
1642 /* make sure the tupdesc is still valid */
1643 Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
1644 /* no need for retail pfree */
1645 MemoryContextDelete(bdesc->bd_context);
1646}
1647
1648/*
1649 * Fetch index's statistical data into *stats
1650 */
1651void
1653{
1655 Page metapage;
1656 BrinMetaPageData *metadata;
1657
1662
1663 stats->pagesPerRange = metadata->pagesPerRange;
1664 stats->revmapNumPages = metadata->lastRevmapPage - 1;
1665
1667}
1668
1669/*
1670 * Initialize a BrinBuildState appropriate to create tuples on the given index.
1671 */
1672static BrinBuildState *
1674 BlockNumber pagesPerRange, BlockNumber tablePages)
1675{
1678
1680
1681 state->bs_irel = idxRel;
1682 state->bs_numtuples = 0;
1683 state->bs_reltuples = 0;
1684 state->bs_currentInsertBuf = InvalidBuffer;
1685 state->bs_pagesPerRange = pagesPerRange;
1686 state->bs_currRangeStart = 0;
1687 state->bs_rmAccess = revmap;
1688 state->bs_bdesc = brin_build_desc(idxRel);
1689 state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
1690 state->bs_leader = NULL;
1691 state->bs_worker_id = 0;
1692 state->bs_sortstate = NULL;
1693
1694 /* Remember the memory context to use for an empty tuple, if needed. */
1695 state->bs_context = CurrentMemoryContext;
1696 state->bs_emptyTuple = NULL;
1697 state->bs_emptyTupleLen = 0;
1698
1699 /*
1700 * Calculate the start of the last page range. Page numbers are 0-based,
1701 * so to calculate the index we need to subtract one. The integer division
1702 * gives us the index of the page range.
1703 */
1704 if (tablePages > 0)
1705 lastRange = ((tablePages - 1) / pagesPerRange) * pagesPerRange;
1706
1707 /* Now calculate the start of the next range. */
1708 state->bs_maxRangeStart = lastRange + state->bs_pagesPerRange;
1709
1710 return state;
1711}
1712
1713/*
1714 * Release resources associated with a BrinBuildState.
1715 */
1716static void
1718{
1719 /*
1720 * Release the last index buffer used. We might as well ensure that
1721 * whatever free space remains in that page is available in FSM, too.
1722 */
1723 if (!BufferIsInvalid(state->bs_currentInsertBuf))
1724 {
1725 Page page;
1726 Size freespace;
1728
1729 page = BufferGetPage(state->bs_currentInsertBuf);
1730 freespace = PageGetFreeSpace(page);
1731 blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
1732 ReleaseBuffer(state->bs_currentInsertBuf);
1733 RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
1734 FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
1735 }
1736
1737 brin_free_desc(state->bs_bdesc);
1738 pfree(state->bs_dtuple);
1739 pfree(state);
1740}
1741
1742/*
1743 * On the given BRIN index, summarize the heap page range that corresponds
1744 * to the heap block number given.
1745 *
1746 * This routine can run in parallel with insertions into the heap. To avoid
1747 * missing those values from the summary tuple, we first insert a placeholder
1748 * index tuple into the index, then execute the heap scan; transactions
1749 * concurrent with the scan update the placeholder tuple. After the scan, we
1750 * union the placeholder tuple with the one computed by this routine. The
1751 * update of the index value happens in a loop, so that if somebody updates
1752 * the placeholder tuple after we read it, we detect the case and try again.
1753 * This ensures that the concurrently inserted tuples are not lost.
1754 *
1755 * A further corner case is this routine being asked to summarize the partial
1756 * range at the end of the table. heapNumBlocks is the (possibly outdated)
1757 * table size; if we notice that the requested range lies beyond that size,
1758 * we re-compute the table size after inserting the placeholder tuple, to
1759 * avoid missing pages that were appended recently.
1760 */
1761static void
1764{
1765 Buffer phbuf;
1767 Size phsz;
1768 OffsetNumber offset;
1770
1771 /*
1772 * Insert the placeholder tuple
1773 */
1775 phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
1776 offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
1777 state->bs_rmAccess, &phbuf,
1778 heapBlk, phtup, phsz);
1779
1780 /*
1781 * Compute range end. We hold ShareUpdateExclusive lock on table, so it
1782 * cannot shrink concurrently (but it can grow).
1783 */
1784 Assert(heapBlk % state->bs_pagesPerRange == 0);
1785 if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1786 {
1787 /*
1788 * If we're asked to scan what we believe to be the final range on the
1789 * table (i.e. a range that might be partial) we need to recompute our
1790 * idea of what the latest page is after inserting the placeholder
1791 * tuple. Anyone that grows the table later will update the
1792 * placeholder tuple, so it doesn't matter that we won't scan these
1793 * pages ourselves. Careful: the table might have been extended
1794 * beyond the current range, so clamp our result.
1795 *
1796 * Fortunately, this should occur infrequently.
1797 */
1798 scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1799 state->bs_pagesPerRange);
1800 }
1801 else
1802 {
1803 /* Easy case: range is known to be complete */
1804 scanNumBlks = state->bs_pagesPerRange;
1805 }
1806
1807 /*
1808 * Execute the partial heap scan covering the heap blocks in the specified
1809 * page range, summarizing the heap tuples in it. This scan stops just
1810 * short of brinbuildCallback creating the new index entry.
1811 *
1812 * Note that it is critical we use the "any visible" mode of
1813 * table_index_build_range_scan here: otherwise, we would miss tuples
1814 * inserted by transactions that are still in progress, among other corner
1815 * cases.
1816 */
1817 state->bs_currRangeStart = heapBlk;
1818 table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
1819 heapBlk, scanNumBlks,
1821
1822 /*
1823 * Now we update the values obtained by the scan with the placeholder
1824 * tuple. We do this in a loop which only terminates if we're able to
1825 * update the placeholder tuple successfully; if we are not, this means
1826 * somebody else modified the placeholder tuple after we read it.
1827 */
1828 for (;;)
1829 {
1831 Size newsize;
1832 bool didupdate;
1833 bool samepage;
1834
1836
1837 /*
1838 * Update the summary tuple and try to update.
1839 */
1840 newtup = brin_form_tuple(state->bs_bdesc,
1841 heapBlk, state->bs_dtuple, &newsize);
1843 didupdate =
1844 brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
1845 state->bs_rmAccess, heapBlk, phbuf, offset,
1849
1850 /* If the update succeeded, we're done. */
1851 if (didupdate)
1852 break;
1853
1854 /*
1855 * If the update didn't work, it might be because somebody updated the
1856 * placeholder tuple concurrently. Extract the new version, union it
1857 * with the values we have from the scan, and start over. (There are
1858 * other reasons for the update to fail, but it's simple to treat them
1859 * the same.)
1860 */
1861 phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
1862 &offset, &phsz, BUFFER_LOCK_SHARE);
1863 /* the placeholder tuple must exist */
1864 if (phtup == NULL)
1865 elog(ERROR, "missing placeholder tuple");
1868
1869 /* merge it into the tuple from the heap scan */
1870 union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
1871 }
1872
1874}
1875
1876/*
1877 * Summarize page ranges that are not already summarized. If pageRange is
1878 * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
1879 * page range containing the given heap page number is scanned.
1880 * If include_partial is true, then the partial range at the end of the table
1881 * is summarized, otherwise not.
1882 *
1883 * For each new index tuple inserted, *numSummarized (if not NULL) is
1884 * incremented; for each existing tuple, *numExisting (if not NULL) is
1885 * incremented.
1886 */
1887static void
1889 bool include_partial, double *numSummarized, double *numExisting)
1890{
1893 IndexInfo *indexInfo = NULL;
1895 BlockNumber pagesPerRange;
1896 Buffer buf;
1898
1899 revmap = brinRevmapInitialize(index, &pagesPerRange);
1900
1901 /* determine range of pages to process */
1904 startBlk = 0;
1905 else
1906 {
1907 startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1908 heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
1909 }
1910 if (startBlk > heapNumBlocks)
1911 {
1912 /* Nothing to do if start point is beyond end of table */
1914 return;
1915 }
1916
1917 /*
1918 * Scan the revmap to find unsummarized items.
1919 */
1921 for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
1922 {
1923 BrinTuple *tup;
1924 OffsetNumber off;
1925
1926 /*
1927 * Unless requested to summarize even a partial range, go away now if
1928 * we think the next range is partial. Caller would pass true when it
1929 * is typically run once bulk data loading is done
1930 * (brin_summarize_new_values), and false when it is typically the
1931 * result of arbitrarily-scheduled maintenance command (vacuuming).
1932 */
1933 if (!include_partial &&
1934 (startBlk + pagesPerRange > heapNumBlocks))
1935 break;
1936
1938
1941 if (tup == NULL)
1942 {
1943 /* no revmap entry for this heap range. Summarize it. */
1944 if (state == NULL)
1945 {
1946 /* first time through */
1947 Assert(!indexInfo);
1949 pagesPerRange,
1951 indexInfo = BuildIndexInfo(index);
1952 }
1953 summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
1954
1955 /* and re-initialize state for the next range */
1956 brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1957
1958 if (numSummarized)
1959 *numSummarized += 1.0;
1960 }
1961 else
1962 {
1963 if (numExisting)
1964 *numExisting += 1.0;
1966 }
1967 }
1968
1969 if (BufferIsValid(buf))
1971
1972 /* free resources */
1974 if (state)
1975 {
1977 pfree(indexInfo);
1978 }
1979}
1980
1981/*
1982 * Given a deformed tuple in the build state, convert it into the on-disk
1983 * format and insert it into the index, making the revmap point to it.
1984 */
1985static void
1987{
1988 BrinTuple *tup;
1989 Size size;
1990
1991 tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
1992 state->bs_dtuple, &size);
1993 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
1994 &state->bs_currentInsertBuf, state->bs_currRangeStart,
1995 tup, size);
1996 state->bs_numtuples++;
1997
1998 pfree(tup);
1999}
2000
2001/*
2002 * Given a deformed tuple in the build state, convert it into the on-disk
2003 * format and write it to a (shared) tuplesort (the leader will insert it
2004 * into the index later).
2005 */
2006static void
2008{
2009 BrinTuple *tup;
2010 Size size;
2011
2012 /* don't insert empty tuples in parallel build */
2013 if (state->bs_dtuple->bt_empty_range)
2014 return;
2015
2016 tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
2017 state->bs_dtuple, &size);
2018
2019 /* write the BRIN tuple to the tuplesort */
2020 tuplesort_putbrintuple(state->bs_sortstate, tup, size);
2021
2022 state->bs_numtuples++;
2023
2024 pfree(tup);
2025}
2026
2027/*
2028 * Given two deformed tuples, adjust the first one so that it's consistent
2029 * with the summary values in both.
2030 */
2031static void
2033{
2034 int keyno;
2035 BrinMemTuple *db;
2036 MemoryContext cxt;
2038
2039 /* Use our own memory context to avoid retail pfree */
2041 "brin union",
2044 db = brin_deform_tuple(bdesc, b, NULL);
2046
2047 /*
2048 * Check if the ranges are empty.
2049 *
2050 * If at least one of them is empty, we don't need to call per-key union
2051 * functions at all. If "b" is empty, we just use "a" as the result (it
2052 * might be empty fine, but that's fine). If "a" is empty but "b" is not,
2053 * we use "b" as the result (but we have to copy the data into "a" first).
2054 *
2055 * Only when both ranges are non-empty, we actually do the per-key merge.
2056 */
2057
2058 /* If "b" is empty - ignore it and just use "a" (even if it's empty etc.). */
2059 if (db->bt_empty_range)
2060 {
2061 /* skip the per-key merge */
2063 return;
2064 }
2065
2066 /*
2067 * Now we know "b" is not empty. If "a" is empty, then "b" is the result.
2068 * But we need to copy the data from "b" to "a" first, because that's how
2069 * we pass result out.
2070 *
2071 * We have to copy all the global/per-key flags etc. too.
2072 */
2073 if (a->bt_empty_range)
2074 {
2075 for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
2076 {
2077 int i;
2078 BrinValues *col_a = &a->bt_columns[keyno];
2079 BrinValues *col_b = &db->bt_columns[keyno];
2080 BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
2081
2082 col_a->bv_allnulls = col_b->bv_allnulls;
2083 col_a->bv_hasnulls = col_b->bv_hasnulls;
2084
2085 /* If "b" has no data, we're done. */
2086 if (col_b->bv_allnulls)
2087 continue;
2088
2089 for (i = 0; i < opcinfo->oi_nstored; i++)
2090 col_a->bv_values[i] =
2091 datumCopy(col_b->bv_values[i],
2092 opcinfo->oi_typcache[i]->typbyval,
2093 opcinfo->oi_typcache[i]->typlen);
2094 }
2095
2096 /* "a" started empty, but "b" was not empty, so remember that */
2097 a->bt_empty_range = false;
2098
2099 /* skip the per-key merge */
2101 return;
2102 }
2103
2104 /* Now we know neither range is empty. */
2105 for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
2106 {
2107 FmgrInfo *unionFn;
2108 BrinValues *col_a = &a->bt_columns[keyno];
2109 BrinValues *col_b = &db->bt_columns[keyno];
2110 BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
2111
2112 if (opcinfo->oi_regular_nulls)
2113 {
2114 /* Does the "b" summary represent any NULL values? */
2115 bool b_has_nulls = (col_b->bv_hasnulls || col_b->bv_allnulls);
2116
2117 /* Adjust "hasnulls". */
2118 if (!col_a->bv_allnulls && b_has_nulls)
2119 col_a->bv_hasnulls = true;
2120
2121 /* If there are no values in B, there's nothing left to do. */
2122 if (col_b->bv_allnulls)
2123 continue;
2124
2125 /*
2126 * Adjust "allnulls". If A doesn't have values, just copy the
2127 * values from B into A, and we're done. We cannot run the
2128 * operators in this case, because values in A might contain
2129 * garbage. Note we already established that B contains values.
2130 *
2131 * Also adjust "hasnulls" in order not to forget the summary
2132 * represents NULL values. This is not redundant with the earlier
2133 * update, because that only happens when allnulls=false.
2134 */
2135 if (col_a->bv_allnulls)
2136 {
2137 int i;
2138
2139 col_a->bv_allnulls = false;
2140 col_a->bv_hasnulls = true;
2141
2142 for (i = 0; i < opcinfo->oi_nstored; i++)
2143 col_a->bv_values[i] =
2144 datumCopy(col_b->bv_values[i],
2145 opcinfo->oi_typcache[i]->typbyval,
2146 opcinfo->oi_typcache[i]->typlen);
2147
2148 continue;
2149 }
2150 }
2151
2152 unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
2154 FunctionCall3Coll(unionFn,
2155 bdesc->bd_index->rd_indcollation[keyno],
2159 }
2160
2162}
2163
2164/*
2165 * brin_vacuum_scan
2166 * Do a complete scan of the index during VACUUM.
2167 *
2168 * This routine scans the complete index looking for uncataloged index pages,
2169 * i.e. those that might have been lost due to a crash after index extension
2170 * and such.
2171 */
2172static void
2174{
2176 ReadStream *stream;
2177 Buffer buf;
2178
2179 p.current_blocknum = 0;
2181
2182 /*
2183 * It is safe to use batchmode as block_range_read_stream_cb takes no
2184 * locks.
2185 */
2189 strategy,
2190 idxrel,
2193 &p,
2194 0);
2195
2196 /*
2197 * Scan the index in physical order, and clean up any possible mess in
2198 * each page.
2199 */
2200 while ((buf = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
2201 {
2203
2205
2207 }
2208
2209 read_stream_end(stream);
2210
2211 /*
2212 * Update all upper pages in the index's FSM, as well. This ensures not
2213 * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
2214 * but also that any pre-existing damage or out-of-dateness is repaired.
2215 */
2217}
2218
2219static bool
2221 const Datum *values, const bool *nulls)
2222{
2223 int keyno;
2224
2225 /* If the range starts empty, we're certainly going to modify it. */
2226 bool modified = dtup->bt_empty_range;
2227
2228 /*
2229 * Compare the key values of the new tuple to the stored index values; our
2230 * deformed tuple will get updated if the new tuple doesn't fit the
2231 * original range (note this means we can't break out of the loop early).
2232 * Make a note of whether this happens, so that we know to insert the
2233 * modified tuple later.
2234 */
2235 for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
2236 {
2237 Datum result;
2238 BrinValues *bval;
2240 bool has_nulls;
2241
2242 bval = &dtup->bt_columns[keyno];
2243
2244 /*
2245 * Does the range have actual NULL values? Either of the flags can be
2246 * set, but we ignore the state before adding first row.
2247 *
2248 * We have to remember this, because we'll modify the flags and we
2249 * need to know if the range started as empty.
2250 */
2251 has_nulls = ((!dtup->bt_empty_range) &&
2252 (bval->bv_hasnulls || bval->bv_allnulls));
2253
2254 /*
2255 * If the value we're adding is NULL, handle it locally. Otherwise
2256 * call the BRIN_PROCNUM_ADDVALUE procedure.
2257 */
2258 if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno])
2259 {
2260 /*
2261 * If the new value is null, we record that we saw it if it's the
2262 * first one; otherwise, there's nothing to do.
2263 */
2264 if (!bval->bv_hasnulls)
2265 {
2266 bval->bv_hasnulls = true;
2267 modified = true;
2268 }
2269
2270 continue;
2271 }
2272
2273 addValue = index_getprocinfo(idxRel, keyno + 1,
2275 result = FunctionCall4Coll(addValue,
2276 idxRel->rd_indcollation[keyno],
2278 PointerGetDatum(bval),
2279 values[keyno],
2280 BoolGetDatum(nulls[keyno]));
2281 /* if that returned true, we need to insert the updated tuple */
2282 modified |= DatumGetBool(result);
2283
2284 /*
2285 * If the range was had actual NULL values (i.e. did not start empty),
2286 * make sure we don't forget about the NULL values. Either the
2287 * allnulls flag is still set to true, or (if the opclass cleared it)
2288 * we need to set hasnulls=true.
2289 *
2290 * XXX This can only happen when the opclass modified the tuple, so
2291 * the modified flag should be set.
2292 */
2293 if (has_nulls && !(bval->bv_hasnulls || bval->bv_allnulls))
2294 {
2296 bval->bv_hasnulls = true;
2297 }
2298 }
2299
2300 /*
2301 * After updating summaries for all the keys, mark it as not empty.
2302 *
2303 * If we're actually changing the flag value (i.e. tuple started as
2304 * empty), we should have modified the tuple. So we should not see empty
2305 * range that was not modified.
2306 */
2307 Assert(!dtup->bt_empty_range || modified);
2308 dtup->bt_empty_range = false;
2309
2310 return modified;
2311}
2312
2313static bool
2314check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
2315{
2316 int keyno;
2317
2318 /*
2319 * First check if there are any IS [NOT] NULL scan keys, and if we're
2320 * violating them.
2321 */
2322 for (keyno = 0; keyno < nnullkeys; keyno++)
2323 {
2324 ScanKey key = nullkeys[keyno];
2325
2326 Assert(key->sk_attno == bval->bv_attno);
2327
2328 /* Handle only IS NULL/IS NOT NULL tests */
2329 if (!(key->sk_flags & SK_ISNULL))
2330 continue;
2331
2332 if (key->sk_flags & SK_SEARCHNULL)
2333 {
2334 /* IS NULL scan key, but range has no NULLs */
2335 if (!bval->bv_allnulls && !bval->bv_hasnulls)
2336 return false;
2337 }
2338 else if (key->sk_flags & SK_SEARCHNOTNULL)
2339 {
2340 /*
2341 * For IS NOT NULL, we can only skip ranges that are known to have
2342 * only nulls.
2343 */
2344 if (bval->bv_allnulls)
2345 return false;
2346 }
2347 else
2348 {
2349 /*
2350 * Neither IS NULL nor IS NOT NULL was used; assume all indexable
2351 * operators are strict and thus return false with NULL value in
2352 * the scan key.
2353 */
2354 return false;
2355 }
2356 }
2357
2358 return true;
2359}
2360
2361/*
2362 * Create parallel context, and launch workers for leader.
2363 *
2364 * buildstate argument should be initialized (with the exception of the
2365 * tuplesort states, which may later be created based on shared
2366 * state initially set up here).
2367 *
2368 * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
2369 *
2370 * request is the target number of parallel worker processes to launch.
2371 *
2372 * Sets buildstate's BrinLeader, which caller must use to shut down parallel
2373 * mode by passing it to _brin_end_parallel() at the very end of its index
2374 * build. If not even a single worker process can be launched, this is
2375 * never set, and caller should proceed with a serial index build.
2376 */
2377static void
2379 bool isconcurrent, int request)
2380{
2381 ParallelContext *pcxt;
2382 int scantuplesortstates;
2383 Snapshot snapshot;
2385 Size estsort;
2386 BrinShared *brinshared;
2387 Sharedsort *sharedsort;
2389 WalUsage *walusage;
2390 BufferUsage *bufferusage;
2391 bool leaderparticipates = true;
2392 int querylen;
2393
2394#ifdef DISABLE_LEADER_PARTICIPATION
2395 leaderparticipates = false;
2396#endif
2397
2398 /*
2399 * Enter parallel mode, and create context for parallel build of brin
2400 * index
2401 */
2403 Assert(request > 0);
2404 pcxt = CreateParallelContext("postgres", "_brin_parallel_build_main",
2405 request);
2406
2407 scantuplesortstates = leaderparticipates ? request + 1 : request;
2408
2409 /*
2410 * Prepare for scan of the base relation. In a normal index build, we use
2411 * SnapshotAny because we must retrieve all tuples and do our own time
2412 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2413 * concurrent build, we take a regular MVCC snapshot and index whatever's
2414 * live according to that.
2415 */
2416 if (!isconcurrent)
2417 snapshot = SnapshotAny;
2418 else
2420
2421 /*
2422 * Estimate size for our own PARALLEL_KEY_BRIN_SHARED workspace.
2423 */
2426 estsort = tuplesort_estimate_shared(scantuplesortstates);
2428
2430
2431 /*
2432 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
2433 * and PARALLEL_KEY_BUFFER_USAGE.
2434 *
2435 * If there are no extensions loaded that care, we could skip this. We
2436 * have no way of knowing whether anyone's looking at pgWalUsage or
2437 * pgBufferUsage, so do it unconditionally.
2438 */
2440 mul_size(sizeof(WalUsage), pcxt->nworkers));
2443 mul_size(sizeof(BufferUsage), pcxt->nworkers));
2445
2446 /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
2448 {
2452 }
2453 else
2454 querylen = 0; /* keep compiler quiet */
2455
2456 /* Everyone's had a chance to ask for space, so now create the DSM */
2458
2459 /* If no DSM segment was available, back out (do serial build) */
2460 if (pcxt->seg == NULL)
2461 {
2462 if (IsMVCCSnapshot(snapshot))
2463 UnregisterSnapshot(snapshot);
2466 return;
2467 }
2468
2469 /* Store shared build state, for which we reserved space */
2470 brinshared = (BrinShared *) shm_toc_allocate(pcxt->toc, estbrinshared);
2471 /* Initialize immutable state */
2472 brinshared->heaprelid = RelationGetRelid(heap);
2473 brinshared->indexrelid = RelationGetRelid(index);
2474 brinshared->isconcurrent = isconcurrent;
2475 brinshared->scantuplesortstates = scantuplesortstates;
2476 brinshared->pagesPerRange = buildstate->bs_pagesPerRange;
2477 brinshared->queryid = pgstat_get_my_query_id();
2479 SpinLockInit(&brinshared->mutex);
2480
2481 /* Initialize mutable state */
2482 brinshared->nparticipantsdone = 0;
2483 brinshared->reltuples = 0.0;
2484 brinshared->indtuples = 0.0;
2485
2488 snapshot);
2489
2490 /*
2491 * Store shared tuplesort-private state, for which we reserved space.
2492 * Then, initialize opaque state using tuplesort routine.
2493 */
2494 sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
2495 tuplesort_initialize_shared(sharedsort, scantuplesortstates,
2496 pcxt->seg);
2497
2498 /*
2499 * Store shared tuplesort-private state, for which we reserved space.
2500 * Then, initialize opaque state using tuplesort routine.
2501 */
2502 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BRIN_SHARED, brinshared);
2503 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
2504
2505 /* Store query string for workers */
2507 {
2508 char *sharedquery;
2509
2510 sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
2513 }
2514
2515 /*
2516 * Allocate space for each worker's WalUsage and BufferUsage; no need to
2517 * initialize.
2518 */
2519 walusage = shm_toc_allocate(pcxt->toc,
2520 mul_size(sizeof(WalUsage), pcxt->nworkers));
2521 shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
2522 bufferusage = shm_toc_allocate(pcxt->toc,
2523 mul_size(sizeof(BufferUsage), pcxt->nworkers));
2524 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
2525
2526 /* Launch workers, saving status for leader/caller */
2528 brinleader->pcxt = pcxt;
2529 brinleader->nparticipanttuplesorts = pcxt->nworkers_launched;
2531 brinleader->nparticipanttuplesorts++;
2532 brinleader->brinshared = brinshared;
2533 brinleader->sharedsort = sharedsort;
2534 brinleader->snapshot = snapshot;
2535 brinleader->walusage = walusage;
2536 brinleader->bufferusage = bufferusage;
2537
2538 /* If no workers were successfully launched, back out (do serial build) */
2539 if (pcxt->nworkers_launched == 0)
2540 {
2542 return;
2543 }
2544
2545 /* Save leader state now that it's clear build will be parallel */
2546 buildstate->bs_leader = brinleader;
2547
2548 /* Join heap scan ourselves */
2551
2552 /*
2553 * Caller needs to wait for all launched workers when we return. Make
2554 * sure that the failure-to-start case will not hang forever.
2555 */
2557}
2558
2559/*
2560 * Shut down workers, destroy parallel context, and end parallel mode.
2561 */
2562static void
2564{
2565 int i;
2566
2567 /* Shutdown worker processes */
2569
2570 /*
2571 * Next, accumulate WAL usage. (This must wait for the workers to finish,
2572 * or we might get incomplete data.)
2573 */
2574 for (i = 0; i < brinleader->pcxt->nworkers_launched; i++)
2575 InstrAccumParallelQuery(&brinleader->bufferusage[i], &brinleader->walusage[i]);
2576
2577 /* Free last reference to MVCC snapshot, if one was used */
2578 if (IsMVCCSnapshot(brinleader->snapshot))
2579 UnregisterSnapshot(brinleader->snapshot);
2582}
2583
2584/*
2585 * Within leader, wait for end of heap scan.
2586 *
2587 * When called, parallel heap scan started by _brin_begin_parallel() will
2588 * already be underway within worker processes (when leader participates
2589 * as a worker, we should end up here just as workers are finishing).
2590 *
2591 * Returns the total number of heap tuples scanned.
2592 */
2593static double
2595{
2596 BrinShared *brinshared = state->bs_leader->brinshared;
2597 int nparticipanttuplesorts;
2598
2599 nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
2600 for (;;)
2601 {
2602 SpinLockAcquire(&brinshared->mutex);
2603 if (brinshared->nparticipantsdone == nparticipanttuplesorts)
2604 {
2605 /* copy the data into leader state */
2606 state->bs_reltuples = brinshared->reltuples;
2607 state->bs_numtuples = brinshared->indtuples;
2608
2609 SpinLockRelease(&brinshared->mutex);
2610 break;
2611 }
2612 SpinLockRelease(&brinshared->mutex);
2613
2616 }
2617
2619
2620 return state->bs_reltuples;
2621}
2622
2623/*
2624 * Within leader, wait for end of heap scan and merge per-worker results.
2625 *
2626 * After waiting for all workers to finish, merge the per-worker results into
2627 * the complete index. The results from each worker are sorted by block number
2628 * (start of the page range). While combining the per-worker results we merge
2629 * summaries for the same page range, and also fill-in empty summaries for
2630 * ranges without any tuples.
2631 *
2632 * Returns the total number of heap tuples scanned.
2633 */
2634static double
2636{
2637 BrinTuple *btup;
2639 Size tuplen;
2640 BlockNumber prevblkno = InvalidBlockNumber;
2642 oldCxt;
2643 double reltuples;
2644
2645 /* wait for workers to scan table and produce partial results */
2646 reltuples = _brin_parallel_heapscan(state);
2647
2648 /* do the actual sort in the leader */
2649 tuplesort_performsort(state->bs_sortstate);
2650
2651 /*
2652 * Initialize BrinMemTuple we'll use to union summaries from workers (in
2653 * case they happened to produce parts of the same page range).
2654 */
2655 memtuple = brin_new_memtuple(state->bs_bdesc);
2656
2657 /*
2658 * Create a memory context we'll reset to combine results for a single
2659 * page range (received from the workers). We don't expect huge number of
2660 * overlaps under regular circumstances, because for large tables the
2661 * chunk size is likely larger than the BRIN page range), but it can
2662 * happen, and the union functions may do all kinds of stuff. So we better
2663 * reset the context once in a while.
2664 */
2666 "brin union",
2669
2670 /*
2671 * Read the BRIN tuples from the shared tuplesort, sorted by block number.
2672 * That probably gives us an index that is cheaper to scan, thanks to
2673 * mostly getting data from the same index page as before.
2674 */
2675 while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
2676 {
2677 /* Ranges should be multiples of pages_per_range for the index. */
2678 Assert(btup->bt_blkno % state->bs_leader->brinshared->pagesPerRange == 0);
2679
2680 /*
2681 * Do we need to union summaries for the same page range?
2682 *
2683 * If this is the first brin tuple we read, then just deform it into
2684 * the memtuple, and continue with the next one from tuplesort. We
2685 * however may need to insert empty summaries into the index.
2686 *
2687 * If it's the same block as the last we saw, we simply union the brin
2688 * tuple into it, and we're done - we don't even need to insert empty
2689 * ranges, because that was done earlier when we saw the first brin
2690 * tuple (for this range).
2691 *
2692 * Finally, if it's not the first brin tuple, and it's not the same
2693 * page range, we need to do the insert and then deform the tuple into
2694 * the memtuple. Then we'll insert empty ranges before the new brin
2695 * tuple, if needed.
2696 */
2697 if (prevblkno == InvalidBlockNumber)
2698 {
2699 /* First brin tuples, just deform into memtuple. */
2701
2702 /* continue to insert empty pages before thisblock */
2703 }
2704 else if (memtuple->bt_blkno == btup->bt_blkno)
2705 {
2706 /*
2707 * Not the first brin tuple, but same page range as the previous
2708 * one, so we can merge it into the memtuple.
2709 */
2710 union_tuples(state->bs_bdesc, memtuple, btup);
2711 continue;
2712 }
2713 else
2714 {
2715 BrinTuple *tmp;
2716 Size len;
2717
2718 /*
2719 * We got brin tuple for a different page range, so form a brin
2720 * tuple from the memtuple, insert it, and re-init the memtuple
2721 * from the new brin tuple.
2722 */
2723 tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno,
2724 memtuple, &len);
2725
2726 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
2727 &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len);
2728
2729 /*
2730 * Reset the per-output-range context. This frees all the memory
2731 * possibly allocated by the union functions, and also the BRIN
2732 * tuple we just formed and inserted.
2733 */
2735
2737
2738 /* continue to insert empty pages before thisblock */
2739 }
2740
2741 /* Fill empty ranges for all ranges missing in the tuplesort. */
2742 brin_fill_empty_ranges(state, prevblkno, btup->bt_blkno);
2743
2744 prevblkno = btup->bt_blkno;
2745 }
2746
2747 tuplesort_end(state->bs_sortstate);
2748
2749 /* Fill the BRIN tuple for the last page range with data. */
2750 if (prevblkno != InvalidBlockNumber)
2751 {
2752 BrinTuple *tmp;
2753 Size len;
2754
2755 tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno,
2756 memtuple, &len);
2757
2758 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
2759 &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len);
2760
2761 pfree(tmp);
2762 }
2763
2764 /* Fill empty ranges at the end, for all ranges missing in the tuplesort. */
2765 brin_fill_empty_ranges(state, prevblkno, state->bs_maxRangeStart);
2766
2767 /*
2768 * Switch back to the original memory context, and destroy the one we
2769 * created to isolate the union_tuple calls.
2770 */
2773
2774 return reltuples;
2775}
2776
2777/*
2778 * Returns size of shared memory required to store state for a parallel
2779 * brin index build based on the snapshot its parallel scan will use.
2780 */
2781static Size
2783{
2784 /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
2785 return add_size(BUFFERALIGN(sizeof(BrinShared)),
2786 table_parallelscan_estimate(heap, snapshot));
2787}
2788
2789/*
2790 * Within leader, participate as a parallel worker.
2791 */
2792static void
2794{
2795 BrinLeader *brinleader = buildstate->bs_leader;
2796 int sortmem;
2797
2798 /*
2799 * Might as well use reliable figure when doling out maintenance_work_mem
2800 * (when requested number of workers were not launched, this will be
2801 * somewhat higher than it is for other workers).
2802 */
2804
2805 /* Perform work common to all participants */
2807 brinleader->sharedsort, heap, index, sortmem, true);
2808}
2809
2810/*
2811 * Perform a worker's portion of a parallel sort.
2812 *
2813 * This generates a tuplesort for the worker portion of the table.
2814 *
2815 * sortmem is the amount of working memory to use within each worker,
2816 * expressed in KBs.
2817 *
2818 * When this returns, workers are done, and need only release resources.
2819 */
2820static void
2822 BrinShared *brinshared, Sharedsort *sharedsort,
2823 Relation heap, Relation index,
2824 int sortmem, bool progress)
2825{
2827 TableScanDesc scan;
2828 double reltuples;
2829 IndexInfo *indexInfo;
2830
2831 /* Initialize local tuplesort coordination state */
2833 coordinate->isWorker = true;
2834 coordinate->nParticipants = -1;
2835 coordinate->sharedsort = sharedsort;
2836
2837 /* Begin "partial" tuplesort */
2840
2841 /* Join parallel scan */
2842 indexInfo = BuildIndexInfo(index);
2843 indexInfo->ii_Concurrent = brinshared->isconcurrent;
2844
2845 scan = table_beginscan_parallel(heap,
2847
2848 reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
2850
2851 /* insert the last item */
2853
2854 /* sort the BRIN ranges built by this worker */
2855 tuplesort_performsort(state->bs_sortstate);
2856
2857 state->bs_reltuples += reltuples;
2858
2859 /*
2860 * Done. Record ambuild statistics.
2861 */
2862 SpinLockAcquire(&brinshared->mutex);
2863 brinshared->nparticipantsdone++;
2864 brinshared->reltuples += state->bs_reltuples;
2865 brinshared->indtuples += state->bs_numtuples;
2866 SpinLockRelease(&brinshared->mutex);
2867
2868 /* Notify leader */
2870
2871 tuplesort_end(state->bs_sortstate);
2872}
2873
2874/*
2875 * Perform work within a launched parallel process.
2876 */
2877void
2879{
2880 char *sharedquery;
2881 BrinShared *brinshared;
2882 Sharedsort *sharedsort;
2884 Relation heapRel;
2885 Relation indexRel;
2888 WalUsage *walusage;
2889 BufferUsage *bufferusage;
2890 int sortmem;
2891
2892 /*
2893 * The only possible status flag that can be set to the parallel worker is
2894 * PROC_IN_SAFE_IC.
2895 */
2896 Assert((MyProc->statusFlags == 0) ||
2898
2899 /* Set debug_query_string for individual workers first */
2902
2903 /* Report the query string from leader */
2905
2906 /* Look up brin shared state */
2907 brinshared = shm_toc_lookup(toc, PARALLEL_KEY_BRIN_SHARED, false);
2908
2909 /* Open relations using lock modes known to be obtained by index.c */
2910 if (!brinshared->isconcurrent)
2911 {
2914 }
2915 else
2916 {
2919 }
2920
2921 /* Track query ID */
2922 pgstat_report_query_id(brinshared->queryid, false);
2923
2924 /* Open relations within worker */
2925 heapRel = table_open(brinshared->heaprelid, heapLockmode);
2926 indexRel = index_open(brinshared->indexrelid, indexLockmode);
2927
2929 brinshared->pagesPerRange,
2931
2932 /* Look up shared state private to tuplesort.c */
2933 sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
2934 tuplesort_attach_shared(sharedsort, seg);
2935
2936 /* Prepare to track buffer usage during parallel execution */
2938
2939 /*
2940 * Might as well use reliable figure when doling out maintenance_work_mem
2941 * (when requested number of workers were not launched, this will be
2942 * somewhat higher than it is for other workers).
2943 */
2945
2946 _brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
2947 heapRel, indexRel, sortmem, false);
2948
2949 /* Report WAL/buffer usage during parallel execution */
2950 bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
2951 walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
2953 &walusage[ParallelWorkerNumber]);
2954
2955 index_close(indexRel, indexLockmode);
2956 table_close(heapRel, heapLockmode);
2957}
2958
2959/*
2960 * brin_build_empty_tuple
2961 * Maybe initialize a BRIN tuple representing empty range.
2962 *
2963 * Returns a BRIN tuple representing an empty page range starting at the
2964 * specified block number. The empty tuple is initialized only once, when it's
2965 * needed for the first time, stored in the memory context bs_context to ensure
2966 * proper life span, and reused on following calls. All empty tuples are
2967 * exactly the same except for the bt_blkno field, which is set to the value
2968 * in blkno parameter.
2969 */
2970static void
2972{
2973 /* First time an empty tuple is requested? If yes, initialize it. */
2974 if (state->bs_emptyTuple == NULL)
2975 {
2978
2979 /* Allocate the tuple in context for the whole index build. */
2980 oldcxt = MemoryContextSwitchTo(state->bs_context);
2981
2982 state->bs_emptyTuple = brin_form_tuple(state->bs_bdesc, blkno, dtuple,
2983 &state->bs_emptyTupleLen);
2984
2986 }
2987 else
2988 {
2989 /* If we already have an empty tuple, just update the block. */
2990 state->bs_emptyTuple->bt_blkno = blkno;
2991 }
2992}
2993
2994/*
2995 * brin_fill_empty_ranges
2996 * Add BRIN index tuples representing empty page ranges.
2997 *
2998 * prevRange/nextRange determine for which page ranges to add empty summaries.
2999 * Both boundaries are exclusive, i.e. only ranges starting at blkno for which
3000 * (prevRange < blkno < nextRange) will be added to the index.
3001 *
3002 * If prevRange is InvalidBlockNumber, this means there was no previous page
3003 * range (i.e. the first empty range to add is for blkno=0).
3004 *
3005 * The empty tuple is built only once, and then reused for all future calls.
3006 */
3007static void
3010{
3011 BlockNumber blkno;
3012
3013 /*
3014 * If we already summarized some ranges, we need to start with the next
3015 * one. Otherwise start from the first range of the table.
3016 */
3017 blkno = (prevRange == InvalidBlockNumber) ? 0 : (prevRange + state->bs_pagesPerRange);
3018
3019 /* Generate empty ranges until we hit the next non-empty range. */
3020 while (blkno < nextRange)
3021 {
3022 /* Did we already build the empty tuple? If not, do it now. */
3024
3025 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
3026 &state->bs_currentInsertBuf,
3027 blkno, state->bs_emptyTuple, state->bs_emptyTupleLen);
3028
3029 /* try next page range */
3030 blkno += state->bs_pagesPerRange;
3031 }
3032}
@ ACLCHECK_NOT_OWNER
Definition acl.h:186
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition aclchk.c:2672
bool object_ownercheck(Oid classid, Oid objectid, Oid roleid)
Definition aclchk.c:4133
int16 AttrNumber
Definition attnum.h:21
static bool validate(Port *port, const char *auth)
Definition auth-oauth.c:638
bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno)
@ AVW_BRINSummarizeRange
Definition autovacuum.h:25
int ParallelWorkerNumber
Definition parallel.c:117
void InitializeParallelDSM(ParallelContext *pcxt)
Definition parallel.c:213
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition parallel.c:805
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition parallel.c:583
void DestroyParallelContext(ParallelContext *pcxt)
Definition parallel.c:959
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition parallel.c:175
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition parallel.c:702
void pgstat_report_query_id(int64 query_id, bool force)
int64 pgstat_get_my_query_id(void)
void pgstat_report_activity(BackendState state, const char *cmd_str)
@ STATE_RUNNING
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
#define MaxBlockNumber
Definition block.h:35
static Datum values[MAXATTR]
Definition bootstrap.c:188
#define PARALLEL_KEY_BUFFER_USAGE
Definition brin.c:55
void brininsertcleanup(Relation index, IndexInfo *indexInfo)
Definition brin.c:517
static double _brin_parallel_merge(BrinBuildState *state)
Definition brin.c:2636
static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
Definition brin.c:2174
Datum brin_desummarize_range(PG_FUNCTION_ARGS)
Definition brin.c:1496
void brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition brin.c:964
static void terminate_brin_buildstate(BrinBuildState *state)
Definition brin.c:1718
#define PARALLEL_KEY_BRIN_SHARED
Definition brin.c:51
Datum brin_summarize_range(PG_FUNCTION_ARGS)
Definition brin.c:1386
IndexBulkDeleteResult * brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition brin.c:1308
static void form_and_spill_tuple(BrinBuildState *state)
Definition brin.c:2008
#define BRIN_ALL_BLOCKRANGES
Definition brin.c:213
Datum brin_summarize_new_values(PG_FUNCTION_ARGS)
Definition brin.c:1371
IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys)
Definition brin.c:544
bytea * brinoptions(Datum reloptions, bool validate)
Definition brin.c:1353
int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition brin.c:572
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting)
Definition brin.c:1889
static void form_and_insert_tuple(BrinBuildState *state)
Definition brin.c:1987
void brinbuildempty(Relation index)
Definition brin.c:1279
void brin_free_desc(BrinDesc *bdesc)
Definition brin.c:1641
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
Definition brin.c:2033
static void _brin_parallel_scan_and_build(BrinBuildState *state, BrinShared *brinshared, Sharedsort *sharedsort, Relation heap, Relation index, int sortmem, bool progress)
Definition brin.c:2822
static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange, BlockNumber tablePages)
Definition brin.c:1674
static void _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, bool isconcurrent, int request)
Definition brin.c:2379
void brinGetStats(Relation index, BrinStatsData *stats)
Definition brin.c:1653
static void _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Relation index)
Definition brin.c:2794
static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, const Datum *values, const bool *nulls)
Definition brin.c:2221
static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
Definition brin.c:2564
static Size _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition brin.c:2783
static void brin_fill_empty_ranges(BrinBuildState *state, BlockNumber prevRange, BlockNumber nextRange)
Definition brin.c:3009
IndexBuildResult * brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition brin.c:1110
IndexBulkDeleteResult * brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition brin.c:1323
static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks)
Definition brin.c:1763
#define ParallelTableScanFromBrinShared(shared)
Definition brin.c:120
#define PARALLEL_KEY_TUPLESORT
Definition brin.c:52
static void brinbuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
Definition brin.c:1051
bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition brin.c:349
#define PARALLEL_KEY_QUERY_TEXT
Definition brin.c:53
Datum brinhandler(PG_FUNCTION_ARGS)
Definition brin.c:254
BrinDesc * brin_build_desc(Relation rel)
Definition brin.c:1586
void _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition brin.c:2879
static void brin_build_empty_tuple(BrinBuildState *state, BlockNumber blkno)
Definition brin.c:2972
#define PARALLEL_KEY_WAL_USAGE
Definition brin.c:54
static double _brin_parallel_heapscan(BrinBuildState *state)
Definition brin.c:2595
static BrinInsertState * initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo)
Definition brin.c:320
static void brinbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate)
Definition brin.c:1000
void brinendscan(IndexScanDesc scan)
Definition brin.c:983
static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
Definition brin.c:2315
#define BrinGetPagesPerRange(relation)
Definition brin.h:41
#define BrinGetAutoSummarize(relation)
Definition brin.h:47
#define BRIN_LAST_OPTIONAL_PROCNUM
#define BRIN_PROCNUM_UNION
#define BRIN_PROCNUM_OPTIONS
#define BRIN_PROCNUM_OPCINFO
#define BRIN_PROCNUM_CONSISTENT
#define BRIN_elog(args)
#define BRIN_PROCNUM_ADDVALUE
#define BRIN_CURRENT_VERSION
Definition brin_page.h:72
#define BRIN_METAPAGE_BLKNO
Definition brin_page.h:75
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
void brin_page_cleanup(Relation idxrel, Buffer buf)
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, const BrinTuple *tup, Size itemsz)
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
void brinRevmapTerminate(BrinRevmap *revmap)
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
Definition brin_revmap.c:70
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode)
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition brin_tuple.c:446
BrinTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, Size *size)
Definition brin_tuple.c:100
BrinMemTuple * brin_new_memtuple(BrinDesc *brdesc)
Definition brin_tuple.c:482
void brin_free_tuple(BrinTuple *tuple)
Definition brin_tuple.c:433
BrinTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
Definition brin_tuple.c:388
BrinMemTuple * brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
Definition brin_tuple.c:511
BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
Definition brin_tuple.c:553
bool brinvalidate(Oid opclassoid)
#define SizeOfBrinCreateIdx
Definition brin_xlog.h:55
#define XLOG_BRIN_CREATE_INDEX
Definition brin_xlog.h:31
int Buffer
Definition buf.h:23
#define BufferIsInvalid(buffer)
Definition buf.h:31
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition bufmgr.c:974
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5505
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5522
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3063
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:874
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:332
@ EB_SKIP_EXTENSION_LOCK
Definition bufmgr.h:75
@ EB_LOCK_FIRST
Definition bufmgr.h:87
#define BMR_REL(p_rel)
Definition bufmgr.h:114
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:421
Size PageGetFreeSpace(const PageData *page)
Definition bufpage.c:906
static char * PageGetContents(Page page)
Definition bufpage.h:283
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:417
PageData * Page
Definition bufpage.h:81
#define Min(x, y)
Definition c.h:1093
#define MAXALIGN(LEN)
Definition c.h:898
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:243
#define BUFFERALIGN(LEN)
Definition c.h:900
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
int32_t int32
Definition c.h:614
uint64_t uint64
Definition c.h:619
#define lengthof(array)
Definition c.h:875
#define OidIsValid(objectId)
Definition c.h:860
size_t Size
Definition c.h:691
bool ConditionVariableCancelSleep(void)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void ConditionVariableSignal(ConditionVariable *cv)
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition datum.c:132
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
int errhint(const char *fmt,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:29
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition fmgr.c:1198
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition fmgr.c:1173
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition fmgr.c:582
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define DirectFunctionCall2(func, arg1, arg2)
Definition fmgr.h:686
#define PG_GETARG_DATUM(n)
Definition fmgr.h:268
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define FunctionCall1(flinfo, arg1)
Definition fmgr.h:702
#define PG_RETURN_INT32(x)
Definition fmgr.h:355
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition freespace.c:377
void FreeSpaceMapVacuum(Relation rel)
Definition freespace.c:358
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition freespace.c:194
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition genam.c:80
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition genam.h:95
IndexUniqueCheck
Definition genam.h:124
int maintenance_work_mem
Definition globals.c:133
int NewGUCNestLevel(void)
Definition guc.c:2142
void RestrictSearchPath(void)
Definition guc.c:2153
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition guc.c:2169
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition index.c:3584
IndexInfo * BuildIndexInfo(Relation index)
Definition index.c:2429
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition indexam.c:917
void index_close(Relation relation, LOCKMODE lockmode)
Definition indexam.c:177
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition indexam.c:133
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition instrument.c:219
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition instrument.c:209
void InstrStartParallelQuery(void)
Definition instrument.c:201
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
int LOCKMODE
Definition lockdefs.h:26
#define AccessExclusiveLock
Definition lockdefs.h:43
#define AccessShareLock
Definition lockdefs.h:36
#define ShareUpdateExclusiveLock
Definition lockdefs.h:39
#define ShareLock
Definition lockdefs.h:40
#define RowExclusiveLock
Definition lockdefs.h:38
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define ALLOCSET_SMALL_SIZES
Definition memutils.h:170
#define SECURITY_RESTRICTED_OPERATION
Definition miscadmin.h:319
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition miscinit.c:613
Oid GetUserId(void)
Definition miscinit.c:470
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition miscinit.c:620
static char * errmsg
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
@ OBJECT_INDEX
FormData_pg_attribute * Form_pg_attribute
const void size_t len
static char buf[DEFAULT_XLOG_SEG_SIZE]
static int progress
Definition pgbench.c:262
#define ERRCODE_UNDEFINED_TABLE
Definition pgbench.c:79
#define pgstat_count_index_scan(rel)
Definition pgstat.h:708
const char * debug_query_string
Definition postgres.c:91
static Datum Int64GetDatum(int64 X)
Definition postgres.h:413
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
#define InvalidOid
unsigned int Oid
static int fb(int x)
#define PROC_IN_SAFE_IC
Definition proc.h:60
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_MAINTENANCE
Definition read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define READ_STREAM_FULL
Definition read_stream.h:43
static void addrange(struct cvec *cv, chr from, chr to)
Definition regc_cvec.c:90
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
@ RELOPT_KIND_BRIN
Definition reloptions.h:53
@ RELOPT_TYPE_INT
Definition reloptions.h:33
@ RELOPT_TYPE_BOOL
Definition reloptions.h:31
@ MAIN_FORKNUM
Definition relpath.h:58
@ INIT_FORKNUM
Definition relpath.h:61
void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition selfuncs.c:8988
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition shmem.c:485
Size mul_size(Size s1, Size s2)
Definition shmem.c:500
#define SK_SEARCHNOTNULL
Definition skey.h:122
#define SK_SEARCHNULL
Definition skey.h:121
#define SK_ISNULL
Definition skey.h:115
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:824
#define SnapshotAny
Definition snapmgr.h:33
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:59
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
PGPROC * MyProc
Definition proc.c:68
BlockNumber bs_maxRangeStart
Definition brin.c:167
Size bs_emptyTupleLen
Definition brin.c:173
MemoryContext bs_context
Definition brin.c:174
BrinMemTuple * bs_dtuple
Definition brin.c:170
Relation bs_irel
Definition brin.c:161
BlockNumber bs_pagesPerRange
Definition brin.c:165
double bs_numtuples
Definition brin.c:162
Buffer bs_currentInsertBuf
Definition brin.c:164
BrinRevmap * bs_rmAccess
Definition brin.c:168
Tuplesortstate * bs_sortstate
Definition brin.c:189
BrinLeader * bs_leader
Definition brin.c:181
int bs_worker_id
Definition brin.c:182
BlockNumber bs_currRangeStart
Definition brin.c:166
double bs_reltuples
Definition brin.c:163
BrinDesc * bs_bdesc
Definition brin.c:169
BrinTuple * bs_emptyTuple
Definition brin.c:172
BrinDesc * bis_desc
Definition brin.c:199
BrinRevmap * bis_rmAccess
Definition brin.c:198
BlockNumber bis_pages_per_range
Definition brin.c:200
int nparticipanttuplesorts
Definition brin.c:137
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER]
Definition brin_tuple.h:55
bool bt_empty_range
Definition brin_tuple.h:47
BlockNumber lastRevmapPage
Definition brin_page.h:69
BlockNumber pagesPerRange
Definition brin_page.h:68
BlockNumber bo_pagesPerRange
Definition brin.c:208
BrinDesc * bo_bdesc
Definition brin.c:210
BrinRevmap * bo_rmAccess
Definition brin.c:209
slock_t mutex
Definition brin.c:91
int scantuplesortstates
Definition brin.c:72
int nparticipantsdone
Definition brin.c:103
Oid heaprelid
Definition brin.c:68
BlockNumber pagesPerRange
Definition brin.c:71
ConditionVariable workersdonecv
Definition brin.c:83
Oid indexrelid
Definition brin.c:69
bool isconcurrent
Definition brin.c:70
double indtuples
Definition brin.c:105
int64 queryid
Definition brin.c:75
double reltuples
Definition brin.c:104
BlockNumber revmapNumPages
Definition brin.h:36
BlockNumber pagesPerRange
Definition brin.h:35
BlockNumber bt_blkno
Definition brin_tuple.h:66
bool bv_hasnulls
Definition brin_tuple.h:32
AttrNumber bv_attno
Definition brin_tuple.h:31
bool bv_allnulls
Definition brin_tuple.h:33
NodeTag type
Definition amapi.h:234
double heap_tuples
Definition genam.h:40
double index_tuples
Definition genam.h:41
BlockNumber num_pages
Definition genam.h:85
double num_index_tuples
Definition genam.h:87
void * ii_AmCache
Definition execnodes.h:234
int ii_ParallelWorkers
Definition execnodes.h:229
bool ii_Concurrent
Definition execnodes.h:221
MemoryContext ii_Context
Definition execnodes.h:237
struct ScanKeyData * keyData
Definition relscan.h:142
Relation indexRelation
Definition relscan.h:138
Relation index
Definition genam.h:54
bool analyze_only
Definition genam.h:56
BufferAccessStrategy strategy
Definition genam.h:61
uint8 statusFlags
Definition proc.h:207
dsm_segment * seg
Definition parallel.h:44
shm_toc_estimator estimator
Definition parallel.h:43
shm_toc * toc
Definition parallel.h:46
int nworkers_launched
Definition parallel.h:39
Form_pg_index rd_index
Definition rel.h:192
Form_pg_class rd_rel
Definition rel.h:111
Definition type.h:96
Definition c.h:778
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition tableam.c:166
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition tableam.c:131
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition tableam.c:146
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition tableam.h:1798
static double table_index_build_scan(Relation table_rel, Relation index_rel, IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition tableam.h:1765
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition tidbitmap.c:432
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:178
void tuplesort_performsort(Tuplesortstate *state)
Definition tuplesort.c:1259
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition tuplesort.c:3210
Size tuplesort_estimate_shared(int nWorkers)
Definition tuplesort.c:3189
void tuplesort_end(Tuplesortstate *state)
Definition tuplesort.c:847
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition tuplesort.c:3233
#define TUPLESORT_NONE
Definition tuplesort.h:67
Tuplesortstate * tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate, int sortopt)
BrinTuple * tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
void tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition vacuum.h:63
void ExitParallelMode(void)
Definition xact.c:1066
void EnterParallelMode(void)
Definition xact.c:1053
bool RecoveryInProgress(void)
Definition xlog.c:6444
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:479
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:369
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:246
void XLogBeginInsert(void)
Definition xloginsert.c:153
#define REGBUF_STANDARD
Definition xloginsert.h:35
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

Typedef Documentation

◆ BrinBuildState

◆ BrinInsertState

◆ BrinLeader

◆ BrinOpaque

◆ BrinShared

Function Documentation

◆ _brin_begin_parallel()

static void _brin_begin_parallel ( BrinBuildState buildstate,
Relation  heap,
Relation  index,
bool  isconcurrent,
int  request 
)
static

Definition at line 2379 of file brin.c.

2381{
2382 ParallelContext *pcxt;
2383 int scantuplesortstates;
2384 Snapshot snapshot;
2386 Size estsort;
2387 BrinShared *brinshared;
2388 Sharedsort *sharedsort;
2390 WalUsage *walusage;
2391 BufferUsage *bufferusage;
2392 bool leaderparticipates = true;
2393 int querylen;
2394
2395#ifdef DISABLE_LEADER_PARTICIPATION
2396 leaderparticipates = false;
2397#endif
2398
2399 /*
2400 * Enter parallel mode, and create context for parallel build of brin
2401 * index
2402 */
2404 Assert(request > 0);
2405 pcxt = CreateParallelContext("postgres", "_brin_parallel_build_main",
2406 request);
2407
2408 scantuplesortstates = leaderparticipates ? request + 1 : request;
2409
2410 /*
2411 * Prepare for scan of the base relation. In a normal index build, we use
2412 * SnapshotAny because we must retrieve all tuples and do our own time
2413 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2414 * concurrent build, we take a regular MVCC snapshot and index whatever's
2415 * live according to that.
2416 */
2417 if (!isconcurrent)
2418 snapshot = SnapshotAny;
2419 else
2421
2422 /*
2423 * Estimate size for our own PARALLEL_KEY_BRIN_SHARED workspace.
2424 */
2427 estsort = tuplesort_estimate_shared(scantuplesortstates);
2429
2431
2432 /*
2433 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
2434 * and PARALLEL_KEY_BUFFER_USAGE.
2435 *
2436 * If there are no extensions loaded that care, we could skip this. We
2437 * have no way of knowing whether anyone's looking at pgWalUsage or
2438 * pgBufferUsage, so do it unconditionally.
2439 */
2441 mul_size(sizeof(WalUsage), pcxt->nworkers));
2444 mul_size(sizeof(BufferUsage), pcxt->nworkers));
2446
2447 /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
2449 {
2453 }
2454 else
2455 querylen = 0; /* keep compiler quiet */
2456
2457 /* Everyone's had a chance to ask for space, so now create the DSM */
2459
2460 /* If no DSM segment was available, back out (do serial build) */
2461 if (pcxt->seg == NULL)
2462 {
2463 if (IsMVCCSnapshot(snapshot))
2464 UnregisterSnapshot(snapshot);
2467 return;
2468 }
2469
2470 /* Store shared build state, for which we reserved space */
2471 brinshared = (BrinShared *) shm_toc_allocate(pcxt->toc, estbrinshared);
2472 /* Initialize immutable state */
2473 brinshared->heaprelid = RelationGetRelid(heap);
2474 brinshared->indexrelid = RelationGetRelid(index);
2475 brinshared->isconcurrent = isconcurrent;
2476 brinshared->scantuplesortstates = scantuplesortstates;
2477 brinshared->pagesPerRange = buildstate->bs_pagesPerRange;
2478 brinshared->queryid = pgstat_get_my_query_id();
2480 SpinLockInit(&brinshared->mutex);
2481
2482 /* Initialize mutable state */
2483 brinshared->nparticipantsdone = 0;
2484 brinshared->reltuples = 0.0;
2485 brinshared->indtuples = 0.0;
2486
2489 snapshot);
2490
2491 /*
2492 * Store shared tuplesort-private state, for which we reserved space.
2493 * Then, initialize opaque state using tuplesort routine.
2494 */
2495 sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
2496 tuplesort_initialize_shared(sharedsort, scantuplesortstates,
2497 pcxt->seg);
2498
2499 /*
2500 * Store shared tuplesort-private state, for which we reserved space.
2501 * Then, initialize opaque state using tuplesort routine.
2502 */
2503 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BRIN_SHARED, brinshared);
2504 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
2505
2506 /* Store query string for workers */
2508 {
2509 char *sharedquery;
2510
2511 sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
2514 }
2515
2516 /*
2517 * Allocate space for each worker's WalUsage and BufferUsage; no need to
2518 * initialize.
2519 */
2520 walusage = shm_toc_allocate(pcxt->toc,
2521 mul_size(sizeof(WalUsage), pcxt->nworkers));
2522 shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
2523 bufferusage = shm_toc_allocate(pcxt->toc,
2524 mul_size(sizeof(BufferUsage), pcxt->nworkers));
2525 shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
2526
2527 /* Launch workers, saving status for leader/caller */
2529 brinleader->pcxt = pcxt;
2530 brinleader->nparticipanttuplesorts = pcxt->nworkers_launched;
2532 brinleader->nparticipanttuplesorts++;
2533 brinleader->brinshared = brinshared;
2534 brinleader->sharedsort = sharedsort;
2535 brinleader->snapshot = snapshot;
2536 brinleader->walusage = walusage;
2537 brinleader->bufferusage = bufferusage;
2538
2539 /* If no workers were successfully launched, back out (do serial build) */
2540 if (pcxt->nworkers_launched == 0)
2541 {
2543 return;
2544 }
2545
2546 /* Save leader state now that it's clear build will be parallel */
2547 buildstate->bs_leader = brinleader;
2548
2549 /* Join heap scan ourselves */
2552
2553 /*
2554 * Caller needs to wait for all launched workers when we return. Make
2555 * sure that the failure-to-start case will not hang forever.
2556 */
2558}

References _brin_end_parallel(), _brin_leader_participate_as_worker(), _brin_parallel_estimate_shared(), Assert, ConditionVariableInit(), CreateParallelContext(), debug_query_string, DestroyParallelContext(), EnterParallelMode(), ParallelContext::estimator, ExitParallelMode(), fb(), GetTransactionSnapshot(), BrinShared::heaprelid, BrinShared::indexrelid, BrinShared::indtuples, InitializeParallelDSM(), BrinShared::isconcurrent, IsMVCCSnapshot, LaunchParallelWorkers(), mul_size(), BrinShared::mutex, BrinShared::nparticipantsdone, ParallelContext::nworkers, ParallelContext::nworkers_launched, BrinShared::pagesPerRange, palloc0_object, PARALLEL_KEY_BRIN_SHARED, PARALLEL_KEY_BUFFER_USAGE, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_WAL_USAGE, ParallelTableScanFromBrinShared, pgstat_get_my_query_id(), BrinShared::queryid, RegisterSnapshot(), RelationGetRelid, BrinShared::reltuples, BrinShared::scantuplesortstates, ParallelContext::seg, shm_toc_allocate(), shm_toc_estimate_chunk, shm_toc_estimate_keys, shm_toc_insert(), SnapshotAny, SpinLockInit(), table_parallelscan_initialize(), ParallelContext::toc, tuplesort_estimate_shared(), tuplesort_initialize_shared(), UnregisterSnapshot(), WaitForParallelWorkersToAttach(), and BrinShared::workersdonecv.

Referenced by brinbuild().

◆ _brin_end_parallel()

static void _brin_end_parallel ( BrinLeader brinleader,
BrinBuildState state 
)
static

Definition at line 2564 of file brin.c.

2565{
2566 int i;
2567
2568 /* Shutdown worker processes */
2570
2571 /*
2572 * Next, accumulate WAL usage. (This must wait for the workers to finish,
2573 * or we might get incomplete data.)
2574 */
2575 for (i = 0; i < brinleader->pcxt->nworkers_launched; i++)
2576 InstrAccumParallelQuery(&brinleader->bufferusage[i], &brinleader->walusage[i]);
2577
2578 /* Free last reference to MVCC snapshot, if one was used */
2579 if (IsMVCCSnapshot(brinleader->snapshot))
2580 UnregisterSnapshot(brinleader->snapshot);
2583}

References DestroyParallelContext(), ExitParallelMode(), fb(), i, InstrAccumParallelQuery(), IsMVCCSnapshot, UnregisterSnapshot(), and WaitForParallelWorkersToFinish().

Referenced by _brin_begin_parallel(), and brinbuild().

◆ _brin_leader_participate_as_worker()

static void _brin_leader_participate_as_worker ( BrinBuildState buildstate,
Relation  heap,
Relation  index 
)
static

Definition at line 2794 of file brin.c.

2795{
2796 BrinLeader *brinleader = buildstate->bs_leader;
2797 int sortmem;
2798
2799 /*
2800 * Might as well use reliable figure when doling out maintenance_work_mem
2801 * (when requested number of workers were not launched, this will be
2802 * somewhat higher than it is for other workers).
2803 */
2805
2806 /* Perform work common to all participants */
2808 brinleader->sharedsort, heap, index, sortmem, true);
2809}

References _brin_parallel_scan_and_build(), fb(), maintenance_work_mem, and BrinLeader::nparticipanttuplesorts.

Referenced by _brin_begin_parallel().

◆ _brin_parallel_build_main()

void _brin_parallel_build_main ( dsm_segment seg,
shm_toc toc 
)

Definition at line 2879 of file brin.c.

2880{
2881 char *sharedquery;
2882 BrinShared *brinshared;
2883 Sharedsort *sharedsort;
2885 Relation heapRel;
2886 Relation indexRel;
2889 WalUsage *walusage;
2890 BufferUsage *bufferusage;
2891 int sortmem;
2892
2893 /*
2894 * The only possible status flag that can be set to the parallel worker is
2895 * PROC_IN_SAFE_IC.
2896 */
2897 Assert((MyProc->statusFlags == 0) ||
2899
2900 /* Set debug_query_string for individual workers first */
2903
2904 /* Report the query string from leader */
2906
2907 /* Look up brin shared state */
2908 brinshared = shm_toc_lookup(toc, PARALLEL_KEY_BRIN_SHARED, false);
2909
2910 /* Open relations using lock modes known to be obtained by index.c */
2911 if (!brinshared->isconcurrent)
2912 {
2915 }
2916 else
2917 {
2920 }
2921
2922 /* Track query ID */
2923 pgstat_report_query_id(brinshared->queryid, false);
2924
2925 /* Open relations within worker */
2926 heapRel = table_open(brinshared->heaprelid, heapLockmode);
2927 indexRel = index_open(brinshared->indexrelid, indexLockmode);
2928
2930 brinshared->pagesPerRange,
2932
2933 /* Look up shared state private to tuplesort.c */
2934 sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
2935 tuplesort_attach_shared(sharedsort, seg);
2936
2937 /* Prepare to track buffer usage during parallel execution */
2939
2940 /*
2941 * Might as well use reliable figure when doling out maintenance_work_mem
2942 * (when requested number of workers were not launched, this will be
2943 * somewhat higher than it is for other workers).
2944 */
2946
2947 _brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
2948 heapRel, indexRel, sortmem, false);
2949
2950 /* Report WAL/buffer usage during parallel execution */
2951 bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
2952 walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
2954 &walusage[ParallelWorkerNumber]);
2955
2956 index_close(indexRel, indexLockmode);
2957 table_close(heapRel, heapLockmode);
2958}

References _brin_parallel_scan_and_build(), AccessExclusiveLock, Assert, debug_query_string, fb(), BrinShared::heaprelid, index_close(), index_open(), BrinShared::indexrelid, initialize_brin_buildstate(), InstrEndParallelQuery(), InstrStartParallelQuery(), InvalidBlockNumber, BrinShared::isconcurrent, maintenance_work_mem, MyProc, BrinShared::pagesPerRange, PARALLEL_KEY_BRIN_SHARED, PARALLEL_KEY_BUFFER_USAGE, PARALLEL_KEY_QUERY_TEXT, PARALLEL_KEY_TUPLESORT, PARALLEL_KEY_WAL_USAGE, ParallelWorkerNumber, pgstat_report_activity(), pgstat_report_query_id(), PROC_IN_SAFE_IC, BrinShared::queryid, RowExclusiveLock, BrinShared::scantuplesortstates, ShareLock, ShareUpdateExclusiveLock, shm_toc_lookup(), STATE_RUNNING, PGPROC::statusFlags, table_close(), table_open(), and tuplesort_attach_shared().

◆ _brin_parallel_estimate_shared()

static Size _brin_parallel_estimate_shared ( Relation  heap,
Snapshot  snapshot 
)
static

Definition at line 2783 of file brin.c.

2784{
2785 /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
2786 return add_size(BUFFERALIGN(sizeof(BrinShared)),
2787 table_parallelscan_estimate(heap, snapshot));
2788}

References add_size(), BUFFERALIGN, and table_parallelscan_estimate().

Referenced by _brin_begin_parallel().

◆ _brin_parallel_heapscan()

static double _brin_parallel_heapscan ( BrinBuildState state)
static

Definition at line 2595 of file brin.c.

2596{
2597 BrinShared *brinshared = state->bs_leader->brinshared;
2598 int nparticipanttuplesorts;
2599
2600 nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
2601 for (;;)
2602 {
2603 SpinLockAcquire(&brinshared->mutex);
2604 if (brinshared->nparticipantsdone == nparticipanttuplesorts)
2605 {
2606 /* copy the data into leader state */
2607 state->bs_reltuples = brinshared->reltuples;
2608 state->bs_numtuples = brinshared->indtuples;
2609
2610 SpinLockRelease(&brinshared->mutex);
2611 break;
2612 }
2613 SpinLockRelease(&brinshared->mutex);
2614
2617 }
2618
2620
2621 return state->bs_reltuples;
2622}

References ConditionVariableCancelSleep(), ConditionVariableSleep(), fb(), BrinShared::indtuples, BrinShared::mutex, BrinShared::nparticipantsdone, BrinShared::reltuples, SpinLockAcquire(), SpinLockRelease(), and BrinShared::workersdonecv.

Referenced by _brin_parallel_merge().

◆ _brin_parallel_merge()

static double _brin_parallel_merge ( BrinBuildState state)
static

Definition at line 2636 of file brin.c.

2637{
2638 BrinTuple *btup;
2640 Size tuplen;
2641 BlockNumber prevblkno = InvalidBlockNumber;
2643 oldCxt;
2644 double reltuples;
2645
2646 /* wait for workers to scan table and produce partial results */
2647 reltuples = _brin_parallel_heapscan(state);
2648
2649 /* do the actual sort in the leader */
2650 tuplesort_performsort(state->bs_sortstate);
2651
2652 /*
2653 * Initialize BrinMemTuple we'll use to union summaries from workers (in
2654 * case they happened to produce parts of the same page range).
2655 */
2656 memtuple = brin_new_memtuple(state->bs_bdesc);
2657
2658 /*
2659 * Create a memory context we'll reset to combine results for a single
2660 * page range (received from the workers). We don't expect huge number of
2661 * overlaps under regular circumstances, because for large tables the
2662 * chunk size is likely larger than the BRIN page range), but it can
2663 * happen, and the union functions may do all kinds of stuff. So we better
2664 * reset the context once in a while.
2665 */
2667 "brin union",
2670
2671 /*
2672 * Read the BRIN tuples from the shared tuplesort, sorted by block number.
2673 * That probably gives us an index that is cheaper to scan, thanks to
2674 * mostly getting data from the same index page as before.
2675 */
2676 while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
2677 {
2678 /* Ranges should be multiples of pages_per_range for the index. */
2679 Assert(btup->bt_blkno % state->bs_leader->brinshared->pagesPerRange == 0);
2680
2681 /*
2682 * Do we need to union summaries for the same page range?
2683 *
2684 * If this is the first brin tuple we read, then just deform it into
2685 * the memtuple, and continue with the next one from tuplesort. We
2686 * however may need to insert empty summaries into the index.
2687 *
2688 * If it's the same block as the last we saw, we simply union the brin
2689 * tuple into it, and we're done - we don't even need to insert empty
2690 * ranges, because that was done earlier when we saw the first brin
2691 * tuple (for this range).
2692 *
2693 * Finally, if it's not the first brin tuple, and it's not the same
2694 * page range, we need to do the insert and then deform the tuple into
2695 * the memtuple. Then we'll insert empty ranges before the new brin
2696 * tuple, if needed.
2697 */
2698 if (prevblkno == InvalidBlockNumber)
2699 {
2700 /* First brin tuples, just deform into memtuple. */
2702
2703 /* continue to insert empty pages before thisblock */
2704 }
2705 else if (memtuple->bt_blkno == btup->bt_blkno)
2706 {
2707 /*
2708 * Not the first brin tuple, but same page range as the previous
2709 * one, so we can merge it into the memtuple.
2710 */
2711 union_tuples(state->bs_bdesc, memtuple, btup);
2712 continue;
2713 }
2714 else
2715 {
2716 BrinTuple *tmp;
2717 Size len;
2718
2719 /*
2720 * We got brin tuple for a different page range, so form a brin
2721 * tuple from the memtuple, insert it, and re-init the memtuple
2722 * from the new brin tuple.
2723 */
2724 tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno,
2725 memtuple, &len);
2726
2727 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
2728 &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len);
2729
2730 /*
2731 * Reset the per-output-range context. This frees all the memory
2732 * possibly allocated by the union functions, and also the BRIN
2733 * tuple we just formed and inserted.
2734 */
2736
2738
2739 /* continue to insert empty pages before thisblock */
2740 }
2741
2742 /* Fill empty ranges for all ranges missing in the tuplesort. */
2743 brin_fill_empty_ranges(state, prevblkno, btup->bt_blkno);
2744
2745 prevblkno = btup->bt_blkno;
2746 }
2747
2748 tuplesort_end(state->bs_sortstate);
2749
2750 /* Fill the BRIN tuple for the last page range with data. */
2751 if (prevblkno != InvalidBlockNumber)
2752 {
2753 BrinTuple *tmp;
2754 Size len;
2755
2756 tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno,
2757 memtuple, &len);
2758
2759 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
2760 &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len);
2761
2762 pfree(tmp);
2763 }
2764
2765 /* Fill empty ranges at the end, for all ranges missing in the tuplesort. */
2766 brin_fill_empty_ranges(state, prevblkno, state->bs_maxRangeStart);
2767
2768 /*
2769 * Switch back to the original memory context, and destroy the one we
2770 * created to isolate the union_tuple calls.
2771 */
2774
2775 return reltuples;
2776}

References _brin_parallel_heapscan(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, brin_deform_tuple(), brin_doinsert(), brin_fill_empty_ranges(), brin_form_tuple(), brin_new_memtuple(), BrinTuple::bt_blkno, CurrentMemoryContext, fb(), InvalidBlockNumber, len, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), pfree(), tuplesort_end(), tuplesort_getbrintuple(), tuplesort_performsort(), and union_tuples().

Referenced by brinbuild().

◆ _brin_parallel_scan_and_build()

static void _brin_parallel_scan_and_build ( BrinBuildState state,
BrinShared brinshared,
Sharedsort sharedsort,
Relation  heap,
Relation  index,
int  sortmem,
bool  progress 
)
static

Definition at line 2822 of file brin.c.

2826{
2828 TableScanDesc scan;
2829 double reltuples;
2830 IndexInfo *indexInfo;
2831
2832 /* Initialize local tuplesort coordination state */
2834 coordinate->isWorker = true;
2835 coordinate->nParticipants = -1;
2836 coordinate->sharedsort = sharedsort;
2837
2838 /* Begin "partial" tuplesort */
2841
2842 /* Join parallel scan */
2843 indexInfo = BuildIndexInfo(index);
2844 indexInfo->ii_Concurrent = brinshared->isconcurrent;
2845
2846 scan = table_beginscan_parallel(heap,
2848
2849 reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
2851
2852 /* insert the last item */
2854
2855 /* sort the BRIN ranges built by this worker */
2856 tuplesort_performsort(state->bs_sortstate);
2857
2858 state->bs_reltuples += reltuples;
2859
2860 /*
2861 * Done. Record ambuild statistics.
2862 */
2863 SpinLockAcquire(&brinshared->mutex);
2864 brinshared->nparticipantsdone++;
2865 brinshared->reltuples += state->bs_reltuples;
2866 brinshared->indtuples += state->bs_numtuples;
2867 SpinLockRelease(&brinshared->mutex);
2868
2869 /* Notify leader */
2871
2872 tuplesort_end(state->bs_sortstate);
2873}

References brinbuildCallbackParallel(), BuildIndexInfo(), ConditionVariableSignal(), fb(), form_and_spill_tuple(), IndexInfo::ii_Concurrent, BrinShared::indtuples, BrinShared::isconcurrent, BrinShared::mutex, BrinShared::nparticipantsdone, palloc0_object, ParallelTableScanFromBrinShared, BrinShared::reltuples, SpinLockAcquire(), SpinLockRelease(), table_beginscan_parallel(), table_index_build_scan(), tuplesort_begin_index_brin(), tuplesort_end(), TUPLESORT_NONE, tuplesort_performsort(), and BrinShared::workersdonecv.

Referenced by _brin_leader_participate_as_worker(), and _brin_parallel_build_main().

◆ add_values_to_range()

static bool add_values_to_range ( Relation  idxRel,
BrinDesc bdesc,
BrinMemTuple dtup,
const Datum values,
const bool nulls 
)
static

Definition at line 2221 of file brin.c.

2223{
2224 int keyno;
2225
2226 /* If the range starts empty, we're certainly going to modify it. */
2227 bool modified = dtup->bt_empty_range;
2228
2229 /*
2230 * Compare the key values of the new tuple to the stored index values; our
2231 * deformed tuple will get updated if the new tuple doesn't fit the
2232 * original range (note this means we can't break out of the loop early).
2233 * Make a note of whether this happens, so that we know to insert the
2234 * modified tuple later.
2235 */
2236 for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
2237 {
2238 Datum result;
2239 BrinValues *bval;
2241 bool has_nulls;
2242
2243 bval = &dtup->bt_columns[keyno];
2244
2245 /*
2246 * Does the range have actual NULL values? Either of the flags can be
2247 * set, but we ignore the state before adding first row.
2248 *
2249 * We have to remember this, because we'll modify the flags and we
2250 * need to know if the range started as empty.
2251 */
2252 has_nulls = ((!dtup->bt_empty_range) &&
2253 (bval->bv_hasnulls || bval->bv_allnulls));
2254
2255 /*
2256 * If the value we're adding is NULL, handle it locally. Otherwise
2257 * call the BRIN_PROCNUM_ADDVALUE procedure.
2258 */
2259 if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno])
2260 {
2261 /*
2262 * If the new value is null, we record that we saw it if it's the
2263 * first one; otherwise, there's nothing to do.
2264 */
2265 if (!bval->bv_hasnulls)
2266 {
2267 bval->bv_hasnulls = true;
2268 modified = true;
2269 }
2270
2271 continue;
2272 }
2273
2274 addValue = index_getprocinfo(idxRel, keyno + 1,
2276 result = FunctionCall4Coll(addValue,
2277 idxRel->rd_indcollation[keyno],
2279 PointerGetDatum(bval),
2280 values[keyno],
2281 BoolGetDatum(nulls[keyno]));
2282 /* if that returned true, we need to insert the updated tuple */
2283 modified |= DatumGetBool(result);
2284
2285 /*
2286 * If the range was had actual NULL values (i.e. did not start empty),
2287 * make sure we don't forget about the NULL values. Either the
2288 * allnulls flag is still set to true, or (if the opclass cleared it)
2289 * we need to set hasnulls=true.
2290 *
2291 * XXX This can only happen when the opclass modified the tuple, so
2292 * the modified flag should be set.
2293 */
2294 if (has_nulls && !(bval->bv_hasnulls || bval->bv_allnulls))
2295 {
2297 bval->bv_hasnulls = true;
2298 }
2299 }
2300
2301 /*
2302 * After updating summaries for all the keys, mark it as not empty.
2303 *
2304 * If we're actually changing the flag value (i.e. tuple started as
2305 * empty), we should have modified the tuple. So we should not see empty
2306 * range that was not modified.
2307 */
2308 Assert(!dtup->bt_empty_range || modified);
2309 dtup->bt_empty_range = false;
2310
2311 return modified;
2312}

References Assert, BoolGetDatum(), BRIN_PROCNUM_ADDVALUE, BrinValues::bv_allnulls, BrinValues::bv_hasnulls, DatumGetBool(), fb(), FunctionCall4Coll(), index_getprocinfo(), PointerGetDatum(), and values.

Referenced by brinbuildCallback(), brinbuildCallbackParallel(), and brininsert().

◆ brin_build_desc()

BrinDesc * brin_build_desc ( Relation  rel)

Definition at line 1586 of file brin.c.

1587{
1589 BrinDesc *bdesc;
1590 TupleDesc tupdesc;
1591 int totalstored = 0;
1592 int keyno;
1593 long totalsize;
1594 MemoryContext cxt;
1596
1598 "brin desc cxt",
1601 tupdesc = RelationGetDescr(rel);
1602
1603 /*
1604 * Obtain BrinOpcInfo for each indexed column. While at it, accumulate
1605 * the number of columns stored, since the number is opclass-defined.
1606 */
1607 opcinfo = palloc_array(BrinOpcInfo *, tupdesc->natts);
1608 for (keyno = 0; keyno < tupdesc->natts; keyno++)
1609 {
1611 Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
1612
1614
1615 opcinfo[keyno] = (BrinOpcInfo *)
1617 totalstored += opcinfo[keyno]->oi_nstored;
1618 }
1619
1620 /* Allocate our result struct and fill it in */
1621 totalsize = offsetof(BrinDesc, bd_info) +
1622 sizeof(BrinOpcInfo *) * tupdesc->natts;
1623
1624 bdesc = palloc(totalsize);
1625 bdesc->bd_context = cxt;
1626 bdesc->bd_index = rel;
1627 bdesc->bd_tupdesc = tupdesc;
1628 bdesc->bd_disktdesc = NULL; /* generated lazily */
1629 bdesc->bd_totalstored = totalstored;
1630
1631 for (keyno = 0; keyno < tupdesc->natts; keyno++)
1632 bdesc->bd_info[keyno] = opcinfo[keyno];
1633 pfree(opcinfo);
1634
1636
1637 return bdesc;
1638}

References ALLOCSET_SMALL_SIZES, AllocSetContextCreate, BRIN_PROCNUM_OPCINFO, CurrentMemoryContext, DatumGetPointer(), fb(), FunctionCall1, index_getprocinfo(), MemoryContextSwitchTo(), TupleDescData::natts, ObjectIdGetDatum(), palloc(), palloc_array, pfree(), RelationGetDescr, and TupleDescAttr().

Referenced by brin_page_items(), brinbeginscan(), initialize_brin_buildstate(), and initialize_brin_insertstate().

◆ brin_build_empty_tuple()

static void brin_build_empty_tuple ( BrinBuildState state,
BlockNumber  blkno 
)
static

Definition at line 2972 of file brin.c.

2973{
2974 /* First time an empty tuple is requested? If yes, initialize it. */
2975 if (state->bs_emptyTuple == NULL)
2976 {
2979
2980 /* Allocate the tuple in context for the whole index build. */
2981 oldcxt = MemoryContextSwitchTo(state->bs_context);
2982
2983 state->bs_emptyTuple = brin_form_tuple(state->bs_bdesc, blkno, dtuple,
2984 &state->bs_emptyTupleLen);
2985
2987 }
2988 else
2989 {
2990 /* If we already have an empty tuple, just update the block. */
2991 state->bs_emptyTuple->bt_blkno = blkno;
2992 }
2993}

References brin_form_tuple(), brin_new_memtuple(), fb(), and MemoryContextSwitchTo().

Referenced by brin_fill_empty_ranges().

◆ brin_desummarize_range()

Datum brin_desummarize_range ( PG_FUNCTION_ARGS  )

Definition at line 1496 of file brin.c.

1497{
1498 Oid indexoid = PG_GETARG_OID(0);
1500 BlockNumber heapBlk;
1501 Oid heapoid;
1502 Relation heapRel;
1503 Relation indexRel;
1504 bool done;
1505
1506 if (RecoveryInProgress())
1507 ereport(ERROR,
1509 errmsg("recovery is in progress"),
1510 errhint("BRIN control functions cannot be executed during recovery.")));
1511
1512 if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
1513 ereport(ERROR,
1515 errmsg("block number out of range: %" PRId64,
1516 heapBlk64)));
1517 heapBlk = (BlockNumber) heapBlk64;
1518
1519 /*
1520 * We must lock table before index to avoid deadlocks. However, if the
1521 * passed indexoid isn't an index then IndexGetRelation() will fail.
1522 * Rather than emitting a not-very-helpful error message, postpone
1523 * complaining, expecting that the is-it-an-index test below will fail.
1524 *
1525 * Unlike brin_summarize_range(), autovacuum never calls this. Hence, we
1526 * don't switch userid.
1527 */
1528 heapoid = IndexGetRelation(indexoid, true);
1529 if (OidIsValid(heapoid))
1531 else
1532 heapRel = NULL;
1533
1534 indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1535
1536 /* Must be a BRIN index */
1537 if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1538 indexRel->rd_rel->relam != BRIN_AM_OID)
1539 ereport(ERROR,
1541 errmsg("\"%s\" is not a BRIN index",
1542 RelationGetRelationName(indexRel))));
1543
1544 /* User must own the index (comparable to privileges needed for VACUUM) */
1547 RelationGetRelationName(indexRel));
1548
1549 /*
1550 * Since we did the IndexGetRelation call above without any lock, it's
1551 * barely possible that a race against an index drop/recreation could have
1552 * netted us the wrong table. Recheck.
1553 */
1554 if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1555 ereport(ERROR,
1557 errmsg("could not open parent table of index \"%s\"",
1558 RelationGetRelationName(indexRel))));
1559
1560 /* see gin_clean_pending_list() */
1561 if (indexRel->rd_index->indisvalid)
1562 {
1563 /* the revmap does the hard work */
1564 do
1565 {
1566 done = brinRevmapDesummarizeRange(indexRel, heapBlk);
1567 }
1568 while (!done);
1569 }
1570 else
1573 errmsg("index \"%s\" is not valid",
1574 RelationGetRelationName(indexRel))));
1575
1578
1580}

References aclcheck_error(), ACLCHECK_NOT_OWNER, brinRevmapDesummarizeRange(), DEBUG1, ereport, errcode(), ERRCODE_UNDEFINED_TABLE, errhint(), errmsg, ERROR, fb(), GetUserId(), index_close(), index_open(), IndexGetRelation(), MaxBlockNumber, OBJECT_INDEX, object_ownercheck(), OidIsValid, PG_GETARG_INT64, PG_GETARG_OID, PG_RETURN_VOID, RelationData::rd_index, RelationData::rd_rel, RecoveryInProgress(), RelationGetRelationName, ShareUpdateExclusiveLock, table_close(), and table_open().

◆ brin_fill_empty_ranges()

static void brin_fill_empty_ranges ( BrinBuildState state,
BlockNumber  prevRange,
BlockNumber  nextRange 
)
static

Definition at line 3009 of file brin.c.

3011{
3012 BlockNumber blkno;
3013
3014 /*
3015 * If we already summarized some ranges, we need to start with the next
3016 * one. Otherwise start from the first range of the table.
3017 */
3018 blkno = (prevRange == InvalidBlockNumber) ? 0 : (prevRange + state->bs_pagesPerRange);
3019
3020 /* Generate empty ranges until we hit the next non-empty range. */
3021 while (blkno < nextRange)
3022 {
3023 /* Did we already build the empty tuple? If not, do it now. */
3025
3026 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
3027 &state->bs_currentInsertBuf,
3028 blkno, state->bs_emptyTuple, state->bs_emptyTupleLen);
3029
3030 /* try next page range */
3031 blkno += state->bs_pagesPerRange;
3032 }
3033}

References brin_build_empty_tuple(), brin_doinsert(), fb(), and InvalidBlockNumber.

Referenced by _brin_parallel_merge(), and brinbuild().

◆ brin_free_desc()

void brin_free_desc ( BrinDesc bdesc)

Definition at line 1641 of file brin.c.

1642{
1643 /* make sure the tupdesc is still valid */
1644 Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
1645 /* no need for retail pfree */
1646 MemoryContextDelete(bdesc->bd_context);
1647}

References Assert, fb(), and MemoryContextDelete().

Referenced by brin_page_items(), brinendscan(), and terminate_brin_buildstate().

◆ brin_summarize_new_values()

Datum brin_summarize_new_values ( PG_FUNCTION_ARGS  )

Definition at line 1371 of file brin.c.

1372{
1373 Datum relation = PG_GETARG_DATUM(0);
1374
1376 relation,
1378}

References BRIN_ALL_BLOCKRANGES, brin_summarize_range(), DirectFunctionCall2, Int64GetDatum(), and PG_GETARG_DATUM.

◆ brin_summarize_range()

Datum brin_summarize_range ( PG_FUNCTION_ARGS  )

Definition at line 1386 of file brin.c.

1387{
1388 Oid indexoid = PG_GETARG_OID(0);
1390 BlockNumber heapBlk;
1391 Oid heapoid;
1392 Relation indexRel;
1393 Relation heapRel;
1394 Oid save_userid;
1395 int save_sec_context;
1396 int save_nestlevel;
1397 double numSummarized = 0;
1398
1399 if (RecoveryInProgress())
1400 ereport(ERROR,
1402 errmsg("recovery is in progress"),
1403 errhint("BRIN control functions cannot be executed during recovery.")));
1404
1406 ereport(ERROR,
1408 errmsg("block number out of range: %" PRId64, heapBlk64)));
1409 heapBlk = (BlockNumber) heapBlk64;
1410
1411 /*
1412 * We must lock table before index to avoid deadlocks. However, if the
1413 * passed indexoid isn't an index then IndexGetRelation() will fail.
1414 * Rather than emitting a not-very-helpful error message, postpone
1415 * complaining, expecting that the is-it-an-index test below will fail.
1416 */
1417 heapoid = IndexGetRelation(indexoid, true);
1418 if (OidIsValid(heapoid))
1419 {
1421
1422 /*
1423 * Autovacuum calls us. For its benefit, switch to the table owner's
1424 * userid, so that any index functions are run as that user. Also
1425 * lock down security-restricted operations and arrange to make GUC
1426 * variable changes local to this command. This is harmless, albeit
1427 * unnecessary, when called from SQL, because we fail shortly if the
1428 * user does not own the index.
1429 */
1430 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1431 SetUserIdAndSecContext(heapRel->rd_rel->relowner,
1432 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1433 save_nestlevel = NewGUCNestLevel();
1435 }
1436 else
1437 {
1438 heapRel = NULL;
1439 /* Set these just to suppress "uninitialized variable" warnings */
1440 save_userid = InvalidOid;
1441 save_sec_context = -1;
1442 save_nestlevel = -1;
1443 }
1444
1445 indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
1446
1447 /* Must be a BRIN index */
1448 if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
1449 indexRel->rd_rel->relam != BRIN_AM_OID)
1450 ereport(ERROR,
1452 errmsg("\"%s\" is not a BRIN index",
1453 RelationGetRelationName(indexRel))));
1454
1455 /* User must own the index (comparable to privileges needed for VACUUM) */
1456 if (heapRel != NULL && !object_ownercheck(RelationRelationId, indexoid, save_userid))
1458 RelationGetRelationName(indexRel));
1459
1460 /*
1461 * Since we did the IndexGetRelation call above without any lock, it's
1462 * barely possible that a race against an index drop/recreation could have
1463 * netted us the wrong table. Recheck.
1464 */
1465 if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
1466 ereport(ERROR,
1468 errmsg("could not open parent table of index \"%s\"",
1469 RelationGetRelationName(indexRel))));
1470
1471 /* see gin_clean_pending_list() */
1472 if (indexRel->rd_index->indisvalid)
1473 brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
1474 else
1477 errmsg("index \"%s\" is not valid",
1478 RelationGetRelationName(indexRel))));
1479
1480 /* Roll back any GUC changes executed by index functions */
1481 AtEOXact_GUC(false, save_nestlevel);
1482
1483 /* Restore userid and security context */
1484 SetUserIdAndSecContext(save_userid, save_sec_context);
1485
1488
1490}

References aclcheck_error(), ACLCHECK_NOT_OWNER, AtEOXact_GUC(), BRIN_ALL_BLOCKRANGES, brinsummarize(), DEBUG1, ereport, errcode(), ERRCODE_UNDEFINED_TABLE, errhint(), errmsg, ERROR, fb(), GetUserIdAndSecContext(), index_close(), index_open(), IndexGetRelation(), InvalidOid, NewGUCNestLevel(), OBJECT_INDEX, object_ownercheck(), OidIsValid, PG_GETARG_INT64, PG_GETARG_OID, PG_RETURN_INT32, RelationData::rd_index, RelationData::rd_rel, RecoveryInProgress(), RelationGetRelationName, RestrictSearchPath(), SECURITY_RESTRICTED_OPERATION, SetUserIdAndSecContext(), ShareUpdateExclusiveLock, table_close(), and table_open().

Referenced by brin_summarize_new_values(), and perform_work_item().

◆ brin_vacuum_scan()

static void brin_vacuum_scan ( Relation  idxrel,
BufferAccessStrategy  strategy 
)
static

Definition at line 2174 of file brin.c.

2175{
2177 ReadStream *stream;
2178 Buffer buf;
2179
2180 p.current_blocknum = 0;
2182
2183 /*
2184 * It is safe to use batchmode as block_range_read_stream_cb takes no
2185 * locks.
2186 */
2190 strategy,
2191 idxrel,
2194 &p,
2195 0);
2196
2197 /*
2198 * Scan the index in physical order, and clean up any possible mess in
2199 * each page.
2200 */
2201 while ((buf = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
2202 {
2204
2206
2208 }
2209
2210 read_stream_end(stream);
2211
2212 /*
2213 * Update all upper pages in the index's FSM, as well. This ensures not
2214 * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
2215 * but also that any pre-existing damage or out-of-dateness is repaired.
2216 */
2218}

References block_range_read_stream_cb(), brin_page_cleanup(), buf, CHECK_FOR_INTERRUPTS, BlockRangeReadStreamPrivate::current_blocknum, fb(), FreeSpaceMapVacuum(), InvalidBuffer, BlockRangeReadStreamPrivate::last_exclusive, MAIN_FORKNUM, read_stream_begin_relation(), read_stream_end(), READ_STREAM_FULL, READ_STREAM_MAINTENANCE, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, RelationGetNumberOfBlocks, and ReleaseBuffer().

Referenced by brinvacuumcleanup().

◆ brinbeginscan()

IndexScanDesc brinbeginscan ( Relation  r,
int  nkeys,
int  norderbys 
)

Definition at line 544 of file brin.c.

545{
546 IndexScanDesc scan;
547 BrinOpaque *opaque;
548
549 scan = RelationGetIndexScan(r, nkeys, norderbys);
550
551 opaque = palloc_object(BrinOpaque);
553 opaque->bo_bdesc = brin_build_desc(r);
554 scan->opaque = opaque;
555
556 return scan;
557}

References BrinOpaque::bo_bdesc, BrinOpaque::bo_pagesPerRange, BrinOpaque::bo_rmAccess, brin_build_desc(), brinRevmapInitialize(), IndexScanDescData::opaque, palloc_object, and RelationGetIndexScan().

Referenced by brinhandler().

◆ brinbuild()

IndexBuildResult * brinbuild ( Relation  heap,
Relation  index,
IndexInfo indexInfo 
)

Definition at line 1110 of file brin.c.

1111{
1112 IndexBuildResult *result;
1113 double reltuples;
1114 double idxtuples;
1117 Buffer meta;
1118 BlockNumber pagesPerRange;
1119
1120 /*
1121 * We expect to be called exactly once for any index relation.
1122 */
1124 elog(ERROR, "index \"%s\" already contains data",
1126
1127 /*
1128 * Critical section not required, because on error the creation of the
1129 * whole relation will be rolled back.
1130 */
1131
1135
1138 MarkBufferDirty(meta);
1139
1141 {
1144 Page page;
1145
1147 xlrec.pagesPerRange = BrinGetPagesPerRange(index);
1148
1152
1154
1155 page = BufferGetPage(meta);
1156 PageSetLSN(page, recptr);
1157 }
1158
1159 UnlockReleaseBuffer(meta);
1160
1161 /*
1162 * Initialize our state, including the deformed tuple state.
1163 */
1164 revmap = brinRevmapInitialize(index, &pagesPerRange);
1165 state = initialize_brin_buildstate(index, revmap, pagesPerRange,
1167
1168 /*
1169 * Attempt to launch parallel worker scan when required
1170 *
1171 * XXX plan_create_index_workers makes the number of workers dependent on
1172 * maintenance_work_mem, requiring 32MB for each worker. That makes sense
1173 * for btree, but not for BRIN, which can do with much less memory. So
1174 * maybe make that somehow less strict, optionally?
1175 */
1176 if (indexInfo->ii_ParallelWorkers > 0)
1177 _brin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
1178 indexInfo->ii_ParallelWorkers);
1179
1180 /*
1181 * If parallel build requested and at least one worker process was
1182 * successfully launched, set up coordination state, wait for workers to
1183 * complete. Then read all tuples from the shared tuplesort and insert
1184 * them into the index.
1185 *
1186 * In serial mode, simply scan the table and build the index one index
1187 * tuple at a time.
1188 */
1189 if (state->bs_leader)
1190 {
1192
1194 coordinate->isWorker = false;
1195 coordinate->nParticipants =
1196 state->bs_leader->nparticipanttuplesorts;
1197 coordinate->sharedsort = state->bs_leader->sharedsort;
1198
1199 /*
1200 * Begin leader tuplesort.
1201 *
1202 * In cases where parallelism is involved, the leader receives the
1203 * same share of maintenance_work_mem as a serial sort (it is
1204 * generally treated in the same way as a serial sort once we return).
1205 * Parallel worker Tuplesortstates will have received only a fraction
1206 * of maintenance_work_mem, though.
1207 *
1208 * We rely on the lifetime of the Leader Tuplesortstate almost not
1209 * overlapping with any worker Tuplesortstate's lifetime. There may
1210 * be some small overlap, but that's okay because we rely on leader
1211 * Tuplesortstate only allocating a small, fixed amount of memory
1212 * here. When its tuplesort_performsort() is called (by our caller),
1213 * and significant amounts of memory are likely to be used, all
1214 * workers must have already freed almost all memory held by their
1215 * Tuplesortstates (they are about to go away completely, too). The
1216 * overall effect is that maintenance_work_mem always represents an
1217 * absolute high watermark on the amount of memory used by a CREATE
1218 * INDEX operation, regardless of the use of parallelism or any other
1219 * factor.
1220 */
1221 state->bs_sortstate =
1224
1225 /* scan the relation and merge per-worker results */
1226 reltuples = _brin_parallel_merge(state);
1227
1228 _brin_end_parallel(state->bs_leader, state);
1229 }
1230 else /* no parallel index build */
1231 {
1232 /*
1233 * Now scan the relation. No syncscan allowed here because we want
1234 * the heap blocks in physical order (we want to produce the ranges
1235 * starting from block 0, and the callback also relies on this to not
1236 * generate summary for the same range twice).
1237 */
1238 reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
1240
1241 /*
1242 * process the final batch
1243 *
1244 * XXX Note this does not update state->bs_currRangeStart, i.e. it
1245 * stays set to the last range added to the index. This is OK, because
1246 * that's what brin_fill_empty_ranges expects.
1247 */
1249
1250 /*
1251 * Backfill the final ranges with empty data.
1252 *
1253 * This saves us from doing what amounts to full table scans when the
1254 * index with a predicate like WHERE (nonnull_column IS NULL), or
1255 * other very selective predicates.
1256 */
1258 state->bs_currRangeStart,
1259 state->bs_maxRangeStart);
1260 }
1261
1262 /* release resources */
1263 idxtuples = state->bs_numtuples;
1264 brinRevmapTerminate(state->bs_rmAccess);
1266
1267 /*
1268 * Return statistics
1269 */
1271
1272 result->heap_tuples = reltuples;
1273 result->index_tuples = idxtuples;
1274
1275 return result;
1276}

References _brin_begin_parallel(), _brin_end_parallel(), _brin_parallel_merge(), Assert, BMR_REL, BRIN_CURRENT_VERSION, brin_fill_empty_ranges(), BRIN_METAPAGE_BLKNO, brin_metapage_init(), brinbuildCallback(), BrinGetPagesPerRange, brinRevmapInitialize(), brinRevmapTerminate(), BufferGetBlockNumber(), BufferGetPage(), EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, elog, ERROR, ExtendBufferedRel(), fb(), form_and_insert_tuple(), IndexBuildResult::heap_tuples, IndexInfo::ii_Concurrent, IndexInfo::ii_ParallelWorkers, IndexBuildResult::index_tuples, initialize_brin_buildstate(), MAIN_FORKNUM, maintenance_work_mem, MarkBufferDirty(), PageSetLSN(), palloc0_object, palloc_object, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetNumberOfBlocks, RelationGetRelationName, RelationNeedsWAL, SizeOfBrinCreateIdx, table_index_build_scan(), terminate_brin_buildstate(), tuplesort_begin_index_brin(), TUPLESORT_NONE, UnlockReleaseBuffer(), xl_brin_createidx::version, XLOG_BRIN_CREATE_INDEX, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by brinhandler().

◆ brinbuildCallback()

static void brinbuildCallback ( Relation  index,
ItemPointer  tid,
Datum values,
bool isnull,
bool  tupleIsAlive,
void brstate 
)
static

Definition at line 1000 of file brin.c.

1006{
1009
1011
1012 /*
1013 * If we're in a block that belongs to a future range, summarize what
1014 * we've got and start afresh. Note the scan might have skipped many
1015 * pages, if they were devoid of live tuples; make sure to insert index
1016 * tuples for those too.
1017 */
1018 while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
1019 {
1020
1022 "brinbuildCallback: completed a range: %u--%u",
1023 state->bs_currRangeStart,
1024 state->bs_currRangeStart + state->bs_pagesPerRange));
1025
1026 /* create the index tuple and insert it */
1028
1029 /* set state to correspond to the next range */
1030 state->bs_currRangeStart += state->bs_pagesPerRange;
1031
1032 /* re-initialize state for it */
1033 brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1034 }
1035
1036 /* Accumulate the current tuple into the running state */
1037 (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
1038 values, isnull);
1039}

References add_values_to_range(), BRIN_elog, brin_memtuple_initialize(), DEBUG2, fb(), form_and_insert_tuple(), ItemPointerGetBlockNumber(), and values.

Referenced by brinbuild(), and summarize_range().

◆ brinbuildCallbackParallel()

static void brinbuildCallbackParallel ( Relation  index,
ItemPointer  tid,
Datum values,
bool isnull,
bool  tupleIsAlive,
void brstate 
)
static

Definition at line 1051 of file brin.c.

1057{
1060
1062
1063 /*
1064 * If we're in a block that belongs to a different range, summarize what
1065 * we've got and start afresh. Note the scan might have skipped many
1066 * pages, if they were devoid of live tuples; we do not create empty BRIN
1067 * ranges here - the leader is responsible for filling them in.
1068 *
1069 * Unlike serial builds, parallel index builds allow synchronized seqscans
1070 * (because that's what parallel scans do). This means the block may wrap
1071 * around to the beginning of the relation, so the condition needs to
1072 * check for both future and past ranges.
1073 */
1074 if ((thisblock < state->bs_currRangeStart) ||
1075 (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1))
1076 {
1077
1079 "brinbuildCallbackParallel: completed a range: %u--%u",
1080 state->bs_currRangeStart,
1081 state->bs_currRangeStart + state->bs_pagesPerRange));
1082
1083 /* create the index tuple and write it into the tuplesort */
1085
1086 /*
1087 * Set state to correspond to the next range (for this block).
1088 *
1089 * This skips ranges that are either empty (and so we don't get any
1090 * tuples to summarize), or processed by other workers. We can't
1091 * differentiate those cases here easily, so we leave it up to the
1092 * leader to fill empty ranges where needed.
1093 */
1094 state->bs_currRangeStart
1095 = state->bs_pagesPerRange * (thisblock / state->bs_pagesPerRange);
1096
1097 /* re-initialize state for it */
1098 brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1099 }
1100
1101 /* Accumulate the current tuple into the running state */
1102 (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
1103 values, isnull);
1104}

References add_values_to_range(), BRIN_elog, brin_memtuple_initialize(), DEBUG2, fb(), form_and_spill_tuple(), ItemPointerGetBlockNumber(), and values.

Referenced by _brin_parallel_scan_and_build().

◆ brinbuildempty()

◆ brinbulkdelete()

IndexBulkDeleteResult * brinbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void callback_state 
)

Definition at line 1308 of file brin.c.

1310{
1311 /* allocate stats if first time through, else re-use existing struct */
1312 if (stats == NULL)
1314
1315 return stats;
1316}

References fb(), and palloc0_object.

Referenced by brinhandler().

◆ brinendscan()

void brinendscan ( IndexScanDesc  scan)

Definition at line 983 of file brin.c.

984{
985 BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
986
987 brinRevmapTerminate(opaque->bo_rmAccess);
988 brin_free_desc(opaque->bo_bdesc);
989 pfree(opaque);
990}

References BrinOpaque::bo_bdesc, BrinOpaque::bo_rmAccess, brin_free_desc(), brinRevmapTerminate(), IndexScanDescData::opaque, and pfree().

Referenced by brinhandler().

◆ bringetbitmap()

int64 bringetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 572 of file brin.c.

573{
577 Oid heapOid;
578 Relation heapRel;
579 BrinOpaque *opaque;
580 BlockNumber nblocks;
581 int64 totalpages = 0;
582 FmgrInfo *consistentFn;
587 Size btupsz = 0;
588 ScanKey **keys,
589 **nullkeys;
590 int *nkeys,
591 *nnullkeys;
592 char *ptr;
593 Size len;
594 char *tmp PG_USED_FOR_ASSERTS_ONLY;
595
596 opaque = (BrinOpaque *) scan->opaque;
597 bdesc = opaque->bo_bdesc;
599 if (scan->instrument)
600 scan->instrument->nsearches++;
601
602 /*
603 * We need to know the size of the table so that we know how long to
604 * iterate on the revmap.
605 */
606 heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
607 heapRel = table_open(heapOid, AccessShareLock);
608 nblocks = RelationGetNumberOfBlocks(heapRel);
610
611 /*
612 * Make room for the consistent support procedures of indexed columns. We
613 * don't look them up here; we do that lazily the first time we see a scan
614 * key reference each of them. We rely on zeroing fn_oid to InvalidOid.
615 */
616 consistentFn = palloc0_array(FmgrInfo, bdesc->bd_tupdesc->natts);
617
618 /*
619 * Make room for per-attribute lists of scan keys that we'll pass to the
620 * consistent support procedure. We don't know which attributes have scan
621 * keys, so we allocate space for all attributes. That may use more memory
622 * but it's probably cheaper than determining which attributes are used.
623 *
624 * We keep null and regular keys separate, so that we can pass just the
625 * regular keys to the consistent function easily.
626 *
627 * To reduce the allocation overhead, we allocate one big chunk and then
628 * carve it into smaller arrays ourselves. All the pieces have exactly the
629 * same lifetime, so that's OK.
630 *
631 * XXX The widest index can have 32 attributes, so the amount of wasted
632 * memory is negligible. We could invent a more compact approach (with
633 * just space for used attributes) but that would make the matching more
634 * complex so it's not a good trade-off.
635 */
636 len =
637 MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* regular keys */
638 MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
639 MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts) +
640 MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) + /* NULL keys */
641 MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
642 MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
643
644 ptr = palloc(len);
645 tmp = ptr;
646
647 keys = (ScanKey **) ptr;
648 ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
649
650 nullkeys = (ScanKey **) ptr;
651 ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
652
653 nkeys = (int *) ptr;
654 ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
655
656 nnullkeys = (int *) ptr;
657 ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
658
659 for (int i = 0; i < bdesc->bd_tupdesc->natts; i++)
660 {
661 keys[i] = (ScanKey *) ptr;
662 ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
663
664 nullkeys[i] = (ScanKey *) ptr;
665 ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
666 }
667
668 Assert(tmp + len == ptr);
669
670 /* zero the number of keys */
671 memset(nkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
672 memset(nnullkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
673
674 /* Preprocess the scan keys - split them into per-attribute arrays. */
675 for (int keyno = 0; keyno < scan->numberOfKeys; keyno++)
676 {
677 ScanKey key = &scan->keyData[keyno];
678 AttrNumber keyattno = key->sk_attno;
679
680 /*
681 * The collation of the scan key must match the collation used in the
682 * index column (but only if the search is not IS NULL/ IS NOT NULL).
683 * Otherwise we shouldn't be using this index ...
684 */
685 Assert((key->sk_flags & SK_ISNULL) ||
686 (key->sk_collation ==
687 TupleDescAttr(bdesc->bd_tupdesc,
688 keyattno - 1)->attcollation));
689
690 /*
691 * First time we see this index attribute, so init as needed.
692 *
693 * This is a bit of an overkill - we don't know how many scan keys are
694 * there for this attribute, so we simply allocate the largest number
695 * possible (as if all keys were for this attribute). This may waste a
696 * bit of memory, but we only expect small number of scan keys in
697 * general, so this should be negligible, and repeated repalloc calls
698 * are not free either.
699 */
700 if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
701 {
702 FmgrInfo *tmp;
703
704 /* First time we see this attribute, so no key/null keys. */
705 Assert(nkeys[keyattno - 1] == 0);
706 Assert(nnullkeys[keyattno - 1] == 0);
707
710 fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
712 }
713
714 /* Add key to the proper per-attribute array. */
715 if (key->sk_flags & SK_ISNULL)
716 {
717 nullkeys[keyattno - 1][nnullkeys[keyattno - 1]] = key;
718 nnullkeys[keyattno - 1]++;
719 }
720 else
721 {
722 keys[keyattno - 1][nkeys[keyattno - 1]] = key;
723 nkeys[keyattno - 1]++;
724 }
725 }
726
727 /* allocate an initial in-memory tuple, out of the per-range memcxt */
729
730 /*
731 * Setup and use a per-range memory context, which is reset every time we
732 * loop below. This avoids having to free the tuples within the loop.
733 */
735 "bringetbitmap cxt",
738
739 /*
740 * Now scan the revmap. We start by querying for heap page 0,
741 * incrementing by the number of pages per range; this gives us a full
742 * view of the table. We make use of uint64 for heapBlk as a BlockNumber
743 * could wrap for tables with close to 2^32 pages.
744 */
745 for (uint64 heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
746 {
747 bool addrange;
748 bool gottuple = false;
749 BrinTuple *tup;
750 OffsetNumber off;
751 Size size;
752
754
756
757 tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, (BlockNumber) heapBlk, &buf,
758 &off, &size, BUFFER_LOCK_SHARE);
759 if (tup)
760 {
761 gottuple = true;
762 btup = brin_copy_tuple(tup, size, btup, &btupsz);
764 }
765
766 /*
767 * For page ranges with no indexed tuple, we must return the whole
768 * range; otherwise, compare it to the scan keys.
769 */
770 if (!gottuple)
771 {
772 addrange = true;
773 }
774 else
775 {
777 if (dtup->bt_placeholder)
778 {
779 /*
780 * Placeholder tuples are always returned, regardless of the
781 * values stored in them.
782 */
783 addrange = true;
784 }
785 else
786 {
787 int attno;
788
789 /*
790 * Compare scan keys with summary values stored for the range.
791 * If scan keys are matched, the page range must be added to
792 * the bitmap. We initially assume the range needs to be
793 * added; in particular this serves the case where there are
794 * no keys.
795 */
796 addrange = true;
797 for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
798 {
799 BrinValues *bval;
800 Datum add;
801 Oid collation;
802
803 /*
804 * skip attributes without any scan keys (both regular and
805 * IS [NOT] NULL)
806 */
807 if (nkeys[attno - 1] == 0 && nnullkeys[attno - 1] == 0)
808 continue;
809
810 bval = &dtup->bt_columns[attno - 1];
811
812 /*
813 * If the BRIN tuple indicates that this range is empty,
814 * we can skip it: there's nothing to match. We don't
815 * need to examine the next columns.
816 */
817 if (dtup->bt_empty_range)
818 {
819 addrange = false;
820 break;
821 }
822
823 /*
824 * First check if there are any IS [NOT] NULL scan keys,
825 * and if we're violating them. In that case we can
826 * terminate early, without invoking the support function.
827 *
828 * As there may be more keys, we can only determine
829 * mismatch within this loop.
830 */
831 if (bdesc->bd_info[attno - 1]->oi_regular_nulls &&
832 !check_null_keys(bval, nullkeys[attno - 1],
833 nnullkeys[attno - 1]))
834 {
835 /*
836 * If any of the IS [NOT] NULL keys failed, the page
837 * range as a whole can't pass. So terminate the loop.
838 */
839 addrange = false;
840 break;
841 }
842
843 /*
844 * So either there are no IS [NOT] NULL keys, or all
845 * passed. If there are no regular scan keys, we're done -
846 * the page range matches. If there are regular keys, but
847 * the page range is marked as 'all nulls' it can't
848 * possibly pass (we're assuming the operators are
849 * strict).
850 */
851
852 /* No regular scan keys - page range as a whole passes. */
853 if (!nkeys[attno - 1])
854 continue;
855
856 Assert((nkeys[attno - 1] > 0) &&
857 (nkeys[attno - 1] <= scan->numberOfKeys));
858
859 /* If it is all nulls, it cannot possibly be consistent. */
860 if (bval->bv_allnulls)
861 {
862 addrange = false;
863 break;
864 }
865
866 /*
867 * Collation from the first key (has to be the same for
868 * all keys for the same attribute).
869 */
870 collation = keys[attno - 1][0]->sk_collation;
871
872 /*
873 * Check whether the scan key is consistent with the page
874 * range values; if so, have the pages in the range added
875 * to the output bitmap.
876 *
877 * The opclass may or may not support processing of
878 * multiple scan keys. We can determine that based on the
879 * number of arguments - functions with extra parameter
880 * (number of scan keys) do support this, otherwise we
881 * have to simply pass the scan keys one by one.
882 */
883 if (consistentFn[attno - 1].fn_nargs >= 4)
884 {
885 /* Check all keys at once */
886 add = FunctionCall4Coll(&consistentFn[attno - 1],
887 collation,
889 PointerGetDatum(bval),
890 PointerGetDatum(keys[attno - 1]),
891 Int32GetDatum(nkeys[attno - 1]));
893 }
894 else
895 {
896 /*
897 * Check keys one by one
898 *
899 * When there are multiple scan keys, failure to meet
900 * the criteria for a single one of them is enough to
901 * discard the range as a whole, so break out of the
902 * loop as soon as a false return value is obtained.
903 */
904 int keyno;
905
906 for (keyno = 0; keyno < nkeys[attno - 1]; keyno++)
907 {
908 add = FunctionCall3Coll(&consistentFn[attno - 1],
909 keys[attno - 1][keyno]->sk_collation,
911 PointerGetDatum(bval),
912 PointerGetDatum(keys[attno - 1][keyno]));
914 if (!addrange)
915 break;
916 }
917 }
918
919 /*
920 * If we found a scan key eliminating the range, no need
921 * to check additional ones.
922 */
923 if (!addrange)
924 break;
925 }
926 }
927 }
928
929 /* add the pages in the range to the output bitmap, if needed */
930 if (addrange)
931 {
932 uint64 pageno;
933
934 for (pageno = heapBlk;
935 pageno <= Min(nblocks, heapBlk + opaque->bo_pagesPerRange) - 1;
936 pageno++)
937 {
939 tbm_add_page(tbm, pageno);
940 totalpages++;
942 }
943 }
944 }
945
948
949 if (buf != InvalidBuffer)
951
952 /*
953 * XXX We have an approximation of the number of *pages* that our scan
954 * returns, but we don't have a precise idea of the number of heap tuples
955 * involved.
956 */
957 return totalpages * 10;
958}
struct IndexScanInstrumentation * instrument
Definition relscan.h:160

References AccessShareLock, addrange(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, BrinOpaque::bo_bdesc, BrinOpaque::bo_pagesPerRange, BrinOpaque::bo_rmAccess, brin_copy_tuple(), brin_deform_tuple(), brin_new_memtuple(), BRIN_PROCNUM_CONSISTENT, brinGetTupleForHeapBlock(), buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BrinValues::bv_allnulls, CHECK_FOR_INTERRUPTS, check_null_keys(), CurrentMemoryContext, DatumGetBool(), fb(), fmgr_info_copy(), FunctionCall3Coll(), FunctionCall4Coll(), i, index_getprocinfo(), IndexGetRelation(), IndexScanDescData::indexRelation, IndexScanDescData::instrument, Int32GetDatum(), InvalidBuffer, InvalidOid, IndexScanDescData::keyData, len, LockBuffer(), MAXALIGN, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), Min, IndexScanInstrumentation::nsearches, IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, palloc(), palloc0_array, PG_USED_FOR_ASSERTS_ONLY, pgstat_count_index_scan, PointerGetDatum(), RelationGetNumberOfBlocks, RelationGetRelid, ReleaseBuffer(), ScanKeyData::sk_collation, SK_ISNULL, table_close(), table_open(), tbm_add_page(), and TupleDescAttr().

Referenced by brinhandler().

◆ brinGetStats()

◆ brinhandler()

Datum brinhandler ( PG_FUNCTION_ARGS  )

Definition at line 254 of file brin.c.

255{
256 static const IndexAmRoutine amroutine = {
258 .amstrategies = 0,
259 .amsupport = BRIN_LAST_OPTIONAL_PROCNUM,
260 .amoptsprocnum = BRIN_PROCNUM_OPTIONS,
261 .amcanorder = false,
262 .amcanorderbyop = false,
263 .amcanhash = false,
264 .amconsistentequality = false,
265 .amconsistentordering = false,
266 .amcanbackward = false,
267 .amcanunique = false,
268 .amcanmulticol = true,
269 .amoptionalkey = true,
270 .amsearcharray = false,
271 .amsearchnulls = true,
272 .amstorage = true,
273 .amclusterable = false,
274 .ampredlocks = false,
275 .amcanparallel = false,
276 .amcanbuildparallel = true,
277 .amcaninclude = false,
278 .amusemaintenanceworkmem = false,
279 .amsummarizing = true,
280 .amparallelvacuumoptions =
282 .amkeytype = InvalidOid,
283
284 .ambuild = brinbuild,
285 .ambuildempty = brinbuildempty,
286 .aminsert = brininsert,
287 .aminsertcleanup = brininsertcleanup,
288 .ambulkdelete = brinbulkdelete,
289 .amvacuumcleanup = brinvacuumcleanup,
290 .amcanreturn = NULL,
291 .amcostestimate = brincostestimate,
292 .amgettreeheight = NULL,
293 .amoptions = brinoptions,
294 .amproperty = NULL,
295 .ambuildphasename = NULL,
296 .amvalidate = brinvalidate,
297 .amadjustmembers = NULL,
298 .ambeginscan = brinbeginscan,
299 .amrescan = brinrescan,
300 .amgettuple = NULL,
301 .amgetbitmap = bringetbitmap,
302 .amendscan = brinendscan,
303 .ammarkpos = NULL,
304 .amrestrpos = NULL,
305 .amestimateparallelscan = NULL,
306 .aminitparallelscan = NULL,
307 .amparallelrescan = NULL,
308 .amtranslatestrategy = NULL,
309 .amtranslatecmptype = NULL,
310 };
311
313}

References BRIN_LAST_OPTIONAL_PROCNUM, BRIN_PROCNUM_OPTIONS, brinbeginscan(), brinbuild(), brinbuildempty(), brinbulkdelete(), brincostestimate(), brinendscan(), bringetbitmap(), brininsert(), brininsertcleanup(), brinoptions(), brinrescan(), brinvacuumcleanup(), brinvalidate(), fb(), InvalidOid, PG_RETURN_POINTER, IndexAmRoutine::type, and VACUUM_OPTION_PARALLEL_CLEANUP.

◆ brininsert()

bool brininsert ( Relation  idxRel,
Datum values,
bool nulls,
ItemPointer  heaptid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 349 of file brin.c.

354{
355 BlockNumber pagesPerRange;
357 BlockNumber heapBlk;
358 BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache;
364 bool autosummarize = BrinGetAutoSummarize(idxRel);
365
366 /*
367 * If first time through in this statement, initialize the insert state
368 * that we keep for all the inserts in the command.
369 */
370 if (!bistate)
371 bistate = initialize_brin_insertstate(idxRel, indexInfo);
372
373 revmap = bistate->bis_rmAccess;
374 bdesc = bistate->bis_desc;
375 pagesPerRange = bistate->bis_pages_per_range;
376
377 /*
378 * origHeapBlk is the block number where the insertion occurred. heapBlk
379 * is the first block in the corresponding page range.
380 */
382 heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
383
384 for (;;)
385 {
386 bool need_insert = false;
387 OffsetNumber off;
390
392
393 /*
394 * If auto-summarization is enabled and we just inserted the first
395 * tuple into the first block of a new non-first page range, request a
396 * summarization run of the previous range.
397 */
398 if (autosummarize &&
399 heapBlk > 0 &&
400 heapBlk == origHeapBlk &&
402 {
403 BlockNumber lastPageRange = heapBlk - 1;
405
409 if (!lastPageTuple)
410 {
411 bool recorded;
412
416 if (!recorded)
417 ereport(LOG,
419 errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
421 lastPageRange)));
422 }
423 else
425 }
426
427 brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
429
430 /* if range is unsummarized, there's nothing to do */
431 if (!brtup)
432 break;
433
434 /* First time through in this brininsert call? */
435 if (tupcxt == NULL)
436 {
438 "brininsert cxt",
441 }
442
444
446
447 if (!need_insert)
448 {
449 /*
450 * The tuple is consistent with the new values, so there's nothing
451 * to do.
452 */
454 }
455 else
456 {
457 Page page = BufferGetPage(buf);
458 ItemId lp = PageGetItemId(page, off);
459 Size origsz;
461 Size newsz;
463 bool samepage;
464
465 /*
466 * Make a copy of the old tuple, so that we can compare it after
467 * re-acquiring the lock.
468 */
471
472 /*
473 * Before releasing the lock, check if we can attempt a same-page
474 * update. Another process could insert a tuple concurrently in
475 * the same page though, so downstream we must be prepared to cope
476 * if this turns out to not be possible after all.
477 */
478 newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
481
482 /*
483 * Try to update the tuple. If this doesn't work for whatever
484 * reason, we need to restart from the top; the revmap might be
485 * pointing at a different tuple for this block now, so we need to
486 * recompute to ensure both our new heap tuple and the other
487 * inserter's are covered by the combined tuple. It might be that
488 * we don't need to update at all.
489 */
490 if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
491 buf, off, origtup, origsz, newtup, newsz,
492 samepage))
493 {
494 /* no luck; start over */
496 continue;
497 }
498 }
499
500 /* success! */
501 break;
502 }
503
504 if (BufferIsValid(buf))
507 if (tupcxt != NULL)
509
510 return false;
511}

References add_values_to_range(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, AutoVacuumRequestWork(), AVW_BRINSummarizeRange, BrinInsertState::bis_desc, BrinInsertState::bis_pages_per_range, BrinInsertState::bis_rmAccess, brin_can_do_samepage_update(), brin_copy_tuple(), brin_deform_tuple(), brin_doupdate(), brin_form_tuple(), BrinGetAutoSummarize, brinGetTupleForHeapBlock(), buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), CHECK_FOR_INTERRUPTS, CurrentMemoryContext, ereport, errcode(), errmsg, fb(), FirstOffsetNumber, IndexInfo::ii_AmCache, initialize_brin_insertstate(), InvalidBuffer, ItemIdGetLength, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LOG, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), PageGetItemId(), RelationGetRelationName, RelationGetRelid, ReleaseBuffer(), and values.

Referenced by brinhandler().

◆ brininsertcleanup()

void brininsertcleanup ( Relation  index,
IndexInfo indexInfo 
)

Definition at line 517 of file brin.c.

518{
519 BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache;
520
521 /* bail out if cache not initialized */
522 if (bistate == NULL)
523 return;
524
525 /* do this first to avoid dangling pointer if we fail partway through */
526 indexInfo->ii_AmCache = NULL;
527
528 /*
529 * Clean up the revmap. Note that the brinDesc has already been cleaned up
530 * as part of its own memory context.
531 */
532 brinRevmapTerminate(bistate->bis_rmAccess);
533 pfree(bistate);
534}

References BrinInsertState::bis_rmAccess, brinRevmapTerminate(), fb(), IndexInfo::ii_AmCache, and pfree().

Referenced by brinhandler().

◆ brinoptions()

bytea * brinoptions ( Datum  reloptions,
bool  validate 
)

Definition at line 1353 of file brin.c.

1354{
1355 static const relopt_parse_elt tab[] = {
1356 {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
1357 {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
1358 };
1359
1360 return (bytea *) build_reloptions(reloptions, validate,
1362 sizeof(BrinOptions),
1363 tab, lengthof(tab));
1364}

References build_reloptions(), fb(), lengthof, RELOPT_KIND_BRIN, RELOPT_TYPE_BOOL, RELOPT_TYPE_INT, and validate().

Referenced by brinhandler().

◆ brinrescan()

void brinrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 964 of file brin.c.

966{
967 /*
968 * Other index AMs preprocess the scan keys at this point, or sometime
969 * early during the scan; this lets them optimize by removing redundant
970 * keys, or doing early returns when they are impossible to satisfy; see
971 * _bt_preprocess_keys for an example. Something like that could be added
972 * here someday, too.
973 */
974
975 if (scankey && scan->numberOfKeys > 0)
976 memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
977}

References fb(), IndexScanDescData::keyData, and IndexScanDescData::numberOfKeys.

Referenced by brinhandler().

◆ brinsummarize()

static void brinsummarize ( Relation  index,
Relation  heapRel,
BlockNumber  pageRange,
bool  include_partial,
double numSummarized,
double numExisting 
)
static

Definition at line 1889 of file brin.c.

1891{
1894 IndexInfo *indexInfo = NULL;
1896 BlockNumber pagesPerRange;
1897 Buffer buf;
1899
1900 revmap = brinRevmapInitialize(index, &pagesPerRange);
1901
1902 /* determine range of pages to process */
1905 startBlk = 0;
1906 else
1907 {
1908 startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1909 heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
1910 }
1911 if (startBlk > heapNumBlocks)
1912 {
1913 /* Nothing to do if start point is beyond end of table */
1915 return;
1916 }
1917
1918 /*
1919 * Scan the revmap to find unsummarized items.
1920 */
1922 for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
1923 {
1924 BrinTuple *tup;
1925 OffsetNumber off;
1926
1927 /*
1928 * Unless requested to summarize even a partial range, go away now if
1929 * we think the next range is partial. Caller would pass true when it
1930 * is typically run once bulk data loading is done
1931 * (brin_summarize_new_values), and false when it is typically the
1932 * result of arbitrarily-scheduled maintenance command (vacuuming).
1933 */
1934 if (!include_partial &&
1935 (startBlk + pagesPerRange > heapNumBlocks))
1936 break;
1937
1939
1942 if (tup == NULL)
1943 {
1944 /* no revmap entry for this heap range. Summarize it. */
1945 if (state == NULL)
1946 {
1947 /* first time through */
1948 Assert(!indexInfo);
1950 pagesPerRange,
1952 indexInfo = BuildIndexInfo(index);
1953 }
1954 summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
1955
1956 /* and re-initialize state for the next range */
1957 brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
1958
1959 if (numSummarized)
1960 *numSummarized += 1.0;
1961 }
1962 else
1963 {
1964 if (numExisting)
1965 *numExisting += 1.0;
1967 }
1968 }
1969
1970 if (BufferIsValid(buf))
1972
1973 /* free resources */
1975 if (state)
1976 {
1978 pfree(indexInfo);
1979 }
1980}

References Assert, BRIN_ALL_BLOCKRANGES, brin_memtuple_initialize(), brinGetTupleForHeapBlock(), brinRevmapInitialize(), brinRevmapTerminate(), buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferIsValid(), BuildIndexInfo(), CHECK_FOR_INTERRUPTS, fb(), initialize_brin_buildstate(), InvalidBlockNumber, InvalidBuffer, LockBuffer(), Min, pfree(), RelationGetNumberOfBlocks, ReleaseBuffer(), summarize_range(), and terminate_brin_buildstate().

Referenced by brin_summarize_range(), and brinvacuumcleanup().

◆ brinvacuumcleanup()

IndexBulkDeleteResult * brinvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 1323 of file brin.c.

1324{
1325 Relation heapRel;
1326
1327 /* No-op in ANALYZE ONLY mode */
1328 if (info->analyze_only)
1329 return stats;
1330
1331 if (!stats)
1334 /* rest of stats is initialized by zeroing */
1335
1336 heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
1338
1339 brin_vacuum_scan(info->index, info->strategy);
1340
1341 brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
1342 &stats->num_index_tuples, &stats->num_index_tuples);
1343
1344 table_close(heapRel, AccessShareLock);
1345
1346 return stats;
1347}

References AccessShareLock, IndexVacuumInfo::analyze_only, BRIN_ALL_BLOCKRANGES, brin_vacuum_scan(), brinsummarize(), IndexVacuumInfo::index, IndexGetRelation(), IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, palloc0_object, RelationGetNumberOfBlocks, RelationGetRelid, IndexVacuumInfo::strategy, table_close(), and table_open().

Referenced by brinhandler().

◆ check_null_keys()

static bool check_null_keys ( BrinValues bval,
ScanKey nullkeys,
int  nnullkeys 
)
static

Definition at line 2315 of file brin.c.

2316{
2317 int keyno;
2318
2319 /*
2320 * First check if there are any IS [NOT] NULL scan keys, and if we're
2321 * violating them.
2322 */
2323 for (keyno = 0; keyno < nnullkeys; keyno++)
2324 {
2325 ScanKey key = nullkeys[keyno];
2326
2327 Assert(key->sk_attno == bval->bv_attno);
2328
2329 /* Handle only IS NULL/IS NOT NULL tests */
2330 if (!(key->sk_flags & SK_ISNULL))
2331 continue;
2332
2333 if (key->sk_flags & SK_SEARCHNULL)
2334 {
2335 /* IS NULL scan key, but range has no NULLs */
2336 if (!bval->bv_allnulls && !bval->bv_hasnulls)
2337 return false;
2338 }
2339 else if (key->sk_flags & SK_SEARCHNOTNULL)
2340 {
2341 /*
2342 * For IS NOT NULL, we can only skip ranges that are known to have
2343 * only nulls.
2344 */
2345 if (bval->bv_allnulls)
2346 return false;
2347 }
2348 else
2349 {
2350 /*
2351 * Neither IS NULL nor IS NOT NULL was used; assume all indexable
2352 * operators are strict and thus return false with NULL value in
2353 * the scan key.
2354 */
2355 return false;
2356 }
2357 }
2358
2359 return true;
2360}

References Assert, BrinValues::bv_allnulls, BrinValues::bv_attno, BrinValues::bv_hasnulls, fb(), SK_ISNULL, SK_SEARCHNOTNULL, and SK_SEARCHNULL.

Referenced by bringetbitmap().

◆ form_and_insert_tuple()

static void form_and_insert_tuple ( BrinBuildState state)
static

Definition at line 1987 of file brin.c.

1988{
1989 BrinTuple *tup;
1990 Size size;
1991
1992 tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
1993 state->bs_dtuple, &size);
1994 brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
1995 &state->bs_currentInsertBuf, state->bs_currRangeStart,
1996 tup, size);
1997 state->bs_numtuples++;
1998
1999 pfree(tup);
2000}

References brin_doinsert(), brin_form_tuple(), fb(), and pfree().

Referenced by brinbuild(), and brinbuildCallback().

◆ form_and_spill_tuple()

static void form_and_spill_tuple ( BrinBuildState state)
static

Definition at line 2008 of file brin.c.

2009{
2010 BrinTuple *tup;
2011 Size size;
2012
2013 /* don't insert empty tuples in parallel build */
2014 if (state->bs_dtuple->bt_empty_range)
2015 return;
2016
2017 tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
2018 state->bs_dtuple, &size);
2019
2020 /* write the BRIN tuple to the tuplesort */
2021 tuplesort_putbrintuple(state->bs_sortstate, tup, size);
2022
2023 state->bs_numtuples++;
2024
2025 pfree(tup);
2026}

References brin_form_tuple(), fb(), pfree(), and tuplesort_putbrintuple().

Referenced by _brin_parallel_scan_and_build(), and brinbuildCallbackParallel().

◆ initialize_brin_buildstate()

static BrinBuildState * initialize_brin_buildstate ( Relation  idxRel,
BrinRevmap revmap,
BlockNumber  pagesPerRange,
BlockNumber  tablePages 
)
static

Definition at line 1674 of file brin.c.

1676{
1679
1681
1682 state->bs_irel = idxRel;
1683 state->bs_numtuples = 0;
1684 state->bs_reltuples = 0;
1685 state->bs_currentInsertBuf = InvalidBuffer;
1686 state->bs_pagesPerRange = pagesPerRange;
1687 state->bs_currRangeStart = 0;
1688 state->bs_rmAccess = revmap;
1689 state->bs_bdesc = brin_build_desc(idxRel);
1690 state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
1691 state->bs_leader = NULL;
1692 state->bs_worker_id = 0;
1693 state->bs_sortstate = NULL;
1694
1695 /* Remember the memory context to use for an empty tuple, if needed. */
1696 state->bs_context = CurrentMemoryContext;
1697 state->bs_emptyTuple = NULL;
1698 state->bs_emptyTupleLen = 0;
1699
1700 /*
1701 * Calculate the start of the last page range. Page numbers are 0-based,
1702 * so to calculate the index we need to subtract one. The integer division
1703 * gives us the index of the page range.
1704 */
1705 if (tablePages > 0)
1706 lastRange = ((tablePages - 1) / pagesPerRange) * pagesPerRange;
1707
1708 /* Now calculate the start of the next range. */
1709 state->bs_maxRangeStart = lastRange + state->bs_pagesPerRange;
1710
1711 return state;
1712}

References brin_build_desc(), brin_new_memtuple(), CurrentMemoryContext, fb(), InvalidBuffer, and palloc_object.

Referenced by _brin_parallel_build_main(), brinbuild(), and brinsummarize().

◆ initialize_brin_insertstate()

static BrinInsertState * initialize_brin_insertstate ( Relation  idxRel,
IndexInfo indexInfo 
)
static

◆ summarize_range()

static void summarize_range ( IndexInfo indexInfo,
BrinBuildState state,
Relation  heapRel,
BlockNumber  heapBlk,
BlockNumber  heapNumBlks 
)
static

Definition at line 1763 of file brin.c.

1765{
1766 Buffer phbuf;
1768 Size phsz;
1769 OffsetNumber offset;
1771
1772 /*
1773 * Insert the placeholder tuple
1774 */
1776 phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
1777 offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
1778 state->bs_rmAccess, &phbuf,
1779 heapBlk, phtup, phsz);
1780
1781 /*
1782 * Compute range end. We hold ShareUpdateExclusive lock on table, so it
1783 * cannot shrink concurrently (but it can grow).
1784 */
1785 Assert(heapBlk % state->bs_pagesPerRange == 0);
1786 if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1787 {
1788 /*
1789 * If we're asked to scan what we believe to be the final range on the
1790 * table (i.e. a range that might be partial) we need to recompute our
1791 * idea of what the latest page is after inserting the placeholder
1792 * tuple. Anyone that grows the table later will update the
1793 * placeholder tuple, so it doesn't matter that we won't scan these
1794 * pages ourselves. Careful: the table might have been extended
1795 * beyond the current range, so clamp our result.
1796 *
1797 * Fortunately, this should occur infrequently.
1798 */
1799 scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1800 state->bs_pagesPerRange);
1801 }
1802 else
1803 {
1804 /* Easy case: range is known to be complete */
1805 scanNumBlks = state->bs_pagesPerRange;
1806 }
1807
1808 /*
1809 * Execute the partial heap scan covering the heap blocks in the specified
1810 * page range, summarizing the heap tuples in it. This scan stops just
1811 * short of brinbuildCallback creating the new index entry.
1812 *
1813 * Note that it is critical we use the "any visible" mode of
1814 * table_index_build_range_scan here: otherwise, we would miss tuples
1815 * inserted by transactions that are still in progress, among other corner
1816 * cases.
1817 */
1818 state->bs_currRangeStart = heapBlk;
1819 table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
1820 heapBlk, scanNumBlks,
1822
1823 /*
1824 * Now we update the values obtained by the scan with the placeholder
1825 * tuple. We do this in a loop which only terminates if we're able to
1826 * update the placeholder tuple successfully; if we are not, this means
1827 * somebody else modified the placeholder tuple after we read it.
1828 */
1829 for (;;)
1830 {
1832 Size newsize;
1833 bool didupdate;
1834 bool samepage;
1835
1837
1838 /*
1839 * Update the summary tuple and try to update.
1840 */
1841 newtup = brin_form_tuple(state->bs_bdesc,
1842 heapBlk, state->bs_dtuple, &newsize);
1844 didupdate =
1845 brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
1846 state->bs_rmAccess, heapBlk, phbuf, offset,
1850
1851 /* If the update succeeded, we're done. */
1852 if (didupdate)
1853 break;
1854
1855 /*
1856 * If the update didn't work, it might be because somebody updated the
1857 * placeholder tuple concurrently. Extract the new version, union it
1858 * with the values we have from the scan, and start over. (There are
1859 * other reasons for the update to fail, but it's simple to treat them
1860 * the same.)
1861 */
1862 phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
1863 &offset, &phsz, BUFFER_LOCK_SHARE);
1864 /* the placeholder tuple must exist */
1865 if (phtup == NULL)
1866 elog(ERROR, "missing placeholder tuple");
1869
1870 /* merge it into the tuple from the heap scan */
1871 union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
1872 }
1873
1875}

References Assert, brin_can_do_samepage_update(), brin_copy_tuple(), brin_doinsert(), brin_doupdate(), brin_form_placeholder_tuple(), brin_form_tuple(), brin_free_tuple(), brinbuildCallback(), brinGetTupleForHeapBlock(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, CHECK_FOR_INTERRUPTS, elog, ERROR, fb(), InvalidBuffer, LockBuffer(), Min, RelationGetNumberOfBlocks, ReleaseBuffer(), table_index_build_range_scan(), and union_tuples().

Referenced by brinsummarize().

◆ terminate_brin_buildstate()

static void terminate_brin_buildstate ( BrinBuildState state)
static

Definition at line 1718 of file brin.c.

1719{
1720 /*
1721 * Release the last index buffer used. We might as well ensure that
1722 * whatever free space remains in that page is available in FSM, too.
1723 */
1724 if (!BufferIsInvalid(state->bs_currentInsertBuf))
1725 {
1726 Page page;
1727 Size freespace;
1729
1730 page = BufferGetPage(state->bs_currentInsertBuf);
1731 freespace = PageGetFreeSpace(page);
1732 blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
1733 ReleaseBuffer(state->bs_currentInsertBuf);
1734 RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
1735 FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
1736 }
1737
1738 brin_free_desc(state->bs_bdesc);
1739 pfree(state->bs_dtuple);
1740 pfree(state);
1741}

References brin_free_desc(), BufferGetBlockNumber(), BufferGetPage(), BufferIsInvalid, fb(), FreeSpaceMapVacuumRange(), PageGetFreeSpace(), pfree(), RecordPageWithFreeSpace(), and ReleaseBuffer().

Referenced by brinbuild(), and brinsummarize().

◆ union_tuples()

static void union_tuples ( BrinDesc bdesc,
BrinMemTuple a,
BrinTuple b 
)
static

Definition at line 2033 of file brin.c.

2034{
2035 int keyno;
2036 BrinMemTuple *db;
2037 MemoryContext cxt;
2039
2040 /* Use our own memory context to avoid retail pfree */
2042 "brin union",
2045 db = brin_deform_tuple(bdesc, b, NULL);
2047
2048 /*
2049 * Check if the ranges are empty.
2050 *
2051 * If at least one of them is empty, we don't need to call per-key union
2052 * functions at all. If "b" is empty, we just use "a" as the result (it
2053 * might be empty fine, but that's fine). If "a" is empty but "b" is not,
2054 * we use "b" as the result (but we have to copy the data into "a" first).
2055 *
2056 * Only when both ranges are non-empty, we actually do the per-key merge.
2057 */
2058
2059 /* If "b" is empty - ignore it and just use "a" (even if it's empty etc.). */
2060 if (db->bt_empty_range)
2061 {
2062 /* skip the per-key merge */
2064 return;
2065 }
2066
2067 /*
2068 * Now we know "b" is not empty. If "a" is empty, then "b" is the result.
2069 * But we need to copy the data from "b" to "a" first, because that's how
2070 * we pass result out.
2071 *
2072 * We have to copy all the global/per-key flags etc. too.
2073 */
2074 if (a->bt_empty_range)
2075 {
2076 for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
2077 {
2078 int i;
2079 BrinValues *col_a = &a->bt_columns[keyno];
2080 BrinValues *col_b = &db->bt_columns[keyno];
2081 BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
2082
2083 col_a->bv_allnulls = col_b->bv_allnulls;
2084 col_a->bv_hasnulls = col_b->bv_hasnulls;
2085
2086 /* If "b" has no data, we're done. */
2087 if (col_b->bv_allnulls)
2088 continue;
2089
2090 for (i = 0; i < opcinfo->oi_nstored; i++)
2091 col_a->bv_values[i] =
2092 datumCopy(col_b->bv_values[i],
2093 opcinfo->oi_typcache[i]->typbyval,
2094 opcinfo->oi_typcache[i]->typlen);
2095 }
2096
2097 /* "a" started empty, but "b" was not empty, so remember that */
2098 a->bt_empty_range = false;
2099
2100 /* skip the per-key merge */
2102 return;
2103 }
2104
2105 /* Now we know neither range is empty. */
2106 for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
2107 {
2108 FmgrInfo *unionFn;
2109 BrinValues *col_a = &a->bt_columns[keyno];
2110 BrinValues *col_b = &db->bt_columns[keyno];
2111 BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
2112
2113 if (opcinfo->oi_regular_nulls)
2114 {
2115 /* Does the "b" summary represent any NULL values? */
2116 bool b_has_nulls = (col_b->bv_hasnulls || col_b->bv_allnulls);
2117
2118 /* Adjust "hasnulls". */
2119 if (!col_a->bv_allnulls && b_has_nulls)
2120 col_a->bv_hasnulls = true;
2121
2122 /* If there are no values in B, there's nothing left to do. */
2123 if (col_b->bv_allnulls)
2124 continue;
2125
2126 /*
2127 * Adjust "allnulls". If A doesn't have values, just copy the
2128 * values from B into A, and we're done. We cannot run the
2129 * operators in this case, because values in A might contain
2130 * garbage. Note we already established that B contains values.
2131 *
2132 * Also adjust "hasnulls" in order not to forget the summary
2133 * represents NULL values. This is not redundant with the earlier
2134 * update, because that only happens when allnulls=false.
2135 */
2136 if (col_a->bv_allnulls)
2137 {
2138 int i;
2139
2140 col_a->bv_allnulls = false;
2141 col_a->bv_hasnulls = true;
2142
2143 for (i = 0; i < opcinfo->oi_nstored; i++)
2144 col_a->bv_values[i] =
2145 datumCopy(col_b->bv_values[i],
2146 opcinfo->oi_typcache[i]->typbyval,
2147 opcinfo->oi_typcache[i]->typlen);
2148
2149 continue;
2150 }
2151 }
2152
2153 unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
2155 FunctionCall3Coll(unionFn,
2156 bdesc->bd_index->rd_indcollation[keyno],
2160 }
2161
2163}

References a, ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, b, brin_deform_tuple(), BRIN_PROCNUM_UNION, BrinMemTuple::bt_columns, BrinMemTuple::bt_empty_range, CurrentMemoryContext, datumCopy(), fb(), FunctionCall3Coll(), i, index_getprocinfo(), MemoryContextDelete(), MemoryContextSwitchTo(), and PointerGetDatum().

Referenced by _brin_parallel_merge(), and summarize_range().