gininsert_8c_source.html

/*-------------------------------------------------------------------------

 *

 * gininsert.c

 *    insert routines for the postgres inverted index access method.

 *

 *

 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group

 * Portions Copyright (c) 1994, Regents of the University of California

 *

 * IDENTIFICATION

 *          src/backend/access/gin/gininsert.c

 *-------------------------------------------------------------------------

 */


#include "postgres.h"


#include "access/gin_private.h"

#include "access/gin_tuple.h"

#include "access/parallel.h"

#include "access/table.h"

#include "access/tableam.h"

#include "access/xloginsert.h"

#include "catalog/index.h"

#include "catalog/pg_collation.h"

#include "commands/progress.h"

#include "miscadmin.h"

#include "nodes/execnodes.h"

#include "pgstat.h"

#include "storage/bufmgr.h"

#include "storage/predicate.h"

#include "tcop/tcopprot.h"

#include "utils/datum.h"

#include "utils/memutils.h"

#include "utils/rel.h"

#include "utils/builtins.h"


/* Magic numbers for parallel state sharing */

#define PARALLEL_KEY_GIN_SHARED         UINT64CONST(0xB000000000000001)

#define PARALLEL_KEY_TUPLESORT          UINT64CONST(0xB000000000000002)

#define PARALLEL_KEY_QUERY_TEXT         UINT64CONST(0xB000000000000003)

#define PARALLEL_KEY_WAL_USAGE          UINT64CONST(0xB000000000000004)

#define PARALLEL_KEY_BUFFER_USAGE       UINT64CONST(0xB000000000000005)


/*

 * Status for index builds performed in parallel.  This is allocated in a

 * dynamic shared memory segment.

 */

typedef struct GinBuildShared

{

    /*

     * These fields are not modified during the build.  They primarily exist

     * for the benefit of worker processes that need to create state

     * corresponding to that used by the leader.

     */

    Oid         heaprelid;

    Oid         indexrelid;

    bool        isconcurrent;

    int         scantuplesortstates;


    /*

     * workersdonecv is used to monitor the progress of workers.  All parallel

     * participants must indicate that they are done before leader can use

     * results built by the workers (and before leader can write the data into

     * the index).

     */

    ConditionVariable workersdonecv;


    /*

     * mutex protects all following fields

     *

     * These fields contain status information of interest to GIN index builds

     * that must work just the same when an index is built in parallel.

     */

    slock_t     mutex;


    /*

     * Mutable state that is maintained by workers, and reported back to

     * leader at end of the scans.

     *

     * nparticipantsdone is number of worker processes finished.

     *

     * reltuples is the total number of input heap tuples.

     *

     * indtuples is the total number of tuples that made it into the index.

     */

    int         nparticipantsdone;

    double      reltuples;

    double      indtuples;


    /*

     * ParallelTableScanDescData data follows. Can't directly embed here, as

     * implementations of the parallel table scan desc interface might need

     * stronger alignment.

     */

} GinBuildShared;


/*

 * Return pointer to a GinBuildShared's parallel table scan.

 *

 * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just

 * MAXALIGN.

 */

#define ParallelTableScanFromGinBuildShared(shared) \

    (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))


/*

 * Status for leader in parallel index build.

 */

typedef struct GinLeader

{

    /* parallel context itself */

    ParallelContext *pcxt;


    /*

     * nparticipanttuplesorts is the exact number of worker processes

     * successfully launched, plus one leader process if it participates as a

     * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader

     * participating as a worker).

     */

    int         nparticipanttuplesorts;


    /*

     * Leader process convenience pointers to shared state (leader avoids TOC

     * lookups).

     *

     * GinBuildShared is the shared state for entire build.  sharedsort is the

     * shared, tuplesort-managed state passed to each process tuplesort.

     * snapshot is the snapshot used by the scan iff an MVCC snapshot is

     * required.

     */

    GinBuildShared *ginshared;

    Sharedsort *sharedsort;

    Snapshot    snapshot;

    WalUsage   *walusage;

    BufferUsage *bufferusage;

} GinLeader;


typedef struct

{

    GinState    ginstate;

    double      indtuples;

    GinStatsData buildStats;

    MemoryContext tmpCtx;

    MemoryContext funcCtx;

    BuildAccumulator accum;

    ItemPointerData tid;

    int         work_mem;


    /*

     * bs_leader is only present when a parallel index build is performed, and

     * only in the leader process.

     */

    GinLeader  *bs_leader;

    int         bs_worker_id;


    /* used to pass information from workers to leader */

    double      bs_numtuples;

    double      bs_reltuples;


    /*

     * The sortstate is used by workers (including the leader). It has to be

     * part of the build state, because that's the only thing passed to the

     * build callback etc.

     */

    Tuplesortstate *bs_sortstate;


    /*

     * The sortstate used only within a single worker for the first merge pass

     * happening there. In principle it doesn't need to be part of the build

     * state and we could pass it around directly, but it's more convenient

     * this way. And it's part of the build state, after all.

     */

    Tuplesortstate *bs_worker_sort;

} GinBuildState;


/* parallel index builds */

static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,

                                bool isconcurrent, int request);

static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);

static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);

static double _gin_parallel_heapscan(GinBuildState *state);

static double _gin_parallel_merge(GinBuildState *state);

static void _gin_leader_participate_as_worker(GinBuildState *buildstate,

                                              Relation heap, Relation index);

static void _gin_parallel_scan_and_build(GinBuildState *state,

                                         GinBuildShared *ginshared,

                                         Sharedsort *sharedsort,

                                         Relation heap, Relation index,

                                         int sortmem, bool progress);


static ItemPointer _gin_parse_tuple_items(GinTuple *a);

static Datum _gin_parse_tuple_key(GinTuple *a);


static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,

                                  Datum key, int16 typlen, bool typbyval,

                                  ItemPointerData *items, uint32 nitems,

                                  Size *len);


/*

 * Adds array of item pointers to tuple's posting list, or

 * creates posting tree and tuple pointing to tree in case

 * of not enough space.  Max size of tuple is defined in

 * GinFormTuple().  Returns a new, modified index tuple.

 * items[] must be in sorted order with no duplicates.

 */

static IndexTuple

addItemPointersToLeafTuple(GinState *ginstate,

                           IndexTuple old,

                           ItemPointerData *items, uint32 nitem,

                           GinStatsData *buildStats, Buffer buffer)

{

    OffsetNumber attnum;

    Datum       key;

    GinNullCategory category;

    IndexTuple  res;

    ItemPointerData *newItems,

               *oldItems;

    int         oldNPosting,

                newNPosting;

    GinPostingList *compressedList;


    Assert(!GinIsPostingTree(old));


    attnum = gintuple_get_attrnum(ginstate, old);

    key = gintuple_get_key(ginstate, old, &category);


    /* merge the old and new posting lists */

    oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);


    newItems = ginMergeItemPointers(items, nitem,

                                    oldItems, oldNPosting,

                                    &newNPosting);


    /* Compress the posting list, and try to a build tuple with room for it */

    res = NULL;

    compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize,

                                            NULL);

    pfree(newItems);

    if (compressedList)

    {

        res = GinFormTuple(ginstate, attnum, key, category,

                           (char *) compressedList,

                           SizeOfGinPostingList(compressedList),

                           newNPosting,

                           false);

        pfree(compressedList);

    }

    if (!res)

    {

        /* posting list would be too big, convert to posting tree */

        BlockNumber postingRoot;


        /*

         * Initialize posting tree with the old tuple's posting list.  It's

         * surely small enough to fit on one posting-tree page, and should

         * already be in order with no duplicates.

         */

        postingRoot = createPostingTree(ginstate->index,

                                        oldItems,

                                        oldNPosting,

                                        buildStats,

                                        buffer);


        /* Now insert the TIDs-to-be-added into the posting tree */

        ginInsertItemPointers(ginstate->index, postingRoot,

                              items, nitem,

                              buildStats);


        /* And build a new posting-tree-only result tuple */

        res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);

        GinSetPostingTree(res, postingRoot);

    }

    pfree(oldItems);


    return res;

}


/*

 * Build a fresh leaf tuple, either posting-list or posting-tree format

 * depending on whether the given items list will fit.

 * items[] must be in sorted order with no duplicates.

 *

 * This is basically the same logic as in addItemPointersToLeafTuple,

 * but working from slightly different input.

 */

static IndexTuple

buildFreshLeafTuple(GinState *ginstate,

                    OffsetNumber attnum, Datum key, GinNullCategory category,

                    ItemPointerData *items, uint32 nitem,

                    GinStatsData *buildStats, Buffer buffer)

{

    IndexTuple  res = NULL;

    GinPostingList *compressedList;


    /* try to build a posting list tuple with all the items */

    compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL);

    if (compressedList)

    {

        res = GinFormTuple(ginstate, attnum, key, category,

                           (char *) compressedList,

                           SizeOfGinPostingList(compressedList),

                           nitem, false);

        pfree(compressedList);

    }

    if (!res)

    {

        /* posting list would be too big, build posting tree */

        BlockNumber postingRoot;


        /*

         * Build posting-tree-only result tuple.  We do this first so as to

         * fail quickly if the key is too big.

         */

        res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);


        /*

         * Initialize a new posting tree with the TIDs.

         */

        postingRoot = createPostingTree(ginstate->index, items, nitem,

                                        buildStats, buffer);


        /* And save the root link in the result tuple */

        GinSetPostingTree(res, postingRoot);

    }


    return res;

}


/*

 * Insert one or more heap TIDs associated with the given key value.

 * This will either add a single key entry, or enlarge a pre-existing entry.

 *

 * During an index build, buildStats is non-null and the counters

 * it contains should be incremented as needed.

 */

void

ginEntryInsert(GinState *ginstate,

               OffsetNumber attnum, Datum key, GinNullCategory category,

               ItemPointerData *items, uint32 nitem,

               GinStatsData *buildStats)

{

    GinBtreeData btree;

    GinBtreeEntryInsertData insertdata;

    GinBtreeStack *stack;

    IndexTuple  itup;

    Page        page;


    insertdata.isDelete = false;


    ginPrepareEntryScan(&btree, attnum, key, category, ginstate);

    btree.isBuild = (buildStats != NULL);


    stack = ginFindLeafPage(&btree, false, false);

    page = BufferGetPage(stack->buffer);


    if (btree.findItem(&btree, stack))

    {

        /* found pre-existing entry */

        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));


        if (GinIsPostingTree(itup))

        {

            /* add entries to existing posting tree */

            BlockNumber rootPostingTree = GinGetPostingTree(itup);


            /* release all stack */

            LockBuffer(stack->buffer, GIN_UNLOCK);

            freeGinBtreeStack(stack);


            /* insert into posting tree */

            ginInsertItemPointers(ginstate->index, rootPostingTree,

                                  items, nitem,

                                  buildStats);

            return;

        }


        CheckForSerializableConflictIn(ginstate->index, NULL,

                                       BufferGetBlockNumber(stack->buffer));

        /* modify an existing leaf entry */

        itup = addItemPointersToLeafTuple(ginstate, itup,

                                          items, nitem, buildStats, stack->buffer);


        insertdata.isDelete = true;

    }

    else

    {

        CheckForSerializableConflictIn(ginstate->index, NULL,

                                       BufferGetBlockNumber(stack->buffer));

        /* no match, so construct a new leaf entry */

        itup = buildFreshLeafTuple(ginstate, attnum, key, category,

                                   items, nitem, buildStats, stack->buffer);


        /*

         * nEntries counts leaf tuples, so increment it only when we make a

         * new one.

         */

        if (buildStats)

            buildStats->nEntries++;

    }


    /* Insert the new or modified leaf tuple */

    insertdata.entry = itup;

    ginInsertValue(&btree, stack, &insertdata, buildStats);

    pfree(itup);

}


/*

 * Extract index entries for a single indexable item, and add them to the

 * BuildAccumulator's state.

 *

 * This function is used only during initial index creation.

 */

static void

ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,

                       Datum value, bool isNull,

                       ItemPointer heapptr)

{

    Datum      *entries;

    GinNullCategory *categories;

    int32       nentries;

    MemoryContext oldCtx;


    oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);

    entries = ginExtractEntries(buildstate->accum.ginstate, attnum,

                                value, isNull,

                                &nentries, &categories);

    MemoryContextSwitchTo(oldCtx);


    ginInsertBAEntries(&buildstate->accum, heapptr, attnum,

                       entries, categories, nentries);


    buildstate->indtuples += nentries;


    MemoryContextReset(buildstate->funcCtx);

}


static void

ginBuildCallback(Relation index, ItemPointer tid, Datum *values,

                 bool *isnull, bool tupleIsAlive, void *state)

{

    GinBuildState *buildstate = (GinBuildState *) state;

    MemoryContext oldCtx;

    int         i;


    oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);


    for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)

        ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),

                               values[i], isnull[i], tid);


    /* If we've maxed out our available memory, dump everything to the index */

    if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)

    {

        ItemPointerData *list;

        Datum       key;

        GinNullCategory category;

        uint32      nlist;

        OffsetNumber attnum;


        ginBeginBAScan(&buildstate->accum);

        while ((list = ginGetBAEntry(&buildstate->accum,

                                     &attnum, &key, &category, &nlist)) != NULL)

        {

            /* there could be many entries, so be willing to abort here */

            CHECK_FOR_INTERRUPTS();

            ginEntryInsert(&buildstate->ginstate, attnum, key, category,

                           list, nlist, &buildstate->buildStats);

        }


        MemoryContextReset(buildstate->tmpCtx);

        ginInitBA(&buildstate->accum);

    }


    MemoryContextSwitchTo(oldCtx);

}


/*

 * ginFlushBuildState

 *      Write all data from BuildAccumulator into the tuplesort.

 */

static void

ginFlushBuildState(GinBuildState *buildstate, Relation index)

{

    ItemPointerData *list;

    Datum       key;

    GinNullCategory category;

    uint32      nlist;

    OffsetNumber attnum;

    TupleDesc   tdesc = RelationGetDescr(index);


    ginBeginBAScan(&buildstate->accum);

    while ((list = ginGetBAEntry(&buildstate->accum,

                                 &attnum, &key, &category, &nlist)) != NULL)

    {

        /* information about the key */

        Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1));


        /* GIN tuple and tuple length */

        GinTuple   *tup;

        Size        tuplen;


        /* there could be many entries, so be willing to abort here */

        CHECK_FOR_INTERRUPTS();


        tup = _gin_build_tuple(attnum, category,

                               key, attr->attlen, attr->attbyval,

                               list, nlist, &tuplen);


        tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);


        pfree(tup);

    }


    MemoryContextReset(buildstate->tmpCtx);

    ginInitBA(&buildstate->accum);

}


/*

 * ginBuildCallbackParallel

 *      Callback for the parallel index build.

 *

 * This is similar to the serial build callback ginBuildCallback, but

 * instead of writing the accumulated entries into the index, each worker

 * writes them into a (local) tuplesort.

 *

 * The worker then sorts and combines these entries, before writing them

 * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build

 * for the whole process).

 */

static void

ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,

                         bool *isnull, bool tupleIsAlive, void *state)

{

    GinBuildState *buildstate = (GinBuildState *) state;

    MemoryContext oldCtx;

    int         i;


    oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);


    /*

     * if scan wrapped around - flush accumulated entries and start anew

     *

     * With parallel scans, we don't have a guarantee the scan does not start

     * half-way through the relation (serial builds disable sync scans and

     * always start from block 0, parallel scans require allow_sync=true).

     *

     * Building the posting lists assumes the TIDs are monotonic and never go

     * back, and the wrap around would break that. We handle that by detecting

     * the wraparound, and flushing all entries. This means we'll later see

     * two separate entries with non-overlapping TID lists (which can be

     * combined by merge sort).

     *

     * To detect a wraparound, we remember the last TID seen by each worker

     * (for any key). If the next TID seen by the worker is lower, the scan

     * must have wrapped around.

     */

    if (ItemPointerCompare(tid, &buildstate->tid) < 0)

        ginFlushBuildState(buildstate, index);


    /* remember the TID we're about to process */

    buildstate->tid = *tid;


    for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)

        ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),

                               values[i], isnull[i], tid);


    /*

     * If we've maxed out our available memory, dump everything to the

     * tuplesort. We use half the per-worker fraction of maintenance_work_mem,

     * the other half is used for the tuplesort.

     */

    if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)

        ginFlushBuildState(buildstate, index);


    MemoryContextSwitchTo(oldCtx);

}


IndexBuildResult *

ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)

{

    IndexBuildResult *result;

    double      reltuples;

    GinBuildState buildstate;

    GinBuildState *state = &buildstate;

    Buffer      RootBuffer,

                MetaBuffer;

    ItemPointerData *list;

    Datum       key;

    GinNullCategory category;

    uint32      nlist;

    MemoryContext oldCtx;

    OffsetNumber attnum;


    if (RelationGetNumberOfBlocks(index) != 0)

        elog(ERROR, "index \"%s\" already contains data",

             RelationGetRelationName(index));


    initGinState(&buildstate.ginstate, index);

    buildstate.indtuples = 0;

    memset(&buildstate.buildStats, 0, sizeof(GinStatsData));


    /* Initialize fields for parallel build too. */

    buildstate.bs_numtuples = 0;

    buildstate.bs_reltuples = 0;

    buildstate.bs_leader = NULL;

    memset(&buildstate.tid, 0, sizeof(ItemPointerData));


    /* initialize the meta page */

    MetaBuffer = GinNewBuffer(index);


    /* initialize the root page */

    RootBuffer = GinNewBuffer(index);


    START_CRIT_SECTION();

    GinInitMetabuffer(MetaBuffer);

    MarkBufferDirty(MetaBuffer);

    GinInitBuffer(RootBuffer, GIN_LEAF);

    MarkBufferDirty(RootBuffer);


    UnlockReleaseBuffer(MetaBuffer);

    UnlockReleaseBuffer(RootBuffer);

    END_CRIT_SECTION();


    /* count the root as first entry page */

    buildstate.buildStats.nEntryPages++;


    /*

     * create a temporary memory context that is used to hold data not yet

     * dumped out to the index

     */

    buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,

                                              "Gin build temporary context",

                                              ALLOCSET_DEFAULT_SIZES);


    /*

     * create a temporary memory context that is used for calling

     * ginExtractEntries(), and can be reset after each tuple

     */

    buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,

                                               "Gin build temporary context for user-defined function",

                                               ALLOCSET_DEFAULT_SIZES);


    buildstate.accum.ginstate = &buildstate.ginstate;

    ginInitBA(&buildstate.accum);


    /* Report table scan phase started */

    pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,

                                 PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);


    /*

     * Attempt to launch parallel worker scan when required

     *

     * XXX plan_create_index_workers makes the number of workers dependent on

     * maintenance_work_mem, requiring 32MB for each worker. For GIN that's

     * reasonable too, because we sort the data just like btree. It does

     * ignore the memory used to accumulate data in memory (set by work_mem),

     * but there is no way to communicate that to plan_create_index_workers.

     */

    if (indexInfo->ii_ParallelWorkers > 0)

        _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,

                            indexInfo->ii_ParallelWorkers);


    /*

     * If parallel build requested and at least one worker process was

     * successfully launched, set up coordination state, wait for workers to

     * complete. Then read all tuples from the shared tuplesort and insert

     * them into the index.

     *

     * In serial mode, simply scan the table and build the index one index

     * tuple at a time.

     */

    if (state->bs_leader)

    {

        SortCoordinate coordinate;


        coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));

        coordinate->isWorker = false;

        coordinate->nParticipants =

            state->bs_leader->nparticipanttuplesorts;

        coordinate->sharedsort = state->bs_leader->sharedsort;


        /*

         * Begin leader tuplesort.

         *

         * In cases where parallelism is involved, the leader receives the

         * same share of maintenance_work_mem as a serial sort (it is

         * generally treated in the same way as a serial sort once we return).

         * Parallel worker Tuplesortstates will have received only a fraction

         * of maintenance_work_mem, though.

         *

         * We rely on the lifetime of the Leader Tuplesortstate almost not

         * overlapping with any worker Tuplesortstate's lifetime.  There may

         * be some small overlap, but that's okay because we rely on leader

         * Tuplesortstate only allocating a small, fixed amount of memory

         * here. When its tuplesort_performsort() is called (by our caller),

         * and significant amounts of memory are likely to be used, all

         * workers must have already freed almost all memory held by their

         * Tuplesortstates (they are about to go away completely, too).  The

         * overall effect is that maintenance_work_mem always represents an

         * absolute high watermark on the amount of memory used by a CREATE

         * INDEX operation, regardless of the use of parallelism or any other

         * factor.

         */

        state->bs_sortstate =

            tuplesort_begin_index_gin(heap, index,

                                      maintenance_work_mem, coordinate,

                                      TUPLESORT_NONE);


        /* scan the relation in parallel and merge per-worker results */

        reltuples = _gin_parallel_merge(state);


        _gin_end_parallel(state->bs_leader, state);

    }

    else                        /* no parallel index build */

    {

        /*

         * Do the heap scan.  We disallow sync scan here because

         * dataPlaceToPage prefers to receive tuples in TID order.

         */

        reltuples = table_index_build_scan(heap, index, indexInfo, false, true,

                                           ginBuildCallback, &buildstate, NULL);


        /* dump remaining entries to the index */

        oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);

        ginBeginBAScan(&buildstate.accum);

        while ((list = ginGetBAEntry(&buildstate.accum,

                                     &attnum, &key, &category, &nlist)) != NULL)

        {

            /* there could be many entries, so be willing to abort here */

            CHECK_FOR_INTERRUPTS();

            ginEntryInsert(&buildstate.ginstate, attnum, key, category,

                           list, nlist, &buildstate.buildStats);

        }

        MemoryContextSwitchTo(oldCtx);

    }


    MemoryContextDelete(buildstate.funcCtx);

    MemoryContextDelete(buildstate.tmpCtx);


    /*

     * Update metapage stats

     */

    buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);

    ginUpdateStats(index, &buildstate.buildStats, true);


    /*

     * We didn't write WAL records as we built the index, so if WAL-logging is

     * required, write all pages to the WAL now.

     */

    if (RelationNeedsWAL(index))

    {

        log_newpage_range(index, MAIN_FORKNUM,

                          0, RelationGetNumberOfBlocks(index),

                          true);

    }


    /*

     * Return statistics

     */

    result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));


    result->heap_tuples = reltuples;

    result->index_tuples = buildstate.indtuples;


    return result;

}


/*

 *  ginbuildempty() -- build an empty gin index in the initialization fork

 */

void

ginbuildempty(Relation index)

{

    Buffer      RootBuffer,

                MetaBuffer;


    /* An empty GIN index has two pages. */

    MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,

                                   EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);

    RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,

                                   EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);


    /* Initialize and xlog metabuffer and root buffer. */

    START_CRIT_SECTION();

    GinInitMetabuffer(MetaBuffer);

    MarkBufferDirty(MetaBuffer);

    log_newpage_buffer(MetaBuffer, true);

    GinInitBuffer(RootBuffer, GIN_LEAF);

    MarkBufferDirty(RootBuffer);

    log_newpage_buffer(RootBuffer, false);

    END_CRIT_SECTION();


    /* Unlock and release the buffers. */

    UnlockReleaseBuffer(MetaBuffer);

    UnlockReleaseBuffer(RootBuffer);

}


/*

 * Insert index entries for a single indexable item during "normal"

 * (non-fast-update) insertion

 */

static void

ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,

                   Datum value, bool isNull,

                   ItemPointer item)

{

    Datum      *entries;

    GinNullCategory *categories;

    int32       i,

                nentries;


    entries = ginExtractEntries(ginstate, attnum, value, isNull,

                                &nentries, &categories);


    for (i = 0; i < nentries; i++)

        ginEntryInsert(ginstate, attnum, entries[i], categories[i],

                       item, 1, NULL);

}


bool

gininsert(Relation index, Datum *values, bool *isnull,

          ItemPointer ht_ctid, Relation heapRel,

          IndexUniqueCheck checkUnique,

          bool indexUnchanged,

          IndexInfo *indexInfo)

{

    GinState   *ginstate = (GinState *) indexInfo->ii_AmCache;

    MemoryContext oldCtx;

    MemoryContext insertCtx;

    int         i;


    /* Initialize GinState cache if first call in this statement */

    if (ginstate == NULL)

    {

        oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);

        ginstate = (GinState *) palloc(sizeof(GinState));

        initGinState(ginstate, index);

        indexInfo->ii_AmCache = ginstate;

        MemoryContextSwitchTo(oldCtx);

    }


    insertCtx = AllocSetContextCreate(CurrentMemoryContext,

                                      "Gin insert temporary context",

                                      ALLOCSET_DEFAULT_SIZES);


    oldCtx = MemoryContextSwitchTo(insertCtx);


    if (GinGetUseFastUpdate(index))

    {

        GinTupleCollector collector;


        memset(&collector, 0, sizeof(GinTupleCollector));


        for (i = 0; i < ginstate->origTupdesc->natts; i++)

            ginHeapTupleFastCollect(ginstate, &collector,

                                    (OffsetNumber) (i + 1),

                                    values[i], isnull[i],

                                    ht_ctid);


        ginHeapTupleFastInsert(ginstate, &collector);

    }

    else

    {

        for (i = 0; i < ginstate->origTupdesc->natts; i++)

            ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),

                               values[i], isnull[i],

                               ht_ctid);

    }


    MemoryContextSwitchTo(oldCtx);

    MemoryContextDelete(insertCtx);


    return false;

}


/*

 * Create parallel context, and launch workers for leader.

 *

 * buildstate argument should be initialized (with the exception of the

 * tuplesort states, which may later be created based on shared

 * state initially set up here).

 *

 * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.

 *

 * request is the target number of parallel worker processes to launch.

 *

 * Sets buildstate's GinLeader, which caller must use to shut down parallel

 * mode by passing it to _gin_end_parallel() at the very end of its index

 * build.  If not even a single worker process can be launched, this is

 * never set, and caller should proceed with a serial index build.

 */

static void

_gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,

                    bool isconcurrent, int request)

{

    ParallelContext *pcxt;

    int         scantuplesortstates;

    Snapshot    snapshot;

    Size        estginshared;

    Size        estsort;

    GinBuildShared *ginshared;

    Sharedsort *sharedsort;

    GinLeader  *ginleader = (GinLeader *) palloc0(sizeof(GinLeader));

    WalUsage   *walusage;

    BufferUsage *bufferusage;

    bool        leaderparticipates = true;

    int         querylen;


#ifdef DISABLE_LEADER_PARTICIPATION

    leaderparticipates = false;

#endif


    /*

     * Enter parallel mode, and create context for parallel build of gin index

     */

    EnterParallelMode();

    Assert(request > 0);

    pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",

                                 request);


    scantuplesortstates = leaderparticipates ? request + 1 : request;


    /*

     * Prepare for scan of the base relation.  In a normal index build, we use

     * SnapshotAny because we must retrieve all tuples and do our own time

     * qual checks (because we have to index RECENTLY_DEAD tuples).  In a

     * concurrent build, we take a regular MVCC snapshot and index whatever's

     * live according to that.

     */

    if (!isconcurrent)

        snapshot = SnapshotAny;

    else

        snapshot = RegisterSnapshot(GetTransactionSnapshot());


    /*

     * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.

     */

    estginshared = _gin_parallel_estimate_shared(heap, snapshot);

    shm_toc_estimate_chunk(&pcxt->estimator, estginshared);

    estsort = tuplesort_estimate_shared(scantuplesortstates);

    shm_toc_estimate_chunk(&pcxt->estimator, estsort);


    shm_toc_estimate_keys(&pcxt->estimator, 2);


    /*

     * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE

     * and PARALLEL_KEY_BUFFER_USAGE.

     *

     * If there are no extensions loaded that care, we could skip this.  We

     * have no way of knowing whether anyone's looking at pgWalUsage or

     * pgBufferUsage, so do it unconditionally.

     */

    shm_toc_estimate_chunk(&pcxt->estimator,

                           mul_size(sizeof(WalUsage), pcxt->nworkers));

    shm_toc_estimate_keys(&pcxt->estimator, 1);

    shm_toc_estimate_chunk(&pcxt->estimator,

                           mul_size(sizeof(BufferUsage), pcxt->nworkers));

    shm_toc_estimate_keys(&pcxt->estimator, 1);


    /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */

    if (debug_query_string)

    {

        querylen = strlen(debug_query_string);

        shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);

        shm_toc_estimate_keys(&pcxt->estimator, 1);

    }

    else

        querylen = 0;           /* keep compiler quiet */


    /* Everyone's had a chance to ask for space, so now create the DSM */

    InitializeParallelDSM(pcxt);


    /* If no DSM segment was available, back out (do serial build) */

    if (pcxt->seg == NULL)

    {

        if (IsMVCCSnapshot(snapshot))

            UnregisterSnapshot(snapshot);

        DestroyParallelContext(pcxt);

        ExitParallelMode();

        return;

    }


    /* Store shared build state, for which we reserved space */

    ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);

    /* Initialize immutable state */

    ginshared->heaprelid = RelationGetRelid(heap);

    ginshared->indexrelid = RelationGetRelid(index);

    ginshared->isconcurrent = isconcurrent;

    ginshared->scantuplesortstates = scantuplesortstates;


    ConditionVariableInit(&ginshared->workersdonecv);

    SpinLockInit(&ginshared->mutex);


    /* Initialize mutable state */

    ginshared->nparticipantsdone = 0;

    ginshared->reltuples = 0.0;

    ginshared->indtuples = 0.0;


    table_parallelscan_initialize(heap,

                                  ParallelTableScanFromGinBuildShared(ginshared),

                                  snapshot);


    /*

     * Store shared tuplesort-private state, for which we reserved space.

     * Then, initialize opaque state using tuplesort routine.

     */

    sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);

    tuplesort_initialize_shared(sharedsort, scantuplesortstates,

                                pcxt->seg);


    shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);

    shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);


    /* Store query string for workers */

    if (debug_query_string)

    {

        char       *sharedquery;


        sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);

        memcpy(sharedquery, debug_query_string, querylen + 1);

        shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);

    }


    /*

     * Allocate space for each worker's WalUsage and BufferUsage; no need to

     * initialize.

     */

    walusage = shm_toc_allocate(pcxt->toc,

                                mul_size(sizeof(WalUsage), pcxt->nworkers));

    shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);

    bufferusage = shm_toc_allocate(pcxt->toc,

                                   mul_size(sizeof(BufferUsage), pcxt->nworkers));

    shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);


    /* Launch workers, saving status for leader/caller */

    LaunchParallelWorkers(pcxt);

    ginleader->pcxt = pcxt;

    ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;

    if (leaderparticipates)

        ginleader->nparticipanttuplesorts++;

    ginleader->ginshared = ginshared;

    ginleader->sharedsort = sharedsort;

    ginleader->snapshot = snapshot;

    ginleader->walusage = walusage;

    ginleader->bufferusage = bufferusage;


    /* If no workers were successfully launched, back out (do serial build) */

    if (pcxt->nworkers_launched == 0)

    {

        _gin_end_parallel(ginleader, NULL);

        return;

    }


    /* Save leader state now that it's clear build will be parallel */

    buildstate->bs_leader = ginleader;


    /* Join heap scan ourselves */

    if (leaderparticipates)

        _gin_leader_participate_as_worker(buildstate, heap, index);


    /*

     * Caller needs to wait for all launched workers when we return.  Make

     * sure that the failure-to-start case will not hang forever.

     */

    WaitForParallelWorkersToAttach(pcxt);

}


/*

 * Shut down workers, destroy parallel context, and end parallel mode.

 */

static void

_gin_end_parallel(GinLeader *ginleader, GinBuildState *state)

{

    int         i;


    /* Shutdown worker processes */

    WaitForParallelWorkersToFinish(ginleader->pcxt);


    /*

     * Next, accumulate WAL usage.  (This must wait for the workers to finish,

     * or we might get incomplete data.)

     */

    for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)

        InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);


    /* Free last reference to MVCC snapshot, if one was used */

    if (IsMVCCSnapshot(ginleader->snapshot))

        UnregisterSnapshot(ginleader->snapshot);

    DestroyParallelContext(ginleader->pcxt);

    ExitParallelMode();

}


/*

 * Within leader, wait for end of heap scan.

 *

 * When called, parallel heap scan started by _gin_begin_parallel() will

 * already be underway within worker processes (when leader participates

 * as a worker, we should end up here just as workers are finishing).

 *

 * Returns the total number of heap tuples scanned.

 */

static double

_gin_parallel_heapscan(GinBuildState *state)

{

    GinBuildShared *ginshared = state->bs_leader->ginshared;

    int         nparticipanttuplesorts;


    nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;

    for (;;)

    {

        SpinLockAcquire(&ginshared->mutex);

        if (ginshared->nparticipantsdone == nparticipanttuplesorts)

        {

            /* copy the data into leader state */

            state->bs_reltuples = ginshared->reltuples;

            state->bs_numtuples = ginshared->indtuples;


            SpinLockRelease(&ginshared->mutex);

            break;

        }

        SpinLockRelease(&ginshared->mutex);


        ConditionVariableSleep(&ginshared->workersdonecv,

                               WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);

    }


    ConditionVariableCancelSleep();


    return state->bs_reltuples;

}


/*

 * Buffer used to accumulate TIDs from multiple GinTuples for the same key

 * (we read these from the tuplesort, sorted by the key).

 *

 * This is similar to BuildAccumulator in that it's used to collect TIDs

 * in memory before inserting them into the index, but it's much simpler

 * as it only deals with a single index key at a time.

 *

 * When adding TIDs to the buffer, we make sure to keep them sorted, both

 * during the initial table scan (and detecting when the scan wraps around),

 * and during merging (where we do mergesort).

 */

typedef struct GinBuffer

{

    OffsetNumber attnum;

    GinNullCategory category;

    Datum       key;            /* 0 if no key (and keylen == 0) */

    Size        keylen;         /* number of bytes (not typlen) */


    /* type info */

    int16       typlen;

    bool        typbyval;


    /* Number of TIDs to collect before attempt to write some out. */

    int         maxitems;


    /* array of TID values */

    int         nitems;

    int         nfrozen;

    SortSupport ssup;           /* for sorting/comparing keys */

    ItemPointerData *items;

} GinBuffer;


/*

 * Check that TID array contains valid values, and that it's sorted (if we

 * expect it to be).

 */

static void

AssertCheckItemPointers(GinBuffer *buffer)

{

#ifdef USE_ASSERT_CHECKING

    /* we should not have a buffer with no TIDs to sort */

    Assert(buffer->items != NULL);

    Assert(buffer->nitems > 0);


    for (int i = 0; i < buffer->nitems; i++)

    {

        Assert(ItemPointerIsValid(&buffer->items[i]));


        /* don't check ordering for the first TID item */

        if (i == 0)

            continue;


        Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);

    }

#endif

}


/*

 * GinBuffer checks

 *

 * Make sure the nitems/items fields are consistent (either the array is empty

 * or not empty, the fields need to agree). If there are items, check ordering.

 */

static void

AssertCheckGinBuffer(GinBuffer *buffer)

{

#ifdef USE_ASSERT_CHECKING

    /* if we have any items, the array must exist */

    Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));


    /*

     * The buffer may be empty, in which case we must not call the check of

     * item pointers, because that assumes non-emptiness.

     */

    if (buffer->nitems == 0)

        return;


    /* Make sure the item pointers are valid and sorted. */

    AssertCheckItemPointers(buffer);

#endif

}


/*

 * GinBufferInit

 *      Initialize buffer to store tuples for a GIN index.

 *

 * Initialize the buffer used to accumulate TID for a single key at a time

 * (we process the data sorted), so we know when we received all data for

 * a given key.

 *

 * Initializes sort support procedures for all index attributes.

 */

static GinBuffer *

GinBufferInit(Relation index)

{

    GinBuffer  *buffer = palloc0(sizeof(GinBuffer));

    int         i,

                nKeys;

    TupleDesc   desc = RelationGetDescr(index);


    /*

     * How many items can we fit into the memory limit? We don't want to end

     * with too many TIDs. and 64kB seems more than enough. But maybe this

     * should be tied to maintenance_work_mem or something like that?

     */

    buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);


    nKeys = IndexRelationGetNumberOfKeyAttributes(index);


    buffer->ssup = palloc0(sizeof(SortSupportData) * nKeys);


    /*

     * Lookup ordering operator for the index key data type, and initialize

     * the sort support function.

     */

    for (i = 0; i < nKeys; i++)

    {

        Oid         cmpFunc;

        SortSupport sortKey = &buffer->ssup[i];

        Form_pg_attribute att = TupleDescAttr(desc, i);


        sortKey->ssup_cxt = CurrentMemoryContext;

        sortKey->ssup_collation = index->rd_indcollation[i];


        if (!OidIsValid(sortKey->ssup_collation))

            sortKey->ssup_collation = DEFAULT_COLLATION_OID;


        sortKey->ssup_nulls_first = false;

        sortKey->ssup_attno = i + 1;

        sortKey->abbreviate = false;


        Assert(sortKey->ssup_attno != 0);


        /*

         * If the compare proc isn't specified in the opclass definition, look

         * up the index key type's default btree comparator.

         */

        cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);

        if (cmpFunc == InvalidOid)

        {

            TypeCacheEntry *typentry;


            typentry = lookup_type_cache(att->atttypid,

                                         TYPECACHE_CMP_PROC_FINFO);

            if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))

                ereport(ERROR,

                        (errcode(ERRCODE_UNDEFINED_FUNCTION),

                         errmsg("could not identify a comparison function for type %s",

                                format_type_be(att->atttypid))));


            cmpFunc = typentry->cmp_proc_finfo.fn_oid;

        }


        PrepareSortSupportComparisonShim(cmpFunc, sortKey);

    }


    return buffer;

}


/* Is the buffer empty, i.e. has no TID values in the array? */

static bool

GinBufferIsEmpty(GinBuffer *buffer)

{

    return (buffer->nitems == 0);

}


/*

 * GinBufferKeyEquals

 *      Can the buffer store TIDs for the provided GIN tuple (same key)?

 *

 * Compare if the tuple matches the already accumulated data in the GIN

 * buffer. Compare scalar fields first, before the actual key.

 *

 * Returns true if the key matches, and the TID belongs to the buffer, or

 * false if the key does not match.

 */

static bool

GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)

{

    int         r;

    Datum       tupkey;


    AssertCheckGinBuffer(buffer);


    if (tup->attrnum != buffer->attnum)

        return false;


    /* same attribute should have the same type info */

    Assert(tup->typbyval == buffer->typbyval);

    Assert(tup->typlen == buffer->typlen);


    if (tup->category != buffer->category)

        return false;


    /*

     * For NULL/empty keys, this means equality, for normal keys we need to

     * compare the actual key value.

     */

    if (buffer->category != GIN_CAT_NORM_KEY)

        return true;


    /*

     * For the tuple, get either the first sizeof(Datum) bytes for byval

     * types, or a pointer to the beginning of the data array.

     */

    tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);


    r = ApplySortComparator(buffer->key, false,

                            tupkey, false,

                            &buffer->ssup[buffer->attnum - 1]);


    return (r == 0);

}


/*

 * GinBufferShouldTrim

 *      Should we trim the list of item pointers?

 *

 * By trimming we understand writing out and removing the tuple IDs that

 * we know can't change by future merges. We can deduce the TID up to which

 * this is guaranteed from the "first" TID in each GIN tuple, which provides

 * a "horizon" (for a given key) thanks to the sort.

 *

 * We don't want to do this too often - compressing longer TID lists is more

 * efficient. But we also don't want to accumulate too many TIDs, for two

 * reasons. First, it consumes memory and we might exceed maintenance_work_mem

 * (or whatever limit applies), even if that's unlikely because TIDs are very

 * small so we can fit a lot of them. Second, and more importantly, long TID

 * lists are an issue if the scan wraps around, because a key may get a very

 * wide list (with min/max TID for that key), forcing "full" mergesorts for

 * every list merged into it (instead of the efficient append).

 *

 * So we look at two things when deciding if to trim - if the resulting list

 * (after adding TIDs from the new tuple) would be too long, and if there is

 * enough TIDs to trim (with values less than "first" TID from the new tuple),

 * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary

 * number).

 */

static bool

GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)

{

    /* not enough TIDs to trim (1024 is somewhat arbitrary number) */

    if (buffer->nfrozen < 1024)

        return false;


    /* no need to trim if we have not hit the memory limit yet */

    if ((buffer->nitems + tup->nitems) < buffer->maxitems)

        return false;


    /*

     * OK, we have enough frozen TIDs to flush, and we have hit the memory

     * limit, so it's time to write it out.

     */

    return true;

}


/*

 * GinBufferStoreTuple

 *      Add data (especially TID list) from a GIN tuple to the buffer.

 *

 * The buffer is expected to be empty (in which case it's initialized), or

 * having the same key. The TID values from the tuple are combined with the

 * stored values using a merge sort.

 *

 * The tuples (for the same key) are expected to be sorted by first TID. But

 * this does not guarantee the lists do not overlap, especially in the leader,

 * because the workers process interleaving data. There should be no overlaps

 * in a single worker - it could happen when the parallel scan wraps around,

 * but we detect that and flush the data (see ginBuildCallbackParallel).

 *

 * By sorting the GinTuple not only by key, but also by the first TID, we make

 * it more less likely the lists will overlap during merge. We merge them using

 * mergesort, but it's cheaper to just append one list to the other.

 *

 * How often can the lists overlap? There should be no overlaps in workers,

 * and in the leader we can see overlaps between lists built by different

 * workers. But the workers merge the items as much as possible, so there

 * should not be too many.

 */

static void

GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)

{

    ItemPointerData *items;

    Datum       key;


    AssertCheckGinBuffer(buffer);


    key = _gin_parse_tuple_key(tup);

    items = _gin_parse_tuple_items(tup);


    /* if the buffer is empty, set the fields (and copy the key) */

    if (GinBufferIsEmpty(buffer))

    {

        buffer->category = tup->category;

        buffer->keylen = tup->keylen;

        buffer->attnum = tup->attrnum;


        buffer->typlen = tup->typlen;

        buffer->typbyval = tup->typbyval;


        if (tup->category == GIN_CAT_NORM_KEY)

            buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);

        else

            buffer->key = (Datum) 0;

    }


    /*

     * Try freeze TIDs at the beginning of the list, i.e. exclude them from

     * the mergesort. We can do that with TIDs before the first TID in the new

     * tuple we're about to add into the buffer.

     *

     * We do this incrementally when adding data into the in-memory buffer,

     * and not later (e.g. when hitting a memory limit), because it allows us

     * to skip the frozen data during the mergesort, making it cheaper.

     */


    /*

     * Check if the last TID in the current list is frozen. This is the case

     * when merging non-overlapping lists, e.g. in each parallel worker.

     */

    if ((buffer->nitems > 0) &&

        (ItemPointerCompare(&buffer->items[buffer->nitems - 1],

                            GinTupleGetFirst(tup)) == 0))

        buffer->nfrozen = buffer->nitems;


    /*

     * Now find the last TID we know to be frozen, i.e. the last TID right

     * before the new GIN tuple.

     *

     * Start with the first not-yet-frozen tuple, and walk until we find the

     * first TID that's higher. If we already know the whole list is frozen

     * (i.e. nfrozen == nitems), this does nothing.

     *

     * XXX This might do a binary search for sufficiently long lists, but it

     * does not seem worth the complexity. Overlapping lists should be rare

     * common, TID comparisons are cheap, and we should quickly freeze most of

     * the list.

     */

    for (int i = buffer->nfrozen; i < buffer->nitems; i++)

    {

        /* Is the TID after the first TID of the new tuple? Can't freeze. */

        if (ItemPointerCompare(&buffer->items[i],

                               GinTupleGetFirst(tup)) > 0)

            break;


        buffer->nfrozen++;

    }


    /* add the new TIDs into the buffer, combine using merge-sort */

    {

        int         nnew;

        ItemPointer new;


        /*

         * Resize the array - we do this first, because we'll dereference the

         * first unfrozen TID, which would fail if the array is NULL. We'll

         * still pass 0 as number of elements in that array though.

         */

        if (buffer->items == NULL)

            buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));

        else

            buffer->items = repalloc(buffer->items,

                                     (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));


        new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */

                                   (buffer->nitems - buffer->nfrozen),  /* num of unfrozen */

                                   items, tup->nitems, &nnew);


        Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));


        memcpy(&buffer->items[buffer->nfrozen], new,

               nnew * sizeof(ItemPointerData));


        pfree(new);


        buffer->nitems += tup->nitems;


        AssertCheckItemPointers(buffer);

    }


    /* free the decompressed TID list */

    pfree(items);

}


/*

 * GinBufferReset

 *      Reset the buffer into a state as if it contains no data.

 */

static void

GinBufferReset(GinBuffer *buffer)

{

    Assert(!GinBufferIsEmpty(buffer));


    /* release byref values, do nothing for by-val ones */

    if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)

        pfree(DatumGetPointer(buffer->key));


    /*

     * Not required, but makes it more likely to trigger NULL dereference if

     * using the value incorrectly, etc.

     */

    buffer->key = (Datum) 0;


    buffer->attnum = 0;

    buffer->category = 0;

    buffer->keylen = 0;

    buffer->nitems = 0;

    buffer->nfrozen = 0;


    buffer->typlen = 0;

    buffer->typbyval = 0;

}


/*

 * GinBufferTrim

 *      Discard the "frozen" part of the TID list (which should have been

 *      written to disk/index before this call).

 */

static void

GinBufferTrim(GinBuffer *buffer)

{

    Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));


    memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],

            sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));


    buffer->nitems -= buffer->nfrozen;

    buffer->nfrozen = 0;

}


/*

 * GinBufferFree

 *      Release memory associated with the GinBuffer (including TID array).

 */

static void

GinBufferFree(GinBuffer *buffer)

{

    if (buffer->items)

        pfree(buffer->items);


    /* release byref values, do nothing for by-val ones */

    if (!GinBufferIsEmpty(buffer) &&

        (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)

        pfree(DatumGetPointer(buffer->key));


    pfree(buffer);

}


/*

 * GinBufferCanAddKey

 *      Check if a given GIN tuple can be added to the current buffer.

 *

 * Returns true if the buffer is either empty or for the same index key.

 */

static bool

GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)

{

    /* empty buffer can accept data for any key */

    if (GinBufferIsEmpty(buffer))

        return true;


    /* otherwise just data for the same key */

    return GinBufferKeyEquals(buffer, tup);

}


/*

 * Within leader, wait for end of heap scan and merge per-worker results.

 *

 * After waiting for all workers to finish, merge the per-worker results into

 * the complete index. The results from each worker are sorted by block number

 * (start of the page range). While combining the per-worker results we merge

 * summaries for the same page range, and also fill-in empty summaries for

 * ranges without any tuples.

 *

 * Returns the total number of heap tuples scanned.

 */

static double

_gin_parallel_merge(GinBuildState *state)

{

    GinTuple   *tup;

    Size        tuplen;

    double      reltuples = 0;

    GinBuffer  *buffer;


    /* GIN tuples from workers, merged by leader */

    double      numtuples = 0;


    /* wait for workers to scan table and produce partial results */

    reltuples = _gin_parallel_heapscan(state);


    /* Execute the sort */

    pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,

                                 PROGRESS_GIN_PHASE_PERFORMSORT_2);


    /* do the actual sort in the leader */

    tuplesort_performsort(state->bs_sortstate);


    /*

     * Initialize buffer to combine entries for the same key.

     *

     * The leader is allowed to use the whole maintenance_work_mem buffer to

     * combine data. The parallel workers already completed.

     */

    buffer = GinBufferInit(state->ginstate.index);


    /*

     * Set the progress target for the next phase.  Reset the block number

     * values set by table_index_build_scan

     */

    {

        const int   progress_index[] = {

            PROGRESS_CREATEIDX_SUBPHASE,

            PROGRESS_CREATEIDX_TUPLES_TOTAL,

            PROGRESS_SCAN_BLOCKS_TOTAL,

            PROGRESS_SCAN_BLOCKS_DONE

        };

        const int64 progress_vals[] = {

            PROGRESS_GIN_PHASE_MERGE_2,

            state->bs_numtuples,

            0, 0

        };


        pgstat_progress_update_multi_param(4, progress_index, progress_vals);

    }


    /*

     * Read the GIN tuples from the shared tuplesort, sorted by category and

     * key. That probably gives us order matching how data is organized in the

     * index.

     *

     * We don't insert the GIN tuples right away, but instead accumulate as

     * many TIDs for the same key as possible, and then insert that at once.

     * This way we don't need to decompress/recompress the posting lists, etc.

     */

    while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)

    {

        MemoryContext oldCtx;


        CHECK_FOR_INTERRUPTS();


        /*

         * If the buffer can accept the new GIN tuple, just store it there and

         * we're done. If it's a different key (or maybe too much data) flush

         * the current contents into the index first.

         */

        if (!GinBufferCanAddKey(buffer, tup))

        {

            /*

             * Buffer is not empty and it's storing a different key - flush

             * the data into the insert, and start a new entry for current

             * GinTuple.

             */

            AssertCheckItemPointers(buffer);


            oldCtx = MemoryContextSwitchTo(state->tmpCtx);


            ginEntryInsert(&state->ginstate,

                           buffer->attnum, buffer->key, buffer->category,

                           buffer->items, buffer->nitems, &state->buildStats);


            MemoryContextSwitchTo(oldCtx);

            MemoryContextReset(state->tmpCtx);


            /* discard the existing data */

            GinBufferReset(buffer);

        }


        /*

         * We're about to add a GIN tuple to the buffer - check the memory

         * limit first, and maybe write out some of the data into the index

         * first, if needed (and possible). We only flush the part of the TID

         * list that we know won't change, and only if there's enough data for

         * compression to work well.

         */

        if (GinBufferShouldTrim(buffer, tup))

        {

            Assert(buffer->nfrozen > 0);


            /*

             * Buffer is not empty and it's storing a different key - flush

             * the data into the insert, and start a new entry for current

             * GinTuple.

             */

            AssertCheckItemPointers(buffer);


            oldCtx = MemoryContextSwitchTo(state->tmpCtx);


            ginEntryInsert(&state->ginstate,

                           buffer->attnum, buffer->key, buffer->category,

                           buffer->items, buffer->nfrozen, &state->buildStats);


            MemoryContextSwitchTo(oldCtx);

            MemoryContextReset(state->tmpCtx);


            /* truncate the data we've just discarded */

            GinBufferTrim(buffer);

        }


        /*

         * Remember data for the current tuple (either remember the new key,

         * or append if to the existing data).

         */

        GinBufferStoreTuple(buffer, tup);


        /* Report progress */

        pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,

                                     ++numtuples);

    }


    /* flush data remaining in the buffer (for the last key) */

    if (!GinBufferIsEmpty(buffer))

    {

        AssertCheckItemPointers(buffer);


        ginEntryInsert(&state->ginstate,

                       buffer->attnum, buffer->key, buffer->category,

                       buffer->items, buffer->nitems, &state->buildStats);


        /* discard the existing data */

        GinBufferReset(buffer);


        /* Report progress */

        pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,

                                     ++numtuples);

    }


    /* relase all the memory */

    GinBufferFree(buffer);


    tuplesort_end(state->bs_sortstate);


    return reltuples;

}


/*

 * Returns size of shared memory required to store state for a parallel

 * gin index build based on the snapshot its parallel scan will use.

 */

static Size

_gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)

{

    /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */

    return add_size(BUFFERALIGN(sizeof(GinBuildShared)),

                    table_parallelscan_estimate(heap, snapshot));

}


/*

 * Within leader, participate as a parallel worker.

 */

static void

_gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)

{

    GinLeader  *ginleader = buildstate->bs_leader;

    int         sortmem;


    /*

     * Might as well use reliable figure when doling out maintenance_work_mem

     * (when requested number of workers were not launched, this will be

     * somewhat higher than it is for other workers).

     */

    sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;


    /* Perform work common to all participants */

    _gin_parallel_scan_and_build(buildstate, ginleader->ginshared,

                                 ginleader->sharedsort, heap, index,

                                 sortmem, true);

}


/*

 * _gin_process_worker_data

 *      First phase of the key merging, happening in the worker.

 *

 * Depending on the number of distinct keys, the TID lists produced by the

 * callback may be very short (due to frequent evictions in the callback).

 * But combining many tiny lists is expensive, so we try to do as much as

 * possible in the workers and only then pass the results to the leader.

 *

 * We read the tuples sorted by the key, and merge them into larger lists.

 * At the moment there's no memory limit, so this will just produce one

 * huge (sorted) list per key in each worker. Which means the leader will

 * do a very limited number of mergesorts, which is good.

 */

static void

_gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,

                         bool progress)

{

    GinTuple   *tup;

    Size        tuplen;


    GinBuffer  *buffer;


    /*

     * Initialize buffer to combine entries for the same key.

     *

     * The workers are limited to the same amount of memory as during the sort

     * in ginBuildCallbackParallel. But this probably should be the 32MB used

     * during planning, just like there.

     */

    buffer = GinBufferInit(state->ginstate.index);


    /* sort the raw per-worker data */

    if (progress)

        pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,

                                     PROGRESS_GIN_PHASE_PERFORMSORT_1);


    tuplesort_performsort(state->bs_worker_sort);


    /* reset the number of GIN tuples produced by this worker */

    state->bs_numtuples = 0;


    if (progress)

        pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,

                                     PROGRESS_GIN_PHASE_MERGE_1);


    /*

     * Read the GIN tuples from the shared tuplesort, sorted by the key, and

     * merge them into larger chunks for the leader to combine.

     */

    while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)

    {


        CHECK_FOR_INTERRUPTS();


        /*

         * If the buffer can accept the new GIN tuple, just store it there and

         * we're done. If it's a different key (or maybe too much data) flush

         * the current contents into the index first.

         */

        if (!GinBufferCanAddKey(buffer, tup))

        {

            GinTuple   *ntup;

            Size        ntuplen;


            /*

             * Buffer is not empty and it's storing a different key - flush

             * the data into the insert, and start a new entry for current

             * GinTuple.

             */

            AssertCheckItemPointers(buffer);


            ntup = _gin_build_tuple(buffer->attnum, buffer->category,

                                    buffer->key, buffer->typlen, buffer->typbyval,

                                    buffer->items, buffer->nitems, &ntuplen);


            tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);

            state->bs_numtuples++;


            pfree(ntup);


            /* discard the existing data */

            GinBufferReset(buffer);

        }


        /*

         * We're about to add a GIN tuple to the buffer - check the memory

         * limit first, and maybe write out some of the data into the index

         * first, if needed (and possible). We only flush the part of the TID

         * list that we know won't change, and only if there's enough data for

         * compression to work well.

         */

        if (GinBufferShouldTrim(buffer, tup))

        {

            GinTuple   *ntup;

            Size        ntuplen;


            Assert(buffer->nfrozen > 0);


            /*

             * Buffer is not empty and it's storing a different key - flush

             * the data into the insert, and start a new entry for current

             * GinTuple.

             */

            AssertCheckItemPointers(buffer);


            ntup = _gin_build_tuple(buffer->attnum, buffer->category,

                                    buffer->key, buffer->typlen, buffer->typbyval,

                                    buffer->items, buffer->nfrozen, &ntuplen);


            tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);


            pfree(ntup);


            /* truncate the data we've just discarded */

            GinBufferTrim(buffer);

        }


        /*

         * Remember data for the current tuple (either remember the new key,

         * or append if to the existing data).

         */

        GinBufferStoreTuple(buffer, tup);

    }


    /* flush data remaining in the buffer (for the last key) */

    if (!GinBufferIsEmpty(buffer))

    {

        GinTuple   *ntup;

        Size        ntuplen;


        AssertCheckItemPointers(buffer);


        ntup = _gin_build_tuple(buffer->attnum, buffer->category,

                                buffer->key, buffer->typlen, buffer->typbyval,

                                buffer->items, buffer->nitems, &ntuplen);


        tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);

        state->bs_numtuples++;


        pfree(ntup);


        /* discard the existing data */

        GinBufferReset(buffer);

    }


    /* relase all the memory */

    GinBufferFree(buffer);


    tuplesort_end(worker_sort);

}


/*

 * Perform a worker's portion of a parallel GIN index build sort.

 *

 * This generates a tuplesort for the worker portion of the table.

 *

 * sortmem is the amount of working memory to use within each worker,

 * expressed in KBs.

 *

 * When this returns, workers are done, and need only release resources.

 *

 * Before feeding data into a shared tuplesort (for the leader process),

 * the workers process data in two phases.

 *

 * 1) A worker reads a portion of rows from the table, accumulates entries

 * in memory, and flushes them into a private tuplesort (e.g. because of

 * using too much memory).

 *

 * 2) The private tuplesort gets sorted (by key and TID), the worker reads

 * the data again, and combines the entries as much as possible. This has

 * to happen eventually, and this way it's done in workers in parallel.

 *

 * Finally, the combined entries are written into the shared tuplesort, so

 * that the leader can process them.

 *

 * How well this works (compared to just writing entries into the shared

 * tuplesort) depends on the data set. For large tables with many distinct

 * keys this helps a lot. With many distinct keys it's likely the buffers has

 * to be flushed often, generating many entries with the same key and short

 * TID lists. These entries need to be sorted and merged at some point,

 * before writing them to the index. The merging is quite expensive, it can

 * easily be ~50% of a serial build, and doing as much of it in the workers

 * means it's parallelized. The leader still has to merge results from the

 * workers, but it's much more efficient to merge few large entries than

 * many tiny ones.

 *

 * This also reduces the amount of data the workers pass to the leader through

 * the shared tuplesort. OTOH the workers need more space for the private sort,

 * possibly up to 2x of the data, if no entries be merged in a worker. But this

 * is very unlikely, and the only consequence is inefficiency, so we ignore it.

 */

static void

_gin_parallel_scan_and_build(GinBuildState *state,

                             GinBuildShared *ginshared, Sharedsort *sharedsort,

                             Relation heap, Relation index,

                             int sortmem, bool progress)

{

    SortCoordinate coordinate;

    TableScanDesc scan;

    double      reltuples;

    IndexInfo  *indexInfo;


    /* Initialize local tuplesort coordination state */

    coordinate = palloc0(sizeof(SortCoordinateData));

    coordinate->isWorker = true;

    coordinate->nParticipants = -1;

    coordinate->sharedsort = sharedsort;


    /* remember how much space is allowed for the accumulated entries */

    state->work_mem = (sortmem / 2);


    /* Begin "partial" tuplesort */

    state->bs_sortstate = tuplesort_begin_index_gin(heap, index,

                                                    state->work_mem,

                                                    coordinate,

                                                    TUPLESORT_NONE);


    /* Local per-worker sort of raw-data */

    state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,

                                                      state->work_mem,

                                                      NULL,

                                                      TUPLESORT_NONE);


    /* Join parallel scan */

    indexInfo = BuildIndexInfo(index);

    indexInfo->ii_Concurrent = ginshared->isconcurrent;


    scan = table_beginscan_parallel(heap,

                                    ParallelTableScanFromGinBuildShared(ginshared));


    reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,

                                       ginBuildCallbackParallel, state, scan);


    /* write remaining accumulated entries */

    ginFlushBuildState(state, index);


    /*

     * Do the first phase of in-worker processing - sort the data produced by

     * the callback, and combine them into much larger chunks and place that

     * into the shared tuplestore for leader to process.

     */

    _gin_process_worker_data(state, state->bs_worker_sort, progress);


    /* sort the GIN tuples built by this worker */

    tuplesort_performsort(state->bs_sortstate);


    state->bs_reltuples += reltuples;


    /*

     * Done.  Record ambuild statistics.

     */

    SpinLockAcquire(&ginshared->mutex);

    ginshared->nparticipantsdone++;

    ginshared->reltuples += state->bs_reltuples;

    ginshared->indtuples += state->bs_numtuples;

    SpinLockRelease(&ginshared->mutex);


    /* Notify leader */

    ConditionVariableSignal(&ginshared->workersdonecv);


    tuplesort_end(state->bs_sortstate);

}


/*

 * Perform work within a launched parallel process.

 */

void

_gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)

{

    char       *sharedquery;

    GinBuildShared *ginshared;

    Sharedsort *sharedsort;

    GinBuildState buildstate;

    Relation    heapRel;

    Relation    indexRel;

    LOCKMODE    heapLockmode;

    LOCKMODE    indexLockmode;

    WalUsage   *walusage;

    BufferUsage *bufferusage;

    int         sortmem;


    /*

     * The only possible status flag that can be set to the parallel worker is

     * PROC_IN_SAFE_IC.

     */

    Assert((MyProc->statusFlags == 0) ||

           (MyProc->statusFlags == PROC_IN_SAFE_IC));


    /* Set debug_query_string for individual workers first */

    sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);

    debug_query_string = sharedquery;


    /* Report the query string from leader */

    pgstat_report_activity(STATE_RUNNING, debug_query_string);


    /* Look up gin shared state */

    ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);


    /* Open relations using lock modes known to be obtained by index.c */

    if (!ginshared->isconcurrent)

    {

        heapLockmode = ShareLock;

        indexLockmode = AccessExclusiveLock;

    }

    else

    {

        heapLockmode = ShareUpdateExclusiveLock;

        indexLockmode = RowExclusiveLock;

    }


    /* Open relations within worker */

    heapRel = table_open(ginshared->heaprelid, heapLockmode);

    indexRel = index_open(ginshared->indexrelid, indexLockmode);


    /* initialize the GIN build state */

    initGinState(&buildstate.ginstate, indexRel);

    buildstate.indtuples = 0;

    memset(&buildstate.buildStats, 0, sizeof(GinStatsData));

    memset(&buildstate.tid, 0, sizeof(ItemPointerData));


    /*

     * create a temporary memory context that is used to hold data not yet

     * dumped out to the index

     */

    buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,

                                              "Gin build temporary context",

                                              ALLOCSET_DEFAULT_SIZES);


    /*

     * create a temporary memory context that is used for calling

     * ginExtractEntries(), and can be reset after each tuple

     */

    buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,

                                               "Gin build temporary context for user-defined function",

                                               ALLOCSET_DEFAULT_SIZES);


    buildstate.accum.ginstate = &buildstate.ginstate;

    ginInitBA(&buildstate.accum);


    /* Look up shared state private to tuplesort.c */

    sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);

    tuplesort_attach_shared(sharedsort, seg);


    /* Prepare to track buffer usage during parallel execution */

    InstrStartParallelQuery();


    /*

     * Might as well use reliable figure when doling out maintenance_work_mem

     * (when requested number of workers were not launched, this will be

     * somewhat higher than it is for other workers).

     */

    sortmem = maintenance_work_mem / ginshared->scantuplesortstates;


    _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,

                                 heapRel, indexRel, sortmem, false);


    /* Report WAL/buffer usage during parallel execution */

    bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);

    walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);

    InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],

                          &walusage[ParallelWorkerNumber]);


    index_close(indexRel, indexLockmode);

    table_close(heapRel, heapLockmode);

}


/*

 * Used to keep track of compressed TID lists when building a GIN tuple.

 */

typedef struct

{

    dlist_node  node;           /* linked list pointers */

    GinPostingList *seg;

} GinSegmentInfo;


/*

 * _gin_build_tuple

 *      Serialize the state for an index key into a tuple for tuplesort.

 *

 * The tuple has a number of scalar fields (mostly matching the build state),

 * and then a data array that stores the key first, and then the TID list.

 *

 * For by-reference data types, we store the actual data. For by-val types

 * we simply copy the whole Datum, so that we don't have to care about stuff

 * like endianess etc. We could make it a little bit smaller, but it's not

 * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the

 * start of the TID list anyway. So we wouldn't save anything.

 *

 * The TID list is serialized as compressed - it's highly compressible, and

 * we already have ginCompressPostingList for this purpose. The list may be

 * pretty long, so we compress it into multiple segments and then copy all

 * of that into the GIN tuple.

 */

static GinTuple *

_gin_build_tuple(OffsetNumber attrnum, unsigned char category,

                 Datum key, int16 typlen, bool typbyval,

                 ItemPointerData *items, uint32 nitems,

                 Size *len)

{

    GinTuple   *tuple;

    char       *ptr;


    Size        tuplen;

    int         keylen;


    dlist_mutable_iter iter;

    dlist_head  segments;

    int         ncompressed;

    Size        compresslen;


    /*

     * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY

     * have actual non-empty key. We include varlena headers and \0 bytes for

     * strings, to make it easier to access the data in-line.

     *

     * For byval types we simply copy the whole Datum. We could store just the

     * necessary bytes, but this is simpler to work with and not worth the

     * extra complexity. Moreover we still need to do the MAXALIGN to allow

     * direct access to items pointers.

     *

     * XXX Note that for byval types we store the whole datum, no matter what

     * the typlen value is.

     */

    if (category != GIN_CAT_NORM_KEY)

        keylen = 0;

    else if (typbyval)

        keylen = sizeof(Datum);

    else if (typlen > 0)

        keylen = typlen;

    else if (typlen == -1)

        keylen = VARSIZE_ANY(key);

    else if (typlen == -2)

        keylen = strlen(DatumGetPointer(key)) + 1;

    else

        elog(ERROR, "unexpected typlen value (%d)", typlen);


    /* compress the item pointers */

    ncompressed = 0;

    compresslen = 0;

    dlist_init(&segments);


    /* generate compressed segments of TID list chunks */

    while (ncompressed < nitems)

    {

        int         cnt;

        GinSegmentInfo *seginfo = palloc(sizeof(GinSegmentInfo));


        seginfo->seg = ginCompressPostingList(&items[ncompressed],

                                              (nitems - ncompressed),

                                              UINT16_MAX,

                                              &cnt);


        ncompressed += cnt;

        compresslen += SizeOfGinPostingList(seginfo->seg);


        dlist_push_tail(&segments, &seginfo->node);

    }


    /*

     * Determine GIN tuple length with all the data included. Be careful about

     * alignment, to allow direct access to compressed segments (those require

     * only SHORTALIGN).

     */

    tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;


    *len = tuplen;


    /*

     * Allocate space for the whole GIN tuple.

     *

     * The palloc0 is needed - writetup_index_gin will write the whole tuple

     * to disk, so we need to make sure the padding bytes are defined

     * (otherwise valgrind would report this).

     */

    tuple = palloc0(tuplen);


    tuple->tuplen = tuplen;

    tuple->attrnum = attrnum;

    tuple->category = category;

    tuple->keylen = keylen;

    tuple->nitems = nitems;


    /* key type info */

    tuple->typlen = typlen;

    tuple->typbyval = typbyval;


    /*

     * Copy the key and items into the tuple. First the key value, which we

     * can simply copy right at the beginning of the data array.

     */

    if (category == GIN_CAT_NORM_KEY)

    {

        if (typbyval)

        {

            memcpy(tuple->data, &key, sizeof(Datum));

        }

        else if (typlen > 0)    /* byref, fixed length */

        {

            memcpy(tuple->data, DatumGetPointer(key), typlen);

        }

        else if (typlen == -1)

        {

            memcpy(tuple->data, DatumGetPointer(key), keylen);

        }

        else if (typlen == -2)

        {

            memcpy(tuple->data, DatumGetPointer(key), keylen);

        }

    }


    /* finally, copy the TIDs into the array */

    ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);


    /* copy in the compressed data, and free the segments */

    dlist_foreach_modify(iter, &segments)

    {

        GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);


        memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));


        ptr += SizeOfGinPostingList(seginfo->seg);


        dlist_delete(&seginfo->node);


        pfree(seginfo->seg);

        pfree(seginfo);

    }


    return tuple;

}


/*

 * _gin_parse_tuple_key

 *      Return a Datum representing the key stored in the tuple.

 *

 * Most of the tuple fields are directly accessible, the only thing that

 * needs more care is the key and the TID list.

 *

 * For the key, this returns a regular Datum representing it. It's either the

 * actual key value, or a pointer to the beginning of the data array (which is

 * where the data was copied by _gin_build_tuple).

 */

static Datum

_gin_parse_tuple_key(GinTuple *a)

{

    Datum       key;


    if (a->category != GIN_CAT_NORM_KEY)

        return (Datum) 0;


    if (a->typbyval)

    {

        memcpy(&key, a->data, a->keylen);

        return key;

    }


    return PointerGetDatum(a->data);

}


/*

* _gin_parse_tuple_items

 *      Return a pointer to a palloc'd array of decompressed TID array.

 */

static ItemPointer

_gin_parse_tuple_items(GinTuple *a)

{

    int         len;

    char       *ptr;

    int         ndecoded;

    ItemPointer items;


    len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);

    ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);


    items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded);


    Assert(ndecoded == a->nitems);


    return (ItemPointer) items;

}


/*

 * _gin_compare_tuples

 *      Compare GIN tuples, used by tuplesort during parallel index build.

 *

 * The scalar fields (attrnum, category) are compared first, the key value is

 * compared last. The comparisons are done using type-specific sort support

 * functions.

 *

 * If the key value matches, we compare the first TID value in the TID list,

 * which means the tuples are merged in an order in which they are most

 * likely to be simply concatenated. (This "first" TID will also allow us

 * to determine a point up to which the list is fully determined and can be

 * written into the index to enforce a memory limit etc.)

 */

int

_gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)

{

    int         r;

    Datum       keya,

                keyb;


    if (a->attrnum < b->attrnum)

        return -1;


    if (a->attrnum > b->attrnum)

        return 1;


    if (a->category < b->category)

        return -1;


    if (a->category > b->category)

        return 1;


    if (a->category == GIN_CAT_NORM_KEY)

    {

        keya = _gin_parse_tuple_key(a);

        keyb = _gin_parse_tuple_key(b);


        r = ApplySortComparator(keya, false,

                                keyb, false,

                                &ssup[a->attrnum - 1]);


        /* if the key is the same, consider the first TID in the array */

        return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),

                                                 GinTupleGetFirst(b));

    }


    return ItemPointerCompare(GinTupleGetFirst(a),

                              GinTupleGetFirst(b));

}

ParallelWorkerNumber
int ParallelWorkerNumber
Definition: parallel.c:115

InitializeParallelDSM
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:211

WaitForParallelWorkersToFinish
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:796

LaunchParallelWorkers
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:573

DestroyParallelContext
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:950

CreateParallelContext
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:173

WaitForParallelWorkersToAttach
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:693

pgstat_progress_update_param
void pgstat_progress_update_param(int index, int64 val)
Definition: backend_progress.c:48

pgstat_progress_update_multi_param
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: backend_progress.c:121

pgstat_report_activity
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: backend_status.c:572

STATE_RUNNING
@ STATE_RUNNING
Definition: backend_status.h:29

BlockNumber
uint32 BlockNumber
Definition: block.h:31

values
static Datum values[MAXATTR]
Definition: bootstrap.c:151

Buffer
int Buffer
Definition: buf.h:23

BufferGetBlockNumber
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4229

ExtendBufferedRel
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:858

UnlockReleaseBuffer
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5388

MarkBufferDirty
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2952

LockBuffer
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5605

bufmgr.h

RelationGetNumberOfBlocks
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:283

BufferGetPage
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:417

EB_SKIP_EXTENSION_LOCK
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:75

EB_LOCK_FIRST
@ EB_LOCK_FIRST
Definition: bufmgr.h:87

BMR_REL
#define BMR_REL(p_rel)
Definition: bufmgr.h:108

PageGetItem
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354

PageGetItemId
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244

Page
PageData * Page
Definition: bufpage.h:82

builtins.h

BUFFERALIGN
#define BUFFERALIGN(LEN)
Definition: c.h:784

int64
int64_t int64
Definition: c.h:499

int16
int16_t int16
Definition: c.h:497

SHORTALIGN
#define SHORTALIGN(LEN)
Definition: c.h:778

int32
int32_t int32
Definition: c.h:498

uint32
uint32_t uint32
Definition: c.h:502

OidIsValid
#define OidIsValid(objectId)
Definition: c.h:746

Size
size_t Size
Definition: c.h:576

ConditionVariableCancelSleep
bool ConditionVariableCancelSleep(void)
Definition: condition_variable.c:230

ConditionVariableInit
void ConditionVariableInit(ConditionVariable *cv)
Definition: condition_variable.c:35

ConditionVariableSleep
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
Definition: condition_variable.c:96

ConditionVariableSignal
void ConditionVariableSignal(ConditionVariable *cv)
Definition: condition_variable.c:259

datumCopy
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132

datum.h

errcode
int errcode(int sqlerrcode)
Definition: elog.c:854

errmsg
int errmsg(const char *fmt,...)
Definition: elog.c:1071

ERROR
#define ERROR
Definition: elog.h:39

elog
#define elog(elevel,...)
Definition: elog.h:225

ereport
#define ereport(elevel,...)
Definition: elog.h:149

execnodes.h

format_type_be
char * format_type_be(Oid type_oid)
Definition: format_type.c:343

IndexUniqueCheck
IndexUniqueCheck
Definition: genam.h:139

GIN_COMPARE_PROC
#define GIN_COMPARE_PROC
Definition: gin.h:24

PROGRESS_GIN_PHASE_PERFORMSORT_2
#define PROGRESS_GIN_PHASE_PERFORMSORT_2
Definition: gin.h:49

PROGRESS_GIN_PHASE_MERGE_1
#define PROGRESS_GIN_PHASE_MERGE_1
Definition: gin.h:48

PROGRESS_GIN_PHASE_PERFORMSORT_1
#define PROGRESS_GIN_PHASE_PERFORMSORT_1
Definition: gin.h:47

PROGRESS_GIN_PHASE_MERGE_2
#define PROGRESS_GIN_PHASE_MERGE_2
Definition: gin.h:50

PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN
#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN
Definition: gin.h:46

gin_private.h

GIN_UNLOCK
#define GIN_UNLOCK
Definition: gin_private.h:49

GinGetUseFastUpdate
#define GinGetUseFastUpdate(relation)
Definition: gin_private.h:34

gin_tuple.h

GinTupleGetFirst
static ItemPointer GinTupleGetFirst(GinTuple *tup)
Definition: gin_tuple.h:33

GinIsPostingTree
#define GinIsPostingTree(itup)
Definition: ginblock.h:231

GIN_CAT_NORM_KEY
#define GIN_CAT_NORM_KEY
Definition: ginblock.h:208

SizeOfGinPostingList
#define SizeOfGinPostingList(plist)
Definition: ginblock.h:342

GIN_LEAF
#define GIN_LEAF
Definition: ginblock.h:42

GinGetPostingTree
#define GinGetPostingTree(itup)
Definition: ginblock.h:233

GinNullCategory
signed char GinNullCategory
Definition: ginblock.h:206

GinSetPostingTree
#define GinSetPostingTree(itup, blkno)
Definition: ginblock.h:232

GinMaxItemSize
#define GinMaxItemSize
Definition: ginblock.h:248

freeGinBtreeStack
void freeGinBtreeStack(GinBtreeStack *stack)
Definition: ginbtree.c:198

ginInsertValue
void ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, GinStatsData *buildStats)
Definition: ginbtree.c:816

ginFindLeafPage
GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, bool rootConflictCheck)
Definition: ginbtree.c:83

ginBeginBAScan
void ginBeginBAScan(BuildAccumulator *accum)
Definition: ginbulk.c:257

ginGetBAEntry
ItemPointerData * ginGetBAEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *key, GinNullCategory *category, uint32 *n)
Definition: ginbulk.c:268

ginInsertBAEntries
void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum *entries, GinNullCategory *categories, int32 nentries)
Definition: ginbulk.c:210

ginInitBA
void ginInitBA(BuildAccumulator *accum)
Definition: ginbulk.c:109

createPostingTree
BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, GinStatsData *buildStats, Buffer entrybuffer)
Definition: gindatapage.c:1775

ginInsertItemPointers
void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats)
Definition: gindatapage.c:1908

ginReadTuple
ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, int *nitems)
Definition: ginentrypage.c:162

GinFormTuple
IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, Pointer data, Size dataSize, int nipd, bool errorTooBig)
Definition: ginentrypage.c:44

ginPrepareEntryScan
void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, Datum key, GinNullCategory category, GinState *ginstate)
Definition: ginentrypage.c:747

ginHeapTupleFastCollect
void ginHeapTupleFastCollect(GinState *ginstate, GinTupleCollector *collector, OffsetNumber attnum, Datum value, bool isNull, ItemPointer ht_ctid)
Definition: ginfast.c:483

ginHeapTupleFastInsert
void ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
Definition: ginfast.c:219

ginBuildCallbackParallel
static void ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: gininsert.c:533

PARALLEL_KEY_BUFFER_USAGE
#define PARALLEL_KEY_BUFFER_USAGE
Definition: gininsert.c:43

AssertCheckItemPointers
static void AssertCheckItemPointers(GinBuffer *buffer)
Definition: gininsert.c:1173

_gin_compare_tuples
int _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
Definition: gininsert.c:2402

GinBuffer
struct GinBuffer GinBuffer

GinLeader
struct GinLeader GinLeader

addItemPointersToLeafTuple
static IndexTuple addItemPointersToLeafTuple(GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
Definition: gininsert.c:209

GinBufferIsEmpty
static bool GinBufferIsEmpty(GinBuffer *buffer)
Definition: gininsert.c:1297

PARALLEL_KEY_GIN_SHARED
#define PARALLEL_KEY_GIN_SHARED
Definition: gininsert.c:39

ginbuild
IndexBuildResult * ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: gininsert.c:581

GinBufferInit
static GinBuffer * GinBufferInit(Relation index)
Definition: gininsert.c:1229

buildFreshLeafTuple
static IndexTuple buildFreshLeafTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer)
Definition: gininsert.c:289

GinBufferReset
static void GinBufferReset(GinBuffer *buffer)
Definition: gininsert.c:1525

ginHeapTupleBulkInsert
static void ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer heapptr)
Definition: gininsert.c:416

AssertCheckGinBuffer
static void AssertCheckGinBuffer(GinBuffer *buffer)
Definition: gininsert.c:1200

_gin_parallel_scan_and_build
static void _gin_parallel_scan_and_build(GinBuildState *state, GinBuildShared *ginshared, Sharedsort *sharedsort, Relation heap, Relation index, int sortmem, bool progress)
Definition: gininsert.c:1997

ginEntryInsert
void ginEntryInsert(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats)
Definition: gininsert.c:339

GinBufferTrim
static void GinBufferTrim(GinBuffer *buffer)
Definition: gininsert.c:1555

_gin_parallel_estimate_shared
static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
Definition: gininsert.c:1775

_gin_end_parallel
static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
Definition: gininsert.c:1075

_gin_begin_parallel
static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, bool isconcurrent, int request)
Definition: gininsert.c:896

ginFlushBuildState
static void ginFlushBuildState(GinBuildState *buildstate, Relation index)
Definition: gininsert.c:484

GinBuildShared
struct GinBuildShared GinBuildShared

_gin_process_worker_data
static void _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, bool progress)
Definition: gininsert.c:1819

GinBufferKeyEquals
static bool GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1313

_gin_parallel_merge
static double _gin_parallel_merge(GinBuildState *state)
Definition: gininsert.c:1613

ginHeapTupleInsert
static void ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, ItemPointer item)
Definition: gininsert.c:806

GinBufferCanAddKey
static bool GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1591

_gin_parallel_build_main
void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition: gininsert.c:2072

ginbuildempty
void ginbuildempty(Relation index)
Definition: gininsert.c:775

gininsert
bool gininsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: gininsert.c:824

_gin_leader_participate_as_worker
static void _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
Definition: gininsert.c:1786

_gin_build_tuple
static GinTuple * _gin_build_tuple(OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len)
Definition: gininsert.c:2200

_gin_parse_tuple_key
static Datum _gin_parse_tuple_key(GinTuple *a)
Definition: gininsert.c:2349

PARALLEL_KEY_TUPLESORT
#define PARALLEL_KEY_TUPLESORT
Definition: gininsert.c:40

GinBufferShouldTrim
static bool GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1375

PARALLEL_KEY_QUERY_TEXT
#define PARALLEL_KEY_QUERY_TEXT
Definition: gininsert.c:41

GinBufferFree
static void GinBufferFree(GinBuffer *buffer)
Definition: gininsert.c:1571

_gin_parse_tuple_items
static ItemPointer _gin_parse_tuple_items(GinTuple *a)
Definition: gininsert.c:2370

ParallelTableScanFromGinBuildShared
#define ParallelTableScanFromGinBuildShared(shared)
Definition: gininsert.c:104

PARALLEL_KEY_WAL_USAGE
#define PARALLEL_KEY_WAL_USAGE
Definition: gininsert.c:42

GinBufferStoreTuple
static void GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
Definition: gininsert.c:1416

ginBuildCallback
static void ginBuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: gininsert.c:440

_gin_parallel_heapscan
static double _gin_parallel_heapscan(GinBuildState *state)
Definition: gininsert.c:1106

ginPostingListDecodeAllSegments
ItemPointer ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
Definition: ginpostinglist.c:297

ginCompressPostingList
GinPostingList * ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize, int *nwritten)
Definition: ginpostinglist.c:197

ginMergeItemPointers
ItemPointer ginMergeItemPointers(ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb, int *nmerged)
Definition: ginpostinglist.c:378

gintuple_get_attrnum
OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
Definition: ginutil.c:231

GinNewBuffer
Buffer GinNewBuffer(Relation index)
Definition: ginutil.c:305

GinInitBuffer
void GinInitBuffer(Buffer b, uint32 f)
Definition: ginutil.c:355

ginExtractEntries
Datum * ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, int32 *nentries, GinNullCategory **categories)
Definition: ginutil.c:488

gintuple_get_key
Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category)
Definition: ginutil.c:264

GinInitMetabuffer
void GinInitMetabuffer(Buffer b)
Definition: ginutil.c:361

initGinState
void initGinState(GinState *state, Relation index)
Definition: ginutil.c:102

ginUpdateStats
void ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
Definition: ginutil.c:655

maintenance_work_mem
int maintenance_work_mem
Definition: globals.c:133

Assert
Assert(PointerIsAligned(start, uint64))

dlist_init
static void dlist_init(dlist_head *head)
Definition: ilist.h:314

dlist_delete
static void dlist_delete(dlist_node *node)
Definition: ilist.h:405

dlist_foreach_modify
#define dlist_foreach_modify(iter, lhead)
Definition: ilist.h:640

dlist_push_tail
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition: ilist.h:364

dlist_container
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593

parallel.h

nitems
#define nitems(x)
Definition: indent.h:31

BuildIndexInfo
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2428

index.h

index_close
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177

index_getprocid
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:873

index_open
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133

value
static struct @165 value

InstrAccumParallelQuery
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:218

InstrEndParallelQuery
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:208

InstrStartParallelQuery
void InstrStartParallelQuery(void)
Definition: instrument.c:200

b
int b
Definition: isn.c:74

a
int a
Definition: isn.c:73

i
int i
Definition: isn.c:77

if
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81

ItemPointerCompare
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51

ItemPointerIsValid
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83

IndexTuple
IndexTupleData * IndexTuple
Definition: itup.h:53

LOCKMODE
int LOCKMODE
Definition: lockdefs.h:26

AccessExclusiveLock
#define AccessExclusiveLock
Definition: lockdefs.h:43

ShareUpdateExclusiveLock
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39

ShareLock
#define ShareLock
Definition: lockdefs.h:40

RowExclusiveLock
#define RowExclusiveLock
Definition: lockdefs.h:38

MemoryContextReset
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383

repalloc
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1548

pfree
void pfree(void *pointer)
Definition: mcxt.c:1528

palloc0
void * palloc0(Size size)
Definition: mcxt.c:1351

palloc
void * palloc(Size size)
Definition: mcxt.c:1321

CurrentMemoryContext
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143

MemoryContextDelete
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454

memutils.h

AllocSetContextCreate
#define AllocSetContextCreate
Definition: memutils.h:129

ALLOCSET_DEFAULT_SIZES
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

miscadmin.h

START_CRIT_SECTION
#define START_CRIT_SECTION()
Definition: miscadmin.h:149

CHECK_FOR_INTERRUPTS
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122

END_CRIT_SECTION
#define END_CRIT_SECTION()
Definition: miscadmin.h:151

sort-test.key
key
Definition: sort-test.py:19

sort-test.list
list
Definition: sort-test.py:13

OffsetNumber
uint16 OffsetNumber
Definition: off.h:24

MemoryContextSwitchTo
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124

attnum
int16 attnum
Definition: pg_attribute.h:74

Form_pg_attribute
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:202

pg_collation.h

len
const void size_t len
Definition: pg_crc32c_sse42.c:28

data
const void * data
Definition: pg_crc32c_sse42.c:27

progress
static int progress
Definition: pgbench.c:262

pgstat.h

debug_query_string
const char * debug_query_string
Definition: postgres.c:88

postgres.h

PointerGetDatum
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327

Datum
uintptr_t Datum
Definition: postgres.h:69

DatumGetPointer
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317

InvalidOid
#define InvalidOid
Definition: postgres_ext.h:35

Oid
unsigned int Oid
Definition: postgres_ext.h:30

CheckForSerializableConflictIn
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4336

predicate.h

PROC_IN_SAFE_IC
#define PROC_IN_SAFE_IC
Definition: proc.h:59

progress.h

PROGRESS_CREATEIDX_TUPLES_TOTAL
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:89

PROGRESS_SCAN_BLOCKS_DONE
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:125

PROGRESS_CREATEIDX_TUPLES_DONE
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition: progress.h:90

PROGRESS_CREATEIDX_SUBPHASE
#define PROGRESS_CREATEIDX_SUBPHASE
Definition: progress.h:88

PROGRESS_SCAN_BLOCKS_TOTAL
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:124

rel.h

RelationGetRelid
#define RelationGetRelid(relation)
Definition: rel.h:516

RelationGetDescr
#define RelationGetDescr(relation)
Definition: rel.h:542

RelationGetRelationName
#define RelationGetRelationName(relation)
Definition: rel.h:550

RelationNeedsWAL
#define RelationNeedsWAL(relation)
Definition: rel.h:639

IndexRelationGetNumberOfKeyAttributes
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:535

MAIN_FORKNUM
@ MAIN_FORKNUM
Definition: relpath.h:58

INIT_FORKNUM
@ INIT_FORKNUM
Definition: relpath.h:61

shm_toc_allocate
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88

shm_toc_insert
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171

shm_toc_lookup
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232

shm_toc_estimate_chunk
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51

shm_toc_estimate_keys
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53

add_size
Size add_size(Size s1, Size s2)
Definition: shmem.c:493

mul_size
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

GetTransactionSnapshot
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:271

UnregisterSnapshot
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:853

RegisterSnapshot
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:811

SnapshotAny
#define SnapshotAny
Definition: snapmgr.h:33

IsMVCCSnapshot
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55

PrepareSortSupportComparisonShim
void PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup)
Definition: sortsupport.c:68

ApplySortComparator
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

SpinLockInit
#define SpinLockInit(lock)
Definition: spin.h:57

SpinLockRelease
#define SpinLockRelease(lock)
Definition: spin.h:61

SpinLockAcquire
#define SpinLockAcquire(lock)
Definition: spin.h:59

MyProc
PGPROC * MyProc
Definition: proc.c:66

BufferUsage
Definition: instrument.h:25

BuildAccumulator
Definition: gin_private.h:440

BuildAccumulator::allocatedMemory
Size allocatedMemory
Definition: gin_private.h:442

BuildAccumulator::ginstate
GinState * ginstate
Definition: gin_private.h:441

ConditionVariable
Definition: condition_variable.h:29

FmgrInfo::fn_oid
Oid fn_oid
Definition: fmgr.h:59

GinBtreeData
Definition: gin_private.h:152

GinBtreeData::isBuild
bool isBuild
Definition: gin_private.h:172

GinBtreeData::findItem
bool(* findItem)(GinBtree, GinBtreeStack *)
Definition: gin_private.h:157

GinBtreeEntryInsertData
Definition: gin_private.h:185

GinBtreeEntryInsertData::entry
IndexTuple entry
Definition: gin_private.h:186

GinBtreeEntryInsertData::isDelete
bool isDelete
Definition: gin_private.h:187

GinBtreeStack
Definition: gin_private.h:131

GinBtreeStack::off
OffsetNumber off
Definition: gin_private.h:134

GinBtreeStack::buffer
Buffer buffer
Definition: gin_private.h:133

GinBuffer
Definition: gininsert.c:1148

GinBuffer::nfrozen
int nfrozen
Definition: gininsert.c:1163

GinBuffer::keylen
Size keylen
Definition: gininsert.c:1152

GinBuffer::maxitems
int maxitems
Definition: gininsert.c:1159

GinBuffer::nitems
int nitems
Definition: gininsert.c:1162

GinBuffer::category
GinNullCategory category
Definition: gininsert.c:1150

GinBuffer::attnum
OffsetNumber attnum
Definition: gininsert.c:1149

GinBuffer::key
Datum key
Definition: gininsert.c:1151

GinBuffer::ssup
SortSupport ssup
Definition: gininsert.c:1164

GinBuffer::items
ItemPointerData * items
Definition: gininsert.c:1165

GinBuffer::typlen
int16 typlen
Definition: gininsert.c:1155

GinBuffer::typbyval
bool typbyval
Definition: gininsert.c:1156

GinBuildShared
Definition: gininsert.c:50

GinBuildShared::reltuples
double reltuples
Definition: gininsert.c:88

GinBuildShared::scantuplesortstates
int scantuplesortstates
Definition: gininsert.c:59

GinBuildShared::heaprelid
Oid heaprelid
Definition: gininsert.c:56

GinBuildShared::indtuples
double indtuples
Definition: gininsert.c:89

GinBuildShared::isconcurrent
bool isconcurrent
Definition: gininsert.c:58

GinBuildShared::mutex
slock_t mutex
Definition: gininsert.c:75

GinBuildShared::workersdonecv
ConditionVariable workersdonecv
Definition: gininsert.c:67

GinBuildShared::nparticipantsdone
int nparticipantsdone
Definition: gininsert.c:87

GinBuildShared::indexrelid
Oid indexrelid
Definition: gininsert.c:57

GinBuildState
Definition: gininsert.c:140

GinBuildState::indtuples
double indtuples
Definition: gininsert.c:142

GinBuildState::ginstate
GinState ginstate
Definition: gininsert.c:141

GinBuildState::bs_reltuples
double bs_reltuples
Definition: gininsert.c:159

GinBuildState::tmpCtx
MemoryContext tmpCtx
Definition: gininsert.c:144

GinBuildState::bs_leader
GinLeader * bs_leader
Definition: gininsert.c:154

GinBuildState::buildStats
GinStatsData buildStats
Definition: gininsert.c:143

GinBuildState::bs_worker_sort
Tuplesortstate * bs_worker_sort
Definition: gininsert.c:174

GinBuildState::tid
ItemPointerData tid
Definition: gininsert.c:147

GinBuildState::bs_numtuples
double bs_numtuples
Definition: gininsert.c:158

GinBuildState::bs_worker_id
int bs_worker_id
Definition: gininsert.c:155

GinBuildState::work_mem
int work_mem
Definition: gininsert.c:148

GinBuildState::bs_sortstate
Tuplesortstate * bs_sortstate
Definition: gininsert.c:166

GinBuildState::funcCtx
MemoryContext funcCtx
Definition: gininsert.c:145

GinBuildState::accum
BuildAccumulator accum
Definition: gininsert.c:146

GinLeader
Definition: gininsert.c:111

GinLeader::sharedsort
Sharedsort * sharedsort
Definition: gininsert.c:133

GinLeader::nparticipanttuplesorts
int nparticipanttuplesorts
Definition: gininsert.c:121

GinLeader::pcxt
ParallelContext * pcxt
Definition: gininsert.c:113

GinLeader::bufferusage
BufferUsage * bufferusage
Definition: gininsert.c:136

GinLeader::snapshot
Snapshot snapshot
Definition: gininsert.c:134

GinLeader::ginshared
GinBuildShared * ginshared
Definition: gininsert.c:132

GinLeader::walusage
WalUsage * walusage
Definition: gininsert.c:135

GinPostingList
Definition: ginblock.h:336

GinSegmentInfo
Definition: gininsert.c:2176

GinSegmentInfo::node
dlist_node node
Definition: gininsert.c:2177

GinSegmentInfo::seg
GinPostingList * seg
Definition: gininsert.c:2178

GinState
Definition: gin_private.h:58

GinState::origTupdesc
TupleDesc origTupdesc
Definition: gin_private.h:73

GinState::index
Relation index
Definition: gin_private.h:59

GinStatsData
Definition: gin.h:56

GinStatsData::nEntryPages
BlockNumber nEntryPages
Definition: gin.h:59

GinStatsData::nEntries
int64 nEntries
Definition: gin.h:61

GinStatsData::nTotalPages
BlockNumber nTotalPages
Definition: gin.h:58

GinTupleCollector
Definition: gin_private.h:462

GinTuple
Definition: gin_tuple.h:21

GinTuple::data
char data[FLEXIBLE_ARRAY_MEMBER]
Definition: gin_tuple.h:29

GinTuple::nitems
int nitems
Definition: gin_tuple.h:28

GinTuple::typlen
int16 typlen
Definition: gin_tuple.h:25

GinTuple::typbyval
bool typbyval
Definition: gin_tuple.h:26

GinTuple::category
signed char category
Definition: gin_tuple.h:27

GinTuple::tuplen
int tuplen
Definition: gin_tuple.h:22

GinTuple::keylen
uint16 keylen
Definition: gin_tuple.h:24

GinTuple::attrnum
OffsetNumber attrnum
Definition: gin_tuple.h:23

IndexBuildResult
Definition: genam.h:54

IndexBuildResult::heap_tuples
double heap_tuples
Definition: genam.h:55

IndexBuildResult::index_tuples
double index_tuples
Definition: genam.h:56

IndexInfo
Definition: execnodes.h:193

IndexInfo::ii_AmCache
void * ii_AmCache
Definition: execnodes.h:219

IndexInfo::ii_ParallelWorkers
int ii_ParallelWorkers
Definition: execnodes.h:217

IndexInfo::ii_Concurrent
bool ii_Concurrent
Definition: execnodes.h:213

IndexInfo::ii_Context
MemoryContext ii_Context
Definition: execnodes.h:220

IndexTupleData
Definition: itup.h:36

ItemPointerData
Definition: itemptr.h:37

MemoryContextData
Definition: memnodes.h:118

PGPROC::statusFlags
uint8 statusFlags
Definition: proc.h:251

ParallelContext
Definition: parallel.h:32

ParallelContext::seg
dsm_segment * seg
Definition: parallel.h:42

ParallelContext::estimator
shm_toc_estimator estimator
Definition: parallel.h:41

ParallelContext::toc
shm_toc * toc
Definition: parallel.h:44

ParallelContext::nworkers_launched
int nworkers_launched
Definition: parallel.h:37

ParallelContext::nworkers
int nworkers
Definition: parallel.h:35

RelationData
Definition: rel.h:56

Sharedsort
Definition: tuplesort.c:344

SnapshotData
Definition: snapshot.h:139

SortCoordinateData
Definition: tuplesort.h:47

SortCoordinateData::sharedsort
Sharedsort * sharedsort
Definition: tuplesort.h:59

SortCoordinateData::isWorker
bool isWorker
Definition: tuplesort.h:49

SortCoordinateData::nParticipants
int nParticipants
Definition: tuplesort.h:56

SortSupportData
Definition: sortsupport.h:61

SortSupportData::abbreviate
bool abbreviate
Definition: sortsupport.h:155

SortSupportData::ssup_attno
AttrNumber ssup_attno
Definition: sortsupport.h:81

SortSupportData::ssup_nulls_first
bool ssup_nulls_first
Definition: sortsupport.h:75

SortSupportData::ssup_collation
Oid ssup_collation
Definition: sortsupport.h:67

SortSupportData::ssup_cxt
MemoryContext ssup_cxt
Definition: sortsupport.h:66

TableScanDescData
Definition: relscan.h:34

TupleDescData
Definition: tupdesc.h:136

TupleDescData::natts
int natts
Definition: tupdesc.h:137

Tuplesortstate
Definition: tuplesort.c:186

TypeCacheEntry
Definition: typcache.h:32

TypeCacheEntry::cmp_proc_finfo
FmgrInfo cmp_proc_finfo
Definition: typcache.h:77

WalUsage
Definition: instrument.h:52

dlist_head
Definition: ilist.h:152

dlist_mutable_iter
Definition: ilist.h:199

dlist_mutable_iter::cur
dlist_node * cur
Definition: ilist.h:200

dlist_node
Definition: ilist.h:138

dsm_segment
Definition: dsm.c:67

index
Definition: type.h:96

shm_toc
Definition: shm_toc.c:27

state
Definition: regguts.h:323

table_close
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126

table_open
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40

table.h

table_beginscan_parallel
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:166

table_parallelscan_estimate
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:131

table_parallelscan_initialize
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:146

tableam.h

table_index_build_scan
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1735

tcopprot.h

items
static ItemArray items
Definition: test_tidstore.c:48

TupleDescAttr
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:160

tuplesort_performsort
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1363

tuplesort_initialize_shared
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:2938

tuplesort_estimate_shared
Size tuplesort_estimate_shared(int nWorkers)
Definition: tuplesort.c:2917

tuplesort_end
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:951

tuplesort_attach_shared
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:2961

SortCoordinate
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:62

TUPLESORT_NONE
#define TUPLESORT_NONE
Definition: tuplesort.h:94

tuplesort_begin_index_gin
Tuplesortstate * tuplesort_begin_index_gin(Relation heapRel, Relation indexRel, int workMem, SortCoordinate coordinate, int sortopt)
Definition: tuplesortvariants.c:580

tuplesort_getgintuple
GinTuple * tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
Definition: tuplesortvariants.c:1089

tuplesort_putgintuple
void tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
Definition: tuplesortvariants.c:886

lookup_type_cache
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386

TYPECACHE_CMP_PROC_FINFO
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:144

VARSIZE_ANY
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311

ExitParallelMode
void ExitParallelMode(void)
Definition: xact.c:1064

EnterParallelMode
void EnterParallelMode(void)
Definition: xact.c:1051

log_newpage_range
void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std)
Definition: xloginsert.c:1270

log_newpage_buffer
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237

xloginsert.h