#include "postgres.h"
#include <limits.h>
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "storage/shmem.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
#include "utils/tuplesort.h"
#include "lib/sort_template.h"

Include dependency graph for tuplesort.c:

Data Structures
union	SlabSlot

struct	Tuplesortstate

struct	Sharedsort

Macros
#define	INITIAL_MEMTUPSIZE

#define	SLAB_SLOT_SIZE 1024

#define	MINORDER 6 /* minimum merge order */

#define	MAXORDER 500 /* maximum merge order */

#define	TAPE_BUFFER_OVERHEAD BLCKSZ

#define	MERGE_BUFFER_SIZE (BLCKSZ * 32)

#define	IS_SLAB_SLOT(state, tuple)

#define	RELEASE_SLAB_SLOT(state, tuple)

#define	REMOVEABBREV(state, stup, count) ((*(state)->base.removeabbrev) (state, stup, count))

#define	COMPARETUP(state, a, b) ((*(state)->base.comparetup) (a, b, state))

#define	WRITETUP(state, tape, stup) ((*(state)->base.writetup) (state, tape, stup))

#define	READTUP(state, stup, tape, len) ((*(state)->base.readtup) (state, stup, tape, len))

#define	FREESTATE(state) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0)

#define	LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed)

#define	USEMEM(state, amt) ((state)->availMem -= (amt))

#define	FREEMEM(state, amt) ((state)->availMem += (amt))

#define	SERIAL(state) ((state)->shared == NULL)

#define	WORKER(state) ((state)->shared && (state)->worker != -1)

#define	LEADER(state) ((state)->shared && (state)->worker == -1)

#define	ST_SORT qsort_tuple_unsigned

#define	ST_ELEMENT_TYPE SortTuple

#define	ST_COMPARE(a, b, state) qsort_tuple_unsigned_compare(a, b, state)

#define	ST_COMPARE_ARG_TYPE Tuplesortstate

#define	ST_CHECK_FOR_INTERRUPTS

#define	ST_SCOPE static

#define	ST_DEFINE

#define	ST_SORT qsort_tuple_int32

#define	ST_ELEMENT_TYPE SortTuple

#define	ST_COMPARE(a, b, state) qsort_tuple_int32_compare(a, b, state)

#define	ST_COMPARE_ARG_TYPE Tuplesortstate

#define	ST_CHECK_FOR_INTERRUPTS

#define	ST_SCOPE static

#define	ST_DEFINE

#define	ST_SORT qsort_tuple

#define	ST_ELEMENT_TYPE SortTuple

#define	ST_COMPARE_RUNTIME_POINTER

#define	ST_COMPARE_ARG_TYPE Tuplesortstate

#define	ST_CHECK_FOR_INTERRUPTS

#define	ST_SCOPE static

#define	ST_DECLARE

#define	ST_DEFINE

#define	ST_SORT qsort_ssup

#define	ST_ELEMENT_TYPE SortTuple

#define	ST_COMPARE(a, b, ssup)

#define	ST_COMPARE_ARG_TYPE SortSupportData

#define	ST_CHECK_FOR_INTERRUPTS

#define	ST_SCOPE static

#define	ST_DEFINE

Typedefs
typedef union SlabSlot	SlabSlot

Enumerations
enum	TupSortStatus { TSS_INITIAL , TSS_BOUNDED , TSS_BUILDRUNS , TSS_SORTEDINMEM , TSS_SORTEDONTAPE , TSS_FINALMERGE }

Functions
static void	tuplesort_begin_batch (Tuplesortstate *state)

static bool	consider_abort_common (Tuplesortstate *state)

static void	inittapes (Tuplesortstate *state, bool mergeruns)

static void	inittapestate (Tuplesortstate *state, int maxTapes)

static void	selectnewtape (Tuplesortstate *state)

static void	init_slab_allocator (Tuplesortstate *state, int numSlots)

static void	mergeruns (Tuplesortstate *state)

static void	mergeonerun (Tuplesortstate *state)

static void	beginmerge (Tuplesortstate *state)

static bool	mergereadnext (Tuplesortstate state, LogicalTape srcTape, SortTuple *stup)

static void	dumptuples (Tuplesortstate *state, bool alltuples)

static void	make_bounded_heap (Tuplesortstate *state)

static void	sort_bounded_heap (Tuplesortstate *state)

static void	tuplesort_sort_memtuples (Tuplesortstate *state)

static void	tuplesort_heap_insert (Tuplesortstate state, SortTuple tuple)

static void	tuplesort_heap_replace_top (Tuplesortstate state, SortTuple tuple)

static void	tuplesort_heap_delete_top (Tuplesortstate *state)

static void	reversedirection (Tuplesortstate *state)

static unsigned int	getlen (LogicalTape *tape, bool eofOK)

static void	markrunend (LogicalTape *tape)

static int	worker_get_identifier (Tuplesortstate *state)

static void	worker_freeze_result_tape (Tuplesortstate *state)

static void	worker_nomergeruns (Tuplesortstate *state)

static void	leader_takeover_tapes (Tuplesortstate *state)

static void	free_sort_tuple (Tuplesortstate state, SortTuple stup)

static void	tuplesort_free (Tuplesortstate *state)

static void	tuplesort_updatemax (Tuplesortstate *state)

static pg_attribute_always_inline int	qsort_tuple_unsigned_compare (SortTuple a, SortTuple b, Tuplesortstate *state)

static pg_attribute_always_inline int	qsort_tuple_int32_compare (SortTuple a, SortTuple b, Tuplesortstate *state)

Tuplesortstate *	tuplesort_begin_common (int workMem, SortCoordinate coordinate, int sortopt)

void	tuplesort_set_bound (Tuplesortstate *state, int64 bound)

bool	tuplesort_used_bound (Tuplesortstate *state)

void	tuplesort_end (Tuplesortstate *state)

void	tuplesort_reset (Tuplesortstate *state)

static bool	grow_memtuples (Tuplesortstate *state)

void	tuplesort_puttuple_common (Tuplesortstate state, SortTuple tuple, bool useAbbrev, Size tuplen)

void	tuplesort_performsort (Tuplesortstate *state)

bool	tuplesort_gettuple_common (Tuplesortstate state, bool forward, SortTuple stup)

bool	tuplesort_skiptuples (Tuplesortstate *state, int64 ntuples, bool forward)

int	tuplesort_merge_order (int64 allowedMem)

static int64	merge_read_buffer_size (int64 avail_mem, int nInputTapes, int nInputRuns, int maxOutputTapes)

void	tuplesort_rescan (Tuplesortstate *state)

void	tuplesort_markpos (Tuplesortstate *state)

void	tuplesort_restorepos (Tuplesortstate *state)

void	tuplesort_get_stats (Tuplesortstate state, TuplesortInstrumentation stats)

const char *	tuplesort_method_name (TuplesortMethod m)

const char *	tuplesort_space_type_name (TuplesortSpaceType t)

void *	tuplesort_readtup_alloc (Tuplesortstate *state, Size tuplen)

Size	tuplesort_estimate_shared (int nWorkers)

void	tuplesort_initialize_shared (Sharedsort shared, int nWorkers, dsm_segment seg)

void	tuplesort_attach_shared (Sharedsort shared, dsm_segment seg)

int	ssup_datum_unsigned_cmp (Datum x, Datum y, SortSupport ssup)

int	ssup_datum_int32_cmp (Datum x, Datum y, SortSupport ssup)

Variables
bool	trace_sort = false

Macro Definition Documentation

◆ COMPARETUP

#define COMPARETUP	(	state,
		a,
		b
	)	((*(state)->base.comparetup) (a, b, state))

Definition at line 396 of file tuplesort.c.

◆ FREEMEM

#define FREEMEM	(	state,
		amt
	)	((state)->availMem += (amt))

Definition at line 402 of file tuplesort.c.

◆ FREESTATE

#define FREESTATE ( state ) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0)

Definition at line 399 of file tuplesort.c.

◆ INITIAL_MEMTUPSIZE

#define INITIAL_MEMTUPSIZE

Value:

Max(1024, \

ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1)

Max

#define Max(x, y)

Definition: c.h:969

ALLOCSET_SEPARATE_THRESHOLD

#define ALLOCSET_SEPARATE_THRESHOLD

Definition: memutils.h:187

SortTuple

Definition: tuplesort.h:149

Definition at line 120 of file tuplesort.c.

◆ IS_SLAB_SLOT

#define IS_SLAB_SLOT	(	state,
		tuple
	)

Value:

((char *) (tuple) >= (state)->slabMemoryBegin && \

(char *) (tuple) < (state)->slabMemoryEnd)

state

Definition: regguts.h:323

Definition at line 375 of file tuplesort.c.

◆ LACKMEM

#define LACKMEM ( state ) ((state)->availMem < 0 && !(state)->slabAllocatorUsed)

Definition at line 400 of file tuplesort.c.

◆ LEADER

#define LEADER ( state ) ((state)->shared && (state)->worker == -1)

Definition at line 405 of file tuplesort.c.

◆ MAXORDER

#define MAXORDER 500 /* maximum merge order */

Definition at line 177 of file tuplesort.c.

◆ MERGE_BUFFER_SIZE

#define MERGE_BUFFER_SIZE (BLCKSZ * 32)

Definition at line 179 of file tuplesort.c.

◆ MINORDER

#define MINORDER 6 /* minimum merge order */

Definition at line 176 of file tuplesort.c.

◆ READTUP

#define READTUP	(	state,
		stup,
		tape,
		len
	)	((*(state)->base.readtup) (state, stup, tape, len))

Definition at line 398 of file tuplesort.c.

◆ RELEASE_SLAB_SLOT

#define RELEASE_SLAB_SLOT	(	state,
		tuple
	)

Value:

    do { \
        SlabSlot *buf = (SlabSlot *) tuple; \
		\
		if (IS_SLAB_SLOT((state), buf)) \
        { \
            buf->nextfree = (state)->slabFreeHead; \
            (state)->slabFreeHead = buf; \
        } else \
			pfree(buf); \
    } while(0)

Definition at line 383 of file tuplesort.c.

◆ REMOVEABBREV

#define REMOVEABBREV	(	state,
		stup,
		count
	)	((*(state)->base.removeabbrev) (state, stup, count))

Definition at line 395 of file tuplesort.c.

◆ SERIAL

#define SERIAL ( state ) ((state)->shared == NULL)

Definition at line 403 of file tuplesort.c.

◆ SLAB_SLOT_SIZE

#define SLAB_SLOT_SIZE 1024

Definition at line 142 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [1/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [2/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [3/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [4/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_COMPARE [1/3]

#define ST_COMPARE	(	a,
		b,
		ssup
	)

Value:

ApplySortComparator((a)->datum1, (a)->isnull1, \

(b)->datum1, (b)->isnull1, (ssup))

b

int b

Definition: isn.c:74

a

int a

Definition: isn.c:73

ApplySortComparator

static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)

Definition: sortsupport.h:200

Definition at line 613 of file tuplesort.c.

◆ ST_COMPARE [2/3]

#define ST_COMPARE	(	a,
		b,
		state
	)	qsort_tuple_unsigned_compare(a, b, state)

Definition at line 613 of file tuplesort.c.

◆ ST_COMPARE [3/3]

#define ST_COMPARE	(	a,
		b,
		state
	)	qsort_tuple_int32_compare(a, b, state)

Definition at line 613 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [1/4]

#define ST_COMPARE_ARG_TYPE Tuplesortstate

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [2/4]

#define ST_COMPARE_ARG_TYPE Tuplesortstate

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [3/4]

#define ST_COMPARE_ARG_TYPE Tuplesortstate

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [4/4]

#define ST_COMPARE_ARG_TYPE SortSupportData

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_RUNTIME_POINTER

#define ST_COMPARE_RUNTIME_POINTER

Definition at line 603 of file tuplesort.c.

◆ ST_DECLARE

#define ST_DECLARE

Definition at line 607 of file tuplesort.c.

◆ ST_DEFINE [1/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_DEFINE [2/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_DEFINE [3/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_DEFINE [4/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [1/4]

#define ST_ELEMENT_TYPE SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [2/4]

#define ST_ELEMENT_TYPE SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [3/4]

#define ST_ELEMENT_TYPE SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [4/4]

#define ST_ELEMENT_TYPE SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_SCOPE [1/4]

#define ST_SCOPE static

Definition at line 618 of file tuplesort.c.

◆ ST_SCOPE [2/4]

#define ST_SCOPE static

Definition at line 618 of file tuplesort.c.

◆ ST_SCOPE [3/4]

#define ST_SCOPE static

Definition at line 618 of file tuplesort.c.

◆ ST_SCOPE [4/4]

#define ST_SCOPE static

Definition at line 618 of file tuplesort.c.

◆ ST_SORT [1/4]

#define ST_SORT qsort_tuple_unsigned

Definition at line 611 of file tuplesort.c.

◆ ST_SORT [2/4]

#define ST_SORT qsort_tuple_int32

Definition at line 611 of file tuplesort.c.

◆ ST_SORT [3/4]

#define ST_SORT qsort_tuple

Definition at line 611 of file tuplesort.c.

◆ ST_SORT [4/4]

#define ST_SORT qsort_ssup

Definition at line 611 of file tuplesort.c.

◆ TAPE_BUFFER_OVERHEAD

#define TAPE_BUFFER_OVERHEAD BLCKSZ

Definition at line 178 of file tuplesort.c.

◆ USEMEM

#define USEMEM	(	state,
		amt
	)	((state)->availMem -= (amt))

Definition at line 401 of file tuplesort.c.

◆ WORKER

#define WORKER ( state ) ((state)->shared && (state)->worker != -1)

Definition at line 404 of file tuplesort.c.

◆ WRITETUP

#define WRITETUP	(	state,
		tape,
		stup
	)	((*(state)->base.writetup) (state, tape, stup))

Definition at line 397 of file tuplesort.c.

Typedef Documentation

◆ SlabSlot

typedef union SlabSlot SlabSlot

Enumeration Type Documentation

◆ TupSortStatus

enum TupSortStatus

Enumerator
TSS_INITIAL
TSS_BOUNDED
TSS_BUILDRUNS
TSS_SORTEDINMEM
TSS_SORTEDONTAPE
TSS_FINALMERGE

Definition at line 154 of file tuplesort.c.

{
    TSS_INITIAL,                /* Loading tuples; still within memory limit */
    TSS_BOUNDED,                /* Loading tuples into bounded-size heap */
    TSS_BUILDRUNS,              /* Loading tuples; writing to tape */
    TSS_SORTEDINMEM,            /* Sort completed entirely in memory */
    TSS_SORTEDONTAPE,           /* Sort completed, final run is on tape */
    TSS_FINALMERGE,             /* Performing final merge on-the-fly */
} TupSortStatus;

Function Documentation

◆ beginmerge()

static void beginmerge ( Tuplesortstate * state )

static

Definition at line 2260 of file tuplesort.c.

{
    int         activeTapes;
    int         srcTapeIndex;
 
    /* Heap should be empty here */
    Assert(state->memtupcount == 0);
 
    activeTapes = Min(state->nInputTapes, state->nInputRuns);
 
    for (srcTapeIndex = 0; srcTapeIndex < activeTapes; srcTapeIndex++)
    {
        SortTuple   tup;
 
        if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup))
        {
            tup.srctape = srcTapeIndex;
            tuplesort_heap_insert(state, &tup);
        }
    }
}

References Assert(), mergereadnext(), Min, SortTuple::srctape, and tuplesort_heap_insert().

Referenced by mergeonerun(), and mergeruns().

◆ consider_abort_common()

static bool consider_abort_common ( Tuplesortstate * state )

static

Definition at line 1319 of file tuplesort.c.

{
    Assert(state->base.sortKeys[0].abbrev_converter != NULL);
    Assert(state->base.sortKeys[0].abbrev_abort != NULL);
    Assert(state->base.sortKeys[0].abbrev_full_comparator != NULL);
 
    /*
     * Check effectiveness of abbreviation optimization.  Consider aborting
     * when still within memory limit.
     */
    if (state->status == TSS_INITIAL &&
        state->memtupcount >= state->abbrevNext)
    {
        state->abbrevNext *= 2;
 
        /*
         * Check opclass-supplied abbreviation abort routine.  It may indicate
         * that abbreviation should not proceed.
         */
        if (!state->base.sortKeys->abbrev_abort(state->memtupcount,
                                                state->base.sortKeys))
            return false;
 
        /*
         * Finally, restore authoritative comparator, and indicate that
         * abbreviation is not in play by setting abbrev_converter to NULL
         */
        state->base.sortKeys[0].comparator = state->base.sortKeys[0].abbrev_full_comparator;
        state->base.sortKeys[0].abbrev_converter = NULL;
        /* Not strictly necessary, but be tidy */
        state->base.sortKeys[0].abbrev_abort = NULL;
        state->base.sortKeys[0].abbrev_full_comparator = NULL;
 
        /* Give up - expect original pass-by-value representation */
        return true;
    }
 
    return false;
}

References Assert(), and TSS_INITIAL.

Referenced by tuplesort_puttuple_common().

◆ dumptuples()

static void dumptuples	(	Tuplesortstate *	state,
		bool	alltuples
	)

static

Definition at line 2307 of file tuplesort.c.

{
    int         memtupwrite;
    int         i;
 
    /*
     * Nothing to do if we still fit in available memory and have array slots,
     * unless this is the final call during initial run generation.
     */
    if (state->memtupcount < state->memtupsize && !LACKMEM(state) &&
        !alltuples)
        return;
 
    /*
     * Final call might require no sorting, in rare cases where we just so
     * happen to have previously LACKMEM()'d at the point where exactly all
     * remaining tuples are loaded into memory, just before input was
     * exhausted.  In general, short final runs are quite possible, but avoid
     * creating a completely empty run.  In a worker, though, we must produce
     * at least one tape, even if it's empty.
     */
    if (state->memtupcount == 0 && state->currentRun > 0)
        return;
 
    Assert(state->status == TSS_BUILDRUNS);
 
    /*
     * It seems unlikely that this limit will ever be exceeded, but take no
     * chances
     */
    if (state->currentRun == INT_MAX)
        ereport(ERROR,
                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                 errmsg("cannot have more than %d runs for an external sort",
                        INT_MAX)));
 
    if (state->currentRun > 0)
        selectnewtape(state);
 
    state->currentRun++;
 
    if (trace_sort)
        elog(LOG, "worker %d starting quicksort of run %d: %s",
             state->worker, state->currentRun,
             pg_rusage_show(&state->ru_start));
 
    /*
     * Sort all tuples accumulated within the allowed amount of memory for
     * this run using quicksort
     */
    tuplesort_sort_memtuples(state);
 
    if (trace_sort)
        elog(LOG, "worker %d finished quicksort of run %d: %s",
             state->worker, state->currentRun,
             pg_rusage_show(&state->ru_start));
 
    memtupwrite = state->memtupcount;
    for (i = 0; i < memtupwrite; i++)
    {
        SortTuple  *stup = &state->memtuples[i];
 
        WRITETUP(state, state->destTape, stup);
    }
 
    state->memtupcount = 0;
 
    /*
     * Reset tuple memory.  We've freed all of the tuples that we previously
     * allocated.  It's important to avoid fragmentation when there is a stark
     * change in the sizes of incoming tuples.  In bounded sorts,
     * fragmentation due to AllocSetFree's bucketing by size class might be
     * particularly bad if this step wasn't taken.
     */
    MemoryContextReset(state->base.tuplecontext);
 
    /*
     * Now update the memory accounting to subtract the memory used by the
     * tuple.
     */
    FREEMEM(state, state->tupleMem);
    state->tupleMem = 0;
 
    markrunend(state->destTape);
 
    if (trace_sort)
        elog(LOG, "worker %d finished writing run %d to tape %d: %s",
             state->worker, state->currentRun, (state->currentRun - 1) % state->nOutputTapes + 1,
             pg_rusage_show(&state->ru_start));
}

References Assert(), elog, ereport, errcode(), errmsg(), ERROR, FREEMEM, i, LACKMEM, LOG, markrunend(), MemoryContextReset(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, tuplesort_sort_memtuples(), and WRITETUP.

Referenced by tuplesort_performsort(), and tuplesort_puttuple_common().

◆ free_sort_tuple()

static void free_sort_tuple	(	Tuplesortstate *	state,
		SortTuple *	stup
	)

static

Definition at line 3128 of file tuplesort.c.

{
    if (stup->tuple)
    {
        FREEMEM(state, GetMemoryChunkSpace(stup->tuple));
        pfree(stup->tuple);
        stup->tuple = NULL;
    }
}

References FREEMEM, GetMemoryChunkSpace(), pfree(), and SortTuple::tuple.

Referenced by make_bounded_heap(), and tuplesort_puttuple_common().

◆ getlen()

static unsigned int getlen	(	LogicalTape *	tape,
		bool	eofOK
	)

static

Definition at line 2856 of file tuplesort.c.

{
    unsigned int len;
 
    if (LogicalTapeRead(tape,
                        &len, sizeof(len)) != sizeof(len))
        elog(ERROR, "unexpected end of tape");
    if (len == 0 && !eofOK)
        elog(ERROR, "unexpected end of data");
    return len;
}

References elog, ERROR, len, and LogicalTapeRead().

Referenced by mergereadnext(), and tuplesort_gettuple_common().

◆ grow_memtuples()

static bool grow_memtuples ( Tuplesortstate * state )

static

Definition at line 1052 of file tuplesort.c.

{
    int         newmemtupsize;
    int         memtupsize = state->memtupsize;
    int64       memNowUsed = state->allowedMem - state->availMem;
 
    /* Forget it if we've already maxed out memtuples, per comment above */
    if (!state->growmemtuples)
        return false;
 
    /* Select new value of memtupsize */
    if (memNowUsed <= state->availMem)
    {
        /*
         * We've used no more than half of allowedMem; double our usage,
         * clamping at INT_MAX tuples.
         */
        if (memtupsize < INT_MAX / 2)
            newmemtupsize = memtupsize * 2;
        else
        {
            newmemtupsize = INT_MAX;
            state->growmemtuples = false;
        }
    }
    else
    {
        /*
         * This will be the last increment of memtupsize.  Abandon doubling
         * strategy and instead increase as much as we safely can.
         *
         * To stay within allowedMem, we can't increase memtupsize by more
         * than availMem / sizeof(SortTuple) elements.  In practice, we want
         * to increase it by considerably less, because we need to leave some
         * space for the tuples to which the new array slots will refer.  We
         * assume the new tuples will be about the same size as the tuples
         * we've already seen, and thus we can extrapolate from the space
         * consumption so far to estimate an appropriate new size for the
         * memtuples array.  The optimal value might be higher or lower than
         * this estimate, but it's hard to know that in advance.  We again
         * clamp at INT_MAX tuples.
         *
         * This calculation is safe against enlarging the array so much that
         * LACKMEM becomes true, because the memory currently used includes
         * the present array; thus, there would be enough allowedMem for the
         * new array elements even if no other memory were currently used.
         *
         * We do the arithmetic in float8, because otherwise the product of
         * memtupsize and allowedMem could overflow.  Any inaccuracy in the
         * result should be insignificant; but even if we computed a
         * completely insane result, the checks below will prevent anything
         * really bad from happening.
         */
        double      grow_ratio;
 
        grow_ratio = (double) state->allowedMem / (double) memNowUsed;
        if (memtupsize * grow_ratio < INT_MAX)
            newmemtupsize = (int) (memtupsize * grow_ratio);
        else
            newmemtupsize = INT_MAX;
 
        /* We won't make any further enlargement attempts */
        state->growmemtuples = false;
    }
 
    /* Must enlarge array by at least one element, else report failure */
    if (newmemtupsize <= memtupsize)
        goto noalloc;
 
    /*
     * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize.  Clamp
     * to ensure our request won't be rejected.  Note that we can easily
     * exhaust address space before facing this outcome.  (This is presently
     * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but
     * don't rely on that at this distance.)
     */
    if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple))
    {
        newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple));
        state->growmemtuples = false;   /* can't grow any more */
    }
 
    /*
     * We need to be sure that we do not cause LACKMEM to become true, else
     * the space management algorithm will go nuts.  The code above should
     * never generate a dangerous request, but to be safe, check explicitly
     * that the array growth fits within availMem.  (We could still cause
     * LACKMEM if the memory chunk overhead associated with the memtuples
     * array were to increase.  That shouldn't happen because we chose the
     * initial array size large enough to ensure that palloc will be treating
     * both old and new arrays as separate chunks.  But we'll check LACKMEM
     * explicitly below just in case.)
     */
    if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple)))
        goto noalloc;
 
    /* OK, do it */
    FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
    state->memtupsize = newmemtupsize;
    state->memtuples = (SortTuple *)
        repalloc_huge(state->memtuples,
                      state->memtupsize * sizeof(SortTuple));
    USEMEM(state, GetMemoryChunkSpace(state->memtuples));
    if (LACKMEM(state))
        elog(ERROR, "unexpected out-of-memory situation in tuplesort");
    return true;
 
noalloc:
    /* If for any reason we didn't realloc, shut off future attempts */
    state->growmemtuples = false;
    return false;
}

References elog, ERROR, FREEMEM, GetMemoryChunkSpace(), LACKMEM, MaxAllocHugeSize, repalloc_huge(), and USEMEM.

Referenced by tuplesort_puttuple_common().

◆ init_slab_allocator()

static void init_slab_allocator	(	Tuplesortstate *	state,
		int	numSlots
	)

static

Definition at line 1981 of file tuplesort.c.

{
    if (numSlots > 0)
    {
        char       *p;
        int         i;
 
        state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE);
        state->slabMemoryEnd = state->slabMemoryBegin +
            numSlots * SLAB_SLOT_SIZE;
        state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin;
        USEMEM(state, numSlots * SLAB_SLOT_SIZE);
 
        p = state->slabMemoryBegin;
        for (i = 0; i < numSlots - 1; i++)
        {
            ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE);
            p += SLAB_SLOT_SIZE;
        }
        ((SlabSlot *) p)->nextfree = NULL;
    }
    else
    {
        state->slabMemoryBegin = state->slabMemoryEnd = NULL;
        state->slabFreeHead = NULL;
    }
    state->slabAllocatorUsed = true;
}

References i, palloc(), SLAB_SLOT_SIZE, and USEMEM.

Referenced by mergeruns().

◆ inittapes()

static void inittapes	(	Tuplesortstate *	state,
		bool	mergeruns
	)

static

Definition at line 1865 of file tuplesort.c.

{
    Assert(!LEADER(state));
 
    if (mergeruns)
    {
        /* Compute number of input tapes to use when merging */
        state->maxTapes = tuplesort_merge_order(state->allowedMem);
    }
    else
    {
        /* Workers can sometimes produce single run, output without merge */
        Assert(WORKER(state));
        state->maxTapes = MINORDER;
    }
 
    if (trace_sort)
        elog(LOG, "worker %d switching to external sort with %d tapes: %s",
             state->worker, state->maxTapes, pg_rusage_show(&state->ru_start));
 
    /* Create the tape set */
    inittapestate(state, state->maxTapes);
    state->tapeset =
        LogicalTapeSetCreate(false,
                             state->shared ? &state->shared->fileset : NULL,
                             state->worker);
 
    state->currentRun = 0;
 
    /*
     * Initialize logical tape arrays.
     */
    state->inputTapes = NULL;
    state->nInputTapes = 0;
    state->nInputRuns = 0;
 
    state->outputTapes = palloc0(state->maxTapes * sizeof(LogicalTape *));
    state->nOutputTapes = 0;
    state->nOutputRuns = 0;
 
    state->status = TSS_BUILDRUNS;
 
    selectnewtape(state);
}

References Assert(), elog, inittapestate(), LEADER, LOG, LogicalTapeSetCreate(), mergeruns(), MINORDER, palloc0(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, tuplesort_merge_order(), and WORKER.

Referenced by tuplesort_performsort(), and tuplesort_puttuple_common().

◆ inittapestate()

static void inittapestate	(	Tuplesortstate *	state,
		int	maxTapes
	)

static

Definition at line 1914 of file tuplesort.c.

{
    int64       tapeSpace;
 
    /*
     * Decrease availMem to reflect the space needed for tape buffers; but
     * don't decrease it to the point that we have no room for tuples. (That
     * case is only likely to occur if sorting pass-by-value Datums; in all
     * other scenarios the memtuples[] array is unlikely to occupy more than
     * half of allowedMem.  In the pass-by-value case it's not important to
     * account for tuple space, so we don't care if LACKMEM becomes
     * inaccurate.)
     */
    tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD;
 
    if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem)
        USEMEM(state, tapeSpace);
 
    /*
     * Make sure that the temp file(s) underlying the tape set are created in
     * suitable temp tablespaces.  For parallel sorts, this should have been
     * called already, but it doesn't matter if it is called a second time.
     */
    PrepareTempTablespaces();
}

References GetMemoryChunkSpace(), PrepareTempTablespaces(), TAPE_BUFFER_OVERHEAD, and USEMEM.

Referenced by inittapes(), and leader_takeover_tapes().

◆ leader_takeover_tapes()

static void leader_takeover_tapes ( Tuplesortstate * state )

static

Definition at line 3069 of file tuplesort.c.

{
    Sharedsort *shared = state->shared;
    int         nParticipants = state->nParticipants;
    int         workersFinished;
    int         j;
 
    Assert(LEADER(state));
    Assert(nParticipants >= 1);
 
    SpinLockAcquire(&shared->mutex);
    workersFinished = shared->workersFinished;
    SpinLockRelease(&shared->mutex);
 
    if (nParticipants != workersFinished)
        elog(ERROR, "cannot take over tapes before all workers finish");
 
    /*
     * Create the tapeset from worker tapes, including a leader-owned tape at
     * the end.  Parallel workers are far more expensive than logical tapes,
     * so the number of tapes allocated here should never be excessive.
     */
    inittapestate(state, nParticipants);
    state->tapeset = LogicalTapeSetCreate(false, &shared->fileset, -1);
 
    /*
     * Set currentRun to reflect the number of runs we will merge (it's not
     * used for anything, this is just pro forma)
     */
    state->currentRun = nParticipants;
 
    /*
     * Initialize the state to look the same as after building the initial
     * runs.
     *
     * There will always be exactly 1 run per worker, and exactly one input
     * tape per run, because workers always output exactly 1 run, even when
     * there were no input tuples for workers to sort.
     */
    state->inputTapes = NULL;
    state->nInputTapes = 0;
    state->nInputRuns = 0;
 
    state->outputTapes = palloc0(nParticipants * sizeof(LogicalTape *));
    state->nOutputTapes = nParticipants;
    state->nOutputRuns = nParticipants;
 
    for (j = 0; j < nParticipants; j++)
    {
        state->outputTapes[j] = LogicalTapeImport(state->tapeset, j, &shared->tapes[j]);
    }
 
    state->status = TSS_BUILDRUNS;
}

References Assert(), elog, ERROR, Sharedsort::fileset, inittapestate(), j, LEADER, LogicalTapeImport(), LogicalTapeSetCreate(), Sharedsort::mutex, palloc0(), SpinLockAcquire, SpinLockRelease, Sharedsort::tapes, TSS_BUILDRUNS, and Sharedsort::workersFinished.

Referenced by tuplesort_performsort().

◆ make_bounded_heap()

static void make_bounded_heap ( Tuplesortstate * state )

static

Definition at line 2587 of file tuplesort.c.

{
    int         tupcount = state->memtupcount;
    int         i;
 
    Assert(state->status == TSS_INITIAL);
    Assert(state->bounded);
    Assert(tupcount >= state->bound);
    Assert(SERIAL(state));
 
    /* Reverse sort direction so largest entry will be at root */
    reversedirection(state);
 
    state->memtupcount = 0;     /* make the heap empty */
    for (i = 0; i < tupcount; i++)
    {
        if (state->memtupcount < state->bound)
        {
            /* Insert next tuple into heap */
            /* Must copy source tuple to avoid possible overwrite */
            SortTuple   stup = state->memtuples[i];
 
            tuplesort_heap_insert(state, &stup);
        }
        else
        {
            /*
             * The heap is full.  Replace the largest entry with the new
             * tuple, or just discard it, if it's larger than anything already
             * in the heap.
             */
            if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
            {
                free_sort_tuple(state, &state->memtuples[i]);
                CHECK_FOR_INTERRUPTS();
            }
            else
                tuplesort_heap_replace_top(state, &state->memtuples[i]);
        }
    }
 
    Assert(state->memtupcount == state->bound);
    state->status = TSS_BOUNDED;
}

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, free_sort_tuple(), i, reversedirection(), SERIAL, TSS_BOUNDED, TSS_INITIAL, tuplesort_heap_insert(), and tuplesort_heap_replace_top().

Referenced by tuplesort_puttuple_common().

◆ markrunend()

static void markrunend ( LogicalTape * tape )

static

Definition at line 2869 of file tuplesort.c.

{
    unsigned int len = 0;
 
    LogicalTapeWrite(tape, &len, sizeof(len));
}

References len, and LogicalTapeWrite().

Referenced by dumptuples(), and mergeonerun().

◆ merge_read_buffer_size()

static int64 merge_read_buffer_size	(	int64	avail_mem,
		int	nInputTapes,
		int	nInputRuns,
		int	maxOutputTapes
	)

static

Definition at line 1833 of file tuplesort.c.

{
    int         nOutputRuns;
    int         nOutputTapes;
 
    /*
     * How many output tapes will we produce in this pass?
     *
     * This is nInputRuns / nInputTapes, rounded up.
     */
    nOutputRuns = (nInputRuns + nInputTapes - 1) / nInputTapes;
 
    nOutputTapes = Min(nOutputRuns, maxOutputTapes);
 
    /*
     * Each output tape consumes TAPE_BUFFER_OVERHEAD bytes of memory.  All
     * remaining memory is divided evenly between the input tapes.
     *
     * This also follows from the formula in tuplesort_merge_order, but here
     * we derive the input buffer size from the amount of memory available,
     * and M and N.
     */
    return Max((avail_mem - TAPE_BUFFER_OVERHEAD * nOutputTapes) / nInputTapes, 0);
}

References Max, Min, and TAPE_BUFFER_OVERHEAD.

Referenced by mergeruns().

◆ mergeonerun()

static void mergeonerun ( Tuplesortstate * state )

static

Definition at line 2200 of file tuplesort.c.

{
    int         srcTapeIndex;
    LogicalTape *srcTape;
 
    /*
     * Start the merge by loading one tuple from each active source tape into
     * the heap.
     */
    beginmerge(state);
 
    Assert(state->slabAllocatorUsed);
 
    /*
     * Execute merge by repeatedly extracting lowest tuple in heap, writing it
     * out, and replacing it with next tuple from same tape (if there is
     * another one).
     */
    while (state->memtupcount > 0)
    {
        SortTuple   stup;
 
        /* write the tuple to destTape */
        srcTapeIndex = state->memtuples[0].srctape;
        srcTape = state->inputTapes[srcTapeIndex];
        WRITETUP(state, state->destTape, &state->memtuples[0]);
 
        /* recycle the slot of the tuple we just wrote out, for the next read */
        if (state->memtuples[0].tuple)
            RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple);
 
        /*
         * pull next tuple from the tape, and replace the written-out tuple in
         * the heap with it.
         */
        if (mergereadnext(state, srcTape, &stup))
        {
            stup.srctape = srcTapeIndex;
            tuplesort_heap_replace_top(state, &stup);
        }
        else
        {
            tuplesort_heap_delete_top(state);
            state->nInputRuns--;
        }
    }
 
    /*
     * When the heap empties, we're done.  Write an end-of-run marker on the
     * output tape.
     */
    markrunend(state->destTape);
}

References Assert(), beginmerge(), markrunend(), mergereadnext(), RELEASE_SLAB_SLOT, SortTuple::srctape, tuplesort_heap_delete_top(), tuplesort_heap_replace_top(), and WRITETUP.

Referenced by mergeruns().

◆ mergereadnext()

static bool mergereadnext	(	Tuplesortstate *	state,
		LogicalTape *	srcTape,
		SortTuple *	stup
	)

static

Definition at line 2288 of file tuplesort.c.

{
    unsigned int tuplen;
 
    /* read next tuple, if any */
    if ((tuplen = getlen(srcTape, true)) == 0)
        return false;
    READTUP(state, stup, srcTape, tuplen);
 
    return true;
}

References getlen(), and READTUP.

Referenced by beginmerge(), mergeonerun(), and tuplesort_gettuple_common().

◆ mergeruns()

static void mergeruns ( Tuplesortstate * state )

static

Definition at line 2017 of file tuplesort.c.

{
    int         tapenum;
 
    Assert(state->status == TSS_BUILDRUNS);
    Assert(state->memtupcount == 0);
 
    if (state->base.sortKeys != NULL && state->base.sortKeys->abbrev_converter != NULL)
    {
        /*
         * If there are multiple runs to be merged, when we go to read back
         * tuples from disk, abbreviated keys will not have been stored, and
         * we don't care to regenerate them.  Disable abbreviation from this
         * point on.
         */
        state->base.sortKeys->abbrev_converter = NULL;
        state->base.sortKeys->comparator = state->base.sortKeys->abbrev_full_comparator;
 
        /* Not strictly necessary, but be tidy */
        state->base.sortKeys->abbrev_abort = NULL;
        state->base.sortKeys->abbrev_full_comparator = NULL;
    }
 
    /*
     * Reset tuple memory.  We've freed all the tuples that we previously
     * allocated.  We will use the slab allocator from now on.
     */
    MemoryContextResetOnly(state->base.tuplecontext);
 
    /*
     * We no longer need a large memtuples array.  (We will allocate a smaller
     * one for the heap later.)
     */
    FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
    pfree(state->memtuples);
    state->memtuples = NULL;
 
    /*
     * Initialize the slab allocator.  We need one slab slot per input tape,
     * for the tuples in the heap, plus one to hold the tuple last returned
     * from tuplesort_gettuple.  (If we're sorting pass-by-val Datums,
     * however, we don't need to do allocate anything.)
     *
     * In a multi-pass merge, we could shrink this allocation for the last
     * merge pass, if it has fewer tapes than previous passes, but we don't
     * bother.
     *
     * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism
     * to track memory usage of individual tuples.
     */
    if (state->base.tuples)
        init_slab_allocator(state, state->nOutputTapes + 1);
    else
        init_slab_allocator(state, 0);
 
    /*
     * Allocate a new 'memtuples' array, for the heap.  It will hold one tuple
     * from each input tape.
     *
     * We could shrink this, too, between passes in a multi-pass merge, but we
     * don't bother.  (The initial input tapes are still in outputTapes.  The
     * number of input tapes will not increase between passes.)
     */
    state->memtupsize = state->nOutputTapes;
    state->memtuples = (SortTuple *) MemoryContextAlloc(state->base.maincontext,
                                                        state->nOutputTapes * sizeof(SortTuple));
    USEMEM(state, GetMemoryChunkSpace(state->memtuples));
 
    /*
     * Use all the remaining memory we have available for tape buffers among
     * all the input tapes.  At the beginning of each merge pass, we will
     * divide this memory between the input and output tapes in the pass.
     */
    state->tape_buffer_mem = state->availMem;
    USEMEM(state, state->tape_buffer_mem);
    if (trace_sort)
        elog(LOG, "worker %d using %zu KB of memory for tape buffers",
             state->worker, state->tape_buffer_mem / 1024);
 
    for (;;)
    {
        /*
         * On the first iteration, or if we have read all the runs from the
         * input tapes in a multi-pass merge, it's time to start a new pass.
         * Rewind all the output tapes, and make them inputs for the next
         * pass.
         */
        if (state->nInputRuns == 0)
        {
            int64       input_buffer_size;
 
            /* Close the old, emptied, input tapes */
            if (state->nInputTapes > 0)
            {
                for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
                    LogicalTapeClose(state->inputTapes[tapenum]);
                pfree(state->inputTapes);
            }
 
            /* Previous pass's outputs become next pass's inputs. */
            state->inputTapes = state->outputTapes;
            state->nInputTapes = state->nOutputTapes;
            state->nInputRuns = state->nOutputRuns;
 
            /*
             * Reset output tape variables.  The actual LogicalTapes will be
             * created as needed, here we only allocate the array to hold
             * them.
             */
            state->outputTapes = palloc0(state->nInputTapes * sizeof(LogicalTape *));
            state->nOutputTapes = 0;
            state->nOutputRuns = 0;
 
            /*
             * Redistribute the memory allocated for tape buffers, among the
             * new input and output tapes.
             */
            input_buffer_size = merge_read_buffer_size(state->tape_buffer_mem,
                                                       state->nInputTapes,
                                                       state->nInputRuns,
                                                       state->maxTapes);
 
            if (trace_sort)
                elog(LOG, "starting merge pass of %d input runs on %d tapes, " INT64_FORMAT " KB of memory for each input tape: %s",
                     state->nInputRuns, state->nInputTapes, input_buffer_size / 1024,
                     pg_rusage_show(&state->ru_start));
 
            /* Prepare the new input tapes for merge pass. */
            for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
                LogicalTapeRewindForRead(state->inputTapes[tapenum], input_buffer_size);
 
            /*
             * If there's just one run left on each input tape, then only one
             * merge pass remains.  If we don't have to produce a materialized
             * sorted tape, we can stop at this point and do the final merge
             * on-the-fly.
             */
            if ((state->base.sortopt & TUPLESORT_RANDOMACCESS) == 0
                && state->nInputRuns <= state->nInputTapes
                && !WORKER(state))
            {
                /* Tell logtape.c we won't be writing anymore */
                LogicalTapeSetForgetFreeSpace(state->tapeset);
                /* Initialize for the final merge pass */
                beginmerge(state);
                state->status = TSS_FINALMERGE;
                return;
            }
        }
 
        /* Select an output tape */
        selectnewtape(state);
 
        /* Merge one run from each input tape. */
        mergeonerun(state);
 
        /*
         * If the input tapes are empty, and we output only one output run,
         * we're done.  The current output tape contains the final result.
         */
        if (state->nInputRuns == 0 && state->nOutputRuns <= 1)
            break;
    }
 
    /*
     * Done.  The result is on a single run on a single tape.
     */
    state->result_tape = state->outputTapes[0];
    if (!WORKER(state))
        LogicalTapeFreeze(state->result_tape, NULL);
    else
        worker_freeze_result_tape(state);
    state->status = TSS_SORTEDONTAPE;
 
    /* Close all the now-empty input tapes, to release their read buffers. */
    for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
        LogicalTapeClose(state->inputTapes[tapenum]);
}

References Assert(), beginmerge(), elog, FREEMEM, GetMemoryChunkSpace(), init_slab_allocator(), INT64_FORMAT, LOG, LogicalTapeClose(), LogicalTapeFreeze(), LogicalTapeRewindForRead(), LogicalTapeSetForgetFreeSpace(), MemoryContextAlloc(), MemoryContextResetOnly(), merge_read_buffer_size(), mergeonerun(), palloc0(), pfree(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, TSS_FINALMERGE, TSS_SORTEDONTAPE, TUPLESORT_RANDOMACCESS, USEMEM, WORKER, and worker_freeze_result_tape().

Referenced by inittapes(), and tuplesort_performsort().

◆ qsort_tuple_int32_compare()

static pg_attribute_always_inline int qsort_tuple_int32_compare	(	SortTuple *	a,
		SortTuple *	b,
		Tuplesortstate *	state
	)

static

Definition at line 542 of file tuplesort.c.

{
    int         compare;
 
    compare = ApplyInt32SortComparator(a->datum1, a->isnull1,
                                       b->datum1, b->isnull1,
                                       &state->base.sortKeys[0]);
 
    if (compare != 0)
        return compare;
 
    /*
     * No need to waste effort calling the tiebreak function when there are no
     * other keys to sort on.
     */
    if (state->base.onlyKey != NULL)
        return 0;
 
    return state->base.comparetup_tiebreak(a, b, state);
}

References a, ApplyInt32SortComparator(), b, and compare().

◆ qsort_tuple_unsigned_compare()

static pg_attribute_always_inline int qsort_tuple_unsigned_compare	(	SortTuple *	a,
		SortTuple *	b,
		Tuplesortstate *	state
	)

static

Definition at line 495 of file tuplesort.c.

{
    int         compare;
 
    compare = ApplyUnsignedSortComparator(a->datum1, a->isnull1,
                                          b->datum1, b->isnull1,
                                          &state->base.sortKeys[0]);
    if (compare != 0)
        return compare;
 
    /*
     * No need to waste effort calling the tiebreak function when there are no
     * other keys to sort on.
     */
    if (state->base.onlyKey != NULL)
        return 0;
 
    return state->base.comparetup_tiebreak(a, b, state);
}

References a, ApplyUnsignedSortComparator(), b, and compare().

◆ reversedirection()

static void reversedirection ( Tuplesortstate * state )

static

Definition at line 2838 of file tuplesort.c.

{
    SortSupport sortKey = state->base.sortKeys;
    int         nkey;
 
    for (nkey = 0; nkey < state->base.nKeys; nkey++, sortKey++)
    {
        sortKey->ssup_reverse = !sortKey->ssup_reverse;
        sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first;
    }
}

References SortSupportData::ssup_nulls_first, and SortSupportData::ssup_reverse.

Referenced by make_bounded_heap(), and sort_bounded_heap().

◆ selectnewtape()

static void selectnewtape ( Tuplesortstate * state )

static

Definition at line 1948 of file tuplesort.c.

{
    /*
     * At the beginning of each merge pass, nOutputTapes and nOutputRuns are
     * both zero.  On each call, we create a new output tape to hold the next
     * run, until maxTapes is reached.  After that, we assign new runs to the
     * existing tapes in a round robin fashion.
     */
    if (state->nOutputTapes < state->maxTapes)
    {
        /* Create a new tape to hold the next run */
        Assert(state->outputTapes[state->nOutputRuns] == NULL);
        Assert(state->nOutputRuns == state->nOutputTapes);
        state->destTape = LogicalTapeCreate(state->tapeset);
        state->outputTapes[state->nOutputTapes] = state->destTape;
        state->nOutputTapes++;
        state->nOutputRuns++;
    }
    else
    {
        /*
         * We have reached the max number of tapes.  Append to an existing
         * tape.
         */
        state->destTape = state->outputTapes[state->nOutputRuns % state->nOutputTapes];
        state->nOutputRuns++;
    }
}

References Assert(), and LogicalTapeCreate().

Referenced by dumptuples(), inittapes(), and mergeruns().

◆ sort_bounded_heap()

static void sort_bounded_heap ( Tuplesortstate * state )

static

Definition at line 2636 of file tuplesort.c.

{
    int         tupcount = state->memtupcount;
 
    Assert(state->status == TSS_BOUNDED);
    Assert(state->bounded);
    Assert(tupcount == state->bound);
    Assert(SERIAL(state));
 
    /*
     * We can unheapify in place because each delete-top call will remove the
     * largest entry, which we can promptly store in the newly freed slot at
     * the end.  Once we're down to a single-entry heap, we're done.
     */
    while (state->memtupcount > 1)
    {
        SortTuple   stup = state->memtuples[0];
 
        /* this sifts-up the next-largest entry and decreases memtupcount */
        tuplesort_heap_delete_top(state);
        state->memtuples[state->memtupcount] = stup;
    }
    state->memtupcount = tupcount;
 
    /*
     * Reverse sort direction back to the original state.  This is not
     * actually necessary but seems like a good idea for tidiness.
     */
    reversedirection(state);
 
    state->status = TSS_SORTEDINMEM;
    state->boundUsed = true;
}

References Assert(), reversedirection(), SERIAL, TSS_BOUNDED, TSS_SORTEDINMEM, and tuplesort_heap_delete_top().

Referenced by tuplesort_performsort().

◆ ssup_datum_int32_cmp()

int ssup_datum_int32_cmp	(	Datum	x,
		Datum	y,
		SortSupport	ssup
	)

Definition at line 3166 of file tuplesort.c.

{
    int32       xx = DatumGetInt32(x);
    int32       yy = DatumGetInt32(y);
 
    if (xx < yy)
        return -1;
    else if (xx > yy)
        return 1;
    else
        return 0;
}

References DatumGetInt32(), x, and y.

Referenced by btint4sortsupport(), date_sortsupport(), and tuplesort_sort_memtuples().

◆ ssup_datum_unsigned_cmp()

int ssup_datum_unsigned_cmp	(	Datum	x,
		Datum	y,
		SortSupport	ssup
	)

Definition at line 3139 of file tuplesort.c.

{
    if (x < y)
        return -1;
    else if (x > y)
        return 1;
    else
        return 0;
}

References x, and y.

Referenced by gist_point_sortsupport(), macaddr_sortsupport(), network_sortsupport(), tuplesort_sort_memtuples(), uuid_sortsupport(), and varstr_sortsupport().

◆ tuplesort_attach_shared()

void tuplesort_attach_shared	(	Sharedsort *	shared,
		dsm_segment *	seg
	)

Definition at line 2961 of file tuplesort.c.

{
    /* Attach to SharedFileSet */
    SharedFileSetAttach(&shared->fileset, seg);
}

References Sharedsort::fileset, and SharedFileSetAttach().

Referenced by _brin_parallel_build_main(), _bt_parallel_build_main(), and _gin_parallel_build_main().

◆ tuplesort_begin_batch()

static void tuplesort_begin_batch ( Tuplesortstate * state )

static

Definition at line 752 of file tuplesort.c.

{
    MemoryContext oldcontext;
 
    oldcontext = MemoryContextSwitchTo(state->base.maincontext);
 
    /*
     * Caller tuple (e.g. IndexTuple) memory context.
     *
     * A dedicated child context used exclusively for caller passed tuples
     * eases memory management.  Resetting at key points reduces
     * fragmentation. Note that the memtuples array of SortTuples is allocated
     * in the parent context, not this context, because there is no need to
     * free memtuples early.  For bounded sorts, tuples may be pfreed in any
     * order, so we use a regular aset.c context so that it can make use of
     * free'd memory.  When the sort is not bounded, we make use of a bump.c
     * context as this keeps allocations more compact with less wastage.
     * Allocations are also slightly more CPU efficient.
     */
    if (TupleSortUseBumpTupleCxt(state->base.sortopt))
        state->base.tuplecontext = BumpContextCreate(state->base.sortcontext,
                                                     "Caller tuples",
                                                     ALLOCSET_DEFAULT_SIZES);
    else
        state->base.tuplecontext = AllocSetContextCreate(state->base.sortcontext,
                                                         "Caller tuples",
                                                         ALLOCSET_DEFAULT_SIZES);
 
 
    state->status = TSS_INITIAL;
    state->bounded = false;
    state->boundUsed = false;
 
    state->availMem = state->allowedMem;
 
    state->tapeset = NULL;
 
    state->memtupcount = 0;
 
    /*
     * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
     * see comments in grow_memtuples().
     */
    state->growmemtuples = true;
    state->slabAllocatorUsed = false;
    if (state->memtuples != NULL && state->memtupsize != INITIAL_MEMTUPSIZE)
    {
        pfree(state->memtuples);
        state->memtuples = NULL;
        state->memtupsize = INITIAL_MEMTUPSIZE;
    }
    if (state->memtuples == NULL)
    {
        state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple));
        USEMEM(state, GetMemoryChunkSpace(state->memtuples));
    }
 
    /* workMem must be large enough for the minimal memtuples array */
    if (LACKMEM(state))
        elog(ERROR, "insufficient memory allowed for sort");
 
    state->currentRun = 0;
 
    /*
     * Tape variables (inputTapes, outputTapes, etc.) will be initialized by
     * inittapes(), if needed.
     */
 
    state->result_tape = NULL;  /* flag that result tape has not been formed */
 
    MemoryContextSwitchTo(oldcontext);
}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BumpContextCreate(), elog, ERROR, GetMemoryChunkSpace(), INITIAL_MEMTUPSIZE, LACKMEM, MemoryContextSwitchTo(), palloc(), pfree(), TSS_INITIAL, TupleSortUseBumpTupleCxt, and USEMEM.

Referenced by tuplesort_begin_common(), and tuplesort_reset().

◆ tuplesort_begin_common()

Tuplesortstate * tuplesort_begin_common	(	int	workMem,
		SortCoordinate	coordinate,
		int	sortopt
	)

Definition at line 642 of file tuplesort.c.

{
    Tuplesortstate *state;
    MemoryContext maincontext;
    MemoryContext sortcontext;
    MemoryContext oldcontext;
 
    /* See leader_takeover_tapes() remarks on random access support */
    if (coordinate && (sortopt & TUPLESORT_RANDOMACCESS))
        elog(ERROR, "random access disallowed under parallel sort");
 
    /*
     * Memory context surviving tuplesort_reset.  This memory context holds
     * data which is useful to keep while sorting multiple similar batches.
     */
    maincontext = AllocSetContextCreate(CurrentMemoryContext,
                                        "TupleSort main",
                                        ALLOCSET_DEFAULT_SIZES);
 
    /*
     * Create a working memory context for one sort operation.  The content of
     * this context is deleted by tuplesort_reset.
     */
    sortcontext = AllocSetContextCreate(maincontext,
                                        "TupleSort sort",
                                        ALLOCSET_DEFAULT_SIZES);
 
    /*
     * Additionally a working memory context for tuples is setup in
     * tuplesort_begin_batch.
     */
 
    /*
     * Make the Tuplesortstate within the per-sortstate context.  This way, we
     * don't need a separate pfree() operation for it at shutdown.
     */
    oldcontext = MemoryContextSwitchTo(maincontext);
 
    state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate));
 
    if (trace_sort)
        pg_rusage_init(&state->ru_start);
 
    state->base.sortopt = sortopt;
    state->base.tuples = true;
    state->abbrevNext = 10;
 
    /*
     * workMem is forced to be at least 64KB, the current minimum valid value
     * for the work_mem GUC.  This is a defense against parallel sort callers
     * that divide out memory among many workers in a way that leaves each
     * with very little memory.
     */
    state->allowedMem = Max(workMem, 64) * (int64) 1024;
    state->base.sortcontext = sortcontext;
    state->base.maincontext = maincontext;
 
    /*
     * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
     * see comments in grow_memtuples().
     */
    state->memtupsize = INITIAL_MEMTUPSIZE;
    state->memtuples = NULL;
 
    /*
     * After all of the other non-parallel-related state, we setup all of the
     * state needed for each batch.
     */
    tuplesort_begin_batch(state);
 
    /*
     * Initialize parallel-related state based on coordination information
     * from caller
     */
    if (!coordinate)
    {
        /* Serial sort */
        state->shared = NULL;
        state->worker = -1;
        state->nParticipants = -1;
    }
    else if (coordinate->isWorker)
    {
        /* Parallel worker produces exactly one final run from all input */
        state->shared = coordinate->sharedsort;
        state->worker = worker_get_identifier(state);
        state->nParticipants = -1;
    }
    else
    {
        /* Parallel leader state only used for final merge */
        state->shared = coordinate->sharedsort;
        state->worker = -1;
        state->nParticipants = coordinate->nParticipants;
        Assert(state->nParticipants >= 1);
    }
 
    MemoryContextSwitchTo(oldcontext);
 
    return state;
}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), CurrentMemoryContext, elog, ERROR, INITIAL_MEMTUPSIZE, SortCoordinateData::isWorker, Max, MemoryContextSwitchTo(), SortCoordinateData::nParticipants, palloc0(), pg_rusage_init(), SortCoordinateData::sharedsort, trace_sort, tuplesort_begin_batch(), TUPLESORT_RANDOMACCESS, and worker_get_identifier().

Referenced by tuplesort_begin_cluster(), tuplesort_begin_datum(), tuplesort_begin_heap(), tuplesort_begin_index_brin(), tuplesort_begin_index_btree(), tuplesort_begin_index_gin(), tuplesort_begin_index_gist(), and tuplesort_begin_index_hash().

◆ tuplesort_end()

void tuplesort_end ( Tuplesortstate * state )

Definition at line 951 of file tuplesort.c.

{
    tuplesort_free(state);
 
    /*
     * Free the main memory context, including the Tuplesortstate struct
     * itself.
     */
    MemoryContextDelete(state->base.maincontext);
}

References MemoryContextDelete(), and tuplesort_free().

Referenced by _brin_parallel_merge(), _brin_parallel_scan_and_build(), _bt_parallel_scan_and_sort(), _bt_spooldestroy(), _gin_parallel_merge(), _gin_parallel_scan_and_build(), _gin_process_worker_data(), _h_spooldestroy(), array_sort_internal(), ExecEndAgg(), ExecEndIncrementalSort(), ExecEndSort(), ExecReScanAgg(), ExecReScanSort(), gistbuild(), heapam_relation_copy_for_cluster(), initialize_aggregate(), initialize_phase(), ordered_set_shutdown(), process_ordered_aggregate_multi(), process_ordered_aggregate_single(), and validate_index().

◆ tuplesort_estimate_shared()

Size tuplesort_estimate_shared ( int nWorkers )

Definition at line 2917 of file tuplesort.c.

{
    Size        tapesSize;
 
    Assert(nWorkers > 0);
 
    /* Make sure that BufFile shared state is MAXALIGN'd */
    tapesSize = mul_size(sizeof(TapeShare), nWorkers);
    tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes)));
 
    return tapesSize;
}

References add_size(), Assert(), MAXALIGN, and mul_size().

Referenced by _brin_begin_parallel(), _bt_begin_parallel(), and _gin_begin_parallel().

◆ tuplesort_free()

static void tuplesort_free ( Tuplesortstate * state )

static

Definition at line 897 of file tuplesort.c.

{
    /* context swap probably not needed, but let's be safe */
    MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
    int64       spaceUsed;
 
    if (state->tapeset)
        spaceUsed = LogicalTapeSetBlocks(state->tapeset);
    else
        spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
 
    /*
     * Delete temporary "tape" files, if any.
     *
     * We don't bother to destroy the individual tapes here. They will go away
     * with the sortcontext.  (In TSS_FINALMERGE state, we have closed
     * finished tapes already.)
     */
    if (state->tapeset)
        LogicalTapeSetClose(state->tapeset);
 
    if (trace_sort)
    {
        if (state->tapeset)
            elog(LOG, "%s of worker %d ended, %" PRId64 " disk blocks used: %s",
                 SERIAL(state) ? "external sort" : "parallel external sort",
                 state->worker, spaceUsed, pg_rusage_show(&state->ru_start));
        else
            elog(LOG, "%s of worker %d ended, %" PRId64 " KB used: %s",
                 SERIAL(state) ? "internal sort" : "unperformed parallel sort",
                 state->worker, spaceUsed, pg_rusage_show(&state->ru_start));
    }
 
    TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed);
 
    FREESTATE(state);
    MemoryContextSwitchTo(oldcontext);
 
    /*
     * Free the per-sort memory context, thereby releasing all working memory.
     */
    MemoryContextReset(state->base.sortcontext);
}

References elog, FREESTATE, LOG, LogicalTapeSetBlocks(), LogicalTapeSetClose(), MemoryContextReset(), MemoryContextSwitchTo(), pg_rusage_show(), SERIAL, and trace_sort.

Referenced by tuplesort_end(), and tuplesort_reset().

◆ tuplesort_get_stats()

void tuplesort_get_stats	(	Tuplesortstate *	state,
		TuplesortInstrumentation *	stats
	)

Definition at line 2499 of file tuplesort.c.

{
    /*
     * Note: it might seem we should provide both memory and disk usage for a
     * disk-based sort.  However, the current code doesn't track memory space
     * accurately once we have begun to return tuples to the caller (since we
     * don't account for pfree's the caller is expected to do), so we cannot
     * rely on availMem in a disk sort.  This does not seem worth the overhead
     * to fix.  Is it worth creating an API for the memory context code to
     * tell us how much is actually used in sortcontext?
     */
    tuplesort_updatemax(state);
 
    if (state->isMaxSpaceDisk)
        stats->spaceType = SORT_SPACE_TYPE_DISK;
    else
        stats->spaceType = SORT_SPACE_TYPE_MEMORY;
    stats->spaceUsed = (state->maxSpace + 1023) / 1024;
 
    switch (state->maxSpaceStatus)
    {
        case TSS_SORTEDINMEM:
            if (state->boundUsed)
                stats->sortMethod = SORT_TYPE_TOP_N_HEAPSORT;
            else
                stats->sortMethod = SORT_TYPE_QUICKSORT;
            break;
        case TSS_SORTEDONTAPE:
            stats->sortMethod = SORT_TYPE_EXTERNAL_SORT;
            break;
        case TSS_FINALMERGE:
            stats->sortMethod = SORT_TYPE_EXTERNAL_MERGE;
            break;
        default:
            stats->sortMethod = SORT_TYPE_STILL_IN_PROGRESS;
            break;
    }
}

References SORT_SPACE_TYPE_DISK, SORT_SPACE_TYPE_MEMORY, SORT_TYPE_EXTERNAL_MERGE, SORT_TYPE_EXTERNAL_SORT, SORT_TYPE_QUICKSORT, SORT_TYPE_STILL_IN_PROGRESS, SORT_TYPE_TOP_N_HEAPSORT, TuplesortInstrumentation::sortMethod, TuplesortInstrumentation::spaceType, TuplesortInstrumentation::spaceUsed, TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and tuplesort_updatemax().

Referenced by ExecSort(), instrumentSortedGroup(), and show_sort_info().

◆ tuplesort_gettuple_common()

bool tuplesort_gettuple_common	(	Tuplesortstate *	state,
		bool	forward,
		SortTuple *	stup
	)

Definition at line 1470 of file tuplesort.c.

{
    unsigned int tuplen;
    size_t      nmoved;
 
    Assert(!WORKER(state));
 
    switch (state->status)
    {
        case TSS_SORTEDINMEM:
            Assert(forward || state->base.sortopt & TUPLESORT_RANDOMACCESS);
            Assert(!state->slabAllocatorUsed);
            if (forward)
            {
                if (state->current < state->memtupcount)
                {
                    *stup = state->memtuples[state->current++];
                    return true;
                }
                state->eof_reached = true;
 
                /*
                 * Complain if caller tries to retrieve more tuples than
                 * originally asked for in a bounded sort.  This is because
                 * returning EOF here might be the wrong thing.
                 */
                if (state->bounded && state->current >= state->bound)
                    elog(ERROR, "retrieved too many tuples in a bounded sort");
 
                return false;
            }
            else
            {
                if (state->current <= 0)
                    return false;
 
                /*
                 * if all tuples are fetched already then we return last
                 * tuple, else - tuple before last returned.
                 */
                if (state->eof_reached)
                    state->eof_reached = false;
                else
                {
                    state->current--;   /* last returned tuple */
                    if (state->current <= 0)
                        return false;
                }
                *stup = state->memtuples[state->current - 1];
                return true;
            }
            break;
 
        case TSS_SORTEDONTAPE:
            Assert(forward || state->base.sortopt & TUPLESORT_RANDOMACCESS);
            Assert(state->slabAllocatorUsed);
 
            /*
             * The slot that held the tuple that we returned in previous
             * gettuple call can now be reused.
             */
            if (state->lastReturnedTuple)
            {
                RELEASE_SLAB_SLOT(state, state->lastReturnedTuple);
                state->lastReturnedTuple = NULL;
            }
 
            if (forward)
            {
                if (state->eof_reached)
                    return false;
 
                if ((tuplen = getlen(state->result_tape, true)) != 0)
                {
                    READTUP(state, stup, state->result_tape, tuplen);
 
                    /*
                     * Remember the tuple we return, so that we can recycle
                     * its memory on next call.  (This can be NULL, in the
                     * !state->tuples case).
                     */
                    state->lastReturnedTuple = stup->tuple;
 
                    return true;
                }
                else
                {
                    state->eof_reached = true;
                    return false;
                }
            }
 
            /*
             * Backward.
             *
             * if all tuples are fetched already then we return last tuple,
             * else - tuple before last returned.
             */
            if (state->eof_reached)
            {
                /*
                 * Seek position is pointing just past the zero tuplen at the
                 * end of file; back up to fetch last tuple's ending length
                 * word.  If seek fails we must have a completely empty file.
                 */
                nmoved = LogicalTapeBackspace(state->result_tape,
                                              2 * sizeof(unsigned int));
                if (nmoved == 0)
                    return false;
                else if (nmoved != 2 * sizeof(unsigned int))
                    elog(ERROR, "unexpected tape position");
                state->eof_reached = false;
            }
            else
            {
                /*
                 * Back up and fetch previously-returned tuple's ending length
                 * word.  If seek fails, assume we are at start of file.
                 */
                nmoved = LogicalTapeBackspace(state->result_tape,
                                              sizeof(unsigned int));
                if (nmoved == 0)
                    return false;
                else if (nmoved != sizeof(unsigned int))
                    elog(ERROR, "unexpected tape position");
                tuplen = getlen(state->result_tape, false);
 
                /*
                 * Back up to get ending length word of tuple before it.
                 */
                nmoved = LogicalTapeBackspace(state->result_tape,
                                              tuplen + 2 * sizeof(unsigned int));
                if (nmoved == tuplen + sizeof(unsigned int))
                {
                    /*
                     * We backed up over the previous tuple, but there was no
                     * ending length word before it.  That means that the prev
                     * tuple is the first tuple in the file.  It is now the
                     * next to read in forward direction (not obviously right,
                     * but that is what in-memory case does).
                     */
                    return false;
                }
                else if (nmoved != tuplen + 2 * sizeof(unsigned int))
                    elog(ERROR, "bogus tuple length in backward scan");
            }
 
            tuplen = getlen(state->result_tape, false);
 
            /*
             * Now we have the length of the prior tuple, back up and read it.
             * Note: READTUP expects we are positioned after the initial
             * length word of the tuple, so back up to that point.
             */
            nmoved = LogicalTapeBackspace(state->result_tape,
                                          tuplen);
            if (nmoved != tuplen)
                elog(ERROR, "bogus tuple length in backward scan");
            READTUP(state, stup, state->result_tape, tuplen);
 
            /*
             * Remember the tuple we return, so that we can recycle its memory
             * on next call. (This can be NULL, in the Datum case).
             */
            state->lastReturnedTuple = stup->tuple;
 
            return true;
 
        case TSS_FINALMERGE:
            Assert(forward);
            /* We are managing memory ourselves, with the slab allocator. */
            Assert(state->slabAllocatorUsed);
 
            /*
             * The slab slot holding the tuple that we returned in previous
             * gettuple call can now be reused.
             */
            if (state->lastReturnedTuple)
            {
                RELEASE_SLAB_SLOT(state, state->lastReturnedTuple);
                state->lastReturnedTuple = NULL;
            }
 
            /*
             * This code should match the inner loop of mergeonerun().
             */
            if (state->memtupcount > 0)
            {
                int         srcTapeIndex = state->memtuples[0].srctape;
                LogicalTape *srcTape = state->inputTapes[srcTapeIndex];
                SortTuple   newtup;
 
                *stup = state->memtuples[0];
 
                /*
                 * Remember the tuple we return, so that we can recycle its
                 * memory on next call. (This can be NULL, in the Datum case).
                 */
                state->lastReturnedTuple = stup->tuple;
 
                /*
                 * Pull next tuple from tape, and replace the returned tuple
                 * at top of the heap with it.
                 */
                if (!mergereadnext(state, srcTape, &newtup))
                {
                    /*
                     * If no more data, we've reached end of run on this tape.
                     * Remove the top node from the heap.
                     */
                    tuplesort_heap_delete_top(state);
                    state->nInputRuns--;
 
                    /*
                     * Close the tape.  It'd go away at the end of the sort
                     * anyway, but better to release the memory early.
                     */
                    LogicalTapeClose(srcTape);
                    return true;
                }
                newtup.srctape = srcTapeIndex;
                tuplesort_heap_replace_top(state, &newtup);
                return true;
            }
            return false;
 
        default:
            elog(ERROR, "invalid tuplesort state");
            return false;       /* keep compiler quiet */
    }
}

References Assert(), elog, ERROR, getlen(), LogicalTapeBackspace(), LogicalTapeClose(), mergereadnext(), READTUP, RELEASE_SLAB_SLOT, SortTuple::srctape, TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, SortTuple::tuple, tuplesort_heap_delete_top(), tuplesort_heap_replace_top(), TUPLESORT_RANDOMACCESS, and WORKER.

Referenced by tuplesort_getbrintuple(), tuplesort_getdatum(), tuplesort_getgintuple(), tuplesort_getheaptuple(), tuplesort_getindextuple(), tuplesort_gettupleslot(), and tuplesort_skiptuples().

◆ tuplesort_heap_delete_top()

static void tuplesort_heap_delete_top ( Tuplesortstate * state )

static

Definition at line 2774 of file tuplesort.c.

{
    SortTuple  *memtuples = state->memtuples;
    SortTuple  *tuple;
 
    if (--state->memtupcount <= 0)
        return;
 
    /*
     * Remove the last tuple in the heap, and re-insert it, by replacing the
     * current top node with it.
     */
    tuple = &memtuples[state->memtupcount];
    tuplesort_heap_replace_top(state, tuple);
}

References tuplesort_heap_replace_top().

Referenced by mergeonerun(), sort_bounded_heap(), and tuplesort_gettuple_common().

◆ tuplesort_heap_insert()

static void tuplesort_heap_insert	(	Tuplesortstate *	state,
		SortTuple *	tuple
	)

static

Definition at line 2739 of file tuplesort.c.

{
    SortTuple  *memtuples;
    int         j;
 
    memtuples = state->memtuples;
    Assert(state->memtupcount < state->memtupsize);
 
    CHECK_FOR_INTERRUPTS();
 
    /*
     * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is
     * using 1-based array indexes, not 0-based.
     */
    j = state->memtupcount++;
    while (j > 0)
    {
        int         i = (j - 1) >> 1;
 
        if (COMPARETUP(state, tuple, &memtuples[i]) >= 0)
            break;
        memtuples[j] = memtuples[i];
        j = i;
    }
    memtuples[j] = *tuple;
}

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, i, and j.

Referenced by beginmerge(), and make_bounded_heap().

◆ tuplesort_heap_replace_top()

static void tuplesort_heap_replace_top	(	Tuplesortstate *	state,
		SortTuple *	tuple
	)

static

Definition at line 2798 of file tuplesort.c.

{
    SortTuple  *memtuples = state->memtuples;
    unsigned int i,
                n;
 
    Assert(state->memtupcount >= 1);
 
    CHECK_FOR_INTERRUPTS();
 
    /*
     * state->memtupcount is "int", but we use "unsigned int" for i, j, n.
     * This prevents overflow in the "2 * i + 1" calculation, since at the top
     * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2.
     */
    n = state->memtupcount;
    i = 0;                      /* i is where the "hole" is */
    for (;;)
    {
        unsigned int j = 2 * i + 1;
 
        if (j >= n)
            break;
        if (j + 1 < n &&
            COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0)
            j++;
        if (COMPARETUP(state, tuple, &memtuples[j]) <= 0)
            break;
        memtuples[i] = memtuples[j];
        i = j;
    }
    memtuples[i] = *tuple;
}

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, i, and j.

Referenced by make_bounded_heap(), mergeonerun(), tuplesort_gettuple_common(), tuplesort_heap_delete_top(), and tuplesort_puttuple_common().

◆ tuplesort_initialize_shared()

void tuplesort_initialize_shared	(	Sharedsort *	shared,
		int	nWorkers,
		dsm_segment *	seg
	)

Definition at line 2938 of file tuplesort.c.

{
    int         i;
 
    Assert(nWorkers > 0);
 
    SpinLockInit(&shared->mutex);
    shared->currentWorker = 0;
    shared->workersFinished = 0;
    SharedFileSetInit(&shared->fileset, seg);
    shared->nTapes = nWorkers;
    for (i = 0; i < nWorkers; i++)
    {
        shared->tapes[i].firstblocknumber = 0L;
    }
}

References Assert(), Sharedsort::currentWorker, Sharedsort::fileset, TapeShare::firstblocknumber, i, Sharedsort::mutex, Sharedsort::nTapes, SharedFileSetInit(), SpinLockInit, Sharedsort::tapes, and Sharedsort::workersFinished.

Referenced by _brin_begin_parallel(), _bt_begin_parallel(), and _gin_begin_parallel().

◆ tuplesort_markpos()

void tuplesort_markpos ( Tuplesortstate * state )

Definition at line 2435 of file tuplesort.c.

{
    MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
 
    Assert(state->base.sortopt & TUPLESORT_RANDOMACCESS);
 
    switch (state->status)
    {
        case TSS_SORTEDINMEM:
            state->markpos_offset = state->current;
            state->markpos_eof = state->eof_reached;
            break;
        case TSS_SORTEDONTAPE:
            LogicalTapeTell(state->result_tape,
                            &state->markpos_block,
                            &state->markpos_offset);
            state->markpos_eof = state->eof_reached;
            break;
        default:
            elog(ERROR, "invalid tuplesort state");
            break;
    }
 
    MemoryContextSwitchTo(oldcontext);
}

References Assert(), elog, ERROR, LogicalTapeTell(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and TUPLESORT_RANDOMACCESS.

Referenced by ExecSortMarkPos().

◆ tuplesort_merge_order()

int tuplesort_merge_order ( int64 allowedMem )

Definition at line 1778 of file tuplesort.c.

{
    int         mOrder;
 
    /*----------
     * In the merge phase, we need buffer space for each input and output tape.
     * Each pass in the balanced merge algorithm reads from M input tapes, and
     * writes to N output tapes.  Each tape consumes TAPE_BUFFER_OVERHEAD bytes
     * of memory.  In addition to that, we want MERGE_BUFFER_SIZE workspace per
     * input tape.
     *
     * totalMem = M * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE) +
     *            N * TAPE_BUFFER_OVERHEAD
     *
     * Except for the last and next-to-last merge passes, where there can be
     * fewer tapes left to process, M = N.  We choose M so that we have the
     * desired amount of memory available for the input buffers
     * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE), given the total memory
     * available for the tape buffers (allowedMem).
     *
     * Note: you might be thinking we need to account for the memtuples[]
     * array in this calculation, but we effectively treat that as part of the
     * MERGE_BUFFER_SIZE workspace.
     *----------
     */
    mOrder = allowedMem /
        (2 * TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE);
 
    /*
     * Even in minimum memory, use at least a MINORDER merge.  On the other
     * hand, even when we have lots of memory, do not use more than a MAXORDER
     * merge.  Tapes are pretty cheap, but they're not entirely free.  Each
     * additional tape reduces the amount of memory available to build runs,
     * which in turn can cause the same sort to need more runs, which makes
     * merging slower even if it can still be done in a single pass.  Also,
     * high order merges are quite slow due to CPU cache effects; it can be
     * faster to pay the I/O cost of a multi-pass merge than to perform a
     * single merge pass across many hundreds of tapes.
     */
    mOrder = Max(mOrder, MINORDER);
    mOrder = Min(mOrder, MAXORDER);
 
    return mOrder;
}

References Max, MAXORDER, MERGE_BUFFER_SIZE, Min, MINORDER, and TAPE_BUFFER_OVERHEAD.

Referenced by cost_tuplesort(), and inittapes().

◆ tuplesort_method_name()

const char * tuplesort_method_name ( TuplesortMethod m )

Definition at line 2543 of file tuplesort.c.

{
    switch (m)
    {
        case SORT_TYPE_STILL_IN_PROGRESS:
            return "still in progress";
        case SORT_TYPE_TOP_N_HEAPSORT:
            return "top-N heapsort";
        case SORT_TYPE_QUICKSORT:
            return "quicksort";
        case SORT_TYPE_EXTERNAL_SORT:
            return "external sort";
        case SORT_TYPE_EXTERNAL_MERGE:
            return "external merge";
    }
 
    return "unknown";
}

References SORT_TYPE_EXTERNAL_MERGE, SORT_TYPE_EXTERNAL_SORT, SORT_TYPE_QUICKSORT, SORT_TYPE_STILL_IN_PROGRESS, and SORT_TYPE_TOP_N_HEAPSORT.

Referenced by show_incremental_sort_group_info(), and show_sort_info().

◆ tuplesort_performsort()

void tuplesort_performsort ( Tuplesortstate * state )

Definition at line 1363 of file tuplesort.c.

{
    MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
 
    if (trace_sort)
        elog(LOG, "performsort of worker %d starting: %s",
             state->worker, pg_rusage_show(&state->ru_start));
 
    switch (state->status)
    {
        case TSS_INITIAL:
 
            /*
             * We were able to accumulate all the tuples within the allowed
             * amount of memory, or leader to take over worker tapes
             */
            if (SERIAL(state))
            {
                /* Just qsort 'em and we're done */
                tuplesort_sort_memtuples(state);
                state->status = TSS_SORTEDINMEM;
            }
            else if (WORKER(state))
            {
                /*
                 * Parallel workers must still dump out tuples to tape.  No
                 * merge is required to produce single output run, though.
                 */
                inittapes(state, false);
                dumptuples(state, true);
                worker_nomergeruns(state);
                state->status = TSS_SORTEDONTAPE;
            }
            else
            {
                /*
                 * Leader will take over worker tapes and merge worker runs.
                 * Note that mergeruns sets the correct state->status.
                 */
                leader_takeover_tapes(state);
                mergeruns(state);
            }
            state->current = 0;
            state->eof_reached = false;
            state->markpos_block = 0L;
            state->markpos_offset = 0;
            state->markpos_eof = false;
            break;
 
        case TSS_BOUNDED:
 
            /*
             * We were able to accumulate all the tuples required for output
             * in memory, using a heap to eliminate excess tuples.  Now we
             * have to transform the heap to a properly-sorted array. Note
             * that sort_bounded_heap sets the correct state->status.
             */
            sort_bounded_heap(state);
            state->current = 0;
            state->eof_reached = false;
            state->markpos_offset = 0;
            state->markpos_eof = false;
            break;
 
        case TSS_BUILDRUNS:
 
            /*
             * Finish tape-based sort.  First, flush all tuples remaining in
             * memory out to tape; then merge until we have a single remaining
             * run (or, if !randomAccess and !WORKER(), one run per tape).
             * Note that mergeruns sets the correct state->status.
             */
            dumptuples(state, true);
            mergeruns(state);
            state->eof_reached = false;
            state->markpos_block = 0L;
            state->markpos_offset = 0;
            state->markpos_eof = false;
            break;
 
        default:
            elog(ERROR, "invalid tuplesort state");
            break;
    }
 
    if (trace_sort)
    {
        if (state->status == TSS_FINALMERGE)
            elog(LOG, "performsort of worker %d done (except %d-way final merge): %s",
                 state->worker, state->nInputTapes,
                 pg_rusage_show(&state->ru_start));
        else
            elog(LOG, "performsort of worker %d done: %s",
                 state->worker, pg_rusage_show(&state->ru_start));
    }
 
    MemoryContextSwitchTo(oldcontext);
}

References dumptuples(), elog, ERROR, inittapes(), leader_takeover_tapes(), LOG, MemoryContextSwitchTo(), mergeruns(), pg_rusage_show(), SERIAL, sort_bounded_heap(), trace_sort, TSS_BOUNDED, TSS_BUILDRUNS, TSS_FINALMERGE, TSS_INITIAL, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, tuplesort_sort_memtuples(), WORKER, and worker_nomergeruns().

Referenced by _brin_parallel_merge(), _brin_parallel_scan_and_build(), _bt_leafbuild(), _bt_parallel_scan_and_sort(), _gin_parallel_merge(), _gin_parallel_scan_and_build(), _gin_process_worker_data(), _h_indexbuild(), array_sort_internal(), ExecIncrementalSort(), ExecSort(), gistbuild(), heapam_relation_copy_for_cluster(), hypothetical_dense_rank_final(), hypothetical_rank_common(), initialize_phase(), mode_final(), percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), percentile_disc_multi_final(), process_ordered_aggregate_multi(), process_ordered_aggregate_single(), switchToPresortedPrefixMode(), and validate_index().

◆ tuplesort_puttuple_common()

void tuplesort_puttuple_common	(	Tuplesortstate *	state,
		SortTuple *	tuple,
		bool	useAbbrev,
		Size	tuplen
	)

Definition at line 1169 of file tuplesort.c.

{
    MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
 
    Assert(!LEADER(state));
 
    /* account for the memory used for this tuple */
    USEMEM(state, tuplen);
    state->tupleMem += tuplen;
 
    if (!useAbbrev)
    {
        /*
         * Leave ordinary Datum representation, or NULL value.  If there is a
         * converter it won't expect NULL values, and cost model is not
         * required to account for NULL, so in that case we avoid calling
         * converter and just set datum1 to zeroed representation (to be
         * consistent, and to support cheap inequality tests for NULL
         * abbreviated keys).
         */
    }
    else if (!consider_abort_common(state))
    {
        /* Store abbreviated key representation */
        tuple->datum1 = state->base.sortKeys->abbrev_converter(tuple->datum1,
                                                               state->base.sortKeys);
    }
    else
    {
        /*
         * Set state to be consistent with never trying abbreviation.
         *
         * Alter datum1 representation in already-copied tuples, so as to
         * ensure a consistent representation (current tuple was just
         * handled).  It does not matter if some dumped tuples are already
         * sorted on tape, since serialized tuples lack abbreviated keys
         * (TSS_BUILDRUNS state prevents control reaching here in any case).
         */
        REMOVEABBREV(state, state->memtuples, state->memtupcount);
    }
 
    switch (state->status)
    {
        case TSS_INITIAL:
 
            /*
             * Save the tuple into the unsorted array.  First, grow the array
             * as needed.  Note that we try to grow the array when there is
             * still one free slot remaining --- if we fail, there'll still be
             * room to store the incoming tuple, and then we'll switch to
             * tape-based operation.
             */
            if (state->memtupcount >= state->memtupsize - 1)
            {
                (void) grow_memtuples(state);
                Assert(state->memtupcount < state->memtupsize);
            }
            state->memtuples[state->memtupcount++] = *tuple;
 
            /*
             * Check if it's time to switch over to a bounded heapsort. We do
             * so if the input tuple count exceeds twice the desired tuple
             * count (this is a heuristic for where heapsort becomes cheaper
             * than a quicksort), or if we've just filled workMem and have
             * enough tuples to meet the bound.
             *
             * Note that once we enter TSS_BOUNDED state we will always try to
             * complete the sort that way.  In the worst case, if later input
             * tuples are larger than earlier ones, this might cause us to
             * exceed workMem significantly.
             */
            if (state->bounded &&
                (state->memtupcount > state->bound * 2 ||
                 (state->memtupcount > state->bound && LACKMEM(state))))
            {
                if (trace_sort)
                    elog(LOG, "switching to bounded heapsort at %d tuples: %s",
                         state->memtupcount,
                         pg_rusage_show(&state->ru_start));
                make_bounded_heap(state);
                MemoryContextSwitchTo(oldcontext);
                return;
            }
 
            /*
             * Done if we still fit in available memory and have array slots.
             */
            if (state->memtupcount < state->memtupsize && !LACKMEM(state))
            {
                MemoryContextSwitchTo(oldcontext);
                return;
            }
 
            /*
             * Nope; time to switch to tape-based operation.
             */
            inittapes(state, true);
 
            /*
             * Dump all tuples.
             */
            dumptuples(state, false);
            break;
 
        case TSS_BOUNDED:
 
            /*
             * We don't want to grow the array here, so check whether the new
             * tuple can be discarded before putting it in.  This should be a
             * good speed optimization, too, since when there are many more
             * input tuples than the bound, most input tuples can be discarded
             * with just this one comparison.  Note that because we currently
             * have the sort direction reversed, we must check for <= not >=.
             */
            if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0)
            {
                /* new tuple <= top of the heap, so we can discard it */
                free_sort_tuple(state, tuple);
                CHECK_FOR_INTERRUPTS();
            }
            else
            {
                /* discard top of heap, replacing it with the new tuple */
                free_sort_tuple(state, &state->memtuples[0]);
                tuplesort_heap_replace_top(state, tuple);
            }
            break;
 
        case TSS_BUILDRUNS:
 
            /*
             * Save the tuple into the unsorted array (there must be space)
             */
            state->memtuples[state->memtupcount++] = *tuple;
 
            /*
             * If we are over the memory limit, dump all tuples.
             */
            dumptuples(state, false);
            break;
 
        default:
            elog(ERROR, "invalid tuplesort state");
            break;
    }
    MemoryContextSwitchTo(oldcontext);
}

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, consider_abort_common(), SortTuple::datum1, dumptuples(), elog, ERROR, free_sort_tuple(), grow_memtuples(), inittapes(), LACKMEM, LEADER, LOG, make_bounded_heap(), MemoryContextSwitchTo(), pg_rusage_show(), REMOVEABBREV, trace_sort, TSS_BOUNDED, TSS_BUILDRUNS, TSS_INITIAL, tuplesort_heap_replace_top(), and USEMEM.

Referenced by tuplesort_putbrintuple(), tuplesort_putdatum(), tuplesort_putgintuple(), tuplesort_putheaptuple(), tuplesort_putindextuplevalues(), and tuplesort_puttupleslot().

◆ tuplesort_readtup_alloc()

void * tuplesort_readtup_alloc	(	Tuplesortstate *	state,
		Size	tuplen
	)

Definition at line 2883 of file tuplesort.c.

{
    SlabSlot   *buf;
 
    /*
     * We pre-allocate enough slots in the slab arena that we should never run
     * out.
     */
    Assert(state->slabFreeHead);
 
    if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead)
        return MemoryContextAlloc(state->base.sortcontext, tuplen);
    else
    {
        buf = state->slabFreeHead;
        /* Reuse this slot */
        state->slabFreeHead = buf->nextfree;
 
        return buf;
    }
}

References Assert(), buf, MemoryContextAlloc(), and SLAB_SLOT_SIZE.

Referenced by readtup_cluster(), readtup_datum(), readtup_heap(), readtup_index(), readtup_index_brin(), and readtup_index_gin().

◆ tuplesort_rescan()

void tuplesort_rescan ( Tuplesortstate * state )

Definition at line 2402 of file tuplesort.c.

{
    MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
 
    Assert(state->base.sortopt & TUPLESORT_RANDOMACCESS);
 
    switch (state->status)
    {
        case TSS_SORTEDINMEM:
            state->current = 0;
            state->eof_reached = false;
            state->markpos_offset = 0;
            state->markpos_eof = false;
            break;
        case TSS_SORTEDONTAPE:
            LogicalTapeRewindForRead(state->result_tape, 0);
            state->eof_reached = false;
            state->markpos_block = 0L;
            state->markpos_offset = 0;
            state->markpos_eof = false;
            break;
        default:
            elog(ERROR, "invalid tuplesort state");
            break;
    }
 
    MemoryContextSwitchTo(oldcontext);
}

References Assert(), elog, ERROR, LogicalTapeRewindForRead(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and TUPLESORT_RANDOMACCESS.

Referenced by ExecReScanSort(), mode_final(), percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), and percentile_disc_multi_final().

◆ tuplesort_reset()

void tuplesort_reset ( Tuplesortstate * state )

Definition at line 1019 of file tuplesort.c.

{
    tuplesort_updatemax(state);
    tuplesort_free(state);
 
    /*
     * After we've freed up per-batch memory, re-setup all of the state common
     * to both the first batch and any subsequent batch.
     */
    tuplesort_begin_batch(state);
 
    state->lastReturnedTuple = NULL;
    state->slabMemoryBegin = NULL;
    state->slabMemoryEnd = NULL;
    state->slabFreeHead = NULL;
}

References tuplesort_begin_batch(), tuplesort_free(), and tuplesort_updatemax().

Referenced by ExecIncrementalSort(), ExecReScanIncrementalSort(), and switchToPresortedPrefixMode().

◆ tuplesort_restorepos()

void tuplesort_restorepos ( Tuplesortstate * state )

Definition at line 2466 of file tuplesort.c.

{
    MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
 
    Assert(state->base.sortopt & TUPLESORT_RANDOMACCESS);
 
    switch (state->status)
    {
        case TSS_SORTEDINMEM:
            state->current = state->markpos_offset;
            state->eof_reached = state->markpos_eof;
            break;
        case TSS_SORTEDONTAPE:
            LogicalTapeSeek(state->result_tape,
                            state->markpos_block,
                            state->markpos_offset);
            state->eof_reached = state->markpos_eof;
            break;
        default:
            elog(ERROR, "invalid tuplesort state");
            break;
    }
 
    MemoryContextSwitchTo(oldcontext);
}

References Assert(), elog, ERROR, LogicalTapeSeek(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and TUPLESORT_RANDOMACCESS.

Referenced by ExecSortRestrPos().

◆ tuplesort_set_bound()

void tuplesort_set_bound	(	Tuplesortstate *	state,
		int64	bound
	)

Definition at line 838 of file tuplesort.c.

{
    /* Assert we're called before loading any tuples */
    Assert(state->status == TSS_INITIAL && state->memtupcount == 0);
    /* Assert we allow bounded sorts */
    Assert(state->base.sortopt & TUPLESORT_ALLOWBOUNDED);
    /* Can't set the bound twice, either */
    Assert(!state->bounded);
    /* Also, this shouldn't be called in a parallel worker */
    Assert(!WORKER(state));
 
    /* Parallel leader allows but ignores hint */
    if (LEADER(state))
        return;
 
#ifdef DEBUG_BOUNDED_SORT
    /* Honor GUC setting that disables the feature (for easy testing) */
    if (!optimize_bounded_sort)
        return;
#endif
 
    /* We want to be able to compute bound * 2, so limit the setting */
    if (bound > (int64) (INT_MAX / 2))
        return;
 
    state->bounded = true;
    state->bound = (int) bound;
 
    /*
     * Bounded sorts are not an effective target for abbreviated key
     * optimization.  Disable by setting state to be consistent with no
     * abbreviation support.
     */
    state->base.sortKeys->abbrev_converter = NULL;
    if (state->base.sortKeys->abbrev_full_comparator)
        state->base.sortKeys->comparator = state->base.sortKeys->abbrev_full_comparator;
 
    /* Not strictly necessary, but be tidy */
    state->base.sortKeys->abbrev_abort = NULL;
    state->base.sortKeys->abbrev_full_comparator = NULL;
}

References Assert(), LEADER, TSS_INITIAL, TUPLESORT_ALLOWBOUNDED, and WORKER.

Referenced by ExecIncrementalSort(), ExecSort(), and switchToPresortedPrefixMode().

◆ tuplesort_skiptuples()

bool tuplesort_skiptuples	(	Tuplesortstate *	state,
		int64	ntuples,
		bool	forward
	)

Definition at line 1710 of file tuplesort.c.

{
    MemoryContext oldcontext;
 
    /*
     * We don't actually support backwards skip yet, because no callers need
     * it.  The API is designed to allow for that later, though.
     */
    Assert(forward);
    Assert(ntuples >= 0);
    Assert(!WORKER(state));
 
    switch (state->status)
    {
        case TSS_SORTEDINMEM:
            if (state->memtupcount - state->current >= ntuples)
            {
                state->current += ntuples;
                return true;
            }
            state->current = state->memtupcount;
            state->eof_reached = true;
 
            /*
             * Complain if caller tries to retrieve more tuples than
             * originally asked for in a bounded sort.  This is because
             * returning EOF here might be the wrong thing.
             */
            if (state->bounded && state->current >= state->bound)
                elog(ERROR, "retrieved too many tuples in a bounded sort");
 
            return false;
 
        case TSS_SORTEDONTAPE:
        case TSS_FINALMERGE:
 
            /*
             * We could probably optimize these cases better, but for now it's
             * not worth the trouble.
             */
            oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
            while (ntuples-- > 0)
            {
                SortTuple   stup;
 
                if (!tuplesort_gettuple_common(state, forward, &stup))
                {
                    MemoryContextSwitchTo(oldcontext);
                    return false;
                }
                CHECK_FOR_INTERRUPTS();
            }
            MemoryContextSwitchTo(oldcontext);
            return true;
 
        default:
            elog(ERROR, "invalid tuplesort state");
            return false;       /* keep compiler quiet */
    }
}

References Assert(), CHECK_FOR_INTERRUPTS, elog, ERROR, MemoryContextSwitchTo(), TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, tuplesort_gettuple_common(), and WORKER.

Referenced by percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), and percentile_disc_multi_final().

◆ tuplesort_sort_memtuples()

static void tuplesort_sort_memtuples ( Tuplesortstate * state )

static

Definition at line 2676 of file tuplesort.c.

{
    Assert(!LEADER(state));
 
    if (state->memtupcount > 1)
    {
        /*
         * Do we have the leading column's value or abbreviation in datum1,
         * and is there a specialization for its comparator?
         */
        if (state->base.haveDatum1 && state->base.sortKeys)
        {
            if (state->base.sortKeys[0].comparator == ssup_datum_unsigned_cmp)
            {
                qsort_tuple_unsigned(state->memtuples,
                                     state->memtupcount,
                                     state);
                return;
            }
#if SIZEOF_DATUM >= 8
            else if (state->base.sortKeys[0].comparator == ssup_datum_signed_cmp)
            {
                qsort_tuple_signed(state->memtuples,
                                   state->memtupcount,
                                   state);
                return;
            }
#endif
            else if (state->base.sortKeys[0].comparator == ssup_datum_int32_cmp)
            {
                qsort_tuple_int32(state->memtuples,
                                  state->memtupcount,
                                  state);
                return;
            }
        }
 
        /* Can we use the single-key sort function? */
        if (state->base.onlyKey != NULL)
        {
            qsort_ssup(state->memtuples, state->memtupcount,
                       state->base.onlyKey);
        }
        else
        {
            qsort_tuple(state->memtuples,
                        state->memtupcount,
                        state->base.comparetup,
                        state);
        }
    }
}

References Assert(), LEADER, ssup_datum_int32_cmp(), and ssup_datum_unsigned_cmp().

Referenced by dumptuples(), and tuplesort_performsort().

◆ tuplesort_space_type_name()

const char * tuplesort_space_type_name ( TuplesortSpaceType t )

Definition at line 2566 of file tuplesort.c.

{
    Assert(t == SORT_SPACE_TYPE_DISK || t == SORT_SPACE_TYPE_MEMORY);
    return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory";
}

References Assert(), SORT_SPACE_TYPE_DISK, and SORT_SPACE_TYPE_MEMORY.

Referenced by show_incremental_sort_group_info(), and show_sort_info().

◆ tuplesort_updatemax()

static void tuplesort_updatemax ( Tuplesortstate * state )

static

Definition at line 968 of file tuplesort.c.

{
    int64       spaceUsed;
    bool        isSpaceDisk;
 
    /*
     * Note: it might seem we should provide both memory and disk usage for a
     * disk-based sort.  However, the current code doesn't track memory space
     * accurately once we have begun to return tuples to the caller (since we
     * don't account for pfree's the caller is expected to do), so we cannot
     * rely on availMem in a disk sort.  This does not seem worth the overhead
     * to fix.  Is it worth creating an API for the memory context code to
     * tell us how much is actually used in sortcontext?
     */
    if (state->tapeset)
    {
        isSpaceDisk = true;
        spaceUsed = LogicalTapeSetBlocks(state->tapeset) * BLCKSZ;
    }
    else
    {
        isSpaceDisk = false;
        spaceUsed = state->allowedMem - state->availMem;
    }
 
    /*
     * Sort evicts data to the disk when it wasn't able to fit that data into
     * main memory.  This is why we assume space used on the disk to be more
     * important for tracking resource usage than space used in memory. Note
     * that the amount of space occupied by some tupleset on the disk might be
     * less than amount of space occupied by the same tupleset in memory due
     * to more compact representation.
     */
    if ((isSpaceDisk && !state->isMaxSpaceDisk) ||
        (isSpaceDisk == state->isMaxSpaceDisk && spaceUsed > state->maxSpace))
    {
        state->maxSpace = spaceUsed;
        state->isMaxSpaceDisk = isSpaceDisk;
        state->maxSpaceStatus = state->status;
    }
}

References LogicalTapeSetBlocks().

Referenced by tuplesort_get_stats(), and tuplesort_reset().

◆ tuplesort_used_bound()

bool tuplesort_used_bound ( Tuplesortstate * state )

Definition at line 886 of file tuplesort.c.

{
    return state->boundUsed;
}

Referenced by ExecIncrementalSort().

◆ worker_freeze_result_tape()

static void worker_freeze_result_tape ( Tuplesortstate * state )

static

Definition at line 3009 of file tuplesort.c.

{
    Sharedsort *shared = state->shared;
    TapeShare   output;
 
    Assert(WORKER(state));
    Assert(state->result_tape != NULL);
    Assert(state->memtupcount == 0);
 
    /*
     * Free most remaining memory, in case caller is sensitive to our holding
     * on to it.  memtuples may not be a tiny merge heap at this point.
     */
    pfree(state->memtuples);
    /* Be tidy */
    state->memtuples = NULL;
    state->memtupsize = 0;
 
    /*
     * Parallel worker requires result tape metadata, which is to be stored in
     * shared memory for leader
     */
    LogicalTapeFreeze(state->result_tape, &output);
 
    /* Store properties of output tape, and update finished worker count */
    SpinLockAcquire(&shared->mutex);
    shared->tapes[state->worker] = output;
    shared->workersFinished++;
    SpinLockRelease(&shared->mutex);
}

References Assert(), LogicalTapeFreeze(), Sharedsort::mutex, output, pfree(), SpinLockAcquire, SpinLockRelease, Sharedsort::tapes, WORKER, and Sharedsort::workersFinished.

Referenced by mergeruns(), and worker_nomergeruns().

◆ worker_get_identifier()

static int worker_get_identifier ( Tuplesortstate * state )

static

Definition at line 2981 of file tuplesort.c.

{
    Sharedsort *shared = state->shared;
    int         worker;
 
    Assert(WORKER(state));
 
    SpinLockAcquire(&shared->mutex);
    worker = shared->currentWorker++;
    SpinLockRelease(&shared->mutex);
 
    return worker;
}

References Assert(), Sharedsort::currentWorker, Sharedsort::mutex, SpinLockAcquire, SpinLockRelease, and WORKER.

Referenced by tuplesort_begin_common().

◆ worker_nomergeruns()

static void worker_nomergeruns ( Tuplesortstate * state )

static

Definition at line 3047 of file tuplesort.c.

{
    Assert(WORKER(state));
    Assert(state->result_tape == NULL);
    Assert(state->nOutputRuns == 1);
 
    state->result_tape = state->destTape;
    worker_freeze_result_tape(state);
}

References Assert(), WORKER, and worker_freeze_result_tape().

Referenced by tuplesort_performsort().

Data Structures

Macros

Typedefs

Enumerations

Functions

Variables

Macro Definition Documentation

◆ COMPARETUP

◆ FREEMEM

◆ FREESTATE

◆ INITIAL_MEMTUPSIZE

◆ IS_SLAB_SLOT

◆ LACKMEM

◆ LEADER

◆ MAXORDER

◆ MERGE_BUFFER_SIZE

◆ MINORDER

◆ READTUP

◆ RELEASE_SLAB_SLOT

◆ REMOVEABBREV

◆ SERIAL

◆ SLAB_SLOT_SIZE

◆ ST_CHECK_FOR_INTERRUPTS [1/4]

◆ ST_CHECK_FOR_INTERRUPTS [2/4]

◆ ST_CHECK_FOR_INTERRUPTS [3/4]

◆ ST_CHECK_FOR_INTERRUPTS [4/4]

◆ ST_COMPARE [1/3]

◆ ST_COMPARE [2/3]

◆ ST_COMPARE [3/3]

◆ ST_COMPARE_ARG_TYPE [1/4]

◆ ST_COMPARE_ARG_TYPE [2/4]

◆ ST_COMPARE_ARG_TYPE [3/4]

◆ ST_COMPARE_ARG_TYPE [4/4]

◆ ST_COMPARE_RUNTIME_POINTER

◆ ST_DECLARE

◆ ST_DEFINE [1/4]

◆ ST_DEFINE [2/4]

◆ ST_DEFINE [3/4]

◆ ST_DEFINE [4/4]

◆ ST_ELEMENT_TYPE [1/4]

◆ ST_ELEMENT_TYPE [2/4]

◆ ST_ELEMENT_TYPE [3/4]

◆ ST_ELEMENT_TYPE [4/4]

◆ ST_SCOPE [1/4]

◆ ST_SCOPE [2/4]

◆ ST_SCOPE [3/4]

◆ ST_SCOPE [4/4]

◆ ST_SORT [1/4]

◆ ST_SORT [2/4]

◆ ST_SORT [3/4]

◆ ST_SORT [4/4]

◆ TAPE_BUFFER_OVERHEAD

◆ USEMEM

◆ WORKER

◆ WRITETUP

Typedef Documentation

◆ SlabSlot

Enumeration Type Documentation

◆ TupSortStatus

Function Documentation

◆ beginmerge()

◆ consider_abort_common()

◆ dumptuples()

◆ free_sort_tuple()

◆ getlen()

◆ grow_memtuples()

◆ init_slab_allocator()

◆ inittapes()

◆ inittapestate()

◆ leader_takeover_tapes()

◆ make_bounded_heap()

◆ markrunend()

◆ merge_read_buffer_size()

◆ mergeonerun()

◆ mergereadnext()

◆ mergeruns()

◆ qsort_tuple_int32_compare()

◆ qsort_tuple_unsigned_compare()

◆ reversedirection()

◆ selectnewtape()