PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
tuplesort.c File Reference
#include "postgres.h"
#include <limits.h>
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "storage/shmem.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
#include "utils/tuplesort.h"
#include "lib/sort_template.h"
Include dependency graph for tuplesort.c:

Go to the source code of this file.

Data Structures

union  SlabSlot
 
struct  Tuplesortstate
 
struct  Sharedsort
 

Macros

#define INITIAL_MEMTUPSIZE
 
#define SLAB_SLOT_SIZE   1024
 
#define MINORDER   6 /* minimum merge order */
 
#define MAXORDER   500 /* maximum merge order */
 
#define TAPE_BUFFER_OVERHEAD   BLCKSZ
 
#define MERGE_BUFFER_SIZE   (BLCKSZ * 32)
 
#define IS_SLAB_SLOT(state, tuple)
 
#define RELEASE_SLAB_SLOT(state, tuple)
 
#define REMOVEABBREV(state, stup, count)   ((*(state)->base.removeabbrev) (state, stup, count))
 
#define COMPARETUP(state, a, b)   ((*(state)->base.comparetup) (a, b, state))
 
#define WRITETUP(state, tape, stup)   ((*(state)->base.writetup) (state, tape, stup))
 
#define READTUP(state, stup, tape, len)   ((*(state)->base.readtup) (state, stup, tape, len))
 
#define FREESTATE(state)   ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0)
 
#define LACKMEM(state)   ((state)->availMem < 0 && !(state)->slabAllocatorUsed)
 
#define USEMEM(state, amt)   ((state)->availMem -= (amt))
 
#define FREEMEM(state, amt)   ((state)->availMem += (amt))
 
#define SERIAL(state)   ((state)->shared == NULL)
 
#define WORKER(state)   ((state)->shared && (state)->worker != -1)
 
#define LEADER(state)   ((state)->shared && (state)->worker == -1)
 
#define ST_SORT   qsort_tuple_unsigned
 
#define ST_ELEMENT_TYPE   SortTuple
 
#define ST_COMPARE(a, b, state)   qsort_tuple_unsigned_compare(a, b, state)
 
#define ST_COMPARE_ARG_TYPE   Tuplesortstate
 
#define ST_CHECK_FOR_INTERRUPTS
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   qsort_tuple_int32
 
#define ST_ELEMENT_TYPE   SortTuple
 
#define ST_COMPARE(a, b, state)   qsort_tuple_int32_compare(a, b, state)
 
#define ST_COMPARE_ARG_TYPE   Tuplesortstate
 
#define ST_CHECK_FOR_INTERRUPTS
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   qsort_tuple
 
#define ST_ELEMENT_TYPE   SortTuple
 
#define ST_COMPARE_RUNTIME_POINTER
 
#define ST_COMPARE_ARG_TYPE   Tuplesortstate
 
#define ST_CHECK_FOR_INTERRUPTS
 
#define ST_SCOPE   static
 
#define ST_DECLARE
 
#define ST_DEFINE
 
#define ST_SORT   qsort_ssup
 
#define ST_ELEMENT_TYPE   SortTuple
 
#define ST_COMPARE(a, b, ssup)
 
#define ST_COMPARE_ARG_TYPE   SortSupportData
 
#define ST_CHECK_FOR_INTERRUPTS
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef union SlabSlot SlabSlot
 

Enumerations

enum  TupSortStatus {
  TSS_INITIAL , TSS_BOUNDED , TSS_BUILDRUNS , TSS_SORTEDINMEM ,
  TSS_SORTEDONTAPE , TSS_FINALMERGE
}
 

Functions

static void tuplesort_begin_batch (Tuplesortstate *state)
 
static bool consider_abort_common (Tuplesortstate *state)
 
static void inittapes (Tuplesortstate *state, bool mergeruns)
 
static void inittapestate (Tuplesortstate *state, int maxTapes)
 
static void selectnewtape (Tuplesortstate *state)
 
static void init_slab_allocator (Tuplesortstate *state, int numSlots)
 
static void mergeruns (Tuplesortstate *state)
 
static void mergeonerun (Tuplesortstate *state)
 
static void beginmerge (Tuplesortstate *state)
 
static bool mergereadnext (Tuplesortstate *state, LogicalTape *srcTape, SortTuple *stup)
 
static void dumptuples (Tuplesortstate *state, bool alltuples)
 
static void make_bounded_heap (Tuplesortstate *state)
 
static void sort_bounded_heap (Tuplesortstate *state)
 
static void tuplesort_sort_memtuples (Tuplesortstate *state)
 
static void tuplesort_heap_insert (Tuplesortstate *state, SortTuple *tuple)
 
static void tuplesort_heap_replace_top (Tuplesortstate *state, SortTuple *tuple)
 
static void tuplesort_heap_delete_top (Tuplesortstate *state)
 
static void reversedirection (Tuplesortstate *state)
 
static unsigned int getlen (LogicalTape *tape, bool eofOK)
 
static void markrunend (LogicalTape *tape)
 
static int worker_get_identifier (Tuplesortstate *state)
 
static void worker_freeze_result_tape (Tuplesortstate *state)
 
static void worker_nomergeruns (Tuplesortstate *state)
 
static void leader_takeover_tapes (Tuplesortstate *state)
 
static void free_sort_tuple (Tuplesortstate *state, SortTuple *stup)
 
static void tuplesort_free (Tuplesortstate *state)
 
static void tuplesort_updatemax (Tuplesortstate *state)
 
static pg_attribute_always_inline int qsort_tuple_unsigned_compare (SortTuple *a, SortTuple *b, Tuplesortstate *state)
 
static pg_attribute_always_inline int qsort_tuple_int32_compare (SortTuple *a, SortTuple *b, Tuplesortstate *state)
 
Tuplesortstatetuplesort_begin_common (int workMem, SortCoordinate coordinate, int sortopt)
 
void tuplesort_set_bound (Tuplesortstate *state, int64 bound)
 
bool tuplesort_used_bound (Tuplesortstate *state)
 
void tuplesort_end (Tuplesortstate *state)
 
void tuplesort_reset (Tuplesortstate *state)
 
static bool grow_memtuples (Tuplesortstate *state)
 
void tuplesort_puttuple_common (Tuplesortstate *state, SortTuple *tuple, bool useAbbrev, Size tuplen)
 
void tuplesort_performsort (Tuplesortstate *state)
 
bool tuplesort_gettuple_common (Tuplesortstate *state, bool forward, SortTuple *stup)
 
bool tuplesort_skiptuples (Tuplesortstate *state, int64 ntuples, bool forward)
 
int tuplesort_merge_order (int64 allowedMem)
 
static int64 merge_read_buffer_size (int64 avail_mem, int nInputTapes, int nInputRuns, int maxOutputTapes)
 
void tuplesort_rescan (Tuplesortstate *state)
 
void tuplesort_markpos (Tuplesortstate *state)
 
void tuplesort_restorepos (Tuplesortstate *state)
 
void tuplesort_get_stats (Tuplesortstate *state, TuplesortInstrumentation *stats)
 
const char * tuplesort_method_name (TuplesortMethod m)
 
const char * tuplesort_space_type_name (TuplesortSpaceType t)
 
void * tuplesort_readtup_alloc (Tuplesortstate *state, Size tuplen)
 
Size tuplesort_estimate_shared (int nWorkers)
 
void tuplesort_initialize_shared (Sharedsort *shared, int nWorkers, dsm_segment *seg)
 
void tuplesort_attach_shared (Sharedsort *shared, dsm_segment *seg)
 
int ssup_datum_unsigned_cmp (Datum x, Datum y, SortSupport ssup)
 
int ssup_datum_int32_cmp (Datum x, Datum y, SortSupport ssup)
 

Variables

bool trace_sort = false
 

Macro Definition Documentation

◆ COMPARETUP

#define COMPARETUP (   state,
  a,
  b 
)    ((*(state)->base.comparetup) (a, b, state))

Definition at line 396 of file tuplesort.c.

◆ FREEMEM

#define FREEMEM (   state,
  amt 
)    ((state)->availMem += (amt))

Definition at line 402 of file tuplesort.c.

◆ FREESTATE

#define FREESTATE (   state)    ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0)

Definition at line 399 of file tuplesort.c.

◆ INITIAL_MEMTUPSIZE

#define INITIAL_MEMTUPSIZE
Value:
Max(1024, \
#define Max(x, y)
Definition: c.h:952
#define ALLOCSET_SEPARATE_THRESHOLD
Definition: memutils.h:187

Definition at line 120 of file tuplesort.c.

◆ IS_SLAB_SLOT

#define IS_SLAB_SLOT (   state,
  tuple 
)
Value:
((char *) (tuple) >= (state)->slabMemoryBegin && \
(char *) (tuple) < (state)->slabMemoryEnd)
Definition: regguts.h:323

Definition at line 375 of file tuplesort.c.

◆ LACKMEM

#define LACKMEM (   state)    ((state)->availMem < 0 && !(state)->slabAllocatorUsed)

Definition at line 400 of file tuplesort.c.

◆ LEADER

#define LEADER (   state)    ((state)->shared && (state)->worker == -1)

Definition at line 405 of file tuplesort.c.

◆ MAXORDER

#define MAXORDER   500 /* maximum merge order */

Definition at line 177 of file tuplesort.c.

◆ MERGE_BUFFER_SIZE

#define MERGE_BUFFER_SIZE   (BLCKSZ * 32)

Definition at line 179 of file tuplesort.c.

◆ MINORDER

#define MINORDER   6 /* minimum merge order */

Definition at line 176 of file tuplesort.c.

◆ READTUP

#define READTUP (   state,
  stup,
  tape,
  len 
)    ((*(state)->base.readtup) (state, stup, tape, len))

Definition at line 398 of file tuplesort.c.

◆ RELEASE_SLAB_SLOT

#define RELEASE_SLAB_SLOT (   state,
  tuple 
)
Value:
do { \
SlabSlot *buf = (SlabSlot *) tuple; \
{ \
buf->nextfree = (state)->slabFreeHead; \
(state)->slabFreeHead = buf; \
} while(0)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void pfree(void *pointer)
Definition: mcxt.c:1521
static char * buf
Definition: pg_test_fsync.c:72
#define IS_SLAB_SLOT(state, tuple)
Definition: tuplesort.c:375

Definition at line 383 of file tuplesort.c.

◆ REMOVEABBREV

#define REMOVEABBREV (   state,
  stup,
  count 
)    ((*(state)->base.removeabbrev) (state, stup, count))

Definition at line 395 of file tuplesort.c.

◆ SERIAL

#define SERIAL (   state)    ((state)->shared == NULL)

Definition at line 403 of file tuplesort.c.

◆ SLAB_SLOT_SIZE

#define SLAB_SLOT_SIZE   1024

Definition at line 142 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [1/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [2/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [3/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [4/4]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 617 of file tuplesort.c.

◆ ST_COMPARE [1/3]

#define ST_COMPARE (   a,
  b,
  ssup 
)
Value:
ApplySortComparator((a)->datum1, (a)->isnull1, \
(b)->datum1, (b)->isnull1, (ssup))
int b
Definition: isn.c:69
int a
Definition: isn.c:68
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

Definition at line 613 of file tuplesort.c.

◆ ST_COMPARE [2/3]

#define ST_COMPARE (   a,
  b,
  state 
)    qsort_tuple_unsigned_compare(a, b, state)

Definition at line 613 of file tuplesort.c.

◆ ST_COMPARE [3/3]

#define ST_COMPARE (   a,
  b,
  state 
)    qsort_tuple_int32_compare(a, b, state)

Definition at line 613 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [1/4]

#define ST_COMPARE_ARG_TYPE   Tuplesortstate

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [2/4]

#define ST_COMPARE_ARG_TYPE   Tuplesortstate

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [3/4]

#define ST_COMPARE_ARG_TYPE   Tuplesortstate

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [4/4]

#define ST_COMPARE_ARG_TYPE   SortSupportData

Definition at line 616 of file tuplesort.c.

◆ ST_COMPARE_RUNTIME_POINTER

#define ST_COMPARE_RUNTIME_POINTER

Definition at line 603 of file tuplesort.c.

◆ ST_DECLARE

#define ST_DECLARE

Definition at line 607 of file tuplesort.c.

◆ ST_DEFINE [1/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_DEFINE [2/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_DEFINE [3/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_DEFINE [4/4]

#define ST_DEFINE

Definition at line 619 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [1/4]

#define ST_ELEMENT_TYPE   SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [2/4]

#define ST_ELEMENT_TYPE   SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [3/4]

#define ST_ELEMENT_TYPE   SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [4/4]

#define ST_ELEMENT_TYPE   SortTuple

Definition at line 612 of file tuplesort.c.

◆ ST_SCOPE [1/4]

#define ST_SCOPE   static

Definition at line 618 of file tuplesort.c.

◆ ST_SCOPE [2/4]

#define ST_SCOPE   static

Definition at line 618 of file tuplesort.c.

◆ ST_SCOPE [3/4]

#define ST_SCOPE   static

Definition at line 618 of file tuplesort.c.

◆ ST_SCOPE [4/4]

#define ST_SCOPE   static

Definition at line 618 of file tuplesort.c.

◆ ST_SORT [1/4]

#define ST_SORT   qsort_tuple_unsigned

Definition at line 611 of file tuplesort.c.

◆ ST_SORT [2/4]

#define ST_SORT   qsort_tuple_int32

Definition at line 611 of file tuplesort.c.

◆ ST_SORT [3/4]

#define ST_SORT   qsort_tuple

Definition at line 611 of file tuplesort.c.

◆ ST_SORT [4/4]

#define ST_SORT   qsort_ssup

Definition at line 611 of file tuplesort.c.

◆ TAPE_BUFFER_OVERHEAD

#define TAPE_BUFFER_OVERHEAD   BLCKSZ

Definition at line 178 of file tuplesort.c.

◆ USEMEM

#define USEMEM (   state,
  amt 
)    ((state)->availMem -= (amt))

Definition at line 401 of file tuplesort.c.

◆ WORKER

#define WORKER (   state)    ((state)->shared && (state)->worker != -1)

Definition at line 404 of file tuplesort.c.

◆ WRITETUP

#define WRITETUP (   state,
  tape,
  stup 
)    ((*(state)->base.writetup) (state, tape, stup))

Definition at line 397 of file tuplesort.c.

Typedef Documentation

◆ SlabSlot

typedef union SlabSlot SlabSlot

Enumeration Type Documentation

◆ TupSortStatus

Enumerator
TSS_INITIAL 
TSS_BOUNDED 
TSS_BUILDRUNS 
TSS_SORTEDINMEM 
TSS_SORTEDONTAPE 
TSS_FINALMERGE 

Definition at line 154 of file tuplesort.c.

155{
156 TSS_INITIAL, /* Loading tuples; still within memory limit */
157 TSS_BOUNDED, /* Loading tuples into bounded-size heap */
158 TSS_BUILDRUNS, /* Loading tuples; writing to tape */
159 TSS_SORTEDINMEM, /* Sort completed entirely in memory */
160 TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */
161 TSS_FINALMERGE, /* Performing final merge on-the-fly */
TupSortStatus
Definition: tuplesort.c:155
@ TSS_SORTEDONTAPE
Definition: tuplesort.c:160
@ TSS_SORTEDINMEM
Definition: tuplesort.c:159
@ TSS_INITIAL
Definition: tuplesort.c:156
@ TSS_FINALMERGE
Definition: tuplesort.c:161
@ TSS_BUILDRUNS
Definition: tuplesort.c:158
@ TSS_BOUNDED
Definition: tuplesort.c:157

Function Documentation

◆ beginmerge()

static void beginmerge ( Tuplesortstate state)
static

Definition at line 2260 of file tuplesort.c.

2261{
2262 int activeTapes;
2263 int srcTapeIndex;
2264
2265 /* Heap should be empty here */
2266 Assert(state->memtupcount == 0);
2267
2268 activeTapes = Min(state->nInputTapes, state->nInputRuns);
2269
2270 for (srcTapeIndex = 0; srcTapeIndex < activeTapes; srcTapeIndex++)
2271 {
2272 SortTuple tup;
2273
2274 if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup))
2275 {
2276 tup.srctape = srcTapeIndex;
2278 }
2279 }
2280}
#define Min(x, y)
Definition: c.h:958
#define Assert(condition)
Definition: c.h:812
int srctape
Definition: tuplesort.h:152
static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple)
Definition: tuplesort.c:2739
static bool mergereadnext(Tuplesortstate *state, LogicalTape *srcTape, SortTuple *stup)
Definition: tuplesort.c:2288

References Assert, mergereadnext(), Min, SortTuple::srctape, and tuplesort_heap_insert().

Referenced by mergeonerun(), and mergeruns().

◆ consider_abort_common()

static bool consider_abort_common ( Tuplesortstate state)
static

Definition at line 1319 of file tuplesort.c.

1320{
1321 Assert(state->base.sortKeys[0].abbrev_converter != NULL);
1322 Assert(state->base.sortKeys[0].abbrev_abort != NULL);
1323 Assert(state->base.sortKeys[0].abbrev_full_comparator != NULL);
1324
1325 /*
1326 * Check effectiveness of abbreviation optimization. Consider aborting
1327 * when still within memory limit.
1328 */
1329 if (state->status == TSS_INITIAL &&
1330 state->memtupcount >= state->abbrevNext)
1331 {
1332 state->abbrevNext *= 2;
1333
1334 /*
1335 * Check opclass-supplied abbreviation abort routine. It may indicate
1336 * that abbreviation should not proceed.
1337 */
1338 if (!state->base.sortKeys->abbrev_abort(state->memtupcount,
1339 state->base.sortKeys))
1340 return false;
1341
1342 /*
1343 * Finally, restore authoritative comparator, and indicate that
1344 * abbreviation is not in play by setting abbrev_converter to NULL
1345 */
1346 state->base.sortKeys[0].comparator = state->base.sortKeys[0].abbrev_full_comparator;
1347 state->base.sortKeys[0].abbrev_converter = NULL;
1348 /* Not strictly necessary, but be tidy */
1349 state->base.sortKeys[0].abbrev_abort = NULL;
1350 state->base.sortKeys[0].abbrev_full_comparator = NULL;
1351
1352 /* Give up - expect original pass-by-value representation */
1353 return true;
1354 }
1355
1356 return false;
1357}

References Assert, and TSS_INITIAL.

Referenced by tuplesort_puttuple_common().

◆ dumptuples()

static void dumptuples ( Tuplesortstate state,
bool  alltuples 
)
static

Definition at line 2307 of file tuplesort.c.

2308{
2309 int memtupwrite;
2310 int i;
2311
2312 /*
2313 * Nothing to do if we still fit in available memory and have array slots,
2314 * unless this is the final call during initial run generation.
2315 */
2316 if (state->memtupcount < state->memtupsize && !LACKMEM(state) &&
2317 !alltuples)
2318 return;
2319
2320 /*
2321 * Final call might require no sorting, in rare cases where we just so
2322 * happen to have previously LACKMEM()'d at the point where exactly all
2323 * remaining tuples are loaded into memory, just before input was
2324 * exhausted. In general, short final runs are quite possible, but avoid
2325 * creating a completely empty run. In a worker, though, we must produce
2326 * at least one tape, even if it's empty.
2327 */
2328 if (state->memtupcount == 0 && state->currentRun > 0)
2329 return;
2330
2331 Assert(state->status == TSS_BUILDRUNS);
2332
2333 /*
2334 * It seems unlikely that this limit will ever be exceeded, but take no
2335 * chances
2336 */
2337 if (state->currentRun == INT_MAX)
2338 ereport(ERROR,
2339 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2340 errmsg("cannot have more than %d runs for an external sort",
2341 INT_MAX)));
2342
2343 if (state->currentRun > 0)
2345
2346 state->currentRun++;
2347
2348 if (trace_sort)
2349 elog(LOG, "worker %d starting quicksort of run %d: %s",
2350 state->worker, state->currentRun,
2351 pg_rusage_show(&state->ru_start));
2352
2353 /*
2354 * Sort all tuples accumulated within the allowed amount of memory for
2355 * this run using quicksort
2356 */
2358
2359 if (trace_sort)
2360 elog(LOG, "worker %d finished quicksort of run %d: %s",
2361 state->worker, state->currentRun,
2362 pg_rusage_show(&state->ru_start));
2363
2364 memtupwrite = state->memtupcount;
2365 for (i = 0; i < memtupwrite; i++)
2366 {
2367 SortTuple *stup = &state->memtuples[i];
2368
2369 WRITETUP(state, state->destTape, stup);
2370 }
2371
2372 state->memtupcount = 0;
2373
2374 /*
2375 * Reset tuple memory. We've freed all of the tuples that we previously
2376 * allocated. It's important to avoid fragmentation when there is a stark
2377 * change in the sizes of incoming tuples. In bounded sorts,
2378 * fragmentation due to AllocSetFree's bucketing by size class might be
2379 * particularly bad if this step wasn't taken.
2380 */
2381 MemoryContextReset(state->base.tuplecontext);
2382
2383 /*
2384 * Now update the memory accounting to subtract the memory used by the
2385 * tuple.
2386 */
2387 FREEMEM(state, state->tupleMem);
2388 state->tupleMem = 0;
2389
2390 markrunend(state->destTape);
2391
2392 if (trace_sort)
2393 elog(LOG, "worker %d finished writing run %d to tape %d: %s",
2394 state->worker, state->currentRun, (state->currentRun - 1) % state->nOutputTapes + 1,
2395 pg_rusage_show(&state->ru_start));
2396}
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
int i
Definition: isn.c:72
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
static void selectnewtape(Tuplesortstate *state)
Definition: tuplesort.c:1948
static void markrunend(LogicalTape *tape)
Definition: tuplesort.c:2869
#define LACKMEM(state)
Definition: tuplesort.c:400
#define WRITETUP(state, tape, stup)
Definition: tuplesort.c:397
#define FREEMEM(state, amt)
Definition: tuplesort.c:402
static void tuplesort_sort_memtuples(Tuplesortstate *state)
Definition: tuplesort.c:2676
bool trace_sort
Definition: tuplesort.c:124

References Assert, elog, ereport, errcode(), errmsg(), ERROR, FREEMEM, i, LACKMEM, LOG, markrunend(), MemoryContextReset(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, tuplesort_sort_memtuples(), and WRITETUP.

Referenced by tuplesort_performsort(), and tuplesort_puttuple_common().

◆ free_sort_tuple()

static void free_sort_tuple ( Tuplesortstate state,
SortTuple stup 
)
static

Definition at line 3128 of file tuplesort.c.

3129{
3130 if (stup->tuple)
3131 {
3133 pfree(stup->tuple);
3134 stup->tuple = NULL;
3135 }
3136}
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:721
void * tuple
Definition: tuplesort.h:149

References FREEMEM, GetMemoryChunkSpace(), pfree(), and SortTuple::tuple.

Referenced by make_bounded_heap(), and tuplesort_puttuple_common().

◆ getlen()

static unsigned int getlen ( LogicalTape tape,
bool  eofOK 
)
static

Definition at line 2856 of file tuplesort.c.

2857{
2858 unsigned int len;
2859
2860 if (LogicalTapeRead(tape,
2861 &len, sizeof(len)) != sizeof(len))
2862 elog(ERROR, "unexpected end of tape");
2863 if (len == 0 && !eofOK)
2864 elog(ERROR, "unexpected end of data");
2865 return len;
2866}
size_t LogicalTapeRead(LogicalTape *lt, void *ptr, size_t size)
Definition: logtape.c:928
const void size_t len

References elog, ERROR, len, and LogicalTapeRead().

Referenced by mergereadnext(), and tuplesort_gettuple_common().

◆ grow_memtuples()

static bool grow_memtuples ( Tuplesortstate state)
static

Definition at line 1052 of file tuplesort.c.

1053{
1054 int newmemtupsize;
1055 int memtupsize = state->memtupsize;
1056 int64 memNowUsed = state->allowedMem - state->availMem;
1057
1058 /* Forget it if we've already maxed out memtuples, per comment above */
1059 if (!state->growmemtuples)
1060 return false;
1061
1062 /* Select new value of memtupsize */
1063 if (memNowUsed <= state->availMem)
1064 {
1065 /*
1066 * We've used no more than half of allowedMem; double our usage,
1067 * clamping at INT_MAX tuples.
1068 */
1069 if (memtupsize < INT_MAX / 2)
1070 newmemtupsize = memtupsize * 2;
1071 else
1072 {
1073 newmemtupsize = INT_MAX;
1074 state->growmemtuples = false;
1075 }
1076 }
1077 else
1078 {
1079 /*
1080 * This will be the last increment of memtupsize. Abandon doubling
1081 * strategy and instead increase as much as we safely can.
1082 *
1083 * To stay within allowedMem, we can't increase memtupsize by more
1084 * than availMem / sizeof(SortTuple) elements. In practice, we want
1085 * to increase it by considerably less, because we need to leave some
1086 * space for the tuples to which the new array slots will refer. We
1087 * assume the new tuples will be about the same size as the tuples
1088 * we've already seen, and thus we can extrapolate from the space
1089 * consumption so far to estimate an appropriate new size for the
1090 * memtuples array. The optimal value might be higher or lower than
1091 * this estimate, but it's hard to know that in advance. We again
1092 * clamp at INT_MAX tuples.
1093 *
1094 * This calculation is safe against enlarging the array so much that
1095 * LACKMEM becomes true, because the memory currently used includes
1096 * the present array; thus, there would be enough allowedMem for the
1097 * new array elements even if no other memory were currently used.
1098 *
1099 * We do the arithmetic in float8, because otherwise the product of
1100 * memtupsize and allowedMem could overflow. Any inaccuracy in the
1101 * result should be insignificant; but even if we computed a
1102 * completely insane result, the checks below will prevent anything
1103 * really bad from happening.
1104 */
1105 double grow_ratio;
1106
1107 grow_ratio = (double) state->allowedMem / (double) memNowUsed;
1108 if (memtupsize * grow_ratio < INT_MAX)
1109 newmemtupsize = (int) (memtupsize * grow_ratio);
1110 else
1111 newmemtupsize = INT_MAX;
1112
1113 /* We won't make any further enlargement attempts */
1114 state->growmemtuples = false;
1115 }
1116
1117 /* Must enlarge array by at least one element, else report failure */
1118 if (newmemtupsize <= memtupsize)
1119 goto noalloc;
1120
1121 /*
1122 * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp
1123 * to ensure our request won't be rejected. Note that we can easily
1124 * exhaust address space before facing this outcome. (This is presently
1125 * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but
1126 * don't rely on that at this distance.)
1127 */
1128 if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple))
1129 {
1130 newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple));
1131 state->growmemtuples = false; /* can't grow any more */
1132 }
1133
1134 /*
1135 * We need to be sure that we do not cause LACKMEM to become true, else
1136 * the space management algorithm will go nuts. The code above should
1137 * never generate a dangerous request, but to be safe, check explicitly
1138 * that the array growth fits within availMem. (We could still cause
1139 * LACKMEM if the memory chunk overhead associated with the memtuples
1140 * array were to increase. That shouldn't happen because we chose the
1141 * initial array size large enough to ensure that palloc will be treating
1142 * both old and new arrays as separate chunks. But we'll check LACKMEM
1143 * explicitly below just in case.)
1144 */
1145 if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple)))
1146 goto noalloc;
1147
1148 /* OK, do it */
1149 FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
1150 state->memtupsize = newmemtupsize;
1151 state->memtuples = (SortTuple *)
1152 repalloc_huge(state->memtuples,
1153 state->memtupsize * sizeof(SortTuple));
1154 USEMEM(state, GetMemoryChunkSpace(state->memtuples));
1155 if (LACKMEM(state))
1156 elog(ERROR, "unexpected out-of-memory situation in tuplesort");
1157 return true;
1158
1159noalloc:
1160 /* If for any reason we didn't realloc, shut off future attempts */
1161 state->growmemtuples = false;
1162 return false;
1163}
int64_t int64
Definition: c.h:482
size_t Size
Definition: c.h:559
void * repalloc_huge(void *pointer, Size size)
Definition: mcxt.c:1672
#define MaxAllocHugeSize
Definition: memutils.h:45
#define USEMEM(state, amt)
Definition: tuplesort.c:401

References elog, ERROR, FREEMEM, GetMemoryChunkSpace(), LACKMEM, MaxAllocHugeSize, repalloc_huge(), and USEMEM.

Referenced by tuplesort_puttuple_common().

◆ init_slab_allocator()

static void init_slab_allocator ( Tuplesortstate state,
int  numSlots 
)
static

Definition at line 1981 of file tuplesort.c.

1982{
1983 if (numSlots > 0)
1984 {
1985 char *p;
1986 int i;
1987
1988 state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE);
1989 state->slabMemoryEnd = state->slabMemoryBegin +
1990 numSlots * SLAB_SLOT_SIZE;
1991 state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin;
1992 USEMEM(state, numSlots * SLAB_SLOT_SIZE);
1993
1994 p = state->slabMemoryBegin;
1995 for (i = 0; i < numSlots - 1; i++)
1996 {
1997 ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE);
1998 p += SLAB_SLOT_SIZE;
1999 }
2000 ((SlabSlot *) p)->nextfree = NULL;
2001 }
2002 else
2003 {
2004 state->slabMemoryBegin = state->slabMemoryEnd = NULL;
2005 state->slabFreeHead = NULL;
2006 }
2007 state->slabAllocatorUsed = true;
2008}
void * palloc(Size size)
Definition: mcxt.c:1317
#define SLAB_SLOT_SIZE
Definition: tuplesort.c:142

References i, palloc(), SLAB_SLOT_SIZE, and USEMEM.

Referenced by mergeruns().

◆ inittapes()

static void inittapes ( Tuplesortstate state,
bool  mergeruns 
)
static

Definition at line 1865 of file tuplesort.c.

1866{
1867 Assert(!LEADER(state));
1868
1869 if (mergeruns)
1870 {
1871 /* Compute number of input tapes to use when merging */
1872 state->maxTapes = tuplesort_merge_order(state->allowedMem);
1873 }
1874 else
1875 {
1876 /* Workers can sometimes produce single run, output without merge */
1878 state->maxTapes = MINORDER;
1879 }
1880
1881 if (trace_sort)
1882 elog(LOG, "worker %d switching to external sort with %d tapes: %s",
1883 state->worker, state->maxTapes, pg_rusage_show(&state->ru_start));
1884
1885 /* Create the tape set */
1886 inittapestate(state, state->maxTapes);
1887 state->tapeset =
1889 state->shared ? &state->shared->fileset : NULL,
1890 state->worker);
1891
1892 state->currentRun = 0;
1893
1894 /*
1895 * Initialize logical tape arrays.
1896 */
1897 state->inputTapes = NULL;
1898 state->nInputTapes = 0;
1899 state->nInputRuns = 0;
1900
1901 state->outputTapes = palloc0(state->maxTapes * sizeof(LogicalTape *));
1902 state->nOutputTapes = 0;
1903 state->nOutputRuns = 0;
1904
1905 state->status = TSS_BUILDRUNS;
1906
1908}
LogicalTapeSet * LogicalTapeSetCreate(bool preallocate, SharedFileSet *fileset, int worker)
Definition: logtape.c:556
void * palloc0(Size size)
Definition: mcxt.c:1347
int tuplesort_merge_order(int64 allowedMem)
Definition: tuplesort.c:1778
static void inittapestate(Tuplesortstate *state, int maxTapes)
Definition: tuplesort.c:1914
#define LEADER(state)
Definition: tuplesort.c:405
#define WORKER(state)
Definition: tuplesort.c:404
static void mergeruns(Tuplesortstate *state)
Definition: tuplesort.c:2017
#define MINORDER
Definition: tuplesort.c:176

References Assert, elog, inittapestate(), LEADER, LOG, LogicalTapeSetCreate(), mergeruns(), MINORDER, palloc0(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, tuplesort_merge_order(), and WORKER.

Referenced by tuplesort_performsort(), and tuplesort_puttuple_common().

◆ inittapestate()

static void inittapestate ( Tuplesortstate state,
int  maxTapes 
)
static

Definition at line 1914 of file tuplesort.c.

1915{
1916 int64 tapeSpace;
1917
1918 /*
1919 * Decrease availMem to reflect the space needed for tape buffers; but
1920 * don't decrease it to the point that we have no room for tuples. (That
1921 * case is only likely to occur if sorting pass-by-value Datums; in all
1922 * other scenarios the memtuples[] array is unlikely to occupy more than
1923 * half of allowedMem. In the pass-by-value case it's not important to
1924 * account for tuple space, so we don't care if LACKMEM becomes
1925 * inaccurate.)
1926 */
1927 tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD;
1928
1929 if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem)
1930 USEMEM(state, tapeSpace);
1931
1932 /*
1933 * Make sure that the temp file(s) underlying the tape set are created in
1934 * suitable temp tablespaces. For parallel sorts, this should have been
1935 * called already, but it doesn't matter if it is called a second time.
1936 */
1938}
void PrepareTempTablespaces(void)
Definition: tablespace.c:1331
#define TAPE_BUFFER_OVERHEAD
Definition: tuplesort.c:178

References GetMemoryChunkSpace(), PrepareTempTablespaces(), TAPE_BUFFER_OVERHEAD, and USEMEM.

Referenced by inittapes(), and leader_takeover_tapes().

◆ leader_takeover_tapes()

static void leader_takeover_tapes ( Tuplesortstate state)
static

Definition at line 3069 of file tuplesort.c.

3070{
3071 Sharedsort *shared = state->shared;
3072 int nParticipants = state->nParticipants;
3073 int workersFinished;
3074 int j;
3075
3077 Assert(nParticipants >= 1);
3078
3079 SpinLockAcquire(&shared->mutex);
3080 workersFinished = shared->workersFinished;
3081 SpinLockRelease(&shared->mutex);
3082
3083 if (nParticipants != workersFinished)
3084 elog(ERROR, "cannot take over tapes before all workers finish");
3085
3086 /*
3087 * Create the tapeset from worker tapes, including a leader-owned tape at
3088 * the end. Parallel workers are far more expensive than logical tapes,
3089 * so the number of tapes allocated here should never be excessive.
3090 */
3091 inittapestate(state, nParticipants);
3092 state->tapeset = LogicalTapeSetCreate(false, &shared->fileset, -1);
3093
3094 /*
3095 * Set currentRun to reflect the number of runs we will merge (it's not
3096 * used for anything, this is just pro forma)
3097 */
3098 state->currentRun = nParticipants;
3099
3100 /*
3101 * Initialize the state to look the same as after building the initial
3102 * runs.
3103 *
3104 * There will always be exactly 1 run per worker, and exactly one input
3105 * tape per run, because workers always output exactly 1 run, even when
3106 * there were no input tuples for workers to sort.
3107 */
3108 state->inputTapes = NULL;
3109 state->nInputTapes = 0;
3110 state->nInputRuns = 0;
3111
3112 state->outputTapes = palloc0(nParticipants * sizeof(LogicalTape *));
3113 state->nOutputTapes = nParticipants;
3114 state->nOutputRuns = nParticipants;
3115
3116 for (j = 0; j < nParticipants; j++)
3117 {
3118 state->outputTapes[j] = LogicalTapeImport(state->tapeset, j, &shared->tapes[j]);
3119 }
3120
3121 state->status = TSS_BUILDRUNS;
3122}
int j
Definition: isn.c:73
LogicalTape * LogicalTapeImport(LogicalTapeSet *lts, int worker, TapeShare *shared)
Definition: logtape.c:609
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
SharedFileSet fileset
Definition: tuplesort.c:360
TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]
Definition: tuplesort.c:369
int workersFinished
Definition: tuplesort.c:357
slock_t mutex
Definition: tuplesort.c:346

References Assert, elog, ERROR, Sharedsort::fileset, inittapestate(), j, LEADER, LogicalTapeImport(), LogicalTapeSetCreate(), Sharedsort::mutex, palloc0(), SpinLockAcquire, SpinLockRelease, Sharedsort::tapes, TSS_BUILDRUNS, and Sharedsort::workersFinished.

Referenced by tuplesort_performsort().

◆ make_bounded_heap()

static void make_bounded_heap ( Tuplesortstate state)
static

Definition at line 2587 of file tuplesort.c.

2588{
2589 int tupcount = state->memtupcount;
2590 int i;
2591
2592 Assert(state->status == TSS_INITIAL);
2593 Assert(state->bounded);
2594 Assert(tupcount >= state->bound);
2596
2597 /* Reverse sort direction so largest entry will be at root */
2599
2600 state->memtupcount = 0; /* make the heap empty */
2601 for (i = 0; i < tupcount; i++)
2602 {
2603 if (state->memtupcount < state->bound)
2604 {
2605 /* Insert next tuple into heap */
2606 /* Must copy source tuple to avoid possible overwrite */
2607 SortTuple stup = state->memtuples[i];
2608
2610 }
2611 else
2612 {
2613 /*
2614 * The heap is full. Replace the largest entry with the new
2615 * tuple, or just discard it, if it's larger than anything already
2616 * in the heap.
2617 */
2618 if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
2619 {
2620 free_sort_tuple(state, &state->memtuples[i]);
2622 }
2623 else
2624 tuplesort_heap_replace_top(state, &state->memtuples[i]);
2625 }
2626 }
2627
2628 Assert(state->memtupcount == state->bound);
2629 state->status = TSS_BOUNDED;
2630}
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define COMPARETUP(state, a, b)
Definition: tuplesort.c:396
#define SERIAL(state)
Definition: tuplesort.c:403
static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup)
Definition: tuplesort.c:3128
static void reversedirection(Tuplesortstate *state)
Definition: tuplesort.c:2838
static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple)
Definition: tuplesort.c:2798

References Assert, CHECK_FOR_INTERRUPTS, COMPARETUP, free_sort_tuple(), i, reversedirection(), SERIAL, TSS_BOUNDED, TSS_INITIAL, tuplesort_heap_insert(), and tuplesort_heap_replace_top().

Referenced by tuplesort_puttuple_common().

◆ markrunend()

static void markrunend ( LogicalTape tape)
static

Definition at line 2869 of file tuplesort.c.

2870{
2871 unsigned int len = 0;
2872
2873 LogicalTapeWrite(tape, &len, sizeof(len));
2874}
void LogicalTapeWrite(LogicalTape *lt, const void *ptr, size_t size)
Definition: logtape.c:761

References len, and LogicalTapeWrite().

Referenced by dumptuples(), and mergeonerun().

◆ merge_read_buffer_size()

static int64 merge_read_buffer_size ( int64  avail_mem,
int  nInputTapes,
int  nInputRuns,
int  maxOutputTapes 
)
static

Definition at line 1833 of file tuplesort.c.

1835{
1836 int nOutputRuns;
1837 int nOutputTapes;
1838
1839 /*
1840 * How many output tapes will we produce in this pass?
1841 *
1842 * This is nInputRuns / nInputTapes, rounded up.
1843 */
1844 nOutputRuns = (nInputRuns + nInputTapes - 1) / nInputTapes;
1845
1846 nOutputTapes = Min(nOutputRuns, maxOutputTapes);
1847
1848 /*
1849 * Each output tape consumes TAPE_BUFFER_OVERHEAD bytes of memory. All
1850 * remaining memory is divided evenly between the input tapes.
1851 *
1852 * This also follows from the formula in tuplesort_merge_order, but here
1853 * we derive the input buffer size from the amount of memory available,
1854 * and M and N.
1855 */
1856 return Max((avail_mem - TAPE_BUFFER_OVERHEAD * nOutputTapes) / nInputTapes, 0);
1857}

References Max, Min, and TAPE_BUFFER_OVERHEAD.

Referenced by mergeruns().

◆ mergeonerun()

static void mergeonerun ( Tuplesortstate state)
static

Definition at line 2200 of file tuplesort.c.

2201{
2202 int srcTapeIndex;
2203 LogicalTape *srcTape;
2204
2205 /*
2206 * Start the merge by loading one tuple from each active source tape into
2207 * the heap.
2208 */
2210
2211 Assert(state->slabAllocatorUsed);
2212
2213 /*
2214 * Execute merge by repeatedly extracting lowest tuple in heap, writing it
2215 * out, and replacing it with next tuple from same tape (if there is
2216 * another one).
2217 */
2218 while (state->memtupcount > 0)
2219 {
2220 SortTuple stup;
2221
2222 /* write the tuple to destTape */
2223 srcTapeIndex = state->memtuples[0].srctape;
2224 srcTape = state->inputTapes[srcTapeIndex];
2225 WRITETUP(state, state->destTape, &state->memtuples[0]);
2226
2227 /* recycle the slot of the tuple we just wrote out, for the next read */
2228 if (state->memtuples[0].tuple)
2229 RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple);
2230
2231 /*
2232 * pull next tuple from the tape, and replace the written-out tuple in
2233 * the heap with it.
2234 */
2235 if (mergereadnext(state, srcTape, &stup))
2236 {
2237 stup.srctape = srcTapeIndex;
2239 }
2240 else
2241 {
2243 state->nInputRuns--;
2244 }
2245 }
2246
2247 /*
2248 * When the heap empties, we're done. Write an end-of-run marker on the
2249 * output tape.
2250 */
2251 markrunend(state->destTape);
2252}
static void tuplesort_heap_delete_top(Tuplesortstate *state)
Definition: tuplesort.c:2774
static void beginmerge(Tuplesortstate *state)
Definition: tuplesort.c:2260
#define RELEASE_SLAB_SLOT(state, tuple)
Definition: tuplesort.c:383

References Assert, beginmerge(), markrunend(), mergereadnext(), RELEASE_SLAB_SLOT, SortTuple::srctape, tuplesort_heap_delete_top(), tuplesort_heap_replace_top(), and WRITETUP.

Referenced by mergeruns().

◆ mergereadnext()

static bool mergereadnext ( Tuplesortstate state,
LogicalTape srcTape,
SortTuple stup 
)
static

Definition at line 2288 of file tuplesort.c.

2289{
2290 unsigned int tuplen;
2291
2292 /* read next tuple, if any */
2293 if ((tuplen = getlen(srcTape, true)) == 0)
2294 return false;
2295 READTUP(state, stup, srcTape, tuplen);
2296
2297 return true;
2298}
static unsigned int getlen(LogicalTape *tape, bool eofOK)
Definition: tuplesort.c:2856
#define READTUP(state, stup, tape, len)
Definition: tuplesort.c:398

References getlen(), and READTUP.

Referenced by beginmerge(), mergeonerun(), and tuplesort_gettuple_common().

◆ mergeruns()

static void mergeruns ( Tuplesortstate state)
static

Definition at line 2017 of file tuplesort.c.

2018{
2019 int tapenum;
2020
2021 Assert(state->status == TSS_BUILDRUNS);
2022 Assert(state->memtupcount == 0);
2023
2024 if (state->base.sortKeys != NULL && state->base.sortKeys->abbrev_converter != NULL)
2025 {
2026 /*
2027 * If there are multiple runs to be merged, when we go to read back
2028 * tuples from disk, abbreviated keys will not have been stored, and
2029 * we don't care to regenerate them. Disable abbreviation from this
2030 * point on.
2031 */
2032 state->base.sortKeys->abbrev_converter = NULL;
2033 state->base.sortKeys->comparator = state->base.sortKeys->abbrev_full_comparator;
2034
2035 /* Not strictly necessary, but be tidy */
2036 state->base.sortKeys->abbrev_abort = NULL;
2037 state->base.sortKeys->abbrev_full_comparator = NULL;
2038 }
2039
2040 /*
2041 * Reset tuple memory. We've freed all the tuples that we previously
2042 * allocated. We will use the slab allocator from now on.
2043 */
2044 MemoryContextResetOnly(state->base.tuplecontext);
2045
2046 /*
2047 * We no longer need a large memtuples array. (We will allocate a smaller
2048 * one for the heap later.)
2049 */
2050 FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
2051 pfree(state->memtuples);
2052 state->memtuples = NULL;
2053
2054 /*
2055 * Initialize the slab allocator. We need one slab slot per input tape,
2056 * for the tuples in the heap, plus one to hold the tuple last returned
2057 * from tuplesort_gettuple. (If we're sorting pass-by-val Datums,
2058 * however, we don't need to do allocate anything.)
2059 *
2060 * In a multi-pass merge, we could shrink this allocation for the last
2061 * merge pass, if it has fewer tapes than previous passes, but we don't
2062 * bother.
2063 *
2064 * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism
2065 * to track memory usage of individual tuples.
2066 */
2067 if (state->base.tuples)
2068 init_slab_allocator(state, state->nOutputTapes + 1);
2069 else
2071
2072 /*
2073 * Allocate a new 'memtuples' array, for the heap. It will hold one tuple
2074 * from each input tape.
2075 *
2076 * We could shrink this, too, between passes in a multi-pass merge, but we
2077 * don't bother. (The initial input tapes are still in outputTapes. The
2078 * number of input tapes will not increase between passes.)
2079 */
2080 state->memtupsize = state->nOutputTapes;
2081 state->memtuples = (SortTuple *) MemoryContextAlloc(state->base.maincontext,
2082 state->nOutputTapes * sizeof(SortTuple));
2083 USEMEM(state, GetMemoryChunkSpace(state->memtuples));
2084
2085 /*
2086 * Use all the remaining memory we have available for tape buffers among
2087 * all the input tapes. At the beginning of each merge pass, we will
2088 * divide this memory between the input and output tapes in the pass.
2089 */
2090 state->tape_buffer_mem = state->availMem;
2091 USEMEM(state, state->tape_buffer_mem);
2092 if (trace_sort)
2093 elog(LOG, "worker %d using %zu KB of memory for tape buffers",
2094 state->worker, state->tape_buffer_mem / 1024);
2095
2096 for (;;)
2097 {
2098 /*
2099 * On the first iteration, or if we have read all the runs from the
2100 * input tapes in a multi-pass merge, it's time to start a new pass.
2101 * Rewind all the output tapes, and make them inputs for the next
2102 * pass.
2103 */
2104 if (state->nInputRuns == 0)
2105 {
2106 int64 input_buffer_size;
2107
2108 /* Close the old, emptied, input tapes */
2109 if (state->nInputTapes > 0)
2110 {
2111 for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
2112 LogicalTapeClose(state->inputTapes[tapenum]);
2113 pfree(state->inputTapes);
2114 }
2115
2116 /* Previous pass's outputs become next pass's inputs. */
2117 state->inputTapes = state->outputTapes;
2118 state->nInputTapes = state->nOutputTapes;
2119 state->nInputRuns = state->nOutputRuns;
2120
2121 /*
2122 * Reset output tape variables. The actual LogicalTapes will be
2123 * created as needed, here we only allocate the array to hold
2124 * them.
2125 */
2126 state->outputTapes = palloc0(state->nInputTapes * sizeof(LogicalTape *));
2127 state->nOutputTapes = 0;
2128 state->nOutputRuns = 0;
2129
2130 /*
2131 * Redistribute the memory allocated for tape buffers, among the
2132 * new input and output tapes.
2133 */
2134 input_buffer_size = merge_read_buffer_size(state->tape_buffer_mem,
2135 state->nInputTapes,
2136 state->nInputRuns,
2137 state->maxTapes);
2138
2139 if (trace_sort)
2140 elog(LOG, "starting merge pass of %d input runs on %d tapes, " INT64_FORMAT " KB of memory for each input tape: %s",
2141 state->nInputRuns, state->nInputTapes, input_buffer_size / 1024,
2142 pg_rusage_show(&state->ru_start));
2143
2144 /* Prepare the new input tapes for merge pass. */
2145 for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
2146 LogicalTapeRewindForRead(state->inputTapes[tapenum], input_buffer_size);
2147
2148 /*
2149 * If there's just one run left on each input tape, then only one
2150 * merge pass remains. If we don't have to produce a materialized
2151 * sorted tape, we can stop at this point and do the final merge
2152 * on-the-fly.
2153 */
2154 if ((state->base.sortopt & TUPLESORT_RANDOMACCESS) == 0
2155 && state->nInputRuns <= state->nInputTapes
2156 && !WORKER(state))
2157 {
2158 /* Tell logtape.c we won't be writing anymore */
2160 /* Initialize for the final merge pass */
2162 state->status = TSS_FINALMERGE;
2163 return;
2164 }
2165 }
2166
2167 /* Select an output tape */
2169
2170 /* Merge one run from each input tape. */
2172
2173 /*
2174 * If the input tapes are empty, and we output only one output run,
2175 * we're done. The current output tape contains the final result.
2176 */
2177 if (state->nInputRuns == 0 && state->nOutputRuns <= 1)
2178 break;
2179 }
2180
2181 /*
2182 * Done. The result is on a single run on a single tape.
2183 */
2184 state->result_tape = state->outputTapes[0];
2185 if (!WORKER(state))
2186 LogicalTapeFreeze(state->result_tape, NULL);
2187 else
2189 state->status = TSS_SORTEDONTAPE;
2190
2191 /* Close all the now-empty input tapes, to release their read buffers. */
2192 for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
2193 LogicalTapeClose(state->inputTapes[tapenum]);
2194}
#define INT64_FORMAT
Definition: c.h:503
void LogicalTapeRewindForRead(LogicalTape *lt, size_t buffer_size)
Definition: logtape.c:846
void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts)
Definition: logtape.c:750
void LogicalTapeClose(LogicalTape *lt)
Definition: logtape.c:733
void LogicalTapeFreeze(LogicalTape *lt, TapeShare *share)
Definition: logtape.c:981
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
void MemoryContextResetOnly(MemoryContext context)
Definition: mcxt.c:402
static void mergeonerun(Tuplesortstate *state)
Definition: tuplesort.c:2200
static int64 merge_read_buffer_size(int64 avail_mem, int nInputTapes, int nInputRuns, int maxOutputTapes)
Definition: tuplesort.c:1833
static void worker_freeze_result_tape(Tuplesortstate *state)
Definition: tuplesort.c:3009
static void init_slab_allocator(Tuplesortstate *state, int numSlots)
Definition: tuplesort.c:1981
#define TUPLESORT_RANDOMACCESS
Definition: tuplesort.h:96

References Assert, beginmerge(), elog, FREEMEM, GetMemoryChunkSpace(), init_slab_allocator(), INT64_FORMAT, LOG, LogicalTapeClose(), LogicalTapeFreeze(), LogicalTapeRewindForRead(), LogicalTapeSetForgetFreeSpace(), MemoryContextAlloc(), MemoryContextResetOnly(), merge_read_buffer_size(), mergeonerun(), palloc0(), pfree(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, TSS_FINALMERGE, TSS_SORTEDONTAPE, TUPLESORT_RANDOMACCESS, USEMEM, WORKER, and worker_freeze_result_tape().

Referenced by inittapes(), and tuplesort_performsort().

◆ qsort_tuple_int32_compare()

static pg_attribute_always_inline int qsort_tuple_int32_compare ( SortTuple a,
SortTuple b,
Tuplesortstate state 
)
static

Definition at line 542 of file tuplesort.c.

543{
544 int compare;
545
546 compare = ApplyInt32SortComparator(a->datum1, a->isnull1,
547 b->datum1, b->isnull1,
548 &state->base.sortKeys[0]);
549
550 if (compare != 0)
551 return compare;
552
553 /*
554 * No need to waste effort calling the tiebreak function when there are no
555 * other keys to sort on.
556 */
557 if (state->base.onlyKey != NULL)
558 return 0;
559
560 return state->base.comparetup_tiebreak(a, b, state);
561}
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
static int ApplyInt32SortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:302

References a, ApplyInt32SortComparator(), b, and compare().

◆ qsort_tuple_unsigned_compare()

static pg_attribute_always_inline int qsort_tuple_unsigned_compare ( SortTuple a,
SortTuple b,
Tuplesortstate state 
)
static

Definition at line 495 of file tuplesort.c.

496{
497 int compare;
498
499 compare = ApplyUnsignedSortComparator(a->datum1, a->isnull1,
500 b->datum1, b->isnull1,
501 &state->base.sortKeys[0]);
502 if (compare != 0)
503 return compare;
504
505 /*
506 * No need to waste effort calling the tiebreak function when there are no
507 * other keys to sort on.
508 */
509 if (state->base.onlyKey != NULL)
510 return 0;
511
512 return state->base.comparetup_tiebreak(a, b, state);
513}
static int ApplyUnsignedSortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:233

References a, ApplyUnsignedSortComparator(), b, and compare().

◆ reversedirection()

static void reversedirection ( Tuplesortstate state)
static

Definition at line 2838 of file tuplesort.c.

2839{
2840 SortSupport sortKey = state->base.sortKeys;
2841 int nkey;
2842
2843 for (nkey = 0; nkey < state->base.nKeys; nkey++, sortKey++)
2844 {
2845 sortKey->ssup_reverse = !sortKey->ssup_reverse;
2846 sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first;
2847 }
2848}
bool ssup_nulls_first
Definition: sortsupport.h:75

References SortSupportData::ssup_nulls_first, and SortSupportData::ssup_reverse.

Referenced by make_bounded_heap(), and sort_bounded_heap().

◆ selectnewtape()

static void selectnewtape ( Tuplesortstate state)
static

Definition at line 1948 of file tuplesort.c.

1949{
1950 /*
1951 * At the beginning of each merge pass, nOutputTapes and nOutputRuns are
1952 * both zero. On each call, we create a new output tape to hold the next
1953 * run, until maxTapes is reached. After that, we assign new runs to the
1954 * existing tapes in a round robin fashion.
1955 */
1956 if (state->nOutputTapes < state->maxTapes)
1957 {
1958 /* Create a new tape to hold the next run */
1959 Assert(state->outputTapes[state->nOutputRuns] == NULL);
1960 Assert(state->nOutputRuns == state->nOutputTapes);
1961 state->destTape = LogicalTapeCreate(state->tapeset);
1962 state->outputTapes[state->nOutputTapes] = state->destTape;
1963 state->nOutputTapes++;
1964 state->nOutputRuns++;
1965 }
1966 else
1967 {
1968 /*
1969 * We have reached the max number of tapes. Append to an existing
1970 * tape.
1971 */
1972 state->destTape = state->outputTapes[state->nOutputRuns % state->nOutputTapes];
1973 state->nOutputRuns++;
1974 }
1975}
LogicalTape * LogicalTapeCreate(LogicalTapeSet *lts)
Definition: logtape.c:680

References Assert, and LogicalTapeCreate().

Referenced by dumptuples(), inittapes(), and mergeruns().

◆ sort_bounded_heap()

static void sort_bounded_heap ( Tuplesortstate state)
static

Definition at line 2636 of file tuplesort.c.

2637{
2638 int tupcount = state->memtupcount;
2639
2640 Assert(state->status == TSS_BOUNDED);
2641 Assert(state->bounded);
2642 Assert(tupcount == state->bound);
2644
2645 /*
2646 * We can unheapify in place because each delete-top call will remove the
2647 * largest entry, which we can promptly store in the newly freed slot at
2648 * the end. Once we're down to a single-entry heap, we're done.
2649 */
2650 while (state->memtupcount > 1)
2651 {
2652 SortTuple stup = state->memtuples[0];
2653
2654 /* this sifts-up the next-largest entry and decreases memtupcount */
2656 state->memtuples[state->memtupcount] = stup;
2657 }
2658 state->memtupcount = tupcount;
2659
2660 /*
2661 * Reverse sort direction back to the original state. This is not
2662 * actually necessary but seems like a good idea for tidiness.
2663 */
2665
2666 state->status = TSS_SORTEDINMEM;
2667 state->boundUsed = true;
2668}

References Assert, reversedirection(), SERIAL, TSS_BOUNDED, TSS_SORTEDINMEM, and tuplesort_heap_delete_top().

Referenced by tuplesort_performsort().

◆ ssup_datum_int32_cmp()

int ssup_datum_int32_cmp ( Datum  x,
Datum  y,
SortSupport  ssup 
)

Definition at line 3166 of file tuplesort.c.

3167{
3168 int32 xx = DatumGetInt32(x);
3169 int32 yy = DatumGetInt32(y);
3170
3171 if (xx < yy)
3172 return -1;
3173 else if (xx > yy)
3174 return 1;
3175 else
3176 return 0;
3177}
int32_t int32
Definition: c.h:481
int y
Definition: isn.c:71
int x
Definition: isn.c:70
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202

References DatumGetInt32(), x, and y.

Referenced by btint4sortsupport(), date_sortsupport(), and tuplesort_sort_memtuples().

◆ ssup_datum_unsigned_cmp()

int ssup_datum_unsigned_cmp ( Datum  x,
Datum  y,
SortSupport  ssup 
)

Definition at line 3139 of file tuplesort.c.

3140{
3141 if (x < y)
3142 return -1;
3143 else if (x > y)
3144 return 1;
3145 else
3146 return 0;
3147}

References x, and y.

Referenced by gist_point_sortsupport(), macaddr_sortsupport(), network_sortsupport(), tuplesort_sort_memtuples(), uuid_sortsupport(), and varstr_sortsupport().

◆ tuplesort_attach_shared()

void tuplesort_attach_shared ( Sharedsort shared,
dsm_segment seg 
)

Definition at line 2961 of file tuplesort.c.

2962{
2963 /* Attach to SharedFileSet */
2964 SharedFileSetAttach(&shared->fileset, seg);
2965}
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:56

References Sharedsort::fileset, and SharedFileSetAttach().

Referenced by _brin_parallel_build_main(), and _bt_parallel_build_main().

◆ tuplesort_begin_batch()

static void tuplesort_begin_batch ( Tuplesortstate state)
static

Definition at line 752 of file tuplesort.c.

753{
754 MemoryContext oldcontext;
755
756 oldcontext = MemoryContextSwitchTo(state->base.maincontext);
757
758 /*
759 * Caller tuple (e.g. IndexTuple) memory context.
760 *
761 * A dedicated child context used exclusively for caller passed tuples
762 * eases memory management. Resetting at key points reduces
763 * fragmentation. Note that the memtuples array of SortTuples is allocated
764 * in the parent context, not this context, because there is no need to
765 * free memtuples early. For bounded sorts, tuples may be pfreed in any
766 * order, so we use a regular aset.c context so that it can make use of
767 * free'd memory. When the sort is not bounded, we make use of a bump.c
768 * context as this keeps allocations more compact with less wastage.
769 * Allocations are also slightly more CPU efficient.
770 */
771 if (TupleSortUseBumpTupleCxt(state->base.sortopt))
772 state->base.tuplecontext = BumpContextCreate(state->base.sortcontext,
773 "Caller tuples",
775 else
776 state->base.tuplecontext = AllocSetContextCreate(state->base.sortcontext,
777 "Caller tuples",
779
780
781 state->status = TSS_INITIAL;
782 state->bounded = false;
783 state->boundUsed = false;
784
785 state->availMem = state->allowedMem;
786
787 state->tapeset = NULL;
788
789 state->memtupcount = 0;
790
791 /*
792 * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
793 * see comments in grow_memtuples().
794 */
795 state->growmemtuples = true;
796 state->slabAllocatorUsed = false;
797 if (state->memtuples != NULL && state->memtupsize != INITIAL_MEMTUPSIZE)
798 {
799 pfree(state->memtuples);
800 state->memtuples = NULL;
801 state->memtupsize = INITIAL_MEMTUPSIZE;
802 }
803 if (state->memtuples == NULL)
804 {
805 state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple));
807 }
808
809 /* workMem must be large enough for the minimal memtuples array */
810 if (LACKMEM(state))
811 elog(ERROR, "insufficient memory allowed for sort");
812
813 state->currentRun = 0;
814
815 /*
816 * Tape variables (inputTapes, outputTapes, etc.) will be initialized by
817 * inittapes(), if needed.
818 */
819
820 state->result_tape = NULL; /* flag that result tape has not been formed */
821
822 MemoryContextSwitchTo(oldcontext);
823}
MemoryContext BumpContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: bump.c:131
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
MemoryContextSwitchTo(old_ctx)
#define INITIAL_MEMTUPSIZE
Definition: tuplesort.c:120
#define TupleSortUseBumpTupleCxt(opt)
Definition: tuplesort.h:108

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BumpContextCreate(), elog, ERROR, GetMemoryChunkSpace(), INITIAL_MEMTUPSIZE, LACKMEM, MemoryContextSwitchTo(), palloc(), pfree(), TSS_INITIAL, TupleSortUseBumpTupleCxt, and USEMEM.

Referenced by tuplesort_begin_common(), and tuplesort_reset().

◆ tuplesort_begin_common()

Tuplesortstate * tuplesort_begin_common ( int  workMem,
SortCoordinate  coordinate,
int  sortopt 
)

Definition at line 642 of file tuplesort.c.

643{
645 MemoryContext maincontext;
646 MemoryContext sortcontext;
647 MemoryContext oldcontext;
648
649 /* See leader_takeover_tapes() remarks on random access support */
650 if (coordinate && (sortopt & TUPLESORT_RANDOMACCESS))
651 elog(ERROR, "random access disallowed under parallel sort");
652
653 /*
654 * Memory context surviving tuplesort_reset. This memory context holds
655 * data which is useful to keep while sorting multiple similar batches.
656 */
658 "TupleSort main",
660
661 /*
662 * Create a working memory context for one sort operation. The content of
663 * this context is deleted by tuplesort_reset.
664 */
665 sortcontext = AllocSetContextCreate(maincontext,
666 "TupleSort sort",
668
669 /*
670 * Additionally a working memory context for tuples is setup in
671 * tuplesort_begin_batch.
672 */
673
674 /*
675 * Make the Tuplesortstate within the per-sortstate context. This way, we
676 * don't need a separate pfree() operation for it at shutdown.
677 */
678 oldcontext = MemoryContextSwitchTo(maincontext);
679
681
682 if (trace_sort)
683 pg_rusage_init(&state->ru_start);
684
685 state->base.sortopt = sortopt;
686 state->base.tuples = true;
687 state->abbrevNext = 10;
688
689 /*
690 * workMem is forced to be at least 64KB, the current minimum valid value
691 * for the work_mem GUC. This is a defense against parallel sort callers
692 * that divide out memory among many workers in a way that leaves each
693 * with very little memory.
694 */
695 state->allowedMem = Max(workMem, 64) * (int64) 1024;
696 state->base.sortcontext = sortcontext;
697 state->base.maincontext = maincontext;
698
699 /*
700 * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
701 * see comments in grow_memtuples().
702 */
703 state->memtupsize = INITIAL_MEMTUPSIZE;
704 state->memtuples = NULL;
705
706 /*
707 * After all of the other non-parallel-related state, we setup all of the
708 * state needed for each batch.
709 */
711
712 /*
713 * Initialize parallel-related state based on coordination information
714 * from caller
715 */
716 if (!coordinate)
717 {
718 /* Serial sort */
719 state->shared = NULL;
720 state->worker = -1;
721 state->nParticipants = -1;
722 }
723 else if (coordinate->isWorker)
724 {
725 /* Parallel worker produces exactly one final run from all input */
726 state->shared = coordinate->sharedsort;
728 state->nParticipants = -1;
729 }
730 else
731 {
732 /* Parallel leader state only used for final merge */
733 state->shared = coordinate->sharedsort;
734 state->worker = -1;
735 state->nParticipants = coordinate->nParticipants;
736 Assert(state->nParticipants >= 1);
737 }
738
739 MemoryContextSwitchTo(oldcontext);
740
741 return state;
742}
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
Sharedsort * sharedsort
Definition: tuplesort.h:58
static int worker_get_identifier(Tuplesortstate *state)
Definition: tuplesort.c:2981
static void tuplesort_begin_batch(Tuplesortstate *state)
Definition: tuplesort.c:752

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, CurrentMemoryContext, elog, ERROR, INITIAL_MEMTUPSIZE, SortCoordinateData::isWorker, Max, MemoryContextSwitchTo(), SortCoordinateData::nParticipants, palloc0(), pg_rusage_init(), SortCoordinateData::sharedsort, trace_sort, tuplesort_begin_batch(), TUPLESORT_RANDOMACCESS, and worker_get_identifier().

Referenced by tuplesort_begin_cluster(), tuplesort_begin_datum(), tuplesort_begin_heap(), tuplesort_begin_index_brin(), tuplesort_begin_index_btree(), tuplesort_begin_index_gist(), and tuplesort_begin_index_hash().

◆ tuplesort_end()

void tuplesort_end ( Tuplesortstate state)

Definition at line 951 of file tuplesort.c.

952{
954
955 /*
956 * Free the main memory context, including the Tuplesortstate struct
957 * itself.
958 */
959 MemoryContextDelete(state->base.maincontext);
960}
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
static void tuplesort_free(Tuplesortstate *state)
Definition: tuplesort.c:897

References MemoryContextDelete(), and tuplesort_free().

Referenced by _brin_parallel_merge(), _brin_parallel_scan_and_build(), _bt_parallel_scan_and_sort(), _bt_spooldestroy(), _h_spooldestroy(), ExecEndAgg(), ExecEndIncrementalSort(), ExecEndSort(), ExecReScanAgg(), ExecReScanSort(), gistbuild(), heapam_relation_copy_for_cluster(), initialize_aggregate(), initialize_phase(), ordered_set_shutdown(), process_ordered_aggregate_multi(), process_ordered_aggregate_single(), and validate_index().

◆ tuplesort_estimate_shared()

Size tuplesort_estimate_shared ( int  nWorkers)

Definition at line 2917 of file tuplesort.c.

2918{
2919 Size tapesSize;
2920
2921 Assert(nWorkers > 0);
2922
2923 /* Make sure that BufFile shared state is MAXALIGN'd */
2924 tapesSize = mul_size(sizeof(TapeShare), nWorkers);
2925 tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes)));
2926
2927 return tapesSize;
2928}
#define MAXALIGN(LEN)
Definition: c.h:765
Size add_size(Size s1, Size s2)
Definition: shmem.c:488
Size mul_size(Size s1, Size s2)
Definition: shmem.c:505

References add_size(), Assert, MAXALIGN, and mul_size().

Referenced by _brin_begin_parallel(), and _bt_begin_parallel().

◆ tuplesort_free()

static void tuplesort_free ( Tuplesortstate state)
static

Definition at line 897 of file tuplesort.c.

898{
899 /* context swap probably not needed, but let's be safe */
900 MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
901 int64 spaceUsed;
902
903 if (state->tapeset)
904 spaceUsed = LogicalTapeSetBlocks(state->tapeset);
905 else
906 spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
907
908 /*
909 * Delete temporary "tape" files, if any.
910 *
911 * We don't bother to destroy the individual tapes here. They will go away
912 * with the sortcontext. (In TSS_FINALMERGE state, we have closed
913 * finished tapes already.)
914 */
915 if (state->tapeset)
916 LogicalTapeSetClose(state->tapeset);
917
918 if (trace_sort)
919 {
920 if (state->tapeset)
921 elog(LOG, "%s of worker %d ended, %lld disk blocks used: %s",
922 SERIAL(state) ? "external sort" : "parallel external sort",
923 state->worker, (long long) spaceUsed, pg_rusage_show(&state->ru_start));
924 else
925 elog(LOG, "%s of worker %d ended, %lld KB used: %s",
926 SERIAL(state) ? "internal sort" : "unperformed parallel sort",
927 state->worker, (long long) spaceUsed, pg_rusage_show(&state->ru_start));
928 }
929
930 TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed);
931
933 MemoryContextSwitchTo(oldcontext);
934
935 /*
936 * Free the per-sort memory context, thereby releasing all working memory.
937 */
938 MemoryContextReset(state->base.sortcontext);
939}
int64 LogicalTapeSetBlocks(LogicalTapeSet *lts)
Definition: logtape.c:1181
void LogicalTapeSetClose(LogicalTapeSet *lts)
Definition: logtape.c:667
#define FREESTATE(state)
Definition: tuplesort.c:399

References elog, FREESTATE, LOG, LogicalTapeSetBlocks(), LogicalTapeSetClose(), MemoryContextReset(), MemoryContextSwitchTo(), pg_rusage_show(), SERIAL, and trace_sort.

Referenced by tuplesort_end(), and tuplesort_reset().

◆ tuplesort_get_stats()

void tuplesort_get_stats ( Tuplesortstate state,
TuplesortInstrumentation stats 
)

Definition at line 2499 of file tuplesort.c.

2501{
2502 /*
2503 * Note: it might seem we should provide both memory and disk usage for a
2504 * disk-based sort. However, the current code doesn't track memory space
2505 * accurately once we have begun to return tuples to the caller (since we
2506 * don't account for pfree's the caller is expected to do), so we cannot
2507 * rely on availMem in a disk sort. This does not seem worth the overhead
2508 * to fix. Is it worth creating an API for the memory context code to
2509 * tell us how much is actually used in sortcontext?
2510 */
2512
2513 if (state->isMaxSpaceDisk)
2515 else
2517 stats->spaceUsed = (state->maxSpace + 1023) / 1024;
2518
2519 switch (state->maxSpaceStatus)
2520 {
2521 case TSS_SORTEDINMEM:
2522 if (state->boundUsed)
2524 else
2526 break;
2527 case TSS_SORTEDONTAPE:
2529 break;
2530 case TSS_FINALMERGE:
2532 break;
2533 default:
2535 break;
2536 }
2537}
TuplesortMethod sortMethod
Definition: tuplesort.h:112
TuplesortSpaceType spaceType
Definition: tuplesort.h:113
static void tuplesort_updatemax(Tuplesortstate *state)
Definition: tuplesort.c:968
@ SORT_SPACE_TYPE_DISK
Definition: tuplesort.h:88
@ SORT_SPACE_TYPE_MEMORY
Definition: tuplesort.h:89
@ SORT_TYPE_EXTERNAL_SORT
Definition: tuplesort.h:80
@ SORT_TYPE_TOP_N_HEAPSORT
Definition: tuplesort.h:78
@ SORT_TYPE_QUICKSORT
Definition: tuplesort.h:79
@ SORT_TYPE_STILL_IN_PROGRESS
Definition: tuplesort.h:77
@ SORT_TYPE_EXTERNAL_MERGE
Definition: tuplesort.h:81

References SORT_SPACE_TYPE_DISK, SORT_SPACE_TYPE_MEMORY, SORT_TYPE_EXTERNAL_MERGE, SORT_TYPE_EXTERNAL_SORT, SORT_TYPE_QUICKSORT, SORT_TYPE_STILL_IN_PROGRESS, SORT_TYPE_TOP_N_HEAPSORT, TuplesortInstrumentation::sortMethod, TuplesortInstrumentation::spaceType, TuplesortInstrumentation::spaceUsed, TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and tuplesort_updatemax().

Referenced by ExecSort(), instrumentSortedGroup(), and show_sort_info().

◆ tuplesort_gettuple_common()

bool tuplesort_gettuple_common ( Tuplesortstate state,
bool  forward,
SortTuple stup 
)

Definition at line 1470 of file tuplesort.c.

1472{
1473 unsigned int tuplen;
1474 size_t nmoved;
1475
1476 Assert(!WORKER(state));
1477
1478 switch (state->status)
1479 {
1480 case TSS_SORTEDINMEM:
1481 Assert(forward || state->base.sortopt & TUPLESORT_RANDOMACCESS);
1482 Assert(!state->slabAllocatorUsed);
1483 if (forward)
1484 {
1485 if (state->current < state->memtupcount)
1486 {
1487 *stup = state->memtuples[state->current++];
1488 return true;
1489 }
1490 state->eof_reached = true;
1491
1492 /*
1493 * Complain if caller tries to retrieve more tuples than
1494 * originally asked for in a bounded sort. This is because
1495 * returning EOF here might be the wrong thing.
1496 */
1497 if (state->bounded && state->current >= state->bound)
1498 elog(ERROR, "retrieved too many tuples in a bounded sort");
1499
1500 return false;
1501 }
1502 else
1503 {
1504 if (state->current <= 0)
1505 return false;
1506
1507 /*
1508 * if all tuples are fetched already then we return last
1509 * tuple, else - tuple before last returned.
1510 */
1511 if (state->eof_reached)
1512 state->eof_reached = false;
1513 else
1514 {
1515 state->current--; /* last returned tuple */
1516 if (state->current <= 0)
1517 return false;
1518 }
1519 *stup = state->memtuples[state->current - 1];
1520 return true;
1521 }
1522 break;
1523
1524 case TSS_SORTEDONTAPE:
1525 Assert(forward || state->base.sortopt & TUPLESORT_RANDOMACCESS);
1526 Assert(state->slabAllocatorUsed);
1527
1528 /*
1529 * The slot that held the tuple that we returned in previous
1530 * gettuple call can now be reused.
1531 */
1532 if (state->lastReturnedTuple)
1533 {
1534 RELEASE_SLAB_SLOT(state, state->lastReturnedTuple);
1535 state->lastReturnedTuple = NULL;
1536 }
1537
1538 if (forward)
1539 {
1540 if (state->eof_reached)
1541 return false;
1542
1543 if ((tuplen = getlen(state->result_tape, true)) != 0)
1544 {
1545 READTUP(state, stup, state->result_tape, tuplen);
1546
1547 /*
1548 * Remember the tuple we return, so that we can recycle
1549 * its memory on next call. (This can be NULL, in the
1550 * !state->tuples case).
1551 */
1552 state->lastReturnedTuple = stup->tuple;
1553
1554 return true;
1555 }
1556 else
1557 {
1558 state->eof_reached = true;
1559 return false;
1560 }
1561 }
1562
1563 /*
1564 * Backward.
1565 *
1566 * if all tuples are fetched already then we return last tuple,
1567 * else - tuple before last returned.
1568 */
1569 if (state->eof_reached)
1570 {
1571 /*
1572 * Seek position is pointing just past the zero tuplen at the
1573 * end of file; back up to fetch last tuple's ending length
1574 * word. If seek fails we must have a completely empty file.
1575 */
1576 nmoved = LogicalTapeBackspace(state->result_tape,
1577 2 * sizeof(unsigned int));
1578 if (nmoved == 0)
1579 return false;
1580 else if (nmoved != 2 * sizeof(unsigned int))
1581 elog(ERROR, "unexpected tape position");
1582 state->eof_reached = false;
1583 }
1584 else
1585 {
1586 /*
1587 * Back up and fetch previously-returned tuple's ending length
1588 * word. If seek fails, assume we are at start of file.
1589 */
1590 nmoved = LogicalTapeBackspace(state->result_tape,
1591 sizeof(unsigned int));
1592 if (nmoved == 0)
1593 return false;
1594 else if (nmoved != sizeof(unsigned int))
1595 elog(ERROR, "unexpected tape position");
1596 tuplen = getlen(state->result_tape, false);
1597
1598 /*
1599 * Back up to get ending length word of tuple before it.
1600 */
1601 nmoved = LogicalTapeBackspace(state->result_tape,
1602 tuplen + 2 * sizeof(unsigned int));
1603 if (nmoved == tuplen + sizeof(unsigned int))
1604 {
1605 /*
1606 * We backed up over the previous tuple, but there was no
1607 * ending length word before it. That means that the prev
1608 * tuple is the first tuple in the file. It is now the
1609 * next to read in forward direction (not obviously right,
1610 * but that is what in-memory case does).
1611 */
1612 return false;
1613 }
1614 else if (nmoved != tuplen + 2 * sizeof(unsigned int))
1615 elog(ERROR, "bogus tuple length in backward scan");
1616 }
1617
1618 tuplen = getlen(state->result_tape, false);
1619
1620 /*
1621 * Now we have the length of the prior tuple, back up and read it.
1622 * Note: READTUP expects we are positioned after the initial
1623 * length word of the tuple, so back up to that point.
1624 */
1625 nmoved = LogicalTapeBackspace(state->result_tape,
1626 tuplen);
1627 if (nmoved != tuplen)
1628 elog(ERROR, "bogus tuple length in backward scan");
1629 READTUP(state, stup, state->result_tape, tuplen);
1630
1631 /*
1632 * Remember the tuple we return, so that we can recycle its memory
1633 * on next call. (This can be NULL, in the Datum case).
1634 */
1635 state->lastReturnedTuple = stup->tuple;
1636
1637 return true;
1638
1639 case TSS_FINALMERGE:
1640 Assert(forward);
1641 /* We are managing memory ourselves, with the slab allocator. */
1642 Assert(state->slabAllocatorUsed);
1643
1644 /*
1645 * The slab slot holding the tuple that we returned in previous
1646 * gettuple call can now be reused.
1647 */
1648 if (state->lastReturnedTuple)
1649 {
1650 RELEASE_SLAB_SLOT(state, state->lastReturnedTuple);
1651 state->lastReturnedTuple = NULL;
1652 }
1653
1654 /*
1655 * This code should match the inner loop of mergeonerun().
1656 */
1657 if (state->memtupcount > 0)
1658 {
1659 int srcTapeIndex = state->memtuples[0].srctape;
1660 LogicalTape *srcTape = state->inputTapes[srcTapeIndex];
1661 SortTuple newtup;
1662
1663 *stup = state->memtuples[0];
1664
1665 /*
1666 * Remember the tuple we return, so that we can recycle its
1667 * memory on next call. (This can be NULL, in the Datum case).
1668 */
1669 state->lastReturnedTuple = stup->tuple;
1670
1671 /*
1672 * Pull next tuple from tape, and replace the returned tuple
1673 * at top of the heap with it.
1674 */
1675 if (!mergereadnext(state, srcTape, &newtup))
1676 {
1677 /*
1678 * If no more data, we've reached end of run on this tape.
1679 * Remove the top node from the heap.
1680 */
1682 state->nInputRuns--;
1683
1684 /*
1685 * Close the tape. It'd go away at the end of the sort
1686 * anyway, but better to release the memory early.
1687 */
1688 LogicalTapeClose(srcTape);
1689 return true;
1690 }
1691 newtup.srctape = srcTapeIndex;
1693 return true;
1694 }
1695 return false;
1696
1697 default:
1698 elog(ERROR, "invalid tuplesort state");
1699 return false; /* keep compiler quiet */
1700 }
1701}
size_t LogicalTapeBackspace(LogicalTape *lt, size_t size)
Definition: logtape.c:1062

References Assert, elog, ERROR, getlen(), LogicalTapeBackspace(), LogicalTapeClose(), mergereadnext(), READTUP, RELEASE_SLAB_SLOT, SortTuple::srctape, TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, SortTuple::tuple, tuplesort_heap_delete_top(), tuplesort_heap_replace_top(), TUPLESORT_RANDOMACCESS, and WORKER.

Referenced by tuplesort_getbrintuple(), tuplesort_getdatum(), tuplesort_getheaptuple(), tuplesort_getindextuple(), tuplesort_gettupleslot(), and tuplesort_skiptuples().

◆ tuplesort_heap_delete_top()

static void tuplesort_heap_delete_top ( Tuplesortstate state)
static

Definition at line 2774 of file tuplesort.c.

2775{
2776 SortTuple *memtuples = state->memtuples;
2777 SortTuple *tuple;
2778
2779 if (--state->memtupcount <= 0)
2780 return;
2781
2782 /*
2783 * Remove the last tuple in the heap, and re-insert it, by replacing the
2784 * current top node with it.
2785 */
2786 tuple = &memtuples[state->memtupcount];
2788}

References tuplesort_heap_replace_top().

Referenced by mergeonerun(), sort_bounded_heap(), and tuplesort_gettuple_common().

◆ tuplesort_heap_insert()

static void tuplesort_heap_insert ( Tuplesortstate state,
SortTuple tuple 
)
static

Definition at line 2739 of file tuplesort.c.

2740{
2741 SortTuple *memtuples;
2742 int j;
2743
2744 memtuples = state->memtuples;
2745 Assert(state->memtupcount < state->memtupsize);
2746
2748
2749 /*
2750 * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is
2751 * using 1-based array indexes, not 0-based.
2752 */
2753 j = state->memtupcount++;
2754 while (j > 0)
2755 {
2756 int i = (j - 1) >> 1;
2757
2758 if (COMPARETUP(state, tuple, &memtuples[i]) >= 0)
2759 break;
2760 memtuples[j] = memtuples[i];
2761 j = i;
2762 }
2763 memtuples[j] = *tuple;
2764}

References Assert, CHECK_FOR_INTERRUPTS, COMPARETUP, i, and j.

Referenced by beginmerge(), and make_bounded_heap().

◆ tuplesort_heap_replace_top()

static void tuplesort_heap_replace_top ( Tuplesortstate state,
SortTuple tuple 
)
static

Definition at line 2798 of file tuplesort.c.

2799{
2800 SortTuple *memtuples = state->memtuples;
2801 unsigned int i,
2802 n;
2803
2804 Assert(state->memtupcount >= 1);
2805
2807
2808 /*
2809 * state->memtupcount is "int", but we use "unsigned int" for i, j, n.
2810 * This prevents overflow in the "2 * i + 1" calculation, since at the top
2811 * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2.
2812 */
2813 n = state->memtupcount;
2814 i = 0; /* i is where the "hole" is */
2815 for (;;)
2816 {
2817 unsigned int j = 2 * i + 1;
2818
2819 if (j >= n)
2820 break;
2821 if (j + 1 < n &&
2822 COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0)
2823 j++;
2824 if (COMPARETUP(state, tuple, &memtuples[j]) <= 0)
2825 break;
2826 memtuples[i] = memtuples[j];
2827 i = j;
2828 }
2829 memtuples[i] = *tuple;
2830}

References Assert, CHECK_FOR_INTERRUPTS, COMPARETUP, i, and j.

Referenced by make_bounded_heap(), mergeonerun(), tuplesort_gettuple_common(), tuplesort_heap_delete_top(), and tuplesort_puttuple_common().

◆ tuplesort_initialize_shared()

void tuplesort_initialize_shared ( Sharedsort shared,
int  nWorkers,
dsm_segment seg 
)

Definition at line 2938 of file tuplesort.c.

2939{
2940 int i;
2941
2942 Assert(nWorkers > 0);
2943
2944 SpinLockInit(&shared->mutex);
2945 shared->currentWorker = 0;
2946 shared->workersFinished = 0;
2947 SharedFileSetInit(&shared->fileset, seg);
2948 shared->nTapes = nWorkers;
2949 for (i = 0; i < nWorkers; i++)
2950 {
2951 shared->tapes[i].firstblocknumber = 0L;
2952 }
2953}
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:38
#define SpinLockInit(lock)
Definition: spin.h:57
int nTapes
Definition: tuplesort.c:363
int currentWorker
Definition: tuplesort.c:356
int64 firstblocknumber
Definition: logtape.h:54

References Assert, Sharedsort::currentWorker, Sharedsort::fileset, TapeShare::firstblocknumber, i, Sharedsort::mutex, Sharedsort::nTapes, SharedFileSetInit(), SpinLockInit, Sharedsort::tapes, and Sharedsort::workersFinished.

Referenced by _brin_begin_parallel(), and _bt_begin_parallel().

◆ tuplesort_markpos()

void tuplesort_markpos ( Tuplesortstate state)

Definition at line 2435 of file tuplesort.c.

2436{
2437 MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
2438
2439 Assert(state->base.sortopt & TUPLESORT_RANDOMACCESS);
2440
2441 switch (state->status)
2442 {
2443 case TSS_SORTEDINMEM:
2444 state->markpos_offset = state->current;
2445 state->markpos_eof = state->eof_reached;
2446 break;
2447 case TSS_SORTEDONTAPE:
2448 LogicalTapeTell(state->result_tape,
2449 &state->markpos_block,
2450 &state->markpos_offset);
2451 state->markpos_eof = state->eof_reached;
2452 break;
2453 default:
2454 elog(ERROR, "invalid tuplesort state");
2455 break;
2456 }
2457
2458 MemoryContextSwitchTo(oldcontext);
2459}
void LogicalTapeTell(LogicalTape *lt, int64 *blocknum, int *offset)
Definition: logtape.c:1162

References Assert, elog, ERROR, LogicalTapeTell(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and TUPLESORT_RANDOMACCESS.

Referenced by ExecSortMarkPos().

◆ tuplesort_merge_order()

int tuplesort_merge_order ( int64  allowedMem)

Definition at line 1778 of file tuplesort.c.

1779{
1780 int mOrder;
1781
1782 /*----------
1783 * In the merge phase, we need buffer space for each input and output tape.
1784 * Each pass in the balanced merge algorithm reads from M input tapes, and
1785 * writes to N output tapes. Each tape consumes TAPE_BUFFER_OVERHEAD bytes
1786 * of memory. In addition to that, we want MERGE_BUFFER_SIZE workspace per
1787 * input tape.
1788 *
1789 * totalMem = M * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE) +
1790 * N * TAPE_BUFFER_OVERHEAD
1791 *
1792 * Except for the last and next-to-last merge passes, where there can be
1793 * fewer tapes left to process, M = N. We choose M so that we have the
1794 * desired amount of memory available for the input buffers
1795 * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE), given the total memory
1796 * available for the tape buffers (allowedMem).
1797 *
1798 * Note: you might be thinking we need to account for the memtuples[]
1799 * array in this calculation, but we effectively treat that as part of the
1800 * MERGE_BUFFER_SIZE workspace.
1801 *----------
1802 */
1803 mOrder = allowedMem /
1805
1806 /*
1807 * Even in minimum memory, use at least a MINORDER merge. On the other
1808 * hand, even when we have lots of memory, do not use more than a MAXORDER
1809 * merge. Tapes are pretty cheap, but they're not entirely free. Each
1810 * additional tape reduces the amount of memory available to build runs,
1811 * which in turn can cause the same sort to need more runs, which makes
1812 * merging slower even if it can still be done in a single pass. Also,
1813 * high order merges are quite slow due to CPU cache effects; it can be
1814 * faster to pay the I/O cost of a multi-pass merge than to perform a
1815 * single merge pass across many hundreds of tapes.
1816 */
1817 mOrder = Max(mOrder, MINORDER);
1818 mOrder = Min(mOrder, MAXORDER);
1819
1820 return mOrder;
1821}
#define MAXORDER
Definition: tuplesort.c:177
#define MERGE_BUFFER_SIZE
Definition: tuplesort.c:179

References Max, MAXORDER, MERGE_BUFFER_SIZE, Min, MINORDER, and TAPE_BUFFER_OVERHEAD.

Referenced by cost_tuplesort(), and inittapes().

◆ tuplesort_method_name()

const char * tuplesort_method_name ( TuplesortMethod  m)

Definition at line 2543 of file tuplesort.c.

2544{
2545 switch (m)
2546 {
2548 return "still in progress";
2550 return "top-N heapsort";
2552 return "quicksort";
2554 return "external sort";
2556 return "external merge";
2557 }
2558
2559 return "unknown";
2560}

References SORT_TYPE_EXTERNAL_MERGE, SORT_TYPE_EXTERNAL_SORT, SORT_TYPE_QUICKSORT, SORT_TYPE_STILL_IN_PROGRESS, and SORT_TYPE_TOP_N_HEAPSORT.

Referenced by show_incremental_sort_group_info(), and show_sort_info().

◆ tuplesort_performsort()

void tuplesort_performsort ( Tuplesortstate state)

Definition at line 1363 of file tuplesort.c.

1364{
1365 MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
1366
1367 if (trace_sort)
1368 elog(LOG, "performsort of worker %d starting: %s",
1369 state->worker, pg_rusage_show(&state->ru_start));
1370
1371 switch (state->status)
1372 {
1373 case TSS_INITIAL:
1374
1375 /*
1376 * We were able to accumulate all the tuples within the allowed
1377 * amount of memory, or leader to take over worker tapes
1378 */
1379 if (SERIAL(state))
1380 {
1381 /* Just qsort 'em and we're done */
1383 state->status = TSS_SORTEDINMEM;
1384 }
1385 else if (WORKER(state))
1386 {
1387 /*
1388 * Parallel workers must still dump out tuples to tape. No
1389 * merge is required to produce single output run, though.
1390 */
1391 inittapes(state, false);
1392 dumptuples(state, true);
1394 state->status = TSS_SORTEDONTAPE;
1395 }
1396 else
1397 {
1398 /*
1399 * Leader will take over worker tapes and merge worker runs.
1400 * Note that mergeruns sets the correct state->status.
1401 */
1404 }
1405 state->current = 0;
1406 state->eof_reached = false;
1407 state->markpos_block = 0L;
1408 state->markpos_offset = 0;
1409 state->markpos_eof = false;
1410 break;
1411
1412 case TSS_BOUNDED:
1413
1414 /*
1415 * We were able to accumulate all the tuples required for output
1416 * in memory, using a heap to eliminate excess tuples. Now we
1417 * have to transform the heap to a properly-sorted array. Note
1418 * that sort_bounded_heap sets the correct state->status.
1419 */
1421 state->current = 0;
1422 state->eof_reached = false;
1423 state->markpos_offset = 0;
1424 state->markpos_eof = false;
1425 break;
1426
1427 case TSS_BUILDRUNS:
1428
1429 /*
1430 * Finish tape-based sort. First, flush all tuples remaining in
1431 * memory out to tape; then merge until we have a single remaining
1432 * run (or, if !randomAccess and !WORKER(), one run per tape).
1433 * Note that mergeruns sets the correct state->status.
1434 */
1435 dumptuples(state, true);
1437 state->eof_reached = false;
1438 state->markpos_block = 0L;
1439 state->markpos_offset = 0;
1440 state->markpos_eof = false;
1441 break;
1442
1443 default:
1444 elog(ERROR, "invalid tuplesort state");
1445 break;
1446 }
1447
1448 if (trace_sort)
1449 {
1450 if (state->status == TSS_FINALMERGE)
1451 elog(LOG, "performsort of worker %d done (except %d-way final merge): %s",
1452 state->worker, state->nInputTapes,
1453 pg_rusage_show(&state->ru_start));
1454 else
1455 elog(LOG, "performsort of worker %d done: %s",
1456 state->worker, pg_rusage_show(&state->ru_start));
1457 }
1458
1459 MemoryContextSwitchTo(oldcontext);
1460}
static void sort_bounded_heap(Tuplesortstate *state)
Definition: tuplesort.c:2636
static void leader_takeover_tapes(Tuplesortstate *state)
Definition: tuplesort.c:3069
static void inittapes(Tuplesortstate *state, bool mergeruns)
Definition: tuplesort.c:1865
static void worker_nomergeruns(Tuplesortstate *state)
Definition: tuplesort.c:3047
static void dumptuples(Tuplesortstate *state, bool alltuples)
Definition: tuplesort.c:2307

References dumptuples(), elog, ERROR, inittapes(), leader_takeover_tapes(), LOG, MemoryContextSwitchTo(), mergeruns(), pg_rusage_show(), SERIAL, sort_bounded_heap(), trace_sort, TSS_BOUNDED, TSS_BUILDRUNS, TSS_FINALMERGE, TSS_INITIAL, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, tuplesort_sort_memtuples(), WORKER, and worker_nomergeruns().

Referenced by _brin_parallel_merge(), _brin_parallel_scan_and_build(), _bt_leafbuild(), _bt_parallel_scan_and_sort(), _h_indexbuild(), ExecIncrementalSort(), ExecSort(), gistbuild(), heapam_relation_copy_for_cluster(), hypothetical_dense_rank_final(), hypothetical_rank_common(), initialize_phase(), mode_final(), percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), percentile_disc_multi_final(), process_ordered_aggregate_multi(), process_ordered_aggregate_single(), switchToPresortedPrefixMode(), and validate_index().

◆ tuplesort_puttuple_common()

void tuplesort_puttuple_common ( Tuplesortstate state,
SortTuple tuple,
bool  useAbbrev,
Size  tuplen 
)

Definition at line 1169 of file tuplesort.c.

1171{
1172 MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
1173
1174 Assert(!LEADER(state));
1175
1176 /* account for the memory used for this tuple */
1177 USEMEM(state, tuplen);
1178 state->tupleMem += tuplen;
1179
1180 if (!useAbbrev)
1181 {
1182 /*
1183 * Leave ordinary Datum representation, or NULL value. If there is a
1184 * converter it won't expect NULL values, and cost model is not
1185 * required to account for NULL, so in that case we avoid calling
1186 * converter and just set datum1 to zeroed representation (to be
1187 * consistent, and to support cheap inequality tests for NULL
1188 * abbreviated keys).
1189 */
1190 }
1191 else if (!consider_abort_common(state))
1192 {
1193 /* Store abbreviated key representation */
1194 tuple->datum1 = state->base.sortKeys->abbrev_converter(tuple->datum1,
1195 state->base.sortKeys);
1196 }
1197 else
1198 {
1199 /*
1200 * Set state to be consistent with never trying abbreviation.
1201 *
1202 * Alter datum1 representation in already-copied tuples, so as to
1203 * ensure a consistent representation (current tuple was just
1204 * handled). It does not matter if some dumped tuples are already
1205 * sorted on tape, since serialized tuples lack abbreviated keys
1206 * (TSS_BUILDRUNS state prevents control reaching here in any case).
1207 */
1208 REMOVEABBREV(state, state->memtuples, state->memtupcount);
1209 }
1210
1211 switch (state->status)
1212 {
1213 case TSS_INITIAL:
1214
1215 /*
1216 * Save the tuple into the unsorted array. First, grow the array
1217 * as needed. Note that we try to grow the array when there is
1218 * still one free slot remaining --- if we fail, there'll still be
1219 * room to store the incoming tuple, and then we'll switch to
1220 * tape-based operation.
1221 */
1222 if (state->memtupcount >= state->memtupsize - 1)
1223 {
1224 (void) grow_memtuples(state);
1225 Assert(state->memtupcount < state->memtupsize);
1226 }
1227 state->memtuples[state->memtupcount++] = *tuple;
1228
1229 /*
1230 * Check if it's time to switch over to a bounded heapsort. We do
1231 * so if the input tuple count exceeds twice the desired tuple
1232 * count (this is a heuristic for where heapsort becomes cheaper
1233 * than a quicksort), or if we've just filled workMem and have
1234 * enough tuples to meet the bound.
1235 *
1236 * Note that once we enter TSS_BOUNDED state we will always try to
1237 * complete the sort that way. In the worst case, if later input
1238 * tuples are larger than earlier ones, this might cause us to
1239 * exceed workMem significantly.
1240 */
1241 if (state->bounded &&
1242 (state->memtupcount > state->bound * 2 ||
1243 (state->memtupcount > state->bound && LACKMEM(state))))
1244 {
1245 if (trace_sort)
1246 elog(LOG, "switching to bounded heapsort at %d tuples: %s",
1247 state->memtupcount,
1248 pg_rusage_show(&state->ru_start));
1250 MemoryContextSwitchTo(oldcontext);
1251 return;
1252 }
1253
1254 /*
1255 * Done if we still fit in available memory and have array slots.
1256 */
1257 if (state->memtupcount < state->memtupsize && !LACKMEM(state))
1258 {
1259 MemoryContextSwitchTo(oldcontext);
1260 return;
1261 }
1262
1263 /*
1264 * Nope; time to switch to tape-based operation.
1265 */
1266 inittapes(state, true);
1267
1268 /*
1269 * Dump all tuples.
1270 */
1271 dumptuples(state, false);
1272 break;
1273
1274 case TSS_BOUNDED:
1275
1276 /*
1277 * We don't want to grow the array here, so check whether the new
1278 * tuple can be discarded before putting it in. This should be a
1279 * good speed optimization, too, since when there are many more
1280 * input tuples than the bound, most input tuples can be discarded
1281 * with just this one comparison. Note that because we currently
1282 * have the sort direction reversed, we must check for <= not >=.
1283 */
1284 if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0)
1285 {
1286 /* new tuple <= top of the heap, so we can discard it */
1287 free_sort_tuple(state, tuple);
1289 }
1290 else
1291 {
1292 /* discard top of heap, replacing it with the new tuple */
1293 free_sort_tuple(state, &state->memtuples[0]);
1295 }
1296 break;
1297
1298 case TSS_BUILDRUNS:
1299
1300 /*
1301 * Save the tuple into the unsorted array (there must be space)
1302 */
1303 state->memtuples[state->memtupcount++] = *tuple;
1304
1305 /*
1306 * If we are over the memory limit, dump all tuples.
1307 */
1308 dumptuples(state, false);
1309 break;
1310
1311 default:
1312 elog(ERROR, "invalid tuplesort state");
1313 break;
1314 }
1315 MemoryContextSwitchTo(oldcontext);
1316}
Datum datum1
Definition: tuplesort.h:150
#define REMOVEABBREV(state, stup, count)
Definition: tuplesort.c:395
static bool grow_memtuples(Tuplesortstate *state)
Definition: tuplesort.c:1052
static void make_bounded_heap(Tuplesortstate *state)
Definition: tuplesort.c:2587
static bool consider_abort_common(Tuplesortstate *state)
Definition: tuplesort.c:1319

References Assert, CHECK_FOR_INTERRUPTS, COMPARETUP, consider_abort_common(), SortTuple::datum1, dumptuples(), elog, ERROR, free_sort_tuple(), grow_memtuples(), inittapes(), LACKMEM, LEADER, LOG, make_bounded_heap(), MemoryContextSwitchTo(), pg_rusage_show(), REMOVEABBREV, trace_sort, TSS_BOUNDED, TSS_BUILDRUNS, TSS_INITIAL, tuplesort_heap_replace_top(), and USEMEM.

Referenced by tuplesort_putbrintuple(), tuplesort_putdatum(), tuplesort_putheaptuple(), tuplesort_putindextuplevalues(), and tuplesort_puttupleslot().

◆ tuplesort_readtup_alloc()

void * tuplesort_readtup_alloc ( Tuplesortstate state,
Size  tuplen 
)

Definition at line 2883 of file tuplesort.c.

2884{
2885 SlabSlot *buf;
2886
2887 /*
2888 * We pre-allocate enough slots in the slab arena that we should never run
2889 * out.
2890 */
2891 Assert(state->slabFreeHead);
2892
2893 if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead)
2894 return MemoryContextAlloc(state->base.sortcontext, tuplen);
2895 else
2896 {
2897 buf = state->slabFreeHead;
2898 /* Reuse this slot */
2899 state->slabFreeHead = buf->nextfree;
2900
2901 return buf;
2902 }
2903}

References Assert, buf, MemoryContextAlloc(), and SLAB_SLOT_SIZE.

Referenced by readtup_cluster(), readtup_datum(), readtup_heap(), readtup_index(), and readtup_index_brin().

◆ tuplesort_rescan()

void tuplesort_rescan ( Tuplesortstate state)

Definition at line 2402 of file tuplesort.c.

2403{
2404 MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
2405
2406 Assert(state->base.sortopt & TUPLESORT_RANDOMACCESS);
2407
2408 switch (state->status)
2409 {
2410 case TSS_SORTEDINMEM:
2411 state->current = 0;
2412 state->eof_reached = false;
2413 state->markpos_offset = 0;
2414 state->markpos_eof = false;
2415 break;
2416 case TSS_SORTEDONTAPE:
2417 LogicalTapeRewindForRead(state->result_tape, 0);
2418 state->eof_reached = false;
2419 state->markpos_block = 0L;
2420 state->markpos_offset = 0;
2421 state->markpos_eof = false;
2422 break;
2423 default:
2424 elog(ERROR, "invalid tuplesort state");
2425 break;
2426 }
2427
2428 MemoryContextSwitchTo(oldcontext);
2429}

References Assert, elog, ERROR, LogicalTapeRewindForRead(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and TUPLESORT_RANDOMACCESS.

Referenced by ExecReScanSort(), mode_final(), percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), and percentile_disc_multi_final().

◆ tuplesort_reset()

void tuplesort_reset ( Tuplesortstate state)

Definition at line 1019 of file tuplesort.c.

1020{
1023
1024 /*
1025 * After we've freed up per-batch memory, re-setup all of the state common
1026 * to both the first batch and any subsequent batch.
1027 */
1029
1030 state->lastReturnedTuple = NULL;
1031 state->slabMemoryBegin = NULL;
1032 state->slabMemoryEnd = NULL;
1033 state->slabFreeHead = NULL;
1034}

References tuplesort_begin_batch(), tuplesort_free(), and tuplesort_updatemax().

Referenced by ExecIncrementalSort(), ExecReScanIncrementalSort(), and switchToPresortedPrefixMode().

◆ tuplesort_restorepos()

void tuplesort_restorepos ( Tuplesortstate state)

Definition at line 2466 of file tuplesort.c.

2467{
2468 MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
2469
2470 Assert(state->base.sortopt & TUPLESORT_RANDOMACCESS);
2471
2472 switch (state->status)
2473 {
2474 case TSS_SORTEDINMEM:
2475 state->current = state->markpos_offset;
2476 state->eof_reached = state->markpos_eof;
2477 break;
2478 case TSS_SORTEDONTAPE:
2479 LogicalTapeSeek(state->result_tape,
2480 state->markpos_block,
2481 state->markpos_offset);
2482 state->eof_reached = state->markpos_eof;
2483 break;
2484 default:
2485 elog(ERROR, "invalid tuplesort state");
2486 break;
2487 }
2488
2489 MemoryContextSwitchTo(oldcontext);
2490}
void LogicalTapeSeek(LogicalTape *lt, int64 blocknum, int offset)
Definition: logtape.c:1133

References Assert, elog, ERROR, LogicalTapeSeek(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and TUPLESORT_RANDOMACCESS.

Referenced by ExecSortRestrPos().

◆ tuplesort_set_bound()

void tuplesort_set_bound ( Tuplesortstate state,
int64  bound 
)

Definition at line 838 of file tuplesort.c.

839{
840 /* Assert we're called before loading any tuples */
841 Assert(state->status == TSS_INITIAL && state->memtupcount == 0);
842 /* Assert we allow bounded sorts */
843 Assert(state->base.sortopt & TUPLESORT_ALLOWBOUNDED);
844 /* Can't set the bound twice, either */
845 Assert(!state->bounded);
846 /* Also, this shouldn't be called in a parallel worker */
848
849 /* Parallel leader allows but ignores hint */
850 if (LEADER(state))
851 return;
852
853#ifdef DEBUG_BOUNDED_SORT
854 /* Honor GUC setting that disables the feature (for easy testing) */
855 if (!optimize_bounded_sort)
856 return;
857#endif
858
859 /* We want to be able to compute bound * 2, so limit the setting */
860 if (bound > (int64) (INT_MAX / 2))
861 return;
862
863 state->bounded = true;
864 state->bound = (int) bound;
865
866 /*
867 * Bounded sorts are not an effective target for abbreviated key
868 * optimization. Disable by setting state to be consistent with no
869 * abbreviation support.
870 */
871 state->base.sortKeys->abbrev_converter = NULL;
872 if (state->base.sortKeys->abbrev_full_comparator)
873 state->base.sortKeys->comparator = state->base.sortKeys->abbrev_full_comparator;
874
875 /* Not strictly necessary, but be tidy */
876 state->base.sortKeys->abbrev_abort = NULL;
877 state->base.sortKeys->abbrev_full_comparator = NULL;
878}
#define TUPLESORT_ALLOWBOUNDED
Definition: tuplesort.h:99

References Assert, LEADER, TSS_INITIAL, TUPLESORT_ALLOWBOUNDED, and WORKER.

Referenced by ExecIncrementalSort(), ExecSort(), and switchToPresortedPrefixMode().

◆ tuplesort_skiptuples()

bool tuplesort_skiptuples ( Tuplesortstate state,
int64  ntuples,
bool  forward 
)

Definition at line 1710 of file tuplesort.c.

1711{
1712 MemoryContext oldcontext;
1713
1714 /*
1715 * We don't actually support backwards skip yet, because no callers need
1716 * it. The API is designed to allow for that later, though.
1717 */
1718 Assert(forward);
1719 Assert(ntuples >= 0);
1720 Assert(!WORKER(state));
1721
1722 switch (state->status)
1723 {
1724 case TSS_SORTEDINMEM:
1725 if (state->memtupcount - state->current >= ntuples)
1726 {
1727 state->current += ntuples;
1728 return true;
1729 }
1730 state->current = state->memtupcount;
1731 state->eof_reached = true;
1732
1733 /*
1734 * Complain if caller tries to retrieve more tuples than
1735 * originally asked for in a bounded sort. This is because
1736 * returning EOF here might be the wrong thing.
1737 */
1738 if (state->bounded && state->current >= state->bound)
1739 elog(ERROR, "retrieved too many tuples in a bounded sort");
1740
1741 return false;
1742
1743 case TSS_SORTEDONTAPE:
1744 case TSS_FINALMERGE:
1745
1746 /*
1747 * We could probably optimize these cases better, but for now it's
1748 * not worth the trouble.
1749 */
1750 oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
1751 while (ntuples-- > 0)
1752 {
1753 SortTuple stup;
1754
1755 if (!tuplesort_gettuple_common(state, forward, &stup))
1756 {
1757 MemoryContextSwitchTo(oldcontext);
1758 return false;
1759 }
1761 }
1762 MemoryContextSwitchTo(oldcontext);
1763 return true;
1764
1765 default:
1766 elog(ERROR, "invalid tuplesort state");
1767 return false; /* keep compiler quiet */
1768 }
1769}
bool tuplesort_gettuple_common(Tuplesortstate *state, bool forward, SortTuple *stup)
Definition: tuplesort.c:1470

References Assert, CHECK_FOR_INTERRUPTS, elog, ERROR, MemoryContextSwitchTo(), TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, tuplesort_gettuple_common(), and WORKER.

Referenced by percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), and percentile_disc_multi_final().

◆ tuplesort_sort_memtuples()

static void tuplesort_sort_memtuples ( Tuplesortstate state)
static

Definition at line 2676 of file tuplesort.c.

2677{
2678 Assert(!LEADER(state));
2679
2680 if (state->memtupcount > 1)
2681 {
2682 /*
2683 * Do we have the leading column's value or abbreviation in datum1,
2684 * and is there a specialization for its comparator?
2685 */
2686 if (state->base.haveDatum1 && state->base.sortKeys)
2687 {
2688 if (state->base.sortKeys[0].comparator == ssup_datum_unsigned_cmp)
2689 {
2690 qsort_tuple_unsigned(state->memtuples,
2691 state->memtupcount,
2692 state);
2693 return;
2694 }
2695#if SIZEOF_DATUM >= 8
2696 else if (state->base.sortKeys[0].comparator == ssup_datum_signed_cmp)
2697 {
2698 qsort_tuple_signed(state->memtuples,
2699 state->memtupcount,
2700 state);
2701 return;
2702 }
2703#endif
2704 else if (state->base.sortKeys[0].comparator == ssup_datum_int32_cmp)
2705 {
2706 qsort_tuple_int32(state->memtuples,
2707 state->memtupcount,
2708 state);
2709 return;
2710 }
2711 }
2712
2713 /* Can we use the single-key sort function? */
2714 if (state->base.onlyKey != NULL)
2715 {
2716 qsort_ssup(state->memtuples, state->memtupcount,
2717 state->base.onlyKey);
2718 }
2719 else
2720 {
2721 qsort_tuple(state->memtuples,
2722 state->memtupcount,
2723 state->base.comparetup,
2724 state);
2725 }
2726 }
2727}
int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
Definition: tuplesort.c:3139
int ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup)
Definition: tuplesort.c:3166

References Assert, LEADER, ssup_datum_int32_cmp(), and ssup_datum_unsigned_cmp().

Referenced by dumptuples(), and tuplesort_performsort().

◆ tuplesort_space_type_name()

const char * tuplesort_space_type_name ( TuplesortSpaceType  t)

Definition at line 2566 of file tuplesort.c.

2567{
2569 return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory";
2570}

References Assert, SORT_SPACE_TYPE_DISK, and SORT_SPACE_TYPE_MEMORY.

Referenced by show_incremental_sort_group_info(), and show_sort_info().

◆ tuplesort_updatemax()

static void tuplesort_updatemax ( Tuplesortstate state)
static

Definition at line 968 of file tuplesort.c.

969{
970 int64 spaceUsed;
971 bool isSpaceDisk;
972
973 /*
974 * Note: it might seem we should provide both memory and disk usage for a
975 * disk-based sort. However, the current code doesn't track memory space
976 * accurately once we have begun to return tuples to the caller (since we
977 * don't account for pfree's the caller is expected to do), so we cannot
978 * rely on availMem in a disk sort. This does not seem worth the overhead
979 * to fix. Is it worth creating an API for the memory context code to
980 * tell us how much is actually used in sortcontext?
981 */
982 if (state->tapeset)
983 {
984 isSpaceDisk = true;
985 spaceUsed = LogicalTapeSetBlocks(state->tapeset) * BLCKSZ;
986 }
987 else
988 {
989 isSpaceDisk = false;
990 spaceUsed = state->allowedMem - state->availMem;
991 }
992
993 /*
994 * Sort evicts data to the disk when it wasn't able to fit that data into
995 * main memory. This is why we assume space used on the disk to be more
996 * important for tracking resource usage than space used in memory. Note
997 * that the amount of space occupied by some tupleset on the disk might be
998 * less than amount of space occupied by the same tupleset in memory due
999 * to more compact representation.
1000 */
1001 if ((isSpaceDisk && !state->isMaxSpaceDisk) ||
1002 (isSpaceDisk == state->isMaxSpaceDisk && spaceUsed > state->maxSpace))
1003 {
1004 state->maxSpace = spaceUsed;
1005 state->isMaxSpaceDisk = isSpaceDisk;
1006 state->maxSpaceStatus = state->status;
1007 }
1008}

References LogicalTapeSetBlocks().

Referenced by tuplesort_get_stats(), and tuplesort_reset().

◆ tuplesort_used_bound()

bool tuplesort_used_bound ( Tuplesortstate state)

Definition at line 886 of file tuplesort.c.

887{
888 return state->boundUsed;
889}

Referenced by ExecIncrementalSort().

◆ worker_freeze_result_tape()

static void worker_freeze_result_tape ( Tuplesortstate state)
static

Definition at line 3009 of file tuplesort.c.

3010{
3011 Sharedsort *shared = state->shared;
3013
3015 Assert(state->result_tape != NULL);
3016 Assert(state->memtupcount == 0);
3017
3018 /*
3019 * Free most remaining memory, in case caller is sensitive to our holding
3020 * on to it. memtuples may not be a tiny merge heap at this point.
3021 */
3022 pfree(state->memtuples);
3023 /* Be tidy */
3024 state->memtuples = NULL;
3025 state->memtupsize = 0;
3026
3027 /*
3028 * Parallel worker requires result tape metadata, which is to be stored in
3029 * shared memory for leader
3030 */
3031 LogicalTapeFreeze(state->result_tape, &output);
3032
3033 /* Store properties of output tape, and update finished worker count */
3034 SpinLockAcquire(&shared->mutex);
3035 shared->tapes[state->worker] = output;
3036 shared->workersFinished++;
3037 SpinLockRelease(&shared->mutex);
3038}
FILE * output

References Assert, LogicalTapeFreeze(), Sharedsort::mutex, output, pfree(), SpinLockAcquire, SpinLockRelease, Sharedsort::tapes, WORKER, and Sharedsort::workersFinished.

Referenced by mergeruns(), and worker_nomergeruns().

◆ worker_get_identifier()

static int worker_get_identifier ( Tuplesortstate state)
static

Definition at line 2981 of file tuplesort.c.

2982{
2983 Sharedsort *shared = state->shared;
2984 int worker;
2985
2987
2988 SpinLockAcquire(&shared->mutex);
2989 worker = shared->currentWorker++;
2990 SpinLockRelease(&shared->mutex);
2991
2992 return worker;
2993}

References Assert, Sharedsort::currentWorker, Sharedsort::mutex, SpinLockAcquire, SpinLockRelease, and WORKER.

Referenced by tuplesort_begin_common().

◆ worker_nomergeruns()

static void worker_nomergeruns ( Tuplesortstate state)
static

Definition at line 3047 of file tuplesort.c.

3048{
3050 Assert(state->result_tape == NULL);
3051 Assert(state->nOutputRuns == 1);
3052
3053 state->result_tape = state->destTape;
3055}

References Assert, WORKER, and worker_freeze_result_tape().

Referenced by tuplesort_performsort().

Variable Documentation

◆ trace_sort