PostgreSQL Source Code  git master
tuplesort.c File Reference
#include "postgres.h"
#include <limits.h>
#include "access/hash.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "commands/tablespace.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "utils/datum.h"
#include "utils/logtape.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
#include "utils/rel.h"
#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
#include "lib/sort_template.h"
Include dependency graph for tuplesort.c:

Go to the source code of this file.

Data Structures

struct  SortTuple
 
union  SlabSlot
 
struct  Tuplesortstate
 
struct  Sharedsort
 

Macros

#define HEAP_SORT   0
 
#define INDEX_SORT   1
 
#define DATUM_SORT   2
 
#define CLUSTER_SORT   3
 
#define PARALLEL_SORT(state)
 
#define INITIAL_MEMTUPSIZE
 
#define SLAB_SLOT_SIZE   1024
 
#define MINORDER   6 /* minimum merge order */
 
#define MAXORDER   500 /* maximum merge order */
 
#define TAPE_BUFFER_OVERHEAD   BLCKSZ
 
#define MERGE_BUFFER_SIZE   (BLCKSZ * 32)
 
#define IS_SLAB_SLOT(state, tuple)
 
#define RELEASE_SLAB_SLOT(state, tuple)
 
#define COMPARETUP(state, a, b)   ((*(state)->comparetup) (a, b, state))
 
#define COPYTUP(state, stup, tup)   ((*(state)->copytup) (state, stup, tup))
 
#define WRITETUP(state, tape, stup)   ((*(state)->writetup) (state, tape, stup))
 
#define READTUP(state, stup, tape, len)   ((*(state)->readtup) (state, stup, tape, len))
 
#define LACKMEM(state)   ((state)->availMem < 0 && !(state)->slabAllocatorUsed)
 
#define USEMEM(state, amt)   ((state)->availMem -= (amt))
 
#define FREEMEM(state, amt)   ((state)->availMem += (amt))
 
#define SERIAL(state)   ((state)->shared == NULL)
 
#define WORKER(state)   ((state)->shared && (state)->worker != -1)
 
#define LEADER(state)   ((state)->shared && (state)->worker == -1)
 
#define LogicalTapeReadExact(tape, ptr, len)
 
#define ST_SORT   qsort_tuple
 
#define ST_ELEMENT_TYPE   SortTuple
 
#define ST_COMPARE_RUNTIME_POINTER
 
#define ST_COMPARE_ARG_TYPE   Tuplesortstate
 
#define ST_CHECK_FOR_INTERRUPTS
 
#define ST_SCOPE   static
 
#define ST_DECLARE
 
#define ST_DEFINE
 
#define ST_SORT   qsort_ssup
 
#define ST_ELEMENT_TYPE   SortTuple
 
#define ST_COMPARE(a, b, ssup)
 
#define ST_COMPARE_ARG_TYPE   SortSupportData
 
#define ST_CHECK_FOR_INTERRUPTS
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef union SlabSlot SlabSlot
 
typedef int(* SortTupleComparator) (const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 

Enumerations

enum  TupSortStatus {
  TSS_INITIAL , TSS_BOUNDED , TSS_BUILDRUNS , TSS_SORTEDINMEM ,
  TSS_SORTEDONTAPE , TSS_FINALMERGE
}
 

Functions

static Tuplesortstatetuplesort_begin_common (int workMem, SortCoordinate coordinate, bool randomAccess)
 
static void tuplesort_begin_batch (Tuplesortstate *state)
 
static void puttuple_common (Tuplesortstate *state, SortTuple *tuple)
 
static bool consider_abort_common (Tuplesortstate *state)
 
static void inittapes (Tuplesortstate *state, bool mergeruns)
 
static void inittapestate (Tuplesortstate *state, int maxTapes)
 
static void selectnewtape (Tuplesortstate *state)
 
static void init_slab_allocator (Tuplesortstate *state, int numSlots)
 
static void mergeruns (Tuplesortstate *state)
 
static void mergeonerun (Tuplesortstate *state)
 
static void beginmerge (Tuplesortstate *state)
 
static bool mergereadnext (Tuplesortstate *state, LogicalTape *srcTape, SortTuple *stup)
 
static void dumptuples (Tuplesortstate *state, bool alltuples)
 
static void make_bounded_heap (Tuplesortstate *state)
 
static void sort_bounded_heap (Tuplesortstate *state)
 
static void tuplesort_sort_memtuples (Tuplesortstate *state)
 
static void tuplesort_heap_insert (Tuplesortstate *state, SortTuple *tuple)
 
static void tuplesort_heap_replace_top (Tuplesortstate *state, SortTuple *tuple)
 
static void tuplesort_heap_delete_top (Tuplesortstate *state)
 
static void reversedirection (Tuplesortstate *state)
 
static unsigned int getlen (LogicalTape *tape, bool eofOK)
 
static void markrunend (LogicalTape *tape)
 
static void * readtup_alloc (Tuplesortstate *state, Size tuplen)
 
static int comparetup_heap (const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 
static void copytup_heap (Tuplesortstate *state, SortTuple *stup, void *tup)
 
static void writetup_heap (Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
 
static void readtup_heap (Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
 
static int comparetup_cluster (const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 
static void copytup_cluster (Tuplesortstate *state, SortTuple *stup, void *tup)
 
static void writetup_cluster (Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
 
static void readtup_cluster (Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
 
static int comparetup_index_btree (const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 
static int comparetup_index_hash (const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 
static void copytup_index (Tuplesortstate *state, SortTuple *stup, void *tup)
 
static void writetup_index (Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
 
static void readtup_index (Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
 
static int comparetup_datum (const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 
static void copytup_datum (Tuplesortstate *state, SortTuple *stup, void *tup)
 
static void writetup_datum (Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
 
static void readtup_datum (Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
 
static int worker_get_identifier (Tuplesortstate *state)
 
static void worker_freeze_result_tape (Tuplesortstate *state)
 
static void worker_nomergeruns (Tuplesortstate *state)
 
static void leader_takeover_tapes (Tuplesortstate *state)
 
static void free_sort_tuple (Tuplesortstate *state, SortTuple *stup)
 
static void tuplesort_free (Tuplesortstate *state)
 
static void tuplesort_updatemax (Tuplesortstate *state)
 
Tuplesortstatetuplesort_begin_heap (TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, bool randomAccess)
 
Tuplesortstatetuplesort_begin_cluster (TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, bool randomAccess)
 
Tuplesortstatetuplesort_begin_index_btree (Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
 
Tuplesortstatetuplesort_begin_index_hash (Relation heapRel, Relation indexRel, uint32 high_mask, uint32 low_mask, uint32 max_buckets, int workMem, SortCoordinate coordinate, bool randomAccess)
 
Tuplesortstatetuplesort_begin_index_gist (Relation heapRel, Relation indexRel, int workMem, SortCoordinate coordinate, bool randomAccess)
 
Tuplesortstatetuplesort_begin_datum (Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, SortCoordinate coordinate, bool randomAccess)
 
void tuplesort_set_bound (Tuplesortstate *state, int64 bound)
 
bool tuplesort_used_bound (Tuplesortstate *state)
 
void tuplesort_end (Tuplesortstate *state)
 
void tuplesort_reset (Tuplesortstate *state)
 
static bool grow_memtuples (Tuplesortstate *state)
 
void tuplesort_puttupleslot (Tuplesortstate *state, TupleTableSlot *slot)
 
void tuplesort_putheaptuple (Tuplesortstate *state, HeapTuple tup)
 
void tuplesort_putindextuplevalues (Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
 
void tuplesort_putdatum (Tuplesortstate *state, Datum val, bool isNull)
 
void tuplesort_performsort (Tuplesortstate *state)
 
static bool tuplesort_gettuple_common (Tuplesortstate *state, bool forward, SortTuple *stup)
 
bool tuplesort_gettupleslot (Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)
 
HeapTuple tuplesort_getheaptuple (Tuplesortstate *state, bool forward)
 
IndexTuple tuplesort_getindextuple (Tuplesortstate *state, bool forward)
 
bool tuplesort_getdatum (Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
 
bool tuplesort_skiptuples (Tuplesortstate *state, int64 ntuples, bool forward)
 
int tuplesort_merge_order (int64 allowedMem)
 
static int64 merge_read_buffer_size (int64 avail_mem, int nInputTapes, int nInputRuns, int maxOutputTapes)
 
void tuplesort_rescan (Tuplesortstate *state)
 
void tuplesort_markpos (Tuplesortstate *state)
 
void tuplesort_restorepos (Tuplesortstate *state)
 
void tuplesort_get_stats (Tuplesortstate *state, TuplesortInstrumentation *stats)
 
const char * tuplesort_method_name (TuplesortMethod m)
 
const char * tuplesort_space_type_name (TuplesortSpaceType t)
 
Size tuplesort_estimate_shared (int nWorkers)
 
void tuplesort_initialize_shared (Sharedsort *shared, int nWorkers, dsm_segment *seg)
 
void tuplesort_attach_shared (Sharedsort *shared, dsm_segment *seg)
 

Variables

bool trace_sort = false
 

Macro Definition Documentation

◆ CLUSTER_SORT

#define CLUSTER_SORT   3

Definition at line 126 of file tuplesort.c.

◆ COMPARETUP

#define COMPARETUP (   state,
  a,
  b 
)    ((*(state)->comparetup) (a, b, state))

Definition at line 540 of file tuplesort.c.

◆ COPYTUP

#define COPYTUP (   state,
  stup,
  tup 
)    ((*(state)->copytup) (state, stup, tup))

Definition at line 541 of file tuplesort.c.

◆ DATUM_SORT

#define DATUM_SORT   2

Definition at line 125 of file tuplesort.c.

◆ FREEMEM

#define FREEMEM (   state,
  amt 
)    ((state)->availMem += (amt))

Definition at line 546 of file tuplesort.c.

◆ HEAP_SORT

#define HEAP_SORT   0

Definition at line 123 of file tuplesort.c.

◆ INDEX_SORT

#define INDEX_SORT   1

Definition at line 124 of file tuplesort.c.

◆ INITIAL_MEMTUPSIZE

#define INITIAL_MEMTUPSIZE
Value:
Max(1024, \
#define Max(x, y)
Definition: c.h:980
#define ALLOCSET_SEPARATE_THRESHOLD
Definition: memutils.h:222

Definition at line 139 of file tuplesort.c.

◆ IS_SLAB_SLOT

#define IS_SLAB_SLOT (   state,
  tuple 
)
Value:
((char *) (tuple) >= (state)->slabMemoryBegin && \
(char *) (tuple) < (state)->slabMemoryEnd)
Definition: regguts.h:318

Definition at line 520 of file tuplesort.c.

◆ LACKMEM

#define LACKMEM (   state)    ((state)->availMem < 0 && !(state)->slabAllocatorUsed)

Definition at line 544 of file tuplesort.c.

◆ LEADER

#define LEADER (   state)    ((state)->shared && (state)->worker == -1)

Definition at line 549 of file tuplesort.c.

◆ LogicalTapeReadExact

#define LogicalTapeReadExact (   tape,
  ptr,
  len 
)
Value:
do { \
if (LogicalTapeRead(tape, ptr, len) != (size_t) (len)) \
elog(ERROR, "unexpected end of data"); \
} while(0)
#define ERROR
Definition: elog.h:33
size_t LogicalTapeRead(LogicalTape *lt, void *ptr, size_t size)
Definition: logtape.c:929
const void size_t len

Definition at line 601 of file tuplesort.c.

◆ MAXORDER

#define MAXORDER   500 /* maximum merge order */

Definition at line 235 of file tuplesort.c.

◆ MERGE_BUFFER_SIZE

#define MERGE_BUFFER_SIZE   (BLCKSZ * 32)

Definition at line 237 of file tuplesort.c.

◆ MINORDER

#define MINORDER   6 /* minimum merge order */

Definition at line 234 of file tuplesort.c.

◆ PARALLEL_SORT

#define PARALLEL_SORT (   state)
Value:
((state)->shared == NULL ? 0 : \
(state)->worker >= 0 ? 1 : 2)

Definition at line 129 of file tuplesort.c.

◆ READTUP

#define READTUP (   state,
  stup,
  tape,
  len 
)    ((*(state)->readtup) (state, stup, tape, len))

Definition at line 543 of file tuplesort.c.

◆ RELEASE_SLAB_SLOT

#define RELEASE_SLAB_SLOT (   state,
  tuple 
)
Value:
do { \
SlabSlot *buf = (SlabSlot *) tuple; \
{ \
buf->nextfree = (state)->slabFreeHead; \
(state)->slabFreeHead = buf; \
} while(0)
void pfree(void *pointer)
Definition: mcxt.c:1169
static char * buf
Definition: pg_test_fsync.c:70
#define IS_SLAB_SLOT(state, tuple)
Definition: tuplesort.c:520

Definition at line 528 of file tuplesort.c.

◆ SERIAL

#define SERIAL (   state)    ((state)->shared == NULL)

Definition at line 547 of file tuplesort.c.

◆ SLAB_SLOT_SIZE

#define SLAB_SLOT_SIZE   1024

Definition at line 200 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [1/2]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 695 of file tuplesort.c.

◆ ST_CHECK_FOR_INTERRUPTS [2/2]

#define ST_CHECK_FOR_INTERRUPTS

Definition at line 695 of file tuplesort.c.

◆ ST_COMPARE

#define ST_COMPARE (   a,
  b,
  ssup 
)
Value:
ApplySortComparator((a)->datum1, (a)->isnull1, \
(b)->datum1, (b)->isnull1, (ssup))
int b
Definition: isn.c:70
int a
Definition: isn.c:69
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

Definition at line 691 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [1/2]

#define ST_COMPARE_ARG_TYPE   Tuplesortstate

Definition at line 694 of file tuplesort.c.

◆ ST_COMPARE_ARG_TYPE [2/2]

#define ST_COMPARE_ARG_TYPE   SortSupportData

Definition at line 694 of file tuplesort.c.

◆ ST_COMPARE_RUNTIME_POINTER

#define ST_COMPARE_RUNTIME_POINTER

Definition at line 681 of file tuplesort.c.

◆ ST_DECLARE

#define ST_DECLARE

Definition at line 685 of file tuplesort.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 697 of file tuplesort.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 697 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   SortTuple

Definition at line 690 of file tuplesort.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   SortTuple

Definition at line 690 of file tuplesort.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 696 of file tuplesort.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 696 of file tuplesort.c.

◆ ST_SORT [1/2]

#define ST_SORT   qsort_tuple

Definition at line 689 of file tuplesort.c.

◆ ST_SORT [2/2]

#define ST_SORT   qsort_ssup

Definition at line 689 of file tuplesort.c.

◆ TAPE_BUFFER_OVERHEAD

#define TAPE_BUFFER_OVERHEAD   BLCKSZ

Definition at line 236 of file tuplesort.c.

◆ USEMEM

#define USEMEM (   state,
  amt 
)    ((state)->availMem -= (amt))

Definition at line 545 of file tuplesort.c.

◆ WORKER

#define WORKER (   state)    ((state)->shared && (state)->worker != -1)

Definition at line 548 of file tuplesort.c.

◆ WRITETUP

#define WRITETUP (   state,
  tape,
  stup 
)    ((*(state)->writetup) (state, tape, stup))

Definition at line 542 of file tuplesort.c.

Typedef Documentation

◆ SlabSlot

typedef union SlabSlot SlabSlot

◆ SortTupleComparator

typedef int(* SortTupleComparator) (const SortTuple *a, const SortTuple *b, Tuplesortstate *state)

Definition at line 239 of file tuplesort.c.

Enumeration Type Documentation

◆ TupSortStatus

Enumerator
TSS_INITIAL 
TSS_BOUNDED 
TSS_BUILDRUNS 
TSS_SORTEDINMEM 
TSS_SORTEDONTAPE 
TSS_FINALMERGE 

Definition at line 212 of file tuplesort.c.

213 {
214  TSS_INITIAL, /* Loading tuples; still within memory limit */
215  TSS_BOUNDED, /* Loading tuples into bounded-size heap */
216  TSS_BUILDRUNS, /* Loading tuples; writing to tape */
217  TSS_SORTEDINMEM, /* Sort completed entirely in memory */
218  TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */
219  TSS_FINALMERGE /* Performing final merge on-the-fly */
220 } TupSortStatus;
TupSortStatus
Definition: tuplesort.c:213
@ TSS_SORTEDONTAPE
Definition: tuplesort.c:218
@ TSS_SORTEDINMEM
Definition: tuplesort.c:217
@ TSS_INITIAL
Definition: tuplesort.c:214
@ TSS_FINALMERGE
Definition: tuplesort.c:219
@ TSS_BUILDRUNS
Definition: tuplesort.c:216
@ TSS_BOUNDED
Definition: tuplesort.c:215

Function Documentation

◆ beginmerge()

static void beginmerge ( Tuplesortstate state)
static

Definition at line 3088 of file tuplesort.c.

3089 {
3090  int activeTapes;
3091  int srcTapeIndex;
3092 
3093  /* Heap should be empty here */
3094  Assert(state->memtupcount == 0);
3095 
3096  activeTapes = Min(state->nInputTapes, state->nInputRuns);
3097 
3098  for (srcTapeIndex = 0; srcTapeIndex < activeTapes; srcTapeIndex++)
3099  {
3100  SortTuple tup;
3101 
3102  if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup))
3103  {
3104  tup.srctape = srcTapeIndex;
3106  }
3107  }
3108 }
#define Min(x, y)
Definition: c.h:986
Assert(fmt[strlen(fmt) - 1] !='\n')
int srctape
Definition: tuplesort.c:186
static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple)
Definition: tuplesort.c:3528
static bool mergereadnext(Tuplesortstate *state, LogicalTape *srcTape, SortTuple *stup)
Definition: tuplesort.c:3116

References Assert(), mergereadnext(), Min, SortTuple::srctape, and tuplesort_heap_insert().

Referenced by mergeonerun(), and mergeruns().

◆ comparetup_cluster()

static int comparetup_cluster ( const SortTuple a,
const SortTuple b,
Tuplesortstate state 
)
static

Definition at line 3894 of file tuplesort.c.

3896 {
3897  SortSupport sortKey = state->sortKeys;
3898  HeapTuple ltup;
3899  HeapTuple rtup;
3900  TupleDesc tupDesc;
3901  int nkey;
3902  int32 compare;
3903  Datum datum1,
3904  datum2;
3905  bool isnull1,
3906  isnull2;
3907  AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0];
3908 
3909  /* Be prepared to compare additional sort keys */
3910  ltup = (HeapTuple) a->tuple;
3911  rtup = (HeapTuple) b->tuple;
3912  tupDesc = state->tupDesc;
3913 
3914  /* Compare the leading sort key, if it's simple */
3915  if (leading != 0)
3916  {
3917  compare = ApplySortComparator(a->datum1, a->isnull1,
3918  b->datum1, b->isnull1,
3919  sortKey);
3920  if (compare != 0)
3921  return compare;
3922 
3923  if (sortKey->abbrev_converter)
3924  {
3925  datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
3926  datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);
3927 
3928  compare = ApplySortAbbrevFullComparator(datum1, isnull1,
3929  datum2, isnull2,
3930  sortKey);
3931  }
3932  if (compare != 0 || state->nKeys == 1)
3933  return compare;
3934  /* Compare additional columns the hard way */
3935  sortKey++;
3936  nkey = 1;
3937  }
3938  else
3939  {
3940  /* Must compare all keys the hard way */
3941  nkey = 0;
3942  }
3943 
3944  if (state->indexInfo->ii_Expressions == NULL)
3945  {
3946  /* If not expression index, just compare the proper heap attrs */
3947 
3948  for (; nkey < state->nKeys; nkey++, sortKey++)
3949  {
3950  AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey];
3951 
3952  datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
3953  datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
3954 
3955  compare = ApplySortComparator(datum1, isnull1,
3956  datum2, isnull2,
3957  sortKey);
3958  if (compare != 0)
3959  return compare;
3960  }
3961  }
3962  else
3963  {
3964  /*
3965  * In the expression index case, compute the whole index tuple and
3966  * then compare values. It would perhaps be faster to compute only as
3967  * many columns as we need to compare, but that would require
3968  * duplicating all the logic in FormIndexDatum.
3969  */
3970  Datum l_index_values[INDEX_MAX_KEYS];
3971  bool l_index_isnull[INDEX_MAX_KEYS];
3972  Datum r_index_values[INDEX_MAX_KEYS];
3973  bool r_index_isnull[INDEX_MAX_KEYS];
3974  TupleTableSlot *ecxt_scantuple;
3975 
3976  /* Reset context each time to prevent memory leakage */
3978 
3979  ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple;
3980 
3981  ExecStoreHeapTuple(ltup, ecxt_scantuple, false);
3982  FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate,
3983  l_index_values, l_index_isnull);
3984 
3985  ExecStoreHeapTuple(rtup, ecxt_scantuple, false);
3986  FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate,
3987  r_index_values, r_index_isnull);
3988 
3989  for (; nkey < state->nKeys; nkey++, sortKey++)
3990  {
3991  compare = ApplySortComparator(l_index_values[nkey],
3992  l_index_isnull[nkey],
3993  r_index_values[nkey],
3994  r_index_isnull[nkey],
3995  sortKey);
3996  if (compare != 0)
3997  return compare;
3998  }
3999  }
4000 
4001  return 0;
4002 }
int16 AttrNumber
Definition: attnum.h:21
signed int int32
Definition: c.h:429
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1352
#define ResetPerTupleExprContext(estate)
Definition: executor.h:542
#define GetPerTupleExprContext(estate)
Definition: executor.h:533
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
HeapTupleData * HeapTuple
Definition: htup.h:71
#define heap_getattr(tup, attnum, tupleDesc, isnull)
Definition: htup_details.h:756
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2704
#define INDEX_MAX_KEYS
uintptr_t Datum
Definition: postgres.h:411
static int ApplySortAbbrevFullComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:238
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:172

References a, SortSupportData::abbrev_converter, ApplySortAbbrevFullComparator(), ApplySortComparator(), b, compare(), ExecStoreHeapTuple(), FormIndexDatum(), GetPerTupleExprContext, heap_getattr, INDEX_MAX_KEYS, and ResetPerTupleExprContext.

Referenced by tuplesort_begin_cluster().

◆ comparetup_datum()

static int comparetup_datum ( const SortTuple a,
const SortTuple b,
Tuplesortstate state 
)
static

Definition at line 4369 of file tuplesort.c.

4370 {
4371  int compare;
4372 
4373  compare = ApplySortComparator(a->datum1, a->isnull1,
4374  b->datum1, b->isnull1,
4375  state->sortKeys);
4376  if (compare != 0)
4377  return compare;
4378 
4379  /* if we have abbreviations, then "tuple" has the original value */
4380 
4381  if (state->sortKeys->abbrev_converter)
4383  PointerGetDatum(b->tuple), b->isnull1,
4384  state->sortKeys);
4385 
4386  return compare;
4387 }
#define PointerGetDatum(X)
Definition: postgres.h:600

References a, ApplySortAbbrevFullComparator(), ApplySortComparator(), b, compare(), and PointerGetDatum.

Referenced by tuplesort_begin_datum().

◆ comparetup_heap()

static int comparetup_heap ( const SortTuple a,
const SortTuple b,
Tuplesortstate state 
)
static

Definition at line 3700 of file tuplesort.c.

3701 {
3702  SortSupport sortKey = state->sortKeys;
3703  HeapTupleData ltup;
3704  HeapTupleData rtup;
3705  TupleDesc tupDesc;
3706  int nkey;
3707  int32 compare;
3708  AttrNumber attno;
3709  Datum datum1,
3710  datum2;
3711  bool isnull1,
3712  isnull2;
3713 
3714 
3715  /* Compare the leading sort key */
3716  compare = ApplySortComparator(a->datum1, a->isnull1,
3717  b->datum1, b->isnull1,
3718  sortKey);
3719  if (compare != 0)
3720  return compare;
3721 
3722  /* Compare additional sort keys */
3723  ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
3724  ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET);
3725  rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
3726  rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
3727  tupDesc = state->tupDesc;
3728 
3729  if (sortKey->abbrev_converter)
3730  {
3731  attno = sortKey->ssup_attno;
3732 
3733  datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
3734  datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
3735 
3736  compare = ApplySortAbbrevFullComparator(datum1, isnull1,
3737  datum2, isnull2,
3738  sortKey);
3739  if (compare != 0)
3740  return compare;
3741  }
3742 
3743  sortKey++;
3744  for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++)
3745  {
3746  attno = sortKey->ssup_attno;
3747 
3748  datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
3749  datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
3750 
3751  compare = ApplySortComparator(datum1, isnull1,
3752  datum2, isnull2,
3753  sortKey);
3754  if (compare != 0)
3755  return compare;
3756  }
3757 
3758  return 0;
3759 }
MinimalTupleData * MinimalTuple
Definition: htup.h:27
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define MINIMAL_TUPLE_OFFSET
Definition: htup_details.h:613
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
AttrNumber ssup_attno
Definition: sortsupport.h:81

References a, SortSupportData::abbrev_converter, ApplySortAbbrevFullComparator(), ApplySortComparator(), b, compare(), heap_getattr, MINIMAL_TUPLE_OFFSET, SortSupportData::ssup_attno, HeapTupleData::t_data, and HeapTupleData::t_len.

Referenced by tuplesort_begin_heap().

◆ comparetup_index_btree()

static int comparetup_index_btree ( const SortTuple a,
const SortTuple b,
Tuplesortstate state 
)
static

Definition at line 4133 of file tuplesort.c.

4135 {
4136  /*
4137  * This is similar to comparetup_heap(), but expects index tuples. There
4138  * is also special handling for enforcing uniqueness, and special
4139  * treatment for equal keys at the end.
4140  */
4141  SortSupport sortKey = state->sortKeys;
4142  IndexTuple tuple1;
4143  IndexTuple tuple2;
4144  int keysz;
4145  TupleDesc tupDes;
4146  bool equal_hasnull = false;
4147  int nkey;
4148  int32 compare;
4149  Datum datum1,
4150  datum2;
4151  bool isnull1,
4152  isnull2;
4153 
4154 
4155  /* Compare the leading sort key */
4156  compare = ApplySortComparator(a->datum1, a->isnull1,
4157  b->datum1, b->isnull1,
4158  sortKey);
4159  if (compare != 0)
4160  return compare;
4161 
4162  /* Compare additional sort keys */
4163  tuple1 = (IndexTuple) a->tuple;
4164  tuple2 = (IndexTuple) b->tuple;
4165  keysz = state->nKeys;
4166  tupDes = RelationGetDescr(state->indexRel);
4167 
4168  if (sortKey->abbrev_converter)
4169  {
4170  datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
4171  datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);
4172 
4173  compare = ApplySortAbbrevFullComparator(datum1, isnull1,
4174  datum2, isnull2,
4175  sortKey);
4176  if (compare != 0)
4177  return compare;
4178  }
4179 
4180  /* they are equal, so we only need to examine one null flag */
4181  if (a->isnull1)
4182  equal_hasnull = true;
4183 
4184  sortKey++;
4185  for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
4186  {
4187  datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
4188  datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);
4189 
4190  compare = ApplySortComparator(datum1, isnull1,
4191  datum2, isnull2,
4192  sortKey);
4193  if (compare != 0)
4194  return compare; /* done when we find unequal attributes */
4195 
4196  /* they are equal, so we only need to examine one null flag */
4197  if (isnull1)
4198  equal_hasnull = true;
4199  }
4200 
4201  /*
4202  * If btree has asked us to enforce uniqueness, complain if two equal
4203  * tuples are detected (unless there was at least one NULL field).
4204  *
4205  * It is sufficient to make the test here, because if two tuples are equal
4206  * they *must* get compared at some stage of the sort --- otherwise the
4207  * sort algorithm wouldn't have checked whether one must appear before the
4208  * other.
4209  */
4210  if (state->enforceUnique && !equal_hasnull)
4211  {
4213  bool isnull[INDEX_MAX_KEYS];
4214  char *key_desc;
4215 
4216  /*
4217  * Some rather brain-dead implementations of qsort (such as the one in
4218  * QNX 4) will sometimes call the comparison routine to compare a
4219  * value to itself, but we always use our own implementation, which
4220  * does not.
4221  */
4222  Assert(tuple1 != tuple2);
4223 
4224  index_deform_tuple(tuple1, tupDes, values, isnull);
4225 
4226  key_desc = BuildIndexValueDescription(state->indexRel, values, isnull);
4227 
4228  ereport(ERROR,
4229  (errcode(ERRCODE_UNIQUE_VIOLATION),
4230  errmsg("could not create unique index \"%s\"",
4231  RelationGetRelationName(state->indexRel)),
4232  key_desc ? errdetail("Key %s is duplicated.", key_desc) :
4233  errdetail("Duplicate keys exist."),
4234  errtableconstraint(state->heapRel,
4235  RelationGetRelationName(state->indexRel))));
4236  }
4237 
4238  /*
4239  * If key values are equal, we sort on ItemPointer. This is required for
4240  * btree indexes, since heap TID is treated as an implicit last key
4241  * attribute in order to ensure that all keys in the index are physically
4242  * unique.
4243  */
4244  {
4245  BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
4246  BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
4247 
4248  if (blk1 != blk2)
4249  return (blk1 < blk2) ? -1 : 1;
4250  }
4251  {
4254 
4255  if (pos1 != pos2)
4256  return (pos1 < pos2) ? -1 : 1;
4257  }
4258 
4259  /* ItemPointer values should never be equal */
4260  Assert(false);
4261 
4262  return 0;
4263 }
uint32 BlockNumber
Definition: block.h:31
static Datum values[MAXATTR]
Definition: bootstrap.c:156
int errdetail(const char *fmt,...)
Definition: elog.c:1037
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ereport(elevel,...)
Definition: elog.h:143
char * BuildIndexValueDescription(Relation indexRelation, Datum *values, bool *isnull)
Definition: genam.c:177
void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:437
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:99
IndexTupleData * IndexTuple
Definition: itup.h:53
uint16 OffsetNumber
Definition: off.h:24
#define RelationGetDescr(relation)
Definition: rel.h:504
#define RelationGetRelationName(relation)
Definition: rel.h:512
int errtableconstraint(Relation rel, const char *conname)
Definition: relcache.c:5780
ItemPointerData t_tid
Definition: itup.h:37

References a, SortSupportData::abbrev_converter, ApplySortAbbrevFullComparator(), ApplySortComparator(), Assert(), b, BuildIndexValueDescription(), compare(), ereport, errcode(), errdetail(), errmsg(), ERROR, errtableconstraint(), index_deform_tuple(), index_getattr, INDEX_MAX_KEYS, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, RelationGetDescr, RelationGetRelationName, IndexTupleData::t_tid, and values.

Referenced by tuplesort_begin_index_btree(), and tuplesort_begin_index_gist().

◆ comparetup_index_hash()

static int comparetup_index_hash ( const SortTuple a,
const SortTuple b,
Tuplesortstate state 
)
static

Definition at line 4266 of file tuplesort.c.

4268 {
4269  Bucket bucket1;
4270  Bucket bucket2;
4271  IndexTuple tuple1;
4272  IndexTuple tuple2;
4273 
4274  /*
4275  * Fetch hash keys and mask off bits we don't want to sort by. We know
4276  * that the first column of the index tuple is the hash key.
4277  */
4278  Assert(!a->isnull1);
4279  bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1),
4280  state->max_buckets, state->high_mask,
4281  state->low_mask);
4282  Assert(!b->isnull1);
4283  bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1),
4284  state->max_buckets, state->high_mask,
4285  state->low_mask);
4286  if (bucket1 > bucket2)
4287  return 1;
4288  else if (bucket1 < bucket2)
4289  return -1;
4290 
4291  /*
4292  * If hash values are equal, we sort on ItemPointer. This does not affect
4293  * validity of the finished index, but it may be useful to have index
4294  * scans in physical order.
4295  */
4296  tuple1 = (IndexTuple) a->tuple;
4297  tuple2 = (IndexTuple) b->tuple;
4298 
4299  {
4300  BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
4301  BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
4302 
4303  if (blk1 != blk2)
4304  return (blk1 < blk2) ? -1 : 1;
4305  }
4306  {
4309 
4310  if (pos1 != pos2)
4311  return (pos1 < pos2) ? -1 : 1;
4312  }
4313 
4314  /* ItemPointer values should never be equal */
4315  Assert(false);
4316 
4317  return 0;
4318 }
uint32 Bucket
Definition: hash.h:35
Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashutil.c:126
#define DatumGetUInt32(X)
Definition: postgres.h:530

References _hash_hashkey2bucket(), a, Assert(), b, DatumGetUInt32, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, and IndexTupleData::t_tid.

Referenced by tuplesort_begin_index_hash().

◆ consider_abort_common()

static bool consider_abort_common ( Tuplesortstate state)
static

Definition at line 1999 of file tuplesort.c.

2000 {
2001  Assert(state->sortKeys[0].abbrev_converter != NULL);
2002  Assert(state->sortKeys[0].abbrev_abort != NULL);
2003  Assert(state->sortKeys[0].abbrev_full_comparator != NULL);
2004 
2005  /*
2006  * Check effectiveness of abbreviation optimization. Consider aborting
2007  * when still within memory limit.
2008  */
2009  if (state->status == TSS_INITIAL &&
2010  state->memtupcount >= state->abbrevNext)
2011  {
2012  state->abbrevNext *= 2;
2013 
2014  /*
2015  * Check opclass-supplied abbreviation abort routine. It may indicate
2016  * that abbreviation should not proceed.
2017  */
2018  if (!state->sortKeys->abbrev_abort(state->memtupcount,
2019  state->sortKeys))
2020  return false;
2021 
2022  /*
2023  * Finally, restore authoritative comparator, and indicate that
2024  * abbreviation is not in play by setting abbrev_converter to NULL
2025  */
2026  state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator;
2027  state->sortKeys[0].abbrev_converter = NULL;
2028  /* Not strictly necessary, but be tidy */
2029  state->sortKeys[0].abbrev_abort = NULL;
2030  state->sortKeys[0].abbrev_full_comparator = NULL;
2031 
2032  /* Give up - expect original pass-by-value representation */
2033  return true;
2034  }
2035 
2036  return false;
2037 }

References Assert(), and TSS_INITIAL.

Referenced by copytup_cluster(), copytup_heap(), tuplesort_putdatum(), and tuplesort_putindextuplevalues().

◆ copytup_cluster()

static void copytup_cluster ( Tuplesortstate state,
SortTuple stup,
void *  tup 
)
static

Definition at line 4005 of file tuplesort.c.

4006 {
4007  HeapTuple tuple = (HeapTuple) tup;
4008  Datum original;
4009  MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
4010 
4011  /* copy the tuple into sort storage */
4012  tuple = heap_copytuple(tuple);
4013  stup->tuple = (void *) tuple;
4014  USEMEM(state, GetMemoryChunkSpace(tuple));
4015 
4016  MemoryContextSwitchTo(oldcontext);
4017 
4018  /*
4019  * set up first-column key value, and potentially abbreviate, if it's a
4020  * simple column
4021  */
4022  if (state->indexInfo->ii_IndexAttrNumbers[0] == 0)
4023  return;
4024 
4025  original = heap_getattr(tuple,
4026  state->indexInfo->ii_IndexAttrNumbers[0],
4027  state->tupDesc,
4028  &stup->isnull1);
4029 
4030  if (!state->sortKeys->abbrev_converter || stup->isnull1)
4031  {
4032  /*
4033  * Store ordinary Datum representation, or NULL value. If there is a
4034  * converter it won't expect NULL values, and cost model is not
4035  * required to account for NULL, so in that case we avoid calling
4036  * converter and just set datum1 to zeroed representation (to be
4037  * consistent, and to support cheap inequality tests for NULL
4038  * abbreviated keys).
4039  */
4040  stup->datum1 = original;
4041  }
4042  else if (!consider_abort_common(state))
4043  {
4044  /* Store abbreviated key representation */
4045  stup->datum1 = state->sortKeys->abbrev_converter(original,
4046  state->sortKeys);
4047  }
4048  else
4049  {
4050  /* Abort abbreviation */
4051  int i;
4052 
4053  stup->datum1 = original;
4054 
4055  /*
4056  * Set state to be consistent with never trying abbreviation.
4057  *
4058  * Alter datum1 representation in already-copied tuples, so as to
4059  * ensure a consistent representation (current tuple was just
4060  * handled). It does not matter if some dumped tuples are already
4061  * sorted on tape, since serialized tuples lack abbreviated keys
4062  * (TSS_BUILDRUNS state prevents control reaching here in any case).
4063  */
4064  for (i = 0; i < state->memtupcount; i++)
4065  {
4066  SortTuple *mtup = &state->memtuples[i];
4067 
4068  tuple = (HeapTuple) mtup->tuple;
4069  mtup->datum1 = heap_getattr(tuple,
4070  state->indexInfo->ii_IndexAttrNumbers[0],
4071  state->tupDesc,
4072  &mtup->isnull1);
4073  }
4074  }
4075 }
HeapTuple heap_copytuple(HeapTuple tuple)
Definition: heaptuple.c:680
int i
Definition: isn.c:73
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:434
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
bool isnull1
Definition: tuplesort.c:185
void * tuple
Definition: tuplesort.c:183
Datum datum1
Definition: tuplesort.c:184
#define USEMEM(state, amt)
Definition: tuplesort.c:545
static bool consider_abort_common(Tuplesortstate *state)
Definition: tuplesort.c:1999

References consider_abort_common(), SortTuple::datum1, GetMemoryChunkSpace(), heap_copytuple(), heap_getattr, i, SortTuple::isnull1, MemoryContextSwitchTo(), SortTuple::tuple, and USEMEM.

Referenced by tuplesort_begin_cluster().

◆ copytup_datum()

static void copytup_datum ( Tuplesortstate state,
SortTuple stup,
void *  tup 
)
static

Definition at line 4390 of file tuplesort.c.

4391 {
4392  /* Not currently needed */
4393  elog(ERROR, "copytup_datum() should not be called");
4394 }
#define elog(elevel,...)
Definition: elog.h:218

References elog, and ERROR.

Referenced by tuplesort_begin_datum().

◆ copytup_heap()

static void copytup_heap ( Tuplesortstate state,
SortTuple stup,
void *  tup 
)
static

Definition at line 3762 of file tuplesort.c.

3763 {
3764  /*
3765  * We expect the passed "tup" to be a TupleTableSlot, and form a
3766  * MinimalTuple using the exported interface for that.
3767  */
3768  TupleTableSlot *slot = (TupleTableSlot *) tup;
3769  Datum original;
3770  MinimalTuple tuple;
3771  HeapTupleData htup;
3772  MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
3773 
3774  /* copy the tuple into sort storage */
3775  tuple = ExecCopySlotMinimalTuple(slot);
3776  stup->tuple = (void *) tuple;
3777  USEMEM(state, GetMemoryChunkSpace(tuple));
3778  /* set up first-column key value */
3779  htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
3780  htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
3781  original = heap_getattr(&htup,
3782  state->sortKeys[0].ssup_attno,
3783  state->tupDesc,
3784  &stup->isnull1);
3785 
3786  MemoryContextSwitchTo(oldcontext);
3787 
3788  if (!state->sortKeys->abbrev_converter || stup->isnull1)
3789  {
3790  /*
3791  * Store ordinary Datum representation, or NULL value. If there is a
3792  * converter it won't expect NULL values, and cost model is not
3793  * required to account for NULL, so in that case we avoid calling
3794  * converter and just set datum1 to zeroed representation (to be
3795  * consistent, and to support cheap inequality tests for NULL
3796  * abbreviated keys).
3797  */
3798  stup->datum1 = original;
3799  }
3800  else if (!consider_abort_common(state))
3801  {
3802  /* Store abbreviated key representation */
3803  stup->datum1 = state->sortKeys->abbrev_converter(original,
3804  state->sortKeys);
3805  }
3806  else
3807  {
3808  /* Abort abbreviation */
3809  int i;
3810 
3811  stup->datum1 = original;
3812 
3813  /*
3814  * Set state to be consistent with never trying abbreviation.
3815  *
3816  * Alter datum1 representation in already-copied tuples, so as to
3817  * ensure a consistent representation (current tuple was just
3818  * handled). It does not matter if some dumped tuples are already
3819  * sorted on tape, since serialized tuples lack abbreviated keys
3820  * (TSS_BUILDRUNS state prevents control reaching here in any case).
3821  */
3822  for (i = 0; i < state->memtupcount; i++)
3823  {
3824  SortTuple *mtup = &state->memtuples[i];
3825 
3826  htup.t_len = ((MinimalTuple) mtup->tuple)->t_len +
3828  htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple -
3830 
3831  mtup->datum1 = heap_getattr(&htup,
3832  state->sortKeys[0].ssup_attno,
3833  state->tupDesc,
3834  &mtup->isnull1);
3835  }
3836  }
3837 }
static MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot)
Definition: tuptable.h:463

References consider_abort_common(), SortTuple::datum1, ExecCopySlotMinimalTuple(), GetMemoryChunkSpace(), heap_getattr, i, SortTuple::isnull1, MemoryContextSwitchTo(), MINIMAL_TUPLE_OFFSET, HeapTupleData::t_data, HeapTupleData::t_len, MinimalTupleData::t_len, SortTuple::tuple, and USEMEM.

Referenced by tuplesort_begin_heap().

◆ copytup_index()

static void copytup_index ( Tuplesortstate state,
SortTuple stup,
void *  tup 
)
static

Definition at line 4321 of file tuplesort.c.

4322 {
4323  /* Not currently needed */
4324  elog(ERROR, "copytup_index() should not be called");
4325 }

References elog, and ERROR.

Referenced by tuplesort_begin_index_btree(), tuplesort_begin_index_gist(), and tuplesort_begin_index_hash().

◆ dumptuples()

static void dumptuples ( Tuplesortstate state,
bool  alltuples 
)
static

Definition at line 3135 of file tuplesort.c.

3136 {
3137  int memtupwrite;
3138  int i;
3139 
3140  /*
3141  * Nothing to do if we still fit in available memory and have array slots,
3142  * unless this is the final call during initial run generation.
3143  */
3144  if (state->memtupcount < state->memtupsize && !LACKMEM(state) &&
3145  !alltuples)
3146  return;
3147 
3148  /*
3149  * Final call might require no sorting, in rare cases where we just so
3150  * happen to have previously LACKMEM()'d at the point where exactly all
3151  * remaining tuples are loaded into memory, just before input was
3152  * exhausted. In general, short final runs are quite possible, but avoid
3153  * creating a completely empty run. In a worker, though, we must produce
3154  * at least one tape, even if it's empty.
3155  */
3156  if (state->memtupcount == 0 && state->currentRun > 0)
3157  return;
3158 
3159  Assert(state->status == TSS_BUILDRUNS);
3160 
3161  /*
3162  * It seems unlikely that this limit will ever be exceeded, but take no
3163  * chances
3164  */
3165  if (state->currentRun == INT_MAX)
3166  ereport(ERROR,
3167  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
3168  errmsg("cannot have more than %d runs for an external sort",
3169  INT_MAX)));
3170 
3171  if (state->currentRun > 0)
3173 
3174  state->currentRun++;
3175 
3176 #ifdef TRACE_SORT
3177  if (trace_sort)
3178  elog(LOG, "worker %d starting quicksort of run %d: %s",
3179  state->worker, state->currentRun,
3180  pg_rusage_show(&state->ru_start));
3181 #endif
3182 
3183  /*
3184  * Sort all tuples accumulated within the allowed amount of memory for
3185  * this run using quicksort
3186  */
3188 
3189 #ifdef TRACE_SORT
3190  if (trace_sort)
3191  elog(LOG, "worker %d finished quicksort of run %d: %s",
3192  state->worker, state->currentRun,
3193  pg_rusage_show(&state->ru_start));
3194 #endif
3195 
3196  memtupwrite = state->memtupcount;
3197  for (i = 0; i < memtupwrite; i++)
3198  {
3199  WRITETUP(state, state->destTape, &state->memtuples[i]);
3200  state->memtupcount--;
3201  }
3202 
3203  /*
3204  * Reset tuple memory. We've freed all of the tuples that we previously
3205  * allocated. It's important to avoid fragmentation when there is a stark
3206  * change in the sizes of incoming tuples. Fragmentation due to
3207  * AllocSetFree's bucketing by size class might be particularly bad if
3208  * this step wasn't taken.
3209  */
3210  MemoryContextReset(state->tuplecontext);
3211 
3212  markrunend(state->destTape);
3213 
3214 #ifdef TRACE_SORT
3215  if (trace_sort)
3216  elog(LOG, "worker %d finished writing run %d to tape %d: %s",
3217  state->worker, state->currentRun, (state->currentRun - 1) % state->nOutputTapes + 1,
3218  pg_rusage_show(&state->ru_start));
3219 #endif
3220 }
#define LOG
Definition: elog.h:25
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
static void selectnewtape(Tuplesortstate *state)
Definition: tuplesort.c:2774
static void markrunend(LogicalTape *tape)
Definition: tuplesort.c:3658
#define LACKMEM(state)
Definition: tuplesort.c:544
#define WRITETUP(state, tape, stup)
Definition: tuplesort.c:542
static void tuplesort_sort_memtuples(Tuplesortstate *state)
Definition: tuplesort.c:3500
bool trace_sort
Definition: tuplesort.c:144

References Assert(), elog, ereport, errcode(), errmsg(), ERROR, i, LACKMEM, LOG, markrunend(), MemoryContextReset(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, tuplesort_sort_memtuples(), and WRITETUP.

Referenced by puttuple_common(), and tuplesort_performsort().

◆ free_sort_tuple()

static void free_sort_tuple ( Tuplesortstate state,
SortTuple stup 
)
static

Definition at line 4690 of file tuplesort.c.

4691 {
4692  if (stup->tuple)
4693  {
4695  pfree(stup->tuple);
4696  stup->tuple = NULL;
4697  }
4698 }
#define FREEMEM(state, amt)
Definition: tuplesort.c:546

References FREEMEM, GetMemoryChunkSpace(), pfree(), and SortTuple::tuple.

Referenced by make_bounded_heap(), and puttuple_common().

◆ getlen()

static unsigned int getlen ( LogicalTape tape,
bool  eofOK 
)
static

Definition at line 3645 of file tuplesort.c.

3646 {
3647  unsigned int len;
3648 
3649  if (LogicalTapeRead(tape,
3650  &len, sizeof(len)) != sizeof(len))
3651  elog(ERROR, "unexpected end of tape");
3652  if (len == 0 && !eofOK)
3653  elog(ERROR, "unexpected end of data");
3654  return len;
3655 }

References elog, ERROR, len, and LogicalTapeRead().

Referenced by mergereadnext(), and tuplesort_gettuple_common().

◆ grow_memtuples()

static bool grow_memtuples ( Tuplesortstate state)
static

Definition at line 1568 of file tuplesort.c.

1569 {
1570  int newmemtupsize;
1571  int memtupsize = state->memtupsize;
1572  int64 memNowUsed = state->allowedMem - state->availMem;
1573 
1574  /* Forget it if we've already maxed out memtuples, per comment above */
1575  if (!state->growmemtuples)
1576  return false;
1577 
1578  /* Select new value of memtupsize */
1579  if (memNowUsed <= state->availMem)
1580  {
1581  /*
1582  * We've used no more than half of allowedMem; double our usage,
1583  * clamping at INT_MAX tuples.
1584  */
1585  if (memtupsize < INT_MAX / 2)
1586  newmemtupsize = memtupsize * 2;
1587  else
1588  {
1589  newmemtupsize = INT_MAX;
1590  state->growmemtuples = false;
1591  }
1592  }
1593  else
1594  {
1595  /*
1596  * This will be the last increment of memtupsize. Abandon doubling
1597  * strategy and instead increase as much as we safely can.
1598  *
1599  * To stay within allowedMem, we can't increase memtupsize by more
1600  * than availMem / sizeof(SortTuple) elements. In practice, we want
1601  * to increase it by considerably less, because we need to leave some
1602  * space for the tuples to which the new array slots will refer. We
1603  * assume the new tuples will be about the same size as the tuples
1604  * we've already seen, and thus we can extrapolate from the space
1605  * consumption so far to estimate an appropriate new size for the
1606  * memtuples array. The optimal value might be higher or lower than
1607  * this estimate, but it's hard to know that in advance. We again
1608  * clamp at INT_MAX tuples.
1609  *
1610  * This calculation is safe against enlarging the array so much that
1611  * LACKMEM becomes true, because the memory currently used includes
1612  * the present array; thus, there would be enough allowedMem for the
1613  * new array elements even if no other memory were currently used.
1614  *
1615  * We do the arithmetic in float8, because otherwise the product of
1616  * memtupsize and allowedMem could overflow. Any inaccuracy in the
1617  * result should be insignificant; but even if we computed a
1618  * completely insane result, the checks below will prevent anything
1619  * really bad from happening.
1620  */
1621  double grow_ratio;
1622 
1623  grow_ratio = (double) state->allowedMem / (double) memNowUsed;
1624  if (memtupsize * grow_ratio < INT_MAX)
1625  newmemtupsize = (int) (memtupsize * grow_ratio);
1626  else
1627  newmemtupsize = INT_MAX;
1628 
1629  /* We won't make any further enlargement attempts */
1630  state->growmemtuples = false;
1631  }
1632 
1633  /* Must enlarge array by at least one element, else report failure */
1634  if (newmemtupsize <= memtupsize)
1635  goto noalloc;
1636 
1637  /*
1638  * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp
1639  * to ensure our request won't be rejected. Note that we can easily
1640  * exhaust address space before facing this outcome. (This is presently
1641  * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but
1642  * don't rely on that at this distance.)
1643  */
1644  if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple))
1645  {
1646  newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple));
1647  state->growmemtuples = false; /* can't grow any more */
1648  }
1649 
1650  /*
1651  * We need to be sure that we do not cause LACKMEM to become true, else
1652  * the space management algorithm will go nuts. The code above should
1653  * never generate a dangerous request, but to be safe, check explicitly
1654  * that the array growth fits within availMem. (We could still cause
1655  * LACKMEM if the memory chunk overhead associated with the memtuples
1656  * array were to increase. That shouldn't happen because we chose the
1657  * initial array size large enough to ensure that palloc will be treating
1658  * both old and new arrays as separate chunks. But we'll check LACKMEM
1659  * explicitly below just in case.)
1660  */
1661  if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple)))
1662  goto noalloc;
1663 
1664  /* OK, do it */
1665  FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
1666  state->memtupsize = newmemtupsize;
1667  state->memtuples = (SortTuple *)
1668  repalloc_huge(state->memtuples,
1669  state->memtupsize * sizeof(SortTuple));
1670  USEMEM(state, GetMemoryChunkSpace(state->memtuples));
1671  if (LACKMEM(state))
1672  elog(ERROR, "unexpected out-of-memory situation in tuplesort");
1673  return true;
1674 
1675 noalloc:
1676  /* If for any reason we didn't realloc, shut off future attempts */
1677  state->growmemtuples = false;
1678  return false;
1679 }
size_t Size
Definition: c.h:540
void * repalloc_huge(void *pointer, Size size)
Definition: mcxt.c:1252
#define MaxAllocHugeSize
Definition: memutils.h:44

References elog, ERROR, FREEMEM, GetMemoryChunkSpace(), LACKMEM, MaxAllocHugeSize, repalloc_huge(), and USEMEM.

Referenced by puttuple_common().

◆ init_slab_allocator()

static void init_slab_allocator ( Tuplesortstate state,
int  numSlots 
)
static

Definition at line 2807 of file tuplesort.c.

2808 {
2809  if (numSlots > 0)
2810  {
2811  char *p;
2812  int i;
2813 
2814  state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE);
2815  state->slabMemoryEnd = state->slabMemoryBegin +
2816  numSlots * SLAB_SLOT_SIZE;
2817  state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin;
2818  USEMEM(state, numSlots * SLAB_SLOT_SIZE);
2819 
2820  p = state->slabMemoryBegin;
2821  for (i = 0; i < numSlots - 1; i++)
2822  {
2823  ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE);
2824  p += SLAB_SLOT_SIZE;
2825  }
2826  ((SlabSlot *) p)->nextfree = NULL;
2827  }
2828  else
2829  {
2830  state->slabMemoryBegin = state->slabMemoryEnd = NULL;
2831  state->slabFreeHead = NULL;
2832  }
2833  state->slabAllocatorUsed = true;
2834 }
void * palloc(Size size)
Definition: mcxt.c:1062
#define SLAB_SLOT_SIZE
Definition: tuplesort.c:200

References i, palloc(), SLAB_SLOT_SIZE, and USEMEM.

Referenced by mergeruns().

◆ inittapes()

static void inittapes ( Tuplesortstate state,
bool  mergeruns 
)
static

Definition at line 2689 of file tuplesort.c.

2690 {
2691  Assert(!LEADER(state));
2692 
2693  if (mergeruns)
2694  {
2695  /* Compute number of input tapes to use when merging */
2696  state->maxTapes = tuplesort_merge_order(state->allowedMem);
2697  }
2698  else
2699  {
2700  /* Workers can sometimes produce single run, output without merge */
2701  Assert(WORKER(state));
2702  state->maxTapes = MINORDER;
2703  }
2704 
2705 #ifdef TRACE_SORT
2706  if (trace_sort)
2707  elog(LOG, "worker %d switching to external sort with %d tapes: %s",
2708  state->worker, state->maxTapes, pg_rusage_show(&state->ru_start));
2709 #endif
2710 
2711  /* Create the tape set */
2712  inittapestate(state, state->maxTapes);
2713  state->tapeset =
2714  LogicalTapeSetCreate(false,
2715  state->shared ? &state->shared->fileset : NULL,
2716  state->worker);
2717 
2718  state->currentRun = 0;
2719 
2720  /*
2721  * Initialize logical tape arrays.
2722  */
2723  state->inputTapes = NULL;
2724  state->nInputTapes = 0;
2725  state->nInputRuns = 0;
2726 
2727  state->outputTapes = palloc0(state->maxTapes * sizeof(LogicalTape *));
2728  state->nOutputTapes = 0;
2729  state->nOutputRuns = 0;
2730 
2731  state->status = TSS_BUILDRUNS;
2732 
2734 }
LogicalTapeSet * LogicalTapeSetCreate(bool preallocate, SharedFileSet *fileset, int worker)
Definition: logtape.c:557
void * palloc0(Size size)
Definition: mcxt.c:1093
int tuplesort_merge_order(int64 allowedMem)
Definition: tuplesort.c:2602
static void inittapestate(Tuplesortstate *state, int maxTapes)
Definition: tuplesort.c:2740
#define LEADER(state)
Definition: tuplesort.c:549
#define WORKER(state)
Definition: tuplesort.c:548
static void mergeruns(Tuplesortstate *state)
Definition: tuplesort.c:2843
#define MINORDER
Definition: tuplesort.c:234

References Assert(), elog, inittapestate(), LEADER, LOG, LogicalTapeSetCreate(), mergeruns(), MINORDER, palloc0(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, tuplesort_merge_order(), and WORKER.

Referenced by puttuple_common(), and tuplesort_performsort().

◆ inittapestate()

static void inittapestate ( Tuplesortstate state,
int  maxTapes 
)
static

Definition at line 2740 of file tuplesort.c.

2741 {
2742  int64 tapeSpace;
2743 
2744  /*
2745  * Decrease availMem to reflect the space needed for tape buffers; but
2746  * don't decrease it to the point that we have no room for tuples. (That
2747  * case is only likely to occur if sorting pass-by-value Datums; in all
2748  * other scenarios the memtuples[] array is unlikely to occupy more than
2749  * half of allowedMem. In the pass-by-value case it's not important to
2750  * account for tuple space, so we don't care if LACKMEM becomes
2751  * inaccurate.)
2752  */
2753  tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD;
2754 
2755  if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem)
2756  USEMEM(state, tapeSpace);
2757 
2758  /*
2759  * Make sure that the temp file(s) underlying the tape set are created in
2760  * suitable temp tablespaces. For parallel sorts, this should have been
2761  * called already, but it doesn't matter if it is called a second time.
2762  */
2764 }
void PrepareTempTablespaces(void)
Definition: tablespace.c:1370
#define TAPE_BUFFER_OVERHEAD
Definition: tuplesort.c:236

References GetMemoryChunkSpace(), PrepareTempTablespaces(), TAPE_BUFFER_OVERHEAD, and USEMEM.

Referenced by inittapes(), and leader_takeover_tapes().

◆ leader_takeover_tapes()

static void leader_takeover_tapes ( Tuplesortstate state)
static

Definition at line 4631 of file tuplesort.c.

4632 {
4633  Sharedsort *shared = state->shared;
4634  int nParticipants = state->nParticipants;
4635  int workersFinished;
4636  int j;
4637 
4638  Assert(LEADER(state));
4639  Assert(nParticipants >= 1);
4640 
4641  SpinLockAcquire(&shared->mutex);
4642  workersFinished = shared->workersFinished;
4643  SpinLockRelease(&shared->mutex);
4644 
4645  if (nParticipants != workersFinished)
4646  elog(ERROR, "cannot take over tapes before all workers finish");
4647 
4648  /*
4649  * Create the tapeset from worker tapes, including a leader-owned tape at
4650  * the end. Parallel workers are far more expensive than logical tapes,
4651  * so the number of tapes allocated here should never be excessive.
4652  */
4653  inittapestate(state, nParticipants);
4654  state->tapeset = LogicalTapeSetCreate(false, &shared->fileset, -1);
4655 
4656  /*
4657  * Set currentRun to reflect the number of runs we will merge (it's not
4658  * used for anything, this is just pro forma)
4659  */
4660  state->currentRun = nParticipants;
4661 
4662  /*
4663  * Initialize the state to look the same as after building the initial
4664  * runs.
4665  *
4666  * There will always be exactly 1 run per worker, and exactly one input
4667  * tape per run, because workers always output exactly 1 run, even when
4668  * there were no input tuples for workers to sort.
4669  */
4670  state->inputTapes = NULL;
4671  state->nInputTapes = 0;
4672  state->nInputRuns = 0;
4673 
4674  state->outputTapes = palloc0(nParticipants * sizeof(LogicalTape *));
4675  state->nOutputTapes = nParticipants;
4676  state->nOutputRuns = nParticipants;
4677 
4678  for (j = 0; j < nParticipants; j++)
4679  {
4680  state->outputTapes[j] = LogicalTapeImport(state->tapeset, j, &shared->tapes[j]);
4681  }
4682 
4683  state->status = TSS_BUILDRUNS;
4684 }
int j
Definition: isn.c:74
LogicalTape * LogicalTapeImport(LogicalTapeSet *lts, int worker, TapeShare *shared)
Definition: logtape.c:610
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
SharedFileSet fileset
Definition: tuplesort.c:505
TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]
Definition: tuplesort.c:514
int workersFinished
Definition: tuplesort.c:502
slock_t mutex
Definition: tuplesort.c:491

References Assert(), elog, ERROR, Sharedsort::fileset, inittapestate(), j, LEADER, LogicalTapeImport(), LogicalTapeSetCreate(), Sharedsort::mutex, palloc0(), SpinLockAcquire, SpinLockRelease, Sharedsort::tapes, TSS_BUILDRUNS, and Sharedsort::workersFinished.

Referenced by tuplesort_performsort().

◆ make_bounded_heap()

static void make_bounded_heap ( Tuplesortstate state)
static

Definition at line 3411 of file tuplesort.c.

3412 {
3413  int tupcount = state->memtupcount;
3414  int i;
3415 
3416  Assert(state->status == TSS_INITIAL);
3417  Assert(state->bounded);
3418  Assert(tupcount >= state->bound);
3419  Assert(SERIAL(state));
3420 
3421  /* Reverse sort direction so largest entry will be at root */
3423 
3424  state->memtupcount = 0; /* make the heap empty */
3425  for (i = 0; i < tupcount; i++)
3426  {
3427  if (state->memtupcount < state->bound)
3428  {
3429  /* Insert next tuple into heap */
3430  /* Must copy source tuple to avoid possible overwrite */
3431  SortTuple stup = state->memtuples[i];
3432 
3433  tuplesort_heap_insert(state, &stup);
3434  }
3435  else
3436  {
3437  /*
3438  * The heap is full. Replace the largest entry with the new
3439  * tuple, or just discard it, if it's larger than anything already
3440  * in the heap.
3441  */
3442  if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
3443  {
3444  free_sort_tuple(state, &state->memtuples[i]);
3446  }
3447  else
3448  tuplesort_heap_replace_top(state, &state->memtuples[i]);
3449  }
3450  }
3451 
3452  Assert(state->memtupcount == state->bound);
3453  state->status = TSS_BOUNDED;
3454 }
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
#define COMPARETUP(state, a, b)
Definition: tuplesort.c:540
#define SERIAL(state)
Definition: tuplesort.c:547
static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup)
Definition: tuplesort.c:4690
static void reversedirection(Tuplesortstate *state)
Definition: tuplesort.c:3627
static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple)
Definition: tuplesort.c:3587

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, free_sort_tuple(), i, reversedirection(), SERIAL, TSS_BOUNDED, TSS_INITIAL, tuplesort_heap_insert(), and tuplesort_heap_replace_top().

Referenced by puttuple_common().

◆ markrunend()

static void markrunend ( LogicalTape tape)
static

Definition at line 3658 of file tuplesort.c.

3659 {
3660  unsigned int len = 0;
3661 
3662  LogicalTapeWrite(tape, (void *) &len, sizeof(len));
3663 }
void LogicalTapeWrite(LogicalTape *lt, void *ptr, size_t size)
Definition: logtape.c:762

References len, and LogicalTapeWrite().

Referenced by dumptuples(), and mergeonerun().

◆ merge_read_buffer_size()

static int64 merge_read_buffer_size ( int64  avail_mem,
int  nInputTapes,
int  nInputRuns,
int  maxOutputTapes 
)
static

Definition at line 2657 of file tuplesort.c.

2659 {
2660  int nOutputRuns;
2661  int nOutputTapes;
2662 
2663  /*
2664  * How many output tapes will we produce in this pass?
2665  *
2666  * This is nInputRuns / nInputTapes, rounded up.
2667  */
2668  nOutputRuns = (nInputRuns + nInputTapes - 1) / nInputTapes;
2669 
2670  nOutputTapes = Min(nOutputRuns, maxOutputTapes);
2671 
2672  /*
2673  * Each output tape consumes TAPE_BUFFER_OVERHEAD bytes of memory. All
2674  * remaining memory is divided evenly between the input tapes.
2675  *
2676  * This also follows from the formula in tuplesort_merge_order, but here
2677  * we derive the input buffer size from the amount of memory available,
2678  * and M and N.
2679  */
2680  return Max((avail_mem - TAPE_BUFFER_OVERHEAD * nOutputTapes) / nInputTapes, 0);
2681 }

References Max, Min, and TAPE_BUFFER_OVERHEAD.

Referenced by mergeruns().

◆ mergeonerun()

static void mergeonerun ( Tuplesortstate state)
static

Definition at line 3029 of file tuplesort.c.

3030 {
3031  int srcTapeIndex;
3032  LogicalTape *srcTape;
3033 
3034  /*
3035  * Start the merge by loading one tuple from each active source tape into
3036  * the heap.
3037  */
3038  beginmerge(state);
3039 
3040  /*
3041  * Execute merge by repeatedly extracting lowest tuple in heap, writing it
3042  * out, and replacing it with next tuple from same tape (if there is
3043  * another one).
3044  */
3045  while (state->memtupcount > 0)
3046  {
3047  SortTuple stup;
3048 
3049  /* write the tuple to destTape */
3050  srcTapeIndex = state->memtuples[0].srctape;
3051  srcTape = state->inputTapes[srcTapeIndex];
3052  WRITETUP(state, state->destTape, &state->memtuples[0]);
3053 
3054  /* recycle the slot of the tuple we just wrote out, for the next read */
3055  if (state->memtuples[0].tuple)
3056  RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple);
3057 
3058  /*
3059  * pull next tuple from the tape, and replace the written-out tuple in
3060  * the heap with it.
3061  */
3062  if (mergereadnext(state, srcTape, &stup))
3063  {
3064  stup.srctape = srcTapeIndex;
3066 
3067  }
3068  else
3069  {
3071  state->nInputRuns--;
3072  }
3073  }
3074 
3075  /*
3076  * When the heap empties, we're done. Write an end-of-run marker on the
3077  * output tape.
3078  */
3079  markrunend(state->destTape);
3080 }
static void tuplesort_heap_delete_top(Tuplesortstate *state)
Definition: tuplesort.c:3563
static void beginmerge(Tuplesortstate *state)
Definition: tuplesort.c:3088
#define RELEASE_SLAB_SLOT(state, tuple)
Definition: tuplesort.c:528

References beginmerge(), markrunend(), mergereadnext(), RELEASE_SLAB_SLOT, SortTuple::srctape, tuplesort_heap_delete_top(), tuplesort_heap_replace_top(), and WRITETUP.

Referenced by mergeruns().

◆ mergereadnext()

static bool mergereadnext ( Tuplesortstate state,
LogicalTape srcTape,
SortTuple stup 
)
static

Definition at line 3116 of file tuplesort.c.

3117 {
3118  unsigned int tuplen;
3119 
3120  /* read next tuple, if any */
3121  if ((tuplen = getlen(srcTape, true)) == 0)
3122  return false;
3123  READTUP(state, stup, srcTape, tuplen);
3124 
3125  return true;
3126 }
static unsigned int getlen(LogicalTape *tape, bool eofOK)
Definition: tuplesort.c:3645
#define READTUP(state, stup, tape, len)
Definition: tuplesort.c:543

References getlen(), and READTUP.

Referenced by beginmerge(), mergeonerun(), and tuplesort_gettuple_common().

◆ mergeruns()

static void mergeruns ( Tuplesortstate state)
static

Definition at line 2843 of file tuplesort.c.

2844 {
2845  int tapenum;
2846 
2847  Assert(state->status == TSS_BUILDRUNS);
2848  Assert(state->memtupcount == 0);
2849 
2850  if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL)
2851  {
2852  /*
2853  * If there are multiple runs to be merged, when we go to read back
2854  * tuples from disk, abbreviated keys will not have been stored, and
2855  * we don't care to regenerate them. Disable abbreviation from this
2856  * point on.
2857  */
2858  state->sortKeys->abbrev_converter = NULL;
2859  state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator;
2860 
2861  /* Not strictly necessary, but be tidy */
2862  state->sortKeys->abbrev_abort = NULL;
2863  state->sortKeys->abbrev_full_comparator = NULL;
2864  }
2865 
2866  /*
2867  * Reset tuple memory. We've freed all the tuples that we previously
2868  * allocated. We will use the slab allocator from now on.
2869  */
2870  MemoryContextResetOnly(state->tuplecontext);
2871 
2872  /*
2873  * We no longer need a large memtuples array. (We will allocate a smaller
2874  * one for the heap later.)
2875  */
2876  FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
2877  pfree(state->memtuples);
2878  state->memtuples = NULL;
2879 
2880  /*
2881  * Initialize the slab allocator. We need one slab slot per input tape,
2882  * for the tuples in the heap, plus one to hold the tuple last returned
2883  * from tuplesort_gettuple. (If we're sorting pass-by-val Datums,
2884  * however, we don't need to do allocate anything.)
2885  *
2886  * In a multi-pass merge, we could shrink this allocation for the last
2887  * merge pass, if it has fewer tapes than previous passes, but we don't
2888  * bother.
2889  *
2890  * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism
2891  * to track memory usage of individual tuples.
2892  */
2893  if (state->tuples)
2894  init_slab_allocator(state, state->nOutputTapes + 1);
2895  else
2897 
2898  /*
2899  * Allocate a new 'memtuples' array, for the heap. It will hold one tuple
2900  * from each input tape.
2901  *
2902  * We could shrink this, too, between passes in a multi-pass merge, but we
2903  * don't bother. (The initial input tapes are still in outputTapes. The
2904  * number of input tapes will not increase between passes.)
2905  */
2906  state->memtupsize = state->nOutputTapes;
2907  state->memtuples = (SortTuple *) MemoryContextAlloc(state->maincontext,
2908  state->nOutputTapes * sizeof(SortTuple));
2909  USEMEM(state, GetMemoryChunkSpace(state->memtuples));
2910 
2911  /*
2912  * Use all the remaining memory we have available for tape buffers among
2913  * all the input tapes. At the beginning of each merge pass, we will
2914  * divide this memory between the input and output tapes in the pass.
2915  */
2916  state->tape_buffer_mem = state->availMem;
2917  USEMEM(state, state->tape_buffer_mem);
2918 #ifdef TRACE_SORT
2919  if (trace_sort)
2920  elog(LOG, "worker %d using %zu KB of memory for tape buffers",
2921  state->worker, state->tape_buffer_mem / 1024);
2922 #endif
2923 
2924  for (;;)
2925  {
2926  /*
2927  * On the first iteration, or if we have read all the runs from the
2928  * input tapes in a multi-pass merge, it's time to start a new pass.
2929  * Rewind all the output tapes, and make them inputs for the next
2930  * pass.
2931  */
2932  if (state->nInputRuns == 0)
2933  {
2934  int64 input_buffer_size;
2935 
2936  /* Close the old, emptied, input tapes */
2937  if (state->nInputTapes > 0)
2938  {
2939  for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
2940  LogicalTapeClose(state->inputTapes[tapenum]);
2941  pfree(state->inputTapes);
2942  }
2943 
2944  /* Previous pass's outputs become next pass's inputs. */
2945  state->inputTapes = state->outputTapes;
2946  state->nInputTapes = state->nOutputTapes;
2947  state->nInputRuns = state->nOutputRuns;
2948 
2949  /*
2950  * Reset output tape variables. The actual LogicalTapes will be
2951  * created as needed, here we only allocate the array to hold
2952  * them.
2953  */
2954  state->outputTapes = palloc0(state->nInputTapes * sizeof(LogicalTape *));
2955  state->nOutputTapes = 0;
2956  state->nOutputRuns = 0;
2957 
2958  /*
2959  * Redistribute the memory allocated for tape buffers, among the
2960  * new input and output tapes.
2961  */
2962  input_buffer_size = merge_read_buffer_size(state->tape_buffer_mem,
2963  state->nInputTapes,
2964  state->nInputRuns,
2965  state->maxTapes);
2966 
2967 #ifdef TRACE_SORT
2968  if (trace_sort)
2969  elog(LOG, "starting merge pass of %d input runs on %d tapes, " INT64_FORMAT " KB of memory for each input tape: %s",
2970  state->nInputRuns, state->nInputTapes, input_buffer_size / 1024,
2971  pg_rusage_show(&state->ru_start));
2972 #endif
2973 
2974  /* Prepare the new input tapes for merge pass. */
2975  for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
2976  LogicalTapeRewindForRead(state->inputTapes[tapenum], input_buffer_size);
2977 
2978  /*
2979  * If there's just one run left on each input tape, then only one
2980  * merge pass remains. If we don't have to produce a materialized
2981  * sorted tape, we can stop at this point and do the final merge
2982  * on-the-fly.
2983  */
2984  if (!state->randomAccess && state->nInputRuns <= state->nInputTapes
2985  && !WORKER(state))
2986  {
2987  /* Tell logtape.c we won't be writing anymore */
2989  /* Initialize for the final merge pass */
2990  beginmerge(state);
2991  state->status = TSS_FINALMERGE;
2992  return;
2993  }
2994  }
2995 
2996  /* Select an output tape */
2998 
2999  /* Merge one run from each input tape. */
3000  mergeonerun(state);
3001 
3002  /*
3003  * If the input tapes are empty, and we output only one output run,
3004  * we're done. The current output tape contains the final result.
3005  */
3006  if (state->nInputRuns == 0 && state->nOutputRuns <= 1)
3007  break;
3008  }
3009 
3010  /*
3011  * Done. The result is on a single run on a single tape.
3012  */
3013  state->result_tape = state->outputTapes[0];
3014  if (!WORKER(state))
3015  LogicalTapeFreeze(state->result_tape, NULL);
3016  else
3018  state->status = TSS_SORTEDONTAPE;
3019 
3020  /* Close all the now-empty input tapes, to release their read buffers. */
3021  for (tapenum = 0; tapenum < state->nInputTapes; tapenum++)
3022  LogicalTapeClose(state->inputTapes[tapenum]);
3023 }
#define INT64_FORMAT
Definition: c.h:483
void LogicalTapeRewindForRead(LogicalTape *lt, size_t buffer_size)
Definition: logtape.c:847
void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts)
Definition: logtape.c:751
void LogicalTapeClose(LogicalTape *lt)
Definition: logtape.c:734
void LogicalTapeFreeze(LogicalTape *lt, TapeShare *share)
Definition: logtape.c:982
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void MemoryContextResetOnly(MemoryContext context)
Definition: mcxt.c:162
static void mergeonerun(Tuplesortstate *state)
Definition: tuplesort.c:3029
static int64 merge_read_buffer_size(int64 avail_mem, int nInputTapes, int nInputRuns, int maxOutputTapes)
Definition: tuplesort.c:2657
static void worker_freeze_result_tape(Tuplesortstate *state)
Definition: tuplesort.c:4571
static void init_slab_allocator(Tuplesortstate *state, int numSlots)
Definition: tuplesort.c:2807

References Assert(), beginmerge(), elog, FREEMEM, GetMemoryChunkSpace(), init_slab_allocator(), INT64_FORMAT, LOG, LogicalTapeClose(), LogicalTapeFreeze(), LogicalTapeRewindForRead(), LogicalTapeSetForgetFreeSpace(), MemoryContextAlloc(), MemoryContextResetOnly(), merge_read_buffer_size(), mergeonerun(), palloc0(), pfree(), pg_rusage_show(), selectnewtape(), trace_sort, TSS_BUILDRUNS, TSS_FINALMERGE, TSS_SORTEDONTAPE, USEMEM, WORKER, and worker_freeze_result_tape().

Referenced by inittapes(), and tuplesort_performsort().

◆ puttuple_common()

static void puttuple_common ( Tuplesortstate state,
SortTuple tuple 
)
static

Definition at line 1890 of file tuplesort.c.

1891 {
1892  Assert(!LEADER(state));
1893 
1894  switch (state->status)
1895  {
1896  case TSS_INITIAL:
1897 
1898  /*
1899  * Save the tuple into the unsorted array. First, grow the array
1900  * as needed. Note that we try to grow the array when there is
1901  * still one free slot remaining --- if we fail, there'll still be
1902  * room to store the incoming tuple, and then we'll switch to
1903  * tape-based operation.
1904  */
1905  if (state->memtupcount >= state->memtupsize - 1)
1906  {
1907  (void) grow_memtuples(state);
1908  Assert(state->memtupcount < state->memtupsize);
1909  }
1910  state->memtuples[state->memtupcount++] = *tuple;
1911 
1912  /*
1913  * Check if it's time to switch over to a bounded heapsort. We do
1914  * so if the input tuple count exceeds twice the desired tuple
1915  * count (this is a heuristic for where heapsort becomes cheaper
1916  * than a quicksort), or if we've just filled workMem and have
1917  * enough tuples to meet the bound.
1918  *
1919  * Note that once we enter TSS_BOUNDED state we will always try to
1920  * complete the sort that way. In the worst case, if later input
1921  * tuples are larger than earlier ones, this might cause us to
1922  * exceed workMem significantly.
1923  */
1924  if (state->bounded &&
1925  (state->memtupcount > state->bound * 2 ||
1926  (state->memtupcount > state->bound && LACKMEM(state))))
1927  {
1928 #ifdef TRACE_SORT
1929  if (trace_sort)
1930  elog(LOG, "switching to bounded heapsort at %d tuples: %s",
1931  state->memtupcount,
1932  pg_rusage_show(&state->ru_start));
1933 #endif
1935  return;
1936  }
1937 
1938  /*
1939  * Done if we still fit in available memory and have array slots.
1940  */
1941  if (state->memtupcount < state->memtupsize && !LACKMEM(state))
1942  return;
1943 
1944  /*
1945  * Nope; time to switch to tape-based operation.
1946  */
1947  inittapes(state, true);
1948 
1949  /*
1950  * Dump all tuples.
1951  */
1952  dumptuples(state, false);
1953  break;
1954 
1955  case TSS_BOUNDED:
1956 
1957  /*
1958  * We don't want to grow the array here, so check whether the new
1959  * tuple can be discarded before putting it in. This should be a
1960  * good speed optimization, too, since when there are many more
1961  * input tuples than the bound, most input tuples can be discarded
1962  * with just this one comparison. Note that because we currently
1963  * have the sort direction reversed, we must check for <= not >=.
1964  */
1965  if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0)
1966  {
1967  /* new tuple <= top of the heap, so we can discard it */
1968  free_sort_tuple(state, tuple);
1970  }
1971  else
1972  {
1973  /* discard top of heap, replacing it with the new tuple */
1974  free_sort_tuple(state, &state->memtuples[0]);
1976  }
1977  break;
1978 
1979  case TSS_BUILDRUNS:
1980 
1981  /*
1982  * Save the tuple into the unsorted array (there must be space)
1983  */
1984  state->memtuples[state->memtupcount++] = *tuple;
1985 
1986  /*
1987  * If we are over the memory limit, dump all tuples.
1988  */
1989  dumptuples(state, false);
1990  break;
1991 
1992  default:
1993  elog(ERROR, "invalid tuplesort state");
1994  break;
1995  }
1996 }
static bool grow_memtuples(Tuplesortstate *state)
Definition: tuplesort.c:1568
static void make_bounded_heap(Tuplesortstate *state)
Definition: tuplesort.c:3411
static void inittapes(Tuplesortstate *state, bool mergeruns)
Definition: tuplesort.c:2689
static void dumptuples(Tuplesortstate *state, bool alltuples)
Definition: tuplesort.c:3135

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, dumptuples(), elog, ERROR, free_sort_tuple(), grow_memtuples(), inittapes(), LACKMEM, LEADER, LOG, make_bounded_heap(), pg_rusage_show(), trace_sort, TSS_BOUNDED, TSS_BUILDRUNS, TSS_INITIAL, and tuplesort_heap_replace_top().

Referenced by tuplesort_putdatum(), tuplesort_putheaptuple(), tuplesort_putindextuplevalues(), and tuplesort_puttupleslot().

◆ readtup_alloc()

static void * readtup_alloc ( Tuplesortstate state,
Size  tuplen 
)
static

Definition at line 3672 of file tuplesort.c.

3673 {
3674  SlabSlot *buf;
3675 
3676  /*
3677  * We pre-allocate enough slots in the slab arena that we should never run
3678  * out.
3679  */
3680  Assert(state->slabFreeHead);
3681 
3682  if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead)
3683  return MemoryContextAlloc(state->sortcontext, tuplen);
3684  else
3685  {
3686  buf = state->slabFreeHead;
3687  /* Reuse this slot */
3688  state->slabFreeHead = buf->nextfree;
3689 
3690  return buf;
3691  }
3692 }

References Assert(), buf, MemoryContextAlloc(), and SLAB_SLOT_SIZE.

Referenced by readtup_cluster(), readtup_datum(), readtup_heap(), and readtup_index().

◆ readtup_cluster()

static void readtup_cluster ( Tuplesortstate state,
SortTuple stup,
LogicalTape tape,
unsigned int  len 
)
static

Definition at line 4098 of file tuplesort.c.

4100 {
4101  unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int);
4103  t_len + HEAPTUPLESIZE);
4104 
4105  /* Reconstruct the HeapTupleData header */
4106  tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
4107  tuple->t_len = t_len;
4108  LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData));
4109  /* We don't currently bother to reconstruct t_tableOid */
4110  tuple->t_tableOid = InvalidOid;
4111  /* Read in the tuple body */
4112  LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len);
4113  if (state->randomAccess) /* need trailing length word? */
4114  LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
4115  stup->tuple = (void *) tuple;
4116  /* set up first-column key value, if it's a simple column */
4117  if (state->indexInfo->ii_IndexAttrNumbers[0] != 0)
4118  stup->datum1 = heap_getattr(tuple,
4119  state->indexInfo->ii_IndexAttrNumbers[0],
4120  state->tupDesc,
4121  &stup->isnull1);
4122 }
#define HEAPTUPLESIZE
Definition: htup.h:73
struct ItemPointerData ItemPointerData
#define InvalidOid
Definition: postgres_ext.h:36
ItemPointerData t_self
Definition: htup.h:65
Oid t_tableOid
Definition: htup.h:66
#define LogicalTapeReadExact(tape, ptr, len)
Definition: tuplesort.c:601
static void * readtup_alloc(Tuplesortstate *state, Size tuplen)
Definition: tuplesort.c:3672

References SortTuple::datum1, heap_getattr, HEAPTUPLESIZE, InvalidOid, SortTuple::isnull1, LogicalTapeReadExact, readtup_alloc(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, and SortTuple::tuple.

Referenced by tuplesort_begin_cluster().

◆ readtup_datum()

static void readtup_datum ( Tuplesortstate state,
SortTuple stup,
LogicalTape tape,
unsigned int  len 
)
static

Definition at line 4435 of file tuplesort.c.

4437 {
4438  unsigned int tuplen = len - sizeof(unsigned int);
4439 
4440  if (tuplen == 0)
4441  {
4442  /* it's NULL */
4443  stup->datum1 = (Datum) 0;
4444  stup->isnull1 = true;
4445  stup->tuple = NULL;
4446  }
4447  else if (!state->tuples)
4448  {
4449  Assert(tuplen == sizeof(Datum));
4450  LogicalTapeReadExact(tape, &stup->datum1, tuplen);
4451  stup->isnull1 = false;
4452  stup->tuple = NULL;
4453  }
4454  else
4455  {
4456  void *raddr = readtup_alloc(state, tuplen);
4457 
4458  LogicalTapeReadExact(tape, raddr, tuplen);
4459  stup->datum1 = PointerGetDatum(raddr);
4460  stup->isnull1 = false;
4461  stup->tuple = raddr;
4462  }
4463 
4464  if (state->randomAccess) /* need trailing length word? */
4465  LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
4466 }

References Assert(), SortTuple::datum1, SortTuple::isnull1, len, LogicalTapeReadExact, PointerGetDatum, readtup_alloc(), and SortTuple::tuple.

Referenced by tuplesort_begin_datum().

◆ readtup_heap()

static void readtup_heap ( Tuplesortstate state,
SortTuple stup,
LogicalTape tape,
unsigned int  len 
)
static

Definition at line 3864 of file tuplesort.c.

3866 {
3867  unsigned int tupbodylen = len - sizeof(int);
3868  unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
3869  MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen);
3870  char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
3871  HeapTupleData htup;
3872 
3873  /* read in the tuple proper */
3874  tuple->t_len = tuplen;
3875  LogicalTapeReadExact(tape, tupbody, tupbodylen);
3876  if (state->randomAccess) /* need trailing length word? */
3877  LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
3878  stup->tuple = (void *) tuple;
3879  /* set up first-column key value */
3880  htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
3881  htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
3882  stup->datum1 = heap_getattr(&htup,
3883  state->sortKeys[0].ssup_attno,
3884  state->tupDesc,
3885  &stup->isnull1);
3886 }
#define MINIMAL_TUPLE_DATA_OFFSET
Definition: htup_details.h:617

References SortTuple::datum1, heap_getattr, SortTuple::isnull1, len, LogicalTapeReadExact, MINIMAL_TUPLE_DATA_OFFSET, MINIMAL_TUPLE_OFFSET, readtup_alloc(), HeapTupleData::t_data, HeapTupleData::t_len, MinimalTupleData::t_len, and SortTuple::tuple.

Referenced by tuplesort_begin_heap().

◆ readtup_index()

static void readtup_index ( Tuplesortstate state,
SortTuple stup,
LogicalTape tape,
unsigned int  len 
)
static

Definition at line 4347 of file tuplesort.c.

4349 {
4350  unsigned int tuplen = len - sizeof(unsigned int);
4351  IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen);
4352 
4353  LogicalTapeReadExact(tape, tuple, tuplen);
4354  if (state->randomAccess) /* need trailing length word? */
4355  LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
4356  stup->tuple = (void *) tuple;
4357  /* set up first-column key value */
4358  stup->datum1 = index_getattr(tuple,
4359  1,
4360  RelationGetDescr(state->indexRel),
4361  &stup->isnull1);
4362 }

References SortTuple::datum1, index_getattr, SortTuple::isnull1, len, LogicalTapeReadExact, readtup_alloc(), RelationGetDescr, and SortTuple::tuple.

Referenced by tuplesort_begin_index_btree(), tuplesort_begin_index_gist(), and tuplesort_begin_index_hash().

◆ reversedirection()

static void reversedirection ( Tuplesortstate state)
static

Definition at line 3627 of file tuplesort.c.

3628 {
3629  SortSupport sortKey = state->sortKeys;
3630  int nkey;
3631 
3632  for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++)
3633  {
3634  sortKey->ssup_reverse = !sortKey->ssup_reverse;
3635  sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first;
3636  }
3637 }
bool ssup_nulls_first
Definition: sortsupport.h:75

References SortSupportData::ssup_nulls_first, and SortSupportData::ssup_reverse.

Referenced by make_bounded_heap(), and sort_bounded_heap().

◆ selectnewtape()

static void selectnewtape ( Tuplesortstate state)
static

Definition at line 2774 of file tuplesort.c.

2775 {
2776  /*
2777  * At the beginning of each merge pass, nOutputTapes and nOutputRuns are
2778  * both zero. On each call, we create a new output tape to hold the next
2779  * run, until maxTapes is reached. After that, we assign new runs to the
2780  * existing tapes in a round robin fashion.
2781  */
2782  if (state->nOutputTapes < state->maxTapes)
2783  {
2784  /* Create a new tape to hold the next run */
2785  Assert(state->outputTapes[state->nOutputRuns] == NULL);
2786  Assert(state->nOutputRuns == state->nOutputTapes);
2787  state->destTape = LogicalTapeCreate(state->tapeset);
2788  state->outputTapes[state->nOutputTapes] = state->destTape;
2789  state->nOutputTapes++;
2790  state->nOutputRuns++;
2791  }
2792  else
2793  {
2794  /*
2795  * We have reached the max number of tapes. Append to an existing
2796  * tape.
2797  */
2798  state->destTape = state->outputTapes[state->nOutputRuns % state->nOutputTapes];
2799  state->nOutputRuns++;
2800  }
2801 }
LogicalTape * LogicalTapeCreate(LogicalTapeSet *lts)
Definition: logtape.c:681

References Assert(), and LogicalTapeCreate().

Referenced by dumptuples(), inittapes(), and mergeruns().

◆ sort_bounded_heap()

static void sort_bounded_heap ( Tuplesortstate state)
static

Definition at line 3460 of file tuplesort.c.

3461 {
3462  int tupcount = state->memtupcount;
3463 
3464  Assert(state->status == TSS_BOUNDED);
3465  Assert(state->bounded);
3466  Assert(tupcount == state->bound);
3467  Assert(SERIAL(state));
3468 
3469  /*
3470  * We can unheapify in place because each delete-top call will remove the
3471  * largest entry, which we can promptly store in the newly freed slot at
3472  * the end. Once we're down to a single-entry heap, we're done.
3473  */
3474  while (state->memtupcount > 1)
3475  {
3476  SortTuple stup = state->memtuples[0];
3477 
3478  /* this sifts-up the next-largest entry and decreases memtupcount */
3480  state->memtuples[state->memtupcount] = stup;
3481  }
3482  state->memtupcount = tupcount;
3483 
3484  /*
3485  * Reverse sort direction back to the original state. This is not
3486  * actually necessary but seems like a good idea for tidiness.
3487  */
3489 
3490  state->status = TSS_SORTEDINMEM;
3491  state->boundUsed = true;
3492 }

References Assert(), reversedirection(), SERIAL, TSS_BOUNDED, TSS_SORTEDINMEM, and tuplesort_heap_delete_top().

Referenced by tuplesort_performsort().

◆ tuplesort_attach_shared()

void tuplesort_attach_shared ( Sharedsort shared,
dsm_segment seg 
)

Definition at line 4523 of file tuplesort.c.

4524 {
4525  /* Attach to SharedFileSet */
4526  SharedFileSetAttach(&shared->fileset, seg);
4527 }
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:62

References Sharedsort::fileset, and SharedFileSetAttach().

Referenced by _bt_parallel_build_main().

◆ tuplesort_begin_batch()

static void tuplesort_begin_batch ( Tuplesortstate state)
static

Definition at line 832 of file tuplesort.c.

833 {
834  MemoryContext oldcontext;
835 
836  oldcontext = MemoryContextSwitchTo(state->maincontext);
837 
838  /*
839  * Caller tuple (e.g. IndexTuple) memory context.
840  *
841  * A dedicated child context used exclusively for caller passed tuples
842  * eases memory management. Resetting at key points reduces
843  * fragmentation. Note that the memtuples array of SortTuples is allocated
844  * in the parent context, not this context, because there is no need to
845  * free memtuples early.
846  */
847  state->tuplecontext = AllocSetContextCreate(state->sortcontext,
848  "Caller tuples",
850 
851  state->status = TSS_INITIAL;
852  state->bounded = false;
853  state->boundUsed = false;
854 
855  state->availMem = state->allowedMem;
856 
857  state->tapeset = NULL;
858 
859  state->memtupcount = 0;
860 
861  /*
862  * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
863  * see comments in grow_memtuples().
864  */
865  state->growmemtuples = true;
866  state->slabAllocatorUsed = false;
867  if (state->memtuples != NULL && state->memtupsize != INITIAL_MEMTUPSIZE)
868  {
869  pfree(state->memtuples);
870  state->memtuples = NULL;
871  state->memtupsize = INITIAL_MEMTUPSIZE;
872  }
873  if (state->memtuples == NULL)
874  {
875  state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple));
876  USEMEM(state, GetMemoryChunkSpace(state->memtuples));
877  }
878 
879  /* workMem must be large enough for the minimal memtuples array */
880  if (LACKMEM(state))
881  elog(ERROR, "insufficient memory allowed for sort");
882 
883  state->currentRun = 0;
884 
885  /*
886  * Tape variables (inputTapes, outputTapes, etc.) will be initialized by
887  * inittapes(), if needed.
888  */
889 
890  state->result_tape = NULL; /* flag that result tape has not been formed */
891 
892  MemoryContextSwitchTo(oldcontext);
893 }
#define AllocSetContextCreate
Definition: memutils.h:173
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
#define INITIAL_MEMTUPSIZE
Definition: tuplesort.c:139

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, elog, ERROR, GetMemoryChunkSpace(), INITIAL_MEMTUPSIZE, LACKMEM, MemoryContextSwitchTo(), palloc(), pfree(), TSS_INITIAL, and USEMEM.

Referenced by tuplesort_begin_common(), and tuplesort_reset().

◆ tuplesort_begin_cluster()

Tuplesortstate* tuplesort_begin_cluster ( TupleDesc  tupDesc,
Relation  indexRel,
int  workMem,
SortCoordinate  coordinate,
bool  randomAccess 
)

Definition at line 970 of file tuplesort.c.

974 {
975  Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
976  randomAccess);
977  BTScanInsert indexScanKey;
978  MemoryContext oldcontext;
979  int i;
980 
981  Assert(indexRel->rd_rel->relam == BTREE_AM_OID);
982 
983  oldcontext = MemoryContextSwitchTo(state->maincontext);
984 
985 #ifdef TRACE_SORT
986  if (trace_sort)
987  elog(LOG,
988  "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
990  workMem, randomAccess ? 't' : 'f');
991 #endif
992 
993  state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
994 
995  TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT,
996  false, /* no unique check */
997  state->nKeys,
998  workMem,
999  randomAccess,
1000  PARALLEL_SORT(state));
1001 
1002  state->comparetup = comparetup_cluster;
1003  state->copytup = copytup_cluster;
1004  state->writetup = writetup_cluster;
1005  state->readtup = readtup_cluster;
1006  state->abbrevNext = 10;
1007 
1008  state->indexInfo = BuildIndexInfo(indexRel);
1009 
1010  state->tupDesc = tupDesc; /* assume we need not copy tupDesc */
1011 
1012  indexScanKey = _bt_mkscankey(indexRel, NULL);
1013 
1014  if (state->indexInfo->ii_Expressions != NULL)
1015  {
1016  TupleTableSlot *slot;
1017  ExprContext *econtext;
1018 
1019  /*
1020  * We will need to use FormIndexDatum to evaluate the index
1021  * expressions. To do that, we need an EState, as well as a
1022  * TupleTableSlot to put the table tuples into. The econtext's
1023  * scantuple has to point to that slot, too.
1024  */
1025  state->estate = CreateExecutorState();
1026  slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple);
1027  econtext = GetPerTupleExprContext(state->estate);
1028  econtext->ecxt_scantuple = slot;
1029  }
1030 
1031  /* Prepare SortSupport data for each column */
1032  state->sortKeys = (SortSupport) palloc0(state->nKeys *
1033  sizeof(SortSupportData));
1034 
1035  for (i = 0; i < state->nKeys; i++)
1036  {
1037  SortSupport sortKey = state->sortKeys + i;
1038  ScanKey scanKey = indexScanKey->scankeys + i;
1039  int16 strategy;
1040 
1041  sortKey->ssup_cxt = CurrentMemoryContext;
1042  sortKey->ssup_collation = scanKey->sk_collation;
1043  sortKey->ssup_nulls_first =
1044  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1045  sortKey->ssup_attno = scanKey->sk_attno;
1046  /* Convey if abbreviation optimization is applicable in principle */
1047  sortKey->abbreviate = (i == 0);
1048 
1049  AssertState(sortKey->ssup_attno != 0);
1050 
1051  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
1053 
1054  PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
1055  }
1056 
1057  pfree(indexScanKey);
1058 
1059  MemoryContextSwitchTo(oldcontext);
1060 
1061  return state;
1062 }
#define AssertState(condition)
Definition: c.h:807
signed short int16
Definition: c.h:428
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:84
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1238
EState * CreateExecutorState(void)
Definition: execUtils.c:90
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:2420
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:1083
#define SK_BT_DESC
Definition: nbtree.h:1082
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:90
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:484
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:497
void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy, SortSupport ssup)
Definition: sortsupport.c:162
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
ScanKeyData scankeys[INDEX_MAX_KEYS]
Definition: nbtree.h:791
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:231
Form_pg_class rd_rel
Definition: rel.h:109
int sk_flags
Definition: skey.h:66
Oid sk_collation
Definition: skey.h:70
AttrNumber sk_attno
Definition: skey.h:67
MemoryContext ssup_cxt
Definition: sortsupport.h:66
static Tuplesortstate * tuplesort_begin_common(int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:720
static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
Definition: tuplesort.c:4005
#define PARALLEL_SORT(state)
Definition: tuplesort.c:129
#define CLUSTER_SORT
Definition: tuplesort.c:126
static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
Definition: tuplesort.c:4098
static int comparetup_cluster(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
Definition: tuplesort.c:3894
static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
Definition: tuplesort.c:4078

References _bt_mkscankey(), SortSupportData::abbreviate, Assert(), AssertState, BTGreaterStrategyNumber, BTLessStrategyNumber, BuildIndexInfo(), CLUSTER_SORT, comparetup_cluster(), copytup_cluster(), CreateExecutorState(), CurrentMemoryContext, ExprContext::ecxt_scantuple, elog, GetPerTupleExprContext, i, IndexRelationGetNumberOfKeyAttributes, LOG, MakeSingleTupleTableSlot(), MemoryContextSwitchTo(), palloc0(), PARALLEL_SORT, pfree(), PrepareSortSupportFromIndexRel(), RelationData::rd_rel, readtup_cluster(), RelationGetNumberOfAttributes, BTScanInsertData::scankeys, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, trace_sort, TTSOpsHeapTuple, tuplesort_begin_common(), and writetup_cluster().

Referenced by heapam_relation_copy_for_cluster().

◆ tuplesort_begin_common()

static Tuplesortstate * tuplesort_begin_common ( int  workMem,
SortCoordinate  coordinate,
bool  randomAccess 
)
static

Definition at line 720 of file tuplesort.c.

722 {
724  MemoryContext maincontext;
725  MemoryContext sortcontext;
726  MemoryContext oldcontext;
727 
728  /* See leader_takeover_tapes() remarks on randomAccess support */
729  if (coordinate && randomAccess)
730  elog(ERROR, "random access disallowed under parallel sort");
731 
732  /*
733  * Memory context surviving tuplesort_reset. This memory context holds
734  * data which is useful to keep while sorting multiple similar batches.
735  */
737  "TupleSort main",
739 
740  /*
741  * Create a working memory context for one sort operation. The content of
742  * this context is deleted by tuplesort_reset.
743  */
744  sortcontext = AllocSetContextCreate(maincontext,
745  "TupleSort sort",
747 
748  /*
749  * Additionally a working memory context for tuples is setup in
750  * tuplesort_begin_batch.
751  */
752 
753  /*
754  * Make the Tuplesortstate within the per-sortstate context. This way, we
755  * don't need a separate pfree() operation for it at shutdown.
756  */
757  oldcontext = MemoryContextSwitchTo(maincontext);
758 
760 
761 #ifdef TRACE_SORT
762  if (trace_sort)
763  pg_rusage_init(&state->ru_start);
764 #endif
765 
766  state->randomAccess = randomAccess;
767  state->tuples = true;
768 
769  /*
770  * workMem is forced to be at least 64KB, the current minimum valid value
771  * for the work_mem GUC. This is a defense against parallel sort callers
772  * that divide out memory among many workers in a way that leaves each
773  * with very little memory.
774  */
775  state->allowedMem = Max(workMem, 64) * (int64) 1024;
776  state->sortcontext = sortcontext;
777  state->maincontext = maincontext;
778 
779  /*
780  * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD;
781  * see comments in grow_memtuples().
782  */
783  state->memtupsize = INITIAL_MEMTUPSIZE;
784  state->memtuples = NULL;
785 
786  /*
787  * After all of the other non-parallel-related state, we setup all of the
788  * state needed for each batch.
789  */
791 
792  /*
793  * Initialize parallel-related state based on coordination information
794  * from caller
795  */
796  if (!coordinate)
797  {
798  /* Serial sort */
799  state->shared = NULL;
800  state->worker = -1;
801  state->nParticipants = -1;
802  }
803  else if (coordinate->isWorker)
804  {
805  /* Parallel worker produces exactly one final run from all input */
806  state->shared = coordinate->sharedsort;
807  state->worker = worker_get_identifier(state);
808  state->nParticipants = -1;
809  }
810  else
811  {
812  /* Parallel leader state only used for final merge */
813  state->shared = coordinate->sharedsort;
814  state->worker = -1;
815  state->nParticipants = coordinate->nParticipants;
816  Assert(state->nParticipants >= 1);
817  }
818 
819  MemoryContextSwitchTo(oldcontext);
820 
821  return state;
822 }
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
Sharedsort * sharedsort
Definition: tuplesort.h:55
static int worker_get_identifier(Tuplesortstate *state)
Definition: tuplesort.c:4543
static void tuplesort_begin_batch(Tuplesortstate *state)
Definition: tuplesort.c:832

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), CurrentMemoryContext, elog, ERROR, INITIAL_MEMTUPSIZE, SortCoordinateData::isWorker, Max, MemoryContextSwitchTo(), SortCoordinateData::nParticipants, palloc0(), pg_rusage_init(), SortCoordinateData::sharedsort, trace_sort, tuplesort_begin_batch(), and worker_get_identifier().

Referenced by tuplesort_begin_cluster(), tuplesort_begin_datum(), tuplesort_begin_heap(), tuplesort_begin_index_btree(), tuplesort_begin_index_gist(), and tuplesort_begin_index_hash().

◆ tuplesort_begin_datum()

Tuplesortstate* tuplesort_begin_datum ( Oid  datumType,
Oid  sortOperator,
Oid  sortCollation,
bool  nullsFirstFlag,
int  workMem,
SortCoordinate  coordinate,
bool  randomAccess 
)

Definition at line 1246 of file tuplesort.c.

1249 {
1250  Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
1251  randomAccess);
1252  MemoryContext oldcontext;
1253  int16 typlen;
1254  bool typbyval;
1255 
1256  oldcontext = MemoryContextSwitchTo(state->maincontext);
1257 
1258 #ifdef TRACE_SORT
1259  if (trace_sort)
1260  elog(LOG,
1261  "begin datum sort: workMem = %d, randomAccess = %c",
1262  workMem, randomAccess ? 't' : 'f');
1263 #endif
1264 
1265  state->nKeys = 1; /* always a one-column sort */
1266 
1267  TRACE_POSTGRESQL_SORT_START(DATUM_SORT,
1268  false, /* no unique check */
1269  1,
1270  workMem,
1271  randomAccess,
1272  PARALLEL_SORT(state));
1273 
1274  state->comparetup = comparetup_datum;
1275  state->copytup = copytup_datum;
1276  state->writetup = writetup_datum;
1277  state->readtup = readtup_datum;
1278  state->abbrevNext = 10;
1279 
1280  state->datumType = datumType;
1281 
1282  /* lookup necessary attributes of the datum type */
1283  get_typlenbyval(datumType, &typlen, &typbyval);
1284  state->datumTypeLen = typlen;
1285  state->tuples = !typbyval;
1286 
1287  /* Prepare SortSupport data */
1288  state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
1289 
1290  state->sortKeys->ssup_cxt = CurrentMemoryContext;
1291  state->sortKeys->ssup_collation = sortCollation;
1292  state->sortKeys->ssup_nulls_first = nullsFirstFlag;
1293 
1294  /*
1295  * Abbreviation is possible here only for by-reference types. In theory,
1296  * a pass-by-value datatype could have an abbreviated form that is cheaper
1297  * to compare. In a tuple sort, we could support that, because we can
1298  * always extract the original datum from the tuple as needed. Here, we
1299  * can't, because a datum sort only stores a single copy of the datum; the
1300  * "tuple" field of each SortTuple is NULL.
1301  */
1302  state->sortKeys->abbreviate = !typbyval;
1303 
1304  PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys);
1305 
1306  /*
1307  * The "onlyKey" optimization cannot be used with abbreviated keys, since
1308  * tie-breaker comparisons may be required. Typically, the optimization
1309  * is only of value to pass-by-value types anyway, whereas abbreviated
1310  * keys are typically only of value to pass-by-reference types.
1311  */
1312  if (!state->sortKeys->abbrev_converter)
1313  state->onlyKey = state->sortKeys;
1314 
1315  MemoryContextSwitchTo(oldcontext);
1316 
1317  return state;
1318 }
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2198
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:135
static int comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
Definition: tuplesort.c:4369
static void readtup_datum(Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
Definition: tuplesort.c:4435
static void writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
Definition: tuplesort.c:4397
#define DATUM_SORT
Definition: tuplesort.c:125
static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup)
Definition: tuplesort.c:4390

References comparetup_datum(), copytup_datum(), CurrentMemoryContext, DATUM_SORT, elog, get_typlenbyval(), LOG, MemoryContextSwitchTo(), palloc0(), PARALLEL_SORT, PrepareSortSupportFromOrderingOp(), readtup_datum(), trace_sort, tuplesort_begin_common(), and writetup_datum().

Referenced by ExecSort(), initialize_aggregate(), ordered_set_startup(), and validate_index().

◆ tuplesort_begin_heap()

Tuplesortstate* tuplesort_begin_heap ( TupleDesc  tupDesc,
int  nkeys,
AttrNumber attNums,
Oid sortOperators,
Oid sortCollations,
bool nullsFirstFlags,
int  workMem,
SortCoordinate  coordinate,
bool  randomAccess 
)

Definition at line 896 of file tuplesort.c.

901 {
902  Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
903  randomAccess);
904  MemoryContext oldcontext;
905  int i;
906 
907  oldcontext = MemoryContextSwitchTo(state->maincontext);
908 
909  AssertArg(nkeys > 0);
910 
911 #ifdef TRACE_SORT
912  if (trace_sort)
913  elog(LOG,
914  "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
915  nkeys, workMem, randomAccess ? 't' : 'f');
916 #endif
917 
918  state->nKeys = nkeys;
919 
920  TRACE_POSTGRESQL_SORT_START(HEAP_SORT,
921  false, /* no unique check */
922  nkeys,
923  workMem,
924  randomAccess,
926 
927  state->comparetup = comparetup_heap;
928  state->copytup = copytup_heap;
929  state->writetup = writetup_heap;
930  state->readtup = readtup_heap;
931 
932  state->tupDesc = tupDesc; /* assume we need not copy tupDesc */
933  state->abbrevNext = 10;
934 
935  /* Prepare SortSupport data for each column */
936  state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
937 
938  for (i = 0; i < nkeys; i++)
939  {
940  SortSupport sortKey = state->sortKeys + i;
941 
942  AssertArg(attNums[i] != 0);
943  AssertArg(sortOperators[i] != 0);
944 
945  sortKey->ssup_cxt = CurrentMemoryContext;
946  sortKey->ssup_collation = sortCollations[i];
947  sortKey->ssup_nulls_first = nullsFirstFlags[i];
948  sortKey->ssup_attno = attNums[i];
949  /* Convey if abbreviation optimization is applicable in principle */
950  sortKey->abbreviate = (i == 0);
951 
952  PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
953  }
954 
955  /*
956  * The "onlyKey" optimization cannot be used with abbreviated keys, since
957  * tie-breaker comparisons may be required. Typically, the optimization
958  * is only of value to pass-by-value types anyway, whereas abbreviated
959  * keys are typically only of value to pass-by-reference types.
960  */
961  if (nkeys == 1 && !state->sortKeys->abbrev_converter)
962  state->onlyKey = state->sortKeys;
963 
964  MemoryContextSwitchTo(oldcontext);
965 
966  return state;
967 }
#define AssertArg(condition)
Definition: c.h:806
static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
Definition: tuplesort.c:3762
static void readtup_heap(Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
Definition: tuplesort.c:3864
static int comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
Definition: tuplesort.c:3700
static void writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
Definition: tuplesort.c:3840
#define HEAP_SORT
Definition: tuplesort.c:123

References SortSupportData::abbreviate, AssertArg, comparetup_heap(), copytup_heap(), CurrentMemoryContext, elog, HEAP_SORT, i, LOG, MemoryContextSwitchTo(), palloc0(), PARALLEL_SORT, PrepareSortSupportFromOrderingOp(), readtup_heap(), SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, trace_sort, tuplesort_begin_common(), and writetup_heap().

Referenced by ExecIncrementalSort(), ExecSort(), initialize_aggregate(), initialize_phase(), ordered_set_startup(), and switchToPresortedPrefixMode().

◆ tuplesort_begin_index_btree()

Tuplesortstate* tuplesort_begin_index_btree ( Relation  heapRel,
Relation  indexRel,
bool  enforceUnique,
int  workMem,
SortCoordinate  coordinate,
bool  randomAccess 
)

Definition at line 1065 of file tuplesort.c.

1071 {
1072  Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
1073  randomAccess);
1074  BTScanInsert indexScanKey;
1075  MemoryContext oldcontext;
1076  int i;
1077 
1078  oldcontext = MemoryContextSwitchTo(state->maincontext);
1079 
1080 #ifdef TRACE_SORT
1081  if (trace_sort)
1082  elog(LOG,
1083  "begin index sort: unique = %c, workMem = %d, randomAccess = %c",
1084  enforceUnique ? 't' : 'f',
1085  workMem, randomAccess ? 't' : 'f');
1086 #endif
1087 
1088  state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
1089 
1090  TRACE_POSTGRESQL_SORT_START(INDEX_SORT,
1091  enforceUnique,
1092  state->nKeys,
1093  workMem,
1094  randomAccess,
1095  PARALLEL_SORT(state));
1096 
1097  state->comparetup = comparetup_index_btree;
1098  state->copytup = copytup_index;
1099  state->writetup = writetup_index;
1100  state->readtup = readtup_index;
1101  state->abbrevNext = 10;
1102 
1103  state->heapRel = heapRel;
1104  state->indexRel = indexRel;
1105  state->enforceUnique = enforceUnique;
1106 
1107  indexScanKey = _bt_mkscankey(indexRel, NULL);
1108 
1109  /* Prepare SortSupport data for each column */
1110  state->sortKeys = (SortSupport) palloc0(state->nKeys *
1111  sizeof(SortSupportData));
1112 
1113  for (i = 0; i < state->nKeys; i++)
1114  {
1115  SortSupport sortKey = state->sortKeys + i;
1116  ScanKey scanKey = indexScanKey->scankeys + i;
1117  int16 strategy;
1118 
1119  sortKey->ssup_cxt = CurrentMemoryContext;
1120  sortKey->ssup_collation = scanKey->sk_collation;
1121  sortKey->ssup_nulls_first =
1122  (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
1123  sortKey->ssup_attno = scanKey->sk_attno;
1124  /* Convey if abbreviation optimization is applicable in principle */
1125  sortKey->abbreviate = (i == 0);
1126 
1127  AssertState(sortKey->ssup_attno != 0);
1128 
1129  strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
1131 
1132  PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
1133  }
1134 
1135  pfree(indexScanKey);
1136 
1137  MemoryContextSwitchTo(oldcontext);
1138 
1139  return state;
1140 }
static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
Definition: tuplesort.c:4133
static void readtup_index(Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len)
Definition: tuplesort.c:4347
static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
Definition: tuplesort.c:4321
#define INDEX_SORT
Definition: tuplesort.c:124
static void writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
Definition: tuplesort.c:4328

References _bt_mkscankey(), SortSupportData::abbreviate, AssertState, BTGreaterStrategyNumber, BTLessStrategyNumber, comparetup_index_btree(), copytup_index(), CurrentMemoryContext, elog, i, INDEX_SORT, IndexRelationGetNumberOfKeyAttributes, LOG, MemoryContextSwitchTo(), palloc0(), PARALLEL_SORT, pfree(), PrepareSortSupportFromIndexRel(), readtup_index(), BTScanInsertData::scankeys, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, trace_sort, tuplesort_begin_common(), and writetup_index().

Referenced by _bt_parallel_scan_and_sort(), and _bt_spools_heapscan().

◆ tuplesort_begin_index_gist()

Tuplesortstate* tuplesort_begin_index_gist ( Relation  heapRel,
Relation  indexRel,
int  workMem,
SortCoordinate  coordinate,
bool  randomAccess 
)

Definition at line 1189 of file tuplesort.c.

1194 {
1195  Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
1196  randomAccess);
1197  MemoryContext oldcontext;
1198  int i;
1199 
1200  oldcontext = MemoryContextSwitchTo(state->sortcontext);
1201 
1202 #ifdef TRACE_SORT
1203  if (trace_sort)
1204  elog(LOG,
1205  "begin index sort: workMem = %d, randomAccess = %c",
1206  workMem, randomAccess ? 't' : 'f');
1207 #endif
1208 
1209  state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
1210 
1211  state->comparetup = comparetup_index_btree;
1212  state->copytup = copytup_index;
1213  state->writetup = writetup_index;
1214  state->readtup = readtup_index;
1215 
1216  state->heapRel = heapRel;
1217  state->indexRel = indexRel;
1218 
1219  /* Prepare SortSupport data for each column */
1220  state->sortKeys = (SortSupport) palloc0(state->nKeys *
1221  sizeof(SortSupportData));
1222 
1223  for (i = 0; i < state->nKeys; i++)
1224  {
1225  SortSupport sortKey = state->sortKeys + i;
1226 
1227  sortKey->ssup_cxt = CurrentMemoryContext;
1228  sortKey->ssup_collation = indexRel->rd_indcollation[i];
1229  sortKey->ssup_nulls_first = false;
1230  sortKey->ssup_attno = i + 1;
1231  /* Convey if abbreviation optimization is applicable in principle */
1232  sortKey->abbreviate = (i == 0);
1233 
1234  AssertState(sortKey->ssup_attno != 0);
1235 
1236  /* Look for a sort support function */
1237  PrepareSortSupportFromGistIndexRel(indexRel, sortKey);
1238  }
1239 
1240  MemoryContextSwitchTo(oldcontext);
1241 
1242  return state;
1243 }
void PrepareSortSupportFromGistIndexRel(Relation indexRel, SortSupport ssup)
Definition: sortsupport.c:189
Oid * rd_indcollation
Definition: rel.h:213

References SortSupportData::abbreviate, AssertState, comparetup_index_btree(), copytup_index(), CurrentMemoryContext, elog, i, IndexRelationGetNumberOfKeyAttributes, LOG, MemoryContextSwitchTo(), palloc0(), PrepareSortSupportFromGistIndexRel(), RelationData::rd_indcollation, readtup_index(), SortSupportData::ssup_attno, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, trace_sort, tuplesort_begin_common(), and writetup_index().

Referenced by gistbuild().

◆ tuplesort_begin_index_hash()

Tuplesortstate* tuplesort_begin_index_hash ( Relation  heapRel,
Relation  indexRel,
uint32  high_mask,
uint32  low_mask,
uint32  max_buckets,
int  workMem,
SortCoordinate  coordinate,
bool  randomAccess 
)

Definition at line 1143 of file tuplesort.c.

1151 {
1152  Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
1153  randomAccess);
1154  MemoryContext oldcontext;
1155 
1156  oldcontext = MemoryContextSwitchTo(state->maincontext);
1157 
1158 #ifdef TRACE_SORT
1159  if (trace_sort)
1160  elog(LOG,
1161  "begin index sort: high_mask = 0x%x, low_mask = 0x%x, "
1162  "max_buckets = 0x%x, workMem = %d, randomAccess = %c",
1163  high_mask,
1164  low_mask,
1165  max_buckets,
1166  workMem, randomAccess ? 't' : 'f');
1167 #endif
1168 
1169  state->nKeys = 1; /* Only one sort column, the hash code */
1170 
1171  state->comparetup = comparetup_index_hash;
1172  state->copytup = copytup_index;
1173  state->writetup = writetup_index;
1174  state->readtup = readtup_index;
1175 
1176  state->heapRel = heapRel;
1177  state->indexRel = indexRel;
1178 
1179  state->high_mask = high_mask;
1180  state->low_mask = low_mask;
1181  state->max_buckets = max_buckets;
1182 
1183  MemoryContextSwitchTo(oldcontext);
1184 
1185  return state;
1186 }
static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
Definition: tuplesort.c:4266

References comparetup_index_hash(), copytup_index(), elog, LOG, MemoryContextSwitchTo(), readtup_index(), trace_sort, tuplesort_begin_common(), and writetup_index().

Referenced by _h_spoolinit().

◆ tuplesort_end()

void tuplesort_end ( Tuplesortstate state)

Definition at line 1467 of file tuplesort.c.

1468 {
1470 
1471  /*
1472  * Free the main memory context, including the Tuplesortstate struct
1473  * itself.
1474  */
1475  MemoryContextDelete(state->maincontext);
1476 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
static void tuplesort_free(Tuplesortstate *state)
Definition: tuplesort.c:1390

References MemoryContextDelete(), and tuplesort_free().

Referenced by _bt_parallel_scan_and_sort(), _bt_spooldestroy(), _h_spooldestroy(), ExecEndAgg(), ExecEndIncrementalSort(), ExecEndSort(), ExecReScanAgg(), ExecReScanSort(), gistbuild(), heapam_relation_copy_for_cluster(), initialize_aggregate(), initialize_phase(), ordered_set_shutdown(), process_ordered_aggregate_multi(), process_ordered_aggregate_single(), and validate_index().

◆ tuplesort_estimate_shared()

Size tuplesort_estimate_shared ( int  nWorkers)

Definition at line 4479 of file tuplesort.c.

4480 {
4481  Size tapesSize;
4482 
4483  Assert(nWorkers > 0);
4484 
4485  /* Make sure that BufFile shared state is MAXALIGN'd */
4486  tapesSize = mul_size(sizeof(TapeShare), nWorkers);
4487  tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes)));
4488 
4489  return tapesSize;
4490 }
#define MAXALIGN(LEN)
Definition: c.h:757
#define offsetof(type, field)
Definition: c.h:727
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519

References add_size(), Assert(), MAXALIGN, mul_size(), and offsetof.

Referenced by _bt_begin_parallel().

◆ tuplesort_free()

static void tuplesort_free ( Tuplesortstate state)
static

Definition at line 1390 of file tuplesort.c.

1391 {
1392  /* context swap probably not needed, but let's be safe */
1393  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
1394 
1395 #ifdef TRACE_SORT
1396  long spaceUsed;
1397 
1398  if (state->tapeset)
1399  spaceUsed = LogicalTapeSetBlocks(state->tapeset);
1400  else
1401  spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
1402 #endif
1403 
1404  /*
1405  * Delete temporary "tape" files, if any.
1406  *
1407  * Note: want to include this in reported total cost of sort, hence need
1408  * for two #ifdef TRACE_SORT sections.
1409  *
1410  * We don't bother to destroy the individual tapes here. They will go away
1411  * with the sortcontext. (In TSS_FINALMERGE state, we have closed
1412  * finished tapes already.)
1413  */
1414  if (state->tapeset)
1415  LogicalTapeSetClose(state->tapeset);
1416 
1417 #ifdef TRACE_SORT
1418  if (trace_sort)
1419  {
1420  if (state->tapeset)
1421  elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s",
1422  SERIAL(state) ? "external sort" : "parallel external sort",
1423  state->worker, spaceUsed, pg_rusage_show(&state->ru_start));
1424  else
1425  elog(LOG, "%s of worker %d ended, %ld KB used: %s",
1426  SERIAL(state) ? "internal sort" : "unperformed parallel sort",
1427  state->worker, spaceUsed, pg_rusage_show(&state->ru_start));
1428  }
1429 
1430  TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed);
1431 #else
1432 
1433  /*
1434  * If you disabled TRACE_SORT, you can still probe sort__done, but you
1435  * ain't getting space-used stats.
1436  */
1437  TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L);
1438 #endif
1439 
1440  /* Free any execution state created for CLUSTER case */
1441  if (state->estate != NULL)
1442  {
1443  ExprContext *econtext = GetPerTupleExprContext(state->estate);
1444 
1446  FreeExecutorState(state->estate);
1447  }
1448 
1449  MemoryContextSwitchTo(oldcontext);
1450 
1451  /*
1452  * Free the per-sort memory context, thereby releasing all working memory.
1453  */
1454  MemoryContextReset(state->sortcontext);
1455 }
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1254
void FreeExecutorState(EState *estate)
Definition: execUtils.c:186
void LogicalTapeSetClose(LogicalTapeSet *lts)
Definition: logtape.c:668
long LogicalTapeSetBlocks(LogicalTapeSet *lts)
Definition: logtape.c:1184

References ExprContext::ecxt_scantuple, elog, ExecDropSingleTupleTableSlot(), FreeExecutorState(), GetPerTupleExprContext, LOG, LogicalTapeSetBlocks(), LogicalTapeSetClose(), MemoryContextReset(), MemoryContextSwitchTo(), pg_rusage_show(), SERIAL, and trace_sort.

Referenced by tuplesort_end(), and tuplesort_reset().

◆ tuplesort_get_stats()

void tuplesort_get_stats ( Tuplesortstate state,
TuplesortInstrumentation stats 
)

Definition at line 3323 of file tuplesort.c.

3325 {
3326  /*
3327  * Note: it might seem we should provide both memory and disk usage for a
3328  * disk-based sort. However, the current code doesn't track memory space
3329  * accurately once we have begun to return tuples to the caller (since we
3330  * don't account for pfree's the caller is expected to do), so we cannot
3331  * rely on availMem in a disk sort. This does not seem worth the overhead
3332  * to fix. Is it worth creating an API for the memory context code to
3333  * tell us how much is actually used in sortcontext?
3334  */
3336 
3337  if (state->isMaxSpaceDisk)
3339  else
3341  stats->spaceUsed = (state->maxSpace + 1023) / 1024;
3342 
3343  switch (state->maxSpaceStatus)
3344  {
3345  case TSS_SORTEDINMEM:
3346  if (state->boundUsed)
3348  else
3350  break;
3351  case TSS_SORTEDONTAPE:
3353  break;
3354  case TSS_FINALMERGE:
3356  break;
3357  default:
3359  break;
3360  }
3361 }
TuplesortMethod sortMethod
Definition: tuplesort.h:91
TuplesortSpaceType spaceType
Definition: tuplesort.h:92
static void tuplesort_updatemax(Tuplesortstate *state)
Definition: tuplesort.c:1484
@ SORT_SPACE_TYPE_DISK
Definition: tuplesort.h:85
@ SORT_SPACE_TYPE_MEMORY
Definition: tuplesort.h:86
@ SORT_TYPE_EXTERNAL_SORT
Definition: tuplesort.h:77
@ SORT_TYPE_TOP_N_HEAPSORT
Definition: tuplesort.h:75
@ SORT_TYPE_QUICKSORT
Definition: tuplesort.h:76
@ SORT_TYPE_STILL_IN_PROGRESS
Definition: tuplesort.h:74
@ SORT_TYPE_EXTERNAL_MERGE
Definition: tuplesort.h:78

References SORT_SPACE_TYPE_DISK, SORT_SPACE_TYPE_MEMORY, SORT_TYPE_EXTERNAL_MERGE, SORT_TYPE_EXTERNAL_SORT, SORT_TYPE_QUICKSORT, SORT_TYPE_STILL_IN_PROGRESS, SORT_TYPE_TOP_N_HEAPSORT, TuplesortInstrumentation::sortMethod, TuplesortInstrumentation::spaceType, TuplesortInstrumentation::spaceUsed, TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, and tuplesort_updatemax().

Referenced by ExecSort(), instrumentSortedGroup(), and show_sort_info().

◆ tuplesort_getdatum()

bool tuplesort_getdatum ( Tuplesortstate state,
bool  forward,
Datum val,
bool isNull,
Datum abbrev 
)

Definition at line 2494 of file tuplesort.c.

2496 {
2497  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
2498  SortTuple stup;
2499 
2500  if (!tuplesort_gettuple_common(state, forward, &stup))
2501  {
2502  MemoryContextSwitchTo(oldcontext);
2503  return false;
2504  }
2505 
2506  /* Ensure we copy into caller's memory context */
2507  MemoryContextSwitchTo(oldcontext);
2508 
2509  /* Record abbreviated key for caller */
2510  if (state->sortKeys->abbrev_converter && abbrev)
2511  *abbrev = stup.datum1;
2512 
2513  if (stup.isnull1 || !state->tuples)
2514  {
2515  *val = stup.datum1;
2516  *isNull = stup.isnull1;
2517  }
2518  else
2519  {
2520  /* use stup.tuple because stup.datum1 may be an abbreviation */
2521  *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen);
2522  *isNull = false;
2523  }
2524 
2525  return true;
2526 }
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
long val
Definition: informix.c:664
static bool tuplesort_gettuple_common(Tuplesortstate *state, bool forward, SortTuple *stup)
Definition: tuplesort.c:2154

References SortTuple::datum1, datumCopy(), SortTuple::isnull1, MemoryContextSwitchTo(), PointerGetDatum, SortTuple::tuple, tuplesort_gettuple_common(), and val.

Referenced by ExecSort(), heapam_index_validate_scan(), mode_final(), percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), percentile_disc_multi_final(), and process_ordered_aggregate_single().

◆ tuplesort_getheaptuple()

HeapTuple tuplesort_getheaptuple ( Tuplesortstate state,
bool  forward 
)

Definition at line 2445 of file tuplesort.c.

2446 {
2447  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
2448  SortTuple stup;
2449 
2450  if (!tuplesort_gettuple_common(state, forward, &stup))
2451  stup.tuple = NULL;
2452 
2453  MemoryContextSwitchTo(oldcontext);
2454 
2455  return stup.tuple;
2456 }

References MemoryContextSwitchTo(), SortTuple::tuple, and tuplesort_gettuple_common().

Referenced by heapam_relation_copy_for_cluster().

◆ tuplesort_getindextuple()

IndexTuple tuplesort_getindextuple ( Tuplesortstate state,
bool  forward 
)

Definition at line 2465 of file tuplesort.c.

2466 {
2467  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
2468  SortTuple stup;
2469 
2470  if (!tuplesort_gettuple_common(state, forward, &stup))
2471  stup.tuple = NULL;
2472 
2473  MemoryContextSwitchTo(oldcontext);
2474 
2475  return (IndexTuple) stup.tuple;
2476 }

References MemoryContextSwitchTo(), SortTuple::tuple, and tuplesort_gettuple_common().

Referenced by _bt_load(), _h_indexbuild(), and gist_indexsortbuild().

◆ tuplesort_gettuple_common()

static bool tuplesort_gettuple_common ( Tuplesortstate state,
bool  forward,
SortTuple stup 
)
static

Definition at line 2154 of file tuplesort.c.

2156 {
2157  unsigned int tuplen;
2158  size_t nmoved;
2159 
2160  Assert(!WORKER(state));
2161 
2162  switch (state->status)
2163  {
2164  case TSS_SORTEDINMEM:
2165  Assert(forward || state->randomAccess);
2166  Assert(!state->slabAllocatorUsed);
2167  if (forward)
2168  {
2169  if (state->current < state->memtupcount)
2170  {
2171  *stup = state->memtuples[state->current++];
2172  return true;
2173  }
2174  state->eof_reached = true;
2175 
2176  /*
2177  * Complain if caller tries to retrieve more tuples than
2178  * originally asked for in a bounded sort. This is because
2179  * returning EOF here might be the wrong thing.
2180  */
2181  if (state->bounded && state->current >= state->bound)
2182  elog(ERROR, "retrieved too many tuples in a bounded sort");
2183 
2184  return false;
2185  }
2186  else
2187  {
2188  if (state->current <= 0)
2189  return false;
2190 
2191  /*
2192  * if all tuples are fetched already then we return last
2193  * tuple, else - tuple before last returned.
2194  */
2195  if (state->eof_reached)
2196  state->eof_reached = false;
2197  else
2198  {
2199  state->current--; /* last returned tuple */
2200  if (state->current <= 0)
2201  return false;
2202  }
2203  *stup = state->memtuples[state->current - 1];
2204  return true;
2205  }
2206  break;
2207 
2208  case TSS_SORTEDONTAPE:
2209  Assert(forward || state->randomAccess);
2210  Assert(state->slabAllocatorUsed);
2211 
2212  /*
2213  * The slot that held the tuple that we returned in previous
2214  * gettuple call can now be reused.
2215  */
2216  if (state->lastReturnedTuple)
2217  {
2218  RELEASE_SLAB_SLOT(state, state->lastReturnedTuple);
2219  state->lastReturnedTuple = NULL;
2220  }
2221 
2222  if (forward)
2223  {
2224  if (state->eof_reached)
2225  return false;
2226 
2227  if ((tuplen = getlen(state->result_tape, true)) != 0)
2228  {
2229  READTUP(state, stup, state->result_tape, tuplen);
2230 
2231  /*
2232  * Remember the tuple we return, so that we can recycle
2233  * its memory on next call. (This can be NULL, in the
2234  * !state->tuples case).
2235  */
2236  state->lastReturnedTuple = stup->tuple;
2237 
2238  return true;
2239  }
2240  else
2241  {
2242  state->eof_reached = true;
2243  return false;
2244  }
2245  }
2246 
2247  /*
2248  * Backward.
2249  *
2250  * if all tuples are fetched already then we return last tuple,
2251  * else - tuple before last returned.
2252  */
2253  if (state->eof_reached)
2254  {
2255  /*
2256  * Seek position is pointing just past the zero tuplen at the
2257  * end of file; back up to fetch last tuple's ending length
2258  * word. If seek fails we must have a completely empty file.
2259  */
2260  nmoved = LogicalTapeBackspace(state->result_tape,
2261  2 * sizeof(unsigned int));
2262  if (nmoved == 0)
2263  return false;
2264  else if (nmoved != 2 * sizeof(unsigned int))
2265  elog(ERROR, "unexpected tape position");
2266  state->eof_reached = false;
2267  }
2268  else
2269  {
2270  /*
2271  * Back up and fetch previously-returned tuple's ending length
2272  * word. If seek fails, assume we are at start of file.
2273  */
2274  nmoved = LogicalTapeBackspace(state->result_tape,
2275  sizeof(unsigned int));
2276  if (nmoved == 0)
2277  return false;
2278  else if (nmoved != sizeof(unsigned int))
2279  elog(ERROR, "unexpected tape position");
2280  tuplen = getlen(state->result_tape, false);
2281 
2282  /*
2283  * Back up to get ending length word of tuple before it.
2284  */
2285  nmoved = LogicalTapeBackspace(state->result_tape,
2286  tuplen + 2 * sizeof(unsigned int));
2287  if (nmoved == tuplen + sizeof(unsigned int))
2288  {
2289  /*
2290  * We backed up over the previous tuple, but there was no
2291  * ending length word before it. That means that the prev
2292  * tuple is the first tuple in the file. It is now the
2293  * next to read in forward direction (not obviously right,
2294  * but that is what in-memory case does).
2295  */
2296  return false;
2297  }
2298  else if (nmoved != tuplen + 2 * sizeof(unsigned int))
2299  elog(ERROR, "bogus tuple length in backward scan");
2300  }
2301 
2302  tuplen = getlen(state->result_tape, false);
2303 
2304  /*
2305  * Now we have the length of the prior tuple, back up and read it.
2306  * Note: READTUP expects we are positioned after the initial
2307  * length word of the tuple, so back up to that point.
2308  */
2309  nmoved = LogicalTapeBackspace(state->result_tape,
2310  tuplen);
2311  if (nmoved != tuplen)
2312  elog(ERROR, "bogus tuple length in backward scan");
2313  READTUP(state, stup, state->result_tape, tuplen);
2314 
2315  /*
2316  * Remember the tuple we return, so that we can recycle its memory
2317  * on next call. (This can be NULL, in the Datum case).
2318  */
2319  state->lastReturnedTuple = stup->tuple;
2320 
2321  return true;
2322 
2323  case TSS_FINALMERGE:
2324  Assert(forward);
2325  /* We are managing memory ourselves, with the slab allocator. */
2326  Assert(state->slabAllocatorUsed);
2327 
2328  /*
2329  * The slab slot holding the tuple that we returned in previous
2330  * gettuple call can now be reused.
2331  */
2332  if (state->lastReturnedTuple)
2333  {
2334  RELEASE_SLAB_SLOT(state, state->lastReturnedTuple);
2335  state->lastReturnedTuple = NULL;
2336  }
2337 
2338  /*
2339  * This code should match the inner loop of mergeonerun().
2340  */
2341  if (state->memtupcount > 0)
2342  {
2343  int srcTapeIndex = state->memtuples[0].srctape;
2344  LogicalTape *srcTape = state->inputTapes[srcTapeIndex];
2345  SortTuple newtup;
2346 
2347  *stup = state->memtuples[0];
2348 
2349  /*
2350  * Remember the tuple we return, so that we can recycle its
2351  * memory on next call. (This can be NULL, in the Datum case).
2352  */
2353  state->lastReturnedTuple = stup->tuple;
2354 
2355  /*
2356  * Pull next tuple from tape, and replace the returned tuple
2357  * at top of the heap with it.
2358  */
2359  if (!mergereadnext(state, srcTape, &newtup))
2360  {
2361  /*
2362  * If no more data, we've reached end of run on this tape.
2363  * Remove the top node from the heap.
2364  */
2366  state->nInputRuns--;
2367 
2368  /*
2369  * Close the tape. It'd go away at the end of the sort
2370  * anyway, but better to release the memory early.
2371  */
2372  LogicalTapeClose(srcTape);
2373  return true;
2374  }
2375  newtup.srctape = srcTapeIndex;
2377  return true;
2378  }
2379  return false;
2380 
2381  default:
2382  elog(ERROR, "invalid tuplesort state");
2383  return false; /* keep compiler quiet */
2384  }
2385 }
size_t LogicalTapeBackspace(LogicalTape *lt, size_t size)
Definition: logtape.c:1063

References Assert(), elog, ERROR, getlen(), LogicalTapeBackspace(), LogicalTapeClose(), mergereadnext(), READTUP, RELEASE_SLAB_SLOT, SortTuple::srctape, TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, SortTuple::tuple, tuplesort_heap_delete_top(), tuplesort_heap_replace_top(), and WORKER.

Referenced by tuplesort_getdatum(), tuplesort_getheaptuple(), tuplesort_getindextuple(), tuplesort_gettupleslot(), and tuplesort_skiptuples().

◆ tuplesort_gettupleslot()

bool tuplesort_gettupleslot ( Tuplesortstate state,
bool  forward,
bool  copy,
TupleTableSlot slot,
Datum abbrev 
)

Definition at line 2408 of file tuplesort.c.

2410 {
2411  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
2412  SortTuple stup;
2413 
2414  if (!tuplesort_gettuple_common(state, forward, &stup))
2415  stup.tuple = NULL;
2416 
2417  MemoryContextSwitchTo(oldcontext);
2418 
2419  if (stup.tuple)
2420  {
2421  /* Record abbreviated key for caller */
2422  if (state->sortKeys->abbrev_converter && abbrev)
2423  *abbrev = stup.datum1;
2424 
2425  if (copy)
2427 
2428  ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy);
2429  return true;
2430  }
2431  else
2432  {
2433  ExecClearTuple(slot);
2434  return false;
2435  }
2436 }
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1446
MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1439
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425

References SortTuple::datum1, ExecClearTuple(), ExecStoreMinimalTuple(), heap_copy_minimal_tuple(), MemoryContextSwitchTo(), SortTuple::tuple, and tuplesort_gettuple_common().

Referenced by ExecIncrementalSort(), ExecSort(), fetch_input_tuple(), hypothetical_dense_rank_final(), hypothetical_rank_common(), process_ordered_aggregate_multi(), and switchToPresortedPrefixMode().

◆ tuplesort_heap_delete_top()

static void tuplesort_heap_delete_top ( Tuplesortstate state)
static

Definition at line 3563 of file tuplesort.c.

3564 {
3565  SortTuple *memtuples = state->memtuples;
3566  SortTuple *tuple;
3567 
3568  if (--state->memtupcount <= 0)
3569  return;
3570 
3571  /*
3572  * Remove the last tuple in the heap, and re-insert it, by replacing the
3573  * current top node with it.
3574  */
3575  tuple = &memtuples[state->memtupcount];
3577 }

References tuplesort_heap_replace_top().

Referenced by mergeonerun(), sort_bounded_heap(), and tuplesort_gettuple_common().

◆ tuplesort_heap_insert()

static void tuplesort_heap_insert ( Tuplesortstate state,
SortTuple tuple 
)
static

Definition at line 3528 of file tuplesort.c.

3529 {
3530  SortTuple *memtuples;
3531  int j;
3532 
3533  memtuples = state->memtuples;
3534  Assert(state->memtupcount < state->memtupsize);
3535 
3537 
3538  /*
3539  * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is
3540  * using 1-based array indexes, not 0-based.
3541  */
3542  j = state->memtupcount++;
3543  while (j > 0)
3544  {
3545  int i = (j - 1) >> 1;
3546 
3547  if (COMPARETUP(state, tuple, &memtuples[i]) >= 0)
3548  break;
3549  memtuples[j] = memtuples[i];
3550  j = i;
3551  }
3552  memtuples[j] = *tuple;
3553 }

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, i, and j.

Referenced by beginmerge(), and make_bounded_heap().

◆ tuplesort_heap_replace_top()

static void tuplesort_heap_replace_top ( Tuplesortstate state,
SortTuple tuple 
)
static

Definition at line 3587 of file tuplesort.c.

3588 {
3589  SortTuple *memtuples = state->memtuples;
3590  unsigned int i,
3591  n;
3592 
3593  Assert(state->memtupcount >= 1);
3594 
3596 
3597  /*
3598  * state->memtupcount is "int", but we use "unsigned int" for i, j, n.
3599  * This prevents overflow in the "2 * i + 1" calculation, since at the top
3600  * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2.
3601  */
3602  n = state->memtupcount;
3603  i = 0; /* i is where the "hole" is */
3604  for (;;)
3605  {
3606  unsigned int j = 2 * i + 1;
3607 
3608  if (j >= n)
3609  break;
3610  if (j + 1 < n &&
3611  COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0)
3612  j++;
3613  if (COMPARETUP(state, tuple, &memtuples[j]) <= 0)
3614  break;
3615  memtuples[i] = memtuples[j];
3616  i = j;
3617  }
3618  memtuples[i] = *tuple;
3619 }

References Assert(), CHECK_FOR_INTERRUPTS, COMPARETUP, i, and j.

Referenced by make_bounded_heap(), mergeonerun(), puttuple_common(), tuplesort_gettuple_common(), and tuplesort_heap_delete_top().

◆ tuplesort_initialize_shared()

void tuplesort_initialize_shared ( Sharedsort shared,
int  nWorkers,
dsm_segment seg 
)

Definition at line 4500 of file tuplesort.c.

4501 {
4502  int i;
4503 
4504  Assert(nWorkers > 0);
4505 
4506  SpinLockInit(&shared->mutex);
4507  shared->currentWorker = 0;
4508  shared->workersFinished = 0;
4509  SharedFileSetInit(&shared->fileset, seg);
4510  shared->nTapes = nWorkers;
4511  for (i = 0; i < nWorkers; i++)
4512  {
4513  shared->tapes[i].firstblocknumber = 0L;
4514  }
4515 }
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:44
#define SpinLockInit(lock)
Definition: spin.h:60
int nTapes
Definition: tuplesort.c:508
int currentWorker
Definition: tuplesort.c:501
long firstblocknumber
Definition: logtape.h:54

References Assert(), Sharedsort::currentWorker, Sharedsort::fileset, TapeShare::firstblocknumber, i, Sharedsort::mutex, Sharedsort::nTapes, SharedFileSetInit(), SpinLockInit, Sharedsort::tapes, and Sharedsort::workersFinished.

Referenced by _bt_begin_parallel().

◆ tuplesort_markpos()

void tuplesort_markpos ( Tuplesortstate state)

Definition at line 3259 of file tuplesort.c.

3260 {
3261  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
3262 
3263  Assert(state->randomAccess);
3264 
3265  switch (state->status)
3266  {
3267  case TSS_SORTEDINMEM:
3268  state->markpos_offset = state->current;
3269  state->markpos_eof = state->eof_reached;
3270  break;
3271  case TSS_SORTEDONTAPE:
3272  LogicalTapeTell(state->result_tape,
3273  &state->markpos_block,
3274  &state->markpos_offset);
3275  state->markpos_eof = state->eof_reached;
3276  break;
3277  default:
3278  elog(ERROR, "invalid tuplesort state");
3279  break;
3280  }
3281 
3282  MemoryContextSwitchTo(oldcontext);
3283 }
void LogicalTapeTell(LogicalTape *lt, long *blocknum, int *offset)
Definition: logtape.c:1163

References Assert(), elog, ERROR, LogicalTapeTell(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, and TSS_SORTEDONTAPE.

Referenced by ExecSortMarkPos().

◆ tuplesort_merge_order()

int tuplesort_merge_order ( int64  allowedMem)

Definition at line 2602 of file tuplesort.c.

2603 {
2604  int mOrder;
2605 
2606  /*----------
2607  * In the merge phase, we need buffer space for each input and output tape.
2608  * Each pass in the balanced merge algorithm reads from M input tapes, and
2609  * writes to N output tapes. Each tape consumes TAPE_BUFFER_OVERHEAD bytes
2610  * of memory. In addition to that, we want MERGE_BUFFER_SIZE workspace per
2611  * input tape.
2612  *
2613  * totalMem = M * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE) +
2614  * N * TAPE_BUFFER_OVERHEAD
2615  *
2616  * Except for the last and next-to-last merge passes, where there can be
2617  * fewer tapes left to process, M = N. We choose M so that we have the
2618  * desired amount of memory available for the input buffers
2619  * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE), given the total memory
2620  * available for the tape buffers (allowedMem).
2621  *
2622  * Note: you might be thinking we need to account for the memtuples[]
2623  * array in this calculation, but we effectively treat that as part of the
2624  * MERGE_BUFFER_SIZE workspace.
2625  *----------
2626  */
2627  mOrder = allowedMem /
2629 
2630  /*
2631  * Even in minimum memory, use at least a MINORDER merge. On the other
2632  * hand, even when we have lots of memory, do not use more than a MAXORDER
2633  * merge. Tapes are pretty cheap, but they're not entirely free. Each
2634  * additional tape reduces the amount of memory available to build runs,
2635  * which in turn can cause the same sort to need more runs, which makes
2636  * merging slower even if it can still be done in a single pass. Also,
2637  * high order merges are quite slow due to CPU cache effects; it can be
2638  * faster to pay the I/O cost of a multi-pass merge than to perform a
2639  * single merge pass across many hundreds of tapes.
2640  */
2641  mOrder = Max(mOrder, MINORDER);
2642  mOrder = Min(mOrder, MAXORDER);
2643 
2644  return mOrder;
2645 }
#define MAXORDER
Definition: tuplesort.c:235
#define MERGE_BUFFER_SIZE
Definition: tuplesort.c:237

References Max, MAXORDER, MERGE_BUFFER_SIZE, Min, MINORDER, and TAPE_BUFFER_OVERHEAD.

Referenced by cost_tuplesort(), and inittapes().

◆ tuplesort_method_name()

const char* tuplesort_method_name ( TuplesortMethod  m)

Definition at line 3367 of file tuplesort.c.

3368 {
3369  switch (m)
3370  {
3372  return "still in progress";
3374  return "top-N heapsort";
3375  case SORT_TYPE_QUICKSORT:
3376  return "quicksort";
3378  return "external sort";
3380  return "external merge";
3381  }
3382 
3383  return "unknown";
3384 }

References SORT_TYPE_EXTERNAL_MERGE, SORT_TYPE_EXTERNAL_SORT, SORT_TYPE_QUICKSORT, SORT_TYPE_STILL_IN_PROGRESS, and SORT_TYPE_TOP_N_HEAPSORT.

Referenced by show_incremental_sort_group_info(), and show_sort_info().

◆ tuplesort_performsort()

void tuplesort_performsort ( Tuplesortstate state)

Definition at line 2043 of file tuplesort.c.

2044 {
2045  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
2046 
2047 #ifdef TRACE_SORT
2048  if (trace_sort)
2049  elog(LOG, "performsort of worker %d starting: %s",
2050  state->worker, pg_rusage_show(&state->ru_start));
2051 #endif
2052 
2053  switch (state->status)
2054  {
2055  case TSS_INITIAL:
2056 
2057  /*
2058  * We were able to accumulate all the tuples within the allowed
2059  * amount of memory, or leader to take over worker tapes
2060  */
2061  if (SERIAL(state))
2062  {
2063  /* Just qsort 'em and we're done */
2065  state->status = TSS_SORTEDINMEM;
2066  }
2067  else if (WORKER(state))
2068  {
2069  /*
2070  * Parallel workers must still dump out tuples to tape. No
2071  * merge is required to produce single output run, though.
2072  */
2073  inittapes(state, false);
2074  dumptuples(state, true);
2076  state->status = TSS_SORTEDONTAPE;
2077  }
2078  else
2079  {
2080  /*
2081  * Leader will take over worker tapes and merge worker runs.
2082  * Note that mergeruns sets the correct state->status.
2083  */
2085  mergeruns(state);
2086  }
2087  state->current = 0;
2088  state->eof_reached = false;
2089  state->markpos_block = 0L;
2090  state->markpos_offset = 0;
2091  state->markpos_eof = false;
2092  break;
2093 
2094  case TSS_BOUNDED:
2095 
2096  /*
2097  * We were able to accumulate all the tuples required for output
2098  * in memory, using a heap to eliminate excess tuples. Now we
2099  * have to transform the heap to a properly-sorted array.
2100  */
2102  state->current = 0;
2103  state->eof_reached = false;
2104  state->markpos_offset = 0;
2105  state->markpos_eof = false;
2106  state->status = TSS_SORTEDINMEM;
2107  break;
2108 
2109  case TSS_BUILDRUNS:
2110 
2111  /*
2112  * Finish tape-based sort. First, flush all tuples remaining in
2113  * memory out to tape; then merge until we have a single remaining
2114  * run (or, if !randomAccess and !WORKER(), one run per tape).
2115  * Note that mergeruns sets the correct state->status.
2116  */
2117  dumptuples(state, true);
2118  mergeruns(state);
2119  state->eof_reached = false;
2120  state->markpos_block = 0L;
2121  state->markpos_offset = 0;
2122  state->markpos_eof = false;
2123  break;
2124 
2125  default:
2126  elog(ERROR, "invalid tuplesort state");
2127  break;
2128  }
2129 
2130 #ifdef TRACE_SORT
2131  if (trace_sort)
2132  {
2133  if (state->status == TSS_FINALMERGE)
2134  elog(LOG, "performsort of worker %d done (except %d-way final merge): %s",
2135  state->worker, state->nInputTapes,
2136  pg_rusage_show(&state->ru_start));
2137  else
2138  elog(LOG, "performsort of worker %d done: %s",
2139  state->worker, pg_rusage_show(&state->ru_start));
2140  }
2141 #endif
2142 
2143  MemoryContextSwitchTo(oldcontext);
2144 }
static void sort_bounded_heap(Tuplesortstate *state)
Definition: tuplesort.c:3460
static void leader_takeover_tapes(Tuplesortstate *state)
Definition: tuplesort.c:4631
static void worker_nomergeruns(Tuplesortstate *state)
Definition: tuplesort.c:4609

References dumptuples(), elog, ERROR, inittapes(), leader_takeover_tapes(), LOG, MemoryContextSwitchTo(), mergeruns(), pg_rusage_show(), SERIAL, sort_bounded_heap(), trace_sort, TSS_BOUNDED, TSS_BUILDRUNS, TSS_FINALMERGE, TSS_INITIAL, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, tuplesort_sort_memtuples(), WORKER, and worker_nomergeruns().

Referenced by _bt_leafbuild(), _bt_parallel_scan_and_sort(), _h_indexbuild(), ExecIncrementalSort(), ExecSort(), gistbuild(), heapam_relation_copy_for_cluster(), hypothetical_dense_rank_final(), hypothetical_rank_common(), initialize_phase(), mode_final(), percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), percentile_disc_multi_final(), process_ordered_aggregate_multi(), process_ordered_aggregate_single(), switchToPresortedPrefixMode(), and validate_index().

◆ tuplesort_putdatum()

void tuplesort_putdatum ( Tuplesortstate state,
Datum  val,
bool  isNull 
)

Definition at line 1808 of file tuplesort.c.

1809 {
1810  MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
1811  SortTuple stup;
1812 
1813  /*
1814  * Pass-by-value types or null values are just stored directly in
1815  * stup.datum1 (and stup.tuple is not used and set to NULL).
1816  *
1817  * Non-null pass-by-reference values need to be copied into memory we
1818  * control, and possibly abbreviated. The copied value is pointed to by
1819  * stup.tuple and is treated as the canonical copy (e.g. to return via
1820  * tuplesort_getdatum or when writing to tape); stup.datum1 gets the
1821  * abbreviated value if abbreviation is happening, otherwise it's
1822  * identical to stup.tuple.
1823  */
1824 
1825  if (isNull || !state->tuples)
1826  {
1827  /*
1828  * Set datum1 to zeroed representation for NULLs (to be consistent,
1829  * and to support cheap inequality tests for NULL abbreviated keys).
1830  */
1831  stup.datum1 = !isNull ? val : (Datum) 0;
1832  stup.isnull1 = isNull;
1833  stup.tuple = NULL; /* no separate storage */
1834  MemoryContextSwitchTo(state->sortcontext);
1835  }
1836  else
1837  {
1838  Datum original = datumCopy(val, false, state->datumTypeLen);
1839 
1840  stup.isnull1 = false;
1841  stup.tuple = DatumGetPointer(original);
1843  MemoryContextSwitchTo(state->sortcontext);
1844 
1845  if (!state->sortKeys->abbrev_converter)
1846  {
1847  stup.datum1 = original;
1848  }
1849  else if (!consider_abort_common(state))
1850  {
1851  /* Store abbreviated key representation */
1852  stup.datum1 = state->sortKeys->abbrev_converter(original,
1853  state->sortKeys);
1854  }
1855  else
1856  {
1857  /* Abort abbreviation */
1858  int i;
1859 
1860  stup.datum1 = original;
1861 
1862  /*
1863  * Set state to be consistent with never trying abbreviation.
1864  *
1865  * Alter datum1 representation in already-copied tuples, so as to
1866  * ensure a consistent representation (current tuple was just
1867  * handled). It does not matter if some dumped tuples are already
1868  * sorted on tape, since serialized tuples lack abbreviated keys
1869  * (TSS_BUILDRUNS state prevents control reaching here in any
1870  * case).
1871  */
1872  for (i = 0; i < state->memtupcount; i++)
1873  {
1874  SortTuple *mtup = &state->memtuples[i];
1875 
1876  mtup->datum1 = PointerGetDatum(mtup->tuple);
1877  }
1878  }
1879  }
1880 
1881  puttuple_common(state, &stup);
1882 
1883  MemoryContextSwitchTo(oldcontext);
1884 }
#define DatumGetPointer(X)
Definition: postgres.h:593
static void puttuple_common(Tuplesortstate *state, SortTuple *tuple)
Definition: tuplesort.c:1890

References consider_abort_common(), SortTuple::datum1, datumCopy(), DatumGetPointer, GetMemoryChunkSpace(), i, SortTuple::isnull1, MemoryContextSwitchTo(), PointerGetDatum, puttuple_common(), SortTuple::tuple, USEMEM, and val.

Referenced by ExecEvalAggOrderedTransDatum(), ExecSort(), ordered_set_transition(), and validate_index_callback().

◆ tuplesort_putheaptuple()

void tuplesort_putheaptuple ( Tuplesortstate state,
HeapTuple  tup 
)

Definition at line 1709 of file tuplesort.c.

1710 {
1711  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
1712  SortTuple stup;
1713 
1714  /*
1715  * Copy the given tuple into memory we control, and decrease availMem.
1716  * Then call the common code.
1717  */
1718  COPYTUP(state, &stup, (void *) tup);
1719 
1720  puttuple_common(state, &stup);
1721 
1722  MemoryContextSwitchTo(oldcontext);
1723 }
#define COPYTUP(state, stup, tup)
Definition: tuplesort.c:541

References COPYTUP, MemoryContextSwitchTo(), and puttuple_common().

Referenced by heapam_relation_copy_for_cluster().

◆ tuplesort_putindextuplevalues()

void tuplesort_putindextuplevalues ( Tuplesortstate state,
Relation  rel,
ItemPointer  self,
Datum values,
bool isnull 
)

Definition at line 1730 of file tuplesort.c.

1733 {
1734  MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
1735  SortTuple stup;
1736  Datum original;
1737  IndexTuple tuple;
1738 
1739  stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull);
1740  tuple = ((IndexTuple) stup.tuple);
1741  tuple->t_tid = *self;
1743  /* set up first-column key value */
1744  original = index_getattr(tuple,
1745  1,
1746  RelationGetDescr(state->indexRel),
1747  &stup.isnull1);
1748 
1749  MemoryContextSwitchTo(state->sortcontext);
1750 
1751  if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1)
1752  {
1753  /*
1754  * Store ordinary Datum representation, or NULL value. If there is a
1755  * converter it won't expect NULL values, and cost model is not
1756  * required to account for NULL, so in that case we avoid calling
1757  * converter and just set datum1 to zeroed representation (to be
1758  * consistent, and to support cheap inequality tests for NULL
1759  * abbreviated keys).
1760  */
1761  stup.datum1 = original;
1762  }
1763  else if (!consider_abort_common(state))
1764  {
1765  /* Store abbreviated key representation */
1766  stup.datum1 = state->sortKeys->abbrev_converter(original,
1767  state->sortKeys);
1768  }
1769  else
1770  {
1771  /* Abort abbreviation */
1772  int i;
1773 
1774  stup.datum1 = original;
1775 
1776  /*
1777  * Set state to be consistent with never trying abbreviation.
1778  *
1779  * Alter datum1 representation in already-copied tuples, so as to
1780  * ensure a consistent representation (current tuple was just
1781  * handled). It does not matter if some dumped tuples are already
1782  * sorted on tape, since serialized tuples lack abbreviated keys
1783  * (TSS_BUILDRUNS state prevents control reaching here in any case).
1784  */
1785  for (i = 0; i < state->memtupcount; i++)
1786  {
1787  SortTuple *mtup = &state->memtuples[i];
1788 
1789  tuple = mtup->tuple;
1790  mtup->datum1 = index_getattr(tuple,
1791  1,
1792  RelationGetDescr(state->indexRel),
1793  &mtup->isnull1);
1794  }
1795  }
1796 
1797  puttuple_common(state, &stup);
1798 
1799  MemoryContextSwitchTo(oldcontext);
1800 }
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47

References consider_abort_common(), SortTuple::datum1, GetMemoryChunkSpace(), i, index_form_tuple(), index_getattr, SortTuple::isnull1, MemoryContextSwitchTo(), puttuple_common(), RelationGetDescr, IndexTupleData::t_tid, SortTuple::tuple, USEMEM, and values.

Referenced by _bt_spool(), _h_spool(), and gistSortedBuildCallback().

◆ tuplesort_puttupleslot()

void tuplesort_puttupleslot ( Tuplesortstate state,
TupleTableSlot slot 
)

Definition at line 1687 of file tuplesort.c.

1688 {
1689  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
1690  SortTuple stup;
1691 
1692  /*
1693  * Copy the given tuple into memory we control, and decrease availMem.
1694  * Then call the common code.
1695  */
1696  COPYTUP(state, &stup, (void *) slot);
1697 
1698  puttuple_common(state, &stup);
1699 
1700  MemoryContextSwitchTo(oldcontext);
1701 }

References COPYTUP, MemoryContextSwitchTo(), and puttuple_common().

Referenced by ExecEvalAggOrderedTransTuple(), ExecIncrementalSort(), ExecSort(), fetch_input_tuple(), hypothetical_dense_rank_final(), hypothetical_rank_common(), ordered_set_transition_multi(), and switchToPresortedPrefixMode().

◆ tuplesort_rescan()

void tuplesort_rescan ( Tuplesortstate state)

Definition at line 3226 of file tuplesort.c.

3227 {
3228  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
3229 
3230  Assert(state->randomAccess);
3231 
3232  switch (state->status)
3233  {
3234  case TSS_SORTEDINMEM:
3235  state->current = 0;
3236  state->eof_reached = false;
3237  state->markpos_offset = 0;
3238  state->markpos_eof = false;
3239  break;
3240  case TSS_SORTEDONTAPE:
3241  LogicalTapeRewindForRead(state->result_tape, 0);
3242  state->eof_reached = false;
3243  state->markpos_block = 0L;
3244  state->markpos_offset = 0;
3245  state->markpos_eof = false;
3246  break;
3247  default:
3248  elog(ERROR, "invalid tuplesort state");
3249  break;
3250  }
3251 
3252  MemoryContextSwitchTo(oldcontext);
3253 }

References Assert(), elog, ERROR, LogicalTapeRewindForRead(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, and TSS_SORTEDONTAPE.

Referenced by ExecReScanSort(), mode_final(), percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), and percentile_disc_multi_final().

◆ tuplesort_reset()

void tuplesort_reset ( Tuplesortstate state)

Definition at line 1535 of file tuplesort.c.

1536 {
1539 
1540  /*
1541  * After we've freed up per-batch memory, re-setup all of the state common
1542  * to both the first batch and any subsequent batch.
1543  */
1545 
1546  state->lastReturnedTuple = NULL;
1547  state->slabMemoryBegin = NULL;
1548  state->slabMemoryEnd = NULL;
1549  state->slabFreeHead = NULL;
1550 }

References tuplesort_begin_batch(), tuplesort_free(), and tuplesort_updatemax().

Referenced by ExecIncrementalSort(), ExecReScanIncrementalSort(), and switchToPresortedPrefixMode().

◆ tuplesort_restorepos()

void tuplesort_restorepos ( Tuplesortstate state)

Definition at line 3290 of file tuplesort.c.

3291 {
3292  MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
3293 
3294  Assert(state->randomAccess);
3295 
3296  switch (state->status)
3297  {
3298  case TSS_SORTEDINMEM:
3299  state->current = state->markpos_offset;
3300  state->eof_reached = state->markpos_eof;
3301  break;
3302  case TSS_SORTEDONTAPE:
3303  LogicalTapeSeek(state->result_tape,
3304  state->markpos_block,
3305  state->markpos_offset);
3306  state->eof_reached = state->markpos_eof;
3307  break;
3308  default:
3309  elog(ERROR, "invalid tuplesort state");
3310  break;
3311  }
3312 
3313  MemoryContextSwitchTo(oldcontext);
3314 }
void LogicalTapeSeek(LogicalTape *lt, long blocknum, int offset)
Definition: logtape.c:1134

References Assert(), elog, ERROR, LogicalTapeSeek(), MemoryContextSwitchTo(), TSS_SORTEDINMEM, and TSS_SORTEDONTAPE.

Referenced by ExecSortRestrPos().

◆ tuplesort_set_bound()

void tuplesort_set_bound ( Tuplesortstate state,
int64  bound 
)

Definition at line 1333 of file tuplesort.c.

1334 {
1335  /* Assert we're called before loading any tuples */
1336  Assert(state->status == TSS_INITIAL && state->memtupcount == 0);
1337  /* Can't set the bound twice, either */
1338  Assert(!state->bounded);
1339  /* Also, this shouldn't be called in a parallel worker */
1340  Assert(!WORKER(state));
1341 
1342  /* Parallel leader allows but ignores hint */
1343  if (LEADER(state))
1344  return;
1345 
1346 #ifdef DEBUG_BOUNDED_SORT
1347  /* Honor GUC setting that disables the feature (for easy testing) */
1348  if (!optimize_bounded_sort)
1349  return;
1350 #endif
1351 
1352  /* We want to be able to compute bound * 2, so limit the setting */
1353  if (bound > (int64) (INT_MAX / 2))
1354  return;
1355 
1356  state->bounded = true;
1357  state->bound = (int) bound;
1358 
1359  /*
1360  * Bounded sorts are not an effective target for abbreviated key
1361  * optimization. Disable by setting state to be consistent with no
1362  * abbreviation support.
1363  */
1364  state->sortKeys->abbrev_converter = NULL;
1365  if (state->sortKeys->abbrev_full_comparator)
1366  state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator;
1367 
1368  /* Not strictly necessary, but be tidy */
1369  state->sortKeys->abbrev_abort = NULL;
1370  state->sortKeys->abbrev_full_comparator = NULL;
1371 }

References Assert(), LEADER, TSS_INITIAL, and WORKER.

Referenced by ExecIncrementalSort(), ExecSort(), and switchToPresortedPrefixMode().

◆ tuplesort_skiptuples()

bool tuplesort_skiptuples ( Tuplesortstate state,
int64  ntuples,
bool  forward 
)

Definition at line 2534 of file tuplesort.c.

2535 {
2536  MemoryContext oldcontext;
2537 
2538  /*
2539  * We don't actually support backwards skip yet, because no callers need
2540  * it. The API is designed to allow for that later, though.
2541  */
2542  Assert(forward);
2543  Assert(ntuples >= 0);
2544  Assert(!WORKER(state));
2545 
2546  switch (state->status)
2547  {
2548  case TSS_SORTEDINMEM:
2549  if (state->memtupcount - state->current >= ntuples)
2550  {
2551  state->current += ntuples;
2552  return true;
2553  }
2554  state->current = state->memtupcount;
2555  state->eof_reached = true;
2556 
2557  /*
2558  * Complain if caller tries to retrieve more tuples than
2559  * originally asked for in a bounded sort. This is because
2560  * returning EOF here might be the wrong thing.
2561  */
2562  if (state->bounded && state->current >= state->bound)
2563  elog(ERROR, "retrieved too many tuples in a bounded sort");
2564 
2565  return false;
2566 
2567  case TSS_SORTEDONTAPE:
2568  case TSS_FINALMERGE:
2569 
2570  /*
2571  * We could probably optimize these cases better, but for now it's
2572  * not worth the trouble.
2573  */
2574  oldcontext = MemoryContextSwitchTo(state->sortcontext);
2575  while (ntuples-- > 0)
2576  {
2577  SortTuple stup;
2578 
2579  if (!tuplesort_gettuple_common(state, forward, &stup))
2580  {
2581  MemoryContextSwitchTo(oldcontext);
2582  return false;
2583  }
2585  }
2586  MemoryContextSwitchTo(oldcontext);
2587  return true;
2588 
2589  default:
2590  elog(ERROR, "invalid tuplesort state");
2591  return false; /* keep compiler quiet */
2592  }
2593 }

References Assert(), CHECK_FOR_INTERRUPTS, elog, ERROR, MemoryContextSwitchTo(), TSS_FINALMERGE, TSS_SORTEDINMEM, TSS_SORTEDONTAPE, tuplesort_gettuple_common(), and WORKER.

Referenced by percentile_cont_final_common(), percentile_cont_multi_final_common(), percentile_disc_final(), and percentile_disc_multi_final().

◆ tuplesort_sort_memtuples()

static void tuplesort_sort_memtuples ( Tuplesortstate state)
static

Definition at line 3500 of file tuplesort.c.

3501 {
3502  Assert(!LEADER(state));
3503 
3504  if (state->memtupcount > 1)
3505  {
3506  /* Can we use the single-key sort function? */
3507  if (state->onlyKey != NULL)
3508  qsort_ssup(state->memtuples, state->memtupcount,
3509  state->onlyKey);
3510  else
3511  qsort_tuple(state->memtuples,
3512  state->memtupcount,
3513  state->comparetup,
3514  state);
3515  }
3516 }

References Assert(), and LEADER.

Referenced by dumptuples(), and tuplesort_performsort().

◆ tuplesort_space_type_name()

const char* tuplesort_space_type_name ( TuplesortSpaceType  t)

Definition at line 3390 of file tuplesort.c.

3391 {
3393  return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory";
3394 }

References Assert(), SORT_SPACE_TYPE_DISK, and SORT_SPACE_TYPE_MEMORY.

Referenced by show_incremental_sort_group_info(), and show_sort_info().

◆ tuplesort_updatemax()

static void tuplesort_updatemax ( Tuplesortstate state)
static

Definition at line 1484 of file tuplesort.c.

1485 {
1486  int64 spaceUsed;
1487  bool isSpaceDisk;
1488 
1489  /*
1490  * Note: it might seem we should provide both memory and disk usage for a
1491  * disk-based sort. However, the current code doesn't track memory space
1492  * accurately once we have begun to return tuples to the caller (since we
1493  * don't account for pfree's the caller is expected to do), so we cannot
1494  * rely on availMem in a disk sort. This does not seem worth the overhead
1495  * to fix. Is it worth creating an API for the memory context code to
1496  * tell us how much is actually used in sortcontext?
1497  */
1498  if (state->tapeset)
1499  {
1500  isSpaceDisk = true;
1501  spaceUsed = LogicalTapeSetBlocks(state->tapeset) * BLCKSZ;
1502  }
1503  else
1504  {
1505  isSpaceDisk = false;
1506  spaceUsed = state->allowedMem - state->availMem;
1507  }
1508 
1509  /*
1510  * Sort evicts data to the disk when it wasn't able to fit that data into
1511  * main memory. This is why we assume space used on the disk to be more
1512  * important for tracking resource usage than space used in memory. Note
1513  * that the amount of space occupied by some tupleset on the disk might be
1514  * less than amount of space occupied by the same tupleset in memory due
1515  * to more compact representation.
1516  */
1517  if ((isSpaceDisk && !state->isMaxSpaceDisk) ||
1518  (isSpaceDisk == state->isMaxSpaceDisk && spaceUsed > state->maxSpace))
1519  {
1520  state->maxSpace = spaceUsed;
1521  state->isMaxSpaceDisk = isSpaceDisk;
1522  state->maxSpaceStatus = state->status;
1523  }
1524 }

References LogicalTapeSetBlocks().

Referenced by tuplesort_get_stats(), and tuplesort_reset().

◆ tuplesort_used_bound()

bool tuplesort_used_bound ( Tuplesortstate state)

Definition at line 1379 of file tuplesort.c.

1380 {
1381  return state->boundUsed;
1382 }

Referenced by ExecIncrementalSort().

◆ worker_freeze_result_tape()

static void worker_freeze_result_tape ( Tuplesortstate state)
static

Definition at line 4571 of file tuplesort.c.

4572 {
4573  Sharedsort *shared = state->shared;
4574  TapeShare output;
4575 
4576  Assert(WORKER(state));
4577  Assert(state->result_tape != NULL);
4578  Assert(state->memtupcount == 0);
4579 
4580  /*
4581  * Free most remaining memory, in case caller is sensitive to our holding
4582  * on to it. memtuples may not be a tiny merge heap at this point.
4583  */
4584  pfree(state->memtuples);
4585  /* Be tidy */
4586  state->memtuples = NULL;
4587  state->memtupsize = 0;
4588 
4589  /*
4590  * Parallel worker requires result tape metadata, which is to be stored in
4591  * shared memory for leader
4592  */
4593  LogicalTapeFreeze(state->result_tape, &output);
4594 
4595  /* Store properties of output tape, and update finished worker count */
4596  SpinLockAcquire(&shared->mutex);
4597  shared->tapes[state->worker] = output;
4598  shared->workersFinished++;
4599  SpinLockRelease(&shared->mutex);
4600 }
static void output(uint64 loop_count)

References Assert(), LogicalTapeFreeze(), Sharedsort::mutex, output(), pfree(), SpinLockAcquire, SpinLockRelease, Sharedsort::tapes, WORKER, and Sharedsort::workersFinished.

Referenced by mergeruns(), and worker_nomergeruns().

◆ worker_get_identifier()

static int worker_get_identifier ( Tuplesortstate state)
static

Definition at line 4543 of file tuplesort.c.

4544 {
4545  Sharedsort *shared = state->shared;
4546  int worker;
4547 
4548  Assert(WORKER(state));
4549 
4550  SpinLockAcquire(&shared->mutex);
4551  worker = shared->currentWorker++;
4552  SpinLockRelease(&shared->mutex);
4553 
4554  return worker;
4555 }

References Assert(), Sharedsort::currentWorker, Sharedsort::mutex, SpinLockAcquire, SpinLockRelease, and WORKER.

Referenced by tuplesort_begin_common().

◆ worker_nomergeruns()

static void worker_nomergeruns ( Tuplesortstate state)
static

Definition at line 4609 of file tuplesort.c.

4610 {
4611  Assert(WORKER(state));
4612  Assert(state->result_tape == NULL);
4613  Assert(state->nOutputRuns == 1);
4614 
4615  state->result_tape = state->destTape;
4617 }

References Assert(), WORKER, and worker_freeze_result_tape().

Referenced by tuplesort_performsort().

◆ writetup_cluster()

static void writetup_cluster ( Tuplesortstate state,
LogicalTape tape,
SortTuple stup 
)
static

Definition at line 4078 of file tuplesort.c.

4079 {
4080  HeapTuple tuple = (HeapTuple) stup->tuple;
4081  unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int);
4082 
4083  /* We need to store t_self, but not other fields of HeapTupleData */
4084  LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
4085  LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData));
4086  LogicalTapeWrite(tape, tuple->t_data, tuple->t_len);
4087  if (state->randomAccess) /* need trailing length word? */
4088  LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
4089 
4090  if (!state->slabAllocatorUsed)
4091  {
4093  heap_freetuple(tuple);
4094  }
4095 }
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338

References FREEMEM, GetMemoryChunkSpace(), heap_freetuple(), LogicalTapeWrite(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and SortTuple::tuple.

Referenced by tuplesort_begin_cluster().

◆ writetup_datum()

static void writetup_datum ( Tuplesortstate state,
LogicalTape tape,
SortTuple stup 
)
static

Definition at line 4397 of file tuplesort.c.

4398 {
4399  void *waddr;
4400  unsigned int tuplen;
4401  unsigned int writtenlen;
4402 
4403  if (stup->isnull1)
4404  {
4405  waddr = NULL;
4406  tuplen = 0;
4407  }
4408  else if (!state->tuples)
4409  {
4410  waddr = &stup->datum1;
4411  tuplen = sizeof(Datum);
4412  }
4413  else
4414  {
4415  waddr = stup->tuple;
4416  tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen);
4417  Assert(tuplen != 0);
4418  }
4419 
4420  writtenlen = tuplen + sizeof(unsigned int);
4421 
4422  LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen));
4423  LogicalTapeWrite(tape, waddr, tuplen);
4424  if (state->randomAccess) /* need trailing length word? */
4425  LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen));
4426 
4427  if (!state->slabAllocatorUsed && stup->tuple)
4428  {
4430  pfree(stup->tuple);
4431  }
4432 }
Size datumGetSize(Datum value, bool typByVal, int typLen)
Definition: datum.c:65

References Assert(), SortTuple::datum1, datumGetSize(), FREEMEM, GetMemoryChunkSpace(), SortTuple::isnull1, LogicalTapeWrite(), pfree(), PointerGetDatum, and SortTuple::tuple.

Referenced by tuplesort_begin_datum().

◆ writetup_heap()

static void writetup_heap ( Tuplesortstate state,
LogicalTape tape,
SortTuple stup 
)
static

Definition at line 3840 of file tuplesort.c.

3841 {
3842  MinimalTuple tuple = (MinimalTuple) stup->tuple;
3843 
3844  /* the part of the MinimalTuple we'll write: */
3845  char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
3846  unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
3847 
3848  /* total on-disk footprint: */
3849  unsigned int tuplen = tupbodylen + sizeof(int);
3850 
3851  LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
3852  LogicalTapeWrite(tape, (void *) tupbody, tupbodylen);
3853  if (state->randomAccess) /* need trailing length word? */
3854  LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
3855 
3856  if (!state->slabAllocatorUsed)
3857  {
3859  heap_free_minimal_tuple(tuple);
3860  }
3861 }
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1427

References FREEMEM, GetMemoryChunkSpace(), heap_free_minimal_tuple(), LogicalTapeWrite(), MINIMAL_TUPLE_DATA_OFFSET, MinimalTupleData::t_len, and SortTuple::tuple.

Referenced by tuplesort_begin_heap().

◆ writetup_index()

static void writetup_index ( Tuplesortstate state,
LogicalTape tape,
SortTuple stup 
)
static

Definition at line 4328 of file tuplesort.c.

4329 {
4330  IndexTuple tuple = (IndexTuple) stup->tuple;
4331  unsigned int tuplen;
4332 
4333  tuplen = IndexTupleSize(tuple) + sizeof(tuplen);
4334  LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
4335  LogicalTapeWrite(tape, (void *) tuple, IndexTupleSize(tuple));
4336  if (state->randomAccess) /* need trailing length word? */
4337  LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
4338 
4339  if (!state->slabAllocatorUsed)
4340  {
4342  pfree(tuple);
4343  }
4344 }
#define IndexTupleSize(itup)
Definition: itup.h:70

References FREEMEM, GetMemoryChunkSpace(), IndexTupleSize, LogicalTapeWrite(), pfree(), and SortTuple::tuple.

Referenced by tuplesort_begin_index_btree(), tuplesort_begin_index_gist(), and tuplesort_begin_index_hash().

Variable Documentation

◆ trace_sort