PostgreSQL Source Code  git master
tuplesort.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tuplesort.h
4  * Generalized tuple sorting routines.
5  *
6  * This module handles sorting of heap tuples, index tuples, or single
7  * Datums (and could easily support other kinds of sortable objects,
8  * if necessary). It works efficiently for both small and large amounts
9  * of data. Small amounts are sorted in-memory using qsort(). Large
10  * amounts are sorted using temporary files and a standard external sort
11  * algorithm. Parallel sorts use a variant of this external sort
12  * algorithm, and are typically only used for large amounts of data.
13  *
14  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  * src/include/utils/tuplesort.h
18  *
19  *-------------------------------------------------------------------------
20  */
21 #ifndef TUPLESORT_H
22 #define TUPLESORT_H
23 
24 #include "access/itup.h"
25 #include "executor/tuptable.h"
26 #include "storage/dsm.h"
27 #include "utils/relcache.h"
28 
29 
30 /*
31  * Tuplesortstate and Sharedsort are opaque types whose details are not
32  * known outside tuplesort.c.
33  */
35 typedef struct Sharedsort Sharedsort;
36 
37 /*
38  * Tuplesort parallel coordination state, allocated by each participant in
39  * local memory. Participant caller initializes everything. See usage notes
40  * below.
41  */
42 typedef struct SortCoordinateData
43 {
44  /* Worker process? If not, must be leader. */
45  bool isWorker;
46 
47  /*
48  * Leader-process-passed number of participants known launched (workers
49  * set this to -1). Includes state within leader needed for it to
50  * participate as a worker, if any.
51  */
53 
54  /* Private opaque state (points to shared memory) */
57 
59 
60 /*
61  * Data structures for reporting sort statistics. Note that
62  * TuplesortInstrumentation can't contain any pointers because we
63  * sometimes put it in shared memory.
64  */
65 typedef enum
66 {
73 
74 typedef enum
75 {
79 
81 {
82  TuplesortMethod sortMethod; /* sort algorithm used */
83  TuplesortSpaceType spaceType; /* type of space spaceUsed represents */
84  long spaceUsed; /* space consumption, in kB */
86 
87 
88 /*
89  * We provide multiple interfaces to what is essentially the same code,
90  * since different callers have different data to be sorted and want to
91  * specify the sort key information differently. There are two APIs for
92  * sorting HeapTuples and two more for sorting IndexTuples. Yet another
93  * API supports sorting bare Datums.
94  *
95  * Serial sort callers should pass NULL for their coordinate argument.
96  *
97  * The "heap" API actually stores/sorts MinimalTuples, which means it doesn't
98  * preserve the system columns (tuple identity and transaction visibility
99  * info). The sort keys are specified by column numbers within the tuples
100  * and sort operator OIDs. We save some cycles by passing and returning the
101  * tuples in TupleTableSlots, rather than forming actual HeapTuples (which'd
102  * have to be converted to MinimalTuples). This API works well for sorts
103  * executed as parts of plan trees.
104  *
105  * The "cluster" API stores/sorts full HeapTuples including all visibility
106  * info. The sort keys are specified by reference to a btree index that is
107  * defined on the relation to be sorted. Note that putheaptuple/getheaptuple
108  * go with this API, not the "begin_heap" one!
109  *
110  * The "index_btree" API stores/sorts IndexTuples (preserving all their
111  * header fields). The sort keys are specified by a btree index definition.
112  *
113  * The "index_hash" API is similar to index_btree, but the tuples are
114  * actually sorted by their hash codes not the raw data.
115  *
116  * Parallel sort callers are required to coordinate multiple tuplesort states
117  * in a leader process and one or more worker processes. The leader process
118  * must launch workers, and have each perform an independent "partial"
119  * tuplesort, typically fed by the parallel heap interface. The leader later
120  * produces the final output (internally, it merges runs output by workers).
121  *
122  * Callers must do the following to perform a sort in parallel using multiple
123  * worker processes:
124  *
125  * 1. Request tuplesort-private shared memory for n workers. Use
126  * tuplesort_estimate_shared() to get the required size.
127  * 2. Have leader process initialize allocated shared memory using
128  * tuplesort_initialize_shared(). Launch workers.
129  * 3. Initialize a coordinate argument within both the leader process, and
130  * for each worker process. This has a pointer to the shared
131  * tuplesort-private structure, as well as some caller-initialized fields.
132  * Leader's coordinate argument reliably indicates number of workers
133  * launched (this is unused by workers).
134  * 4. Begin a tuplesort using some appropriate tuplesort_begin* routine,
135  * (passing the coordinate argument) within each worker. The workMem
136  * arguments need not be identical. All other arguments should match
137  * exactly, though.
138  * 5. tuplesort_attach_shared() should be called by all workers. Feed tuples
139  * to each worker, and call tuplesort_performsort() within each when input
140  * is exhausted.
141  * 6. Call tuplesort_end() in each worker process. Worker processes can shut
142  * down once tuplesort_end() returns.
143  * 7. Begin a tuplesort in the leader using the same tuplesort_begin*
144  * routine, passing a leader-appropriate coordinate argument (this can
145  * happen as early as during step 3, actually, since we only need to know
146  * the number of workers successfully launched). The leader must now wait
147  * for workers to finish. Caller must use own mechanism for ensuring that
148  * next step isn't reached until all workers have called and returned from
149  * tuplesort_performsort(). (Note that it's okay if workers have already
150  * also called tuplesort_end() by then.)
151  * 8. Call tuplesort_performsort() in leader. Consume output using the
152  * appropriate tuplesort_get* routine. Leader can skip this step if
153  * tuplesort turns out to be unnecessary.
154  * 9. Call tuplesort_end() in leader.
155  *
156  * This division of labor assumes nothing about how input tuples are produced,
157  * but does require that caller combine the state of multiple tuplesorts for
158  * any purpose other than producing the final output. For example, callers
159  * must consider that tuplesort_get_stats() reports on only one worker's role
160  * in a sort (or the leader's role), and not statistics for the sort as a
161  * whole.
162  *
163  * Note that callers may use the leader process to sort runs as if it was an
164  * independent worker process (prior to the process performing a leader sort
165  * to produce the final sorted output). Doing so only requires a second
166  * "partial" tuplesort within the leader process, initialized like that of a
167  * worker process. The steps above don't touch on this directly. The only
168  * difference is that the tuplesort_attach_shared() call is never needed within
169  * leader process, because the backend as a whole holds the shared fileset
170  * reference. A worker Tuplesortstate in leader is expected to do exactly the
171  * same amount of total initial processing work as a worker process
172  * Tuplesortstate, since the leader process has nothing else to do before
173  * workers finish.
174  *
175  * Note that only a very small amount of memory will be allocated prior to
176  * the leader state first consuming input, and that workers will free the
177  * vast majority of their memory upon returning from tuplesort_performsort().
178  * Callers can rely on this to arrange for memory to be used in a way that
179  * respects a workMem-style budget across an entire parallel sort operation.
180  *
181  * Callers are responsible for parallel safety in general. However, they
182  * can at least rely on there being no parallel safety hazards within
183  * tuplesort, because tuplesort thinks of the sort as several independent
184  * sorts whose results are combined. Since, in general, the behavior of
185  * sort operators is immutable, caller need only worry about the parallel
186  * safety of whatever the process is through which input tuples are
187  * generated (typically, caller uses a parallel heap scan).
188  */
189 
191  int nkeys, AttrNumber *attNums,
192  Oid *sortOperators, Oid *sortCollations,
193  bool *nullsFirstFlags,
194  int workMem, SortCoordinate coordinate,
195  bool randomAccess);
197  Relation indexRel, int workMem,
198  SortCoordinate coordinate, bool randomAccess);
200  Relation indexRel,
201  bool enforceUnique,
202  int workMem, SortCoordinate coordinate,
203  bool randomAccess);
205  Relation indexRel,
206  uint32 high_mask,
207  uint32 low_mask,
208  uint32 max_buckets,
209  int workMem, SortCoordinate coordinate,
210  bool randomAccess);
211 extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
212  Oid sortOperator, Oid sortCollation,
213  bool nullsFirstFlag,
214  int workMem, SortCoordinate coordinate,
215  bool randomAccess);
216 
217 extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
218 
220  TupleTableSlot *slot);
223  Relation rel, ItemPointer self,
224  Datum *values, bool *isnull);
226  bool isNull);
227 
229 
230 extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
231  bool copy, TupleTableSlot *slot, Datum *abbrev);
234 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
235  Datum *val, bool *isNull, Datum *abbrev);
236 
237 extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
238  bool forward);
239 
240 extern void tuplesort_end(Tuplesortstate *state);
241 
243  TuplesortInstrumentation *stats);
244 extern const char *tuplesort_method_name(TuplesortMethod m);
245 extern const char *tuplesort_space_type_name(TuplesortSpaceType t);
246 
247 extern int tuplesort_merge_order(int64 allowedMem);
248 
249 extern Size tuplesort_estimate_shared(int nworkers);
250 extern void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers,
251  dsm_segment *seg);
252 extern void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg);
253 
254 /*
255  * These routines may only be called if randomAccess was specified 'true'.
256  * Likewise, backwards scan in gettuple/getdatum is only allowed if
257  * randomAccess was specified. Note that parallel sorts do not support
258  * randomAccess.
259  */
260 
264 
265 #endif /* TUPLESORT_H */
void tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
Definition: tuplesort.c:1435
struct TuplesortInstrumentation TuplesortInstrumentation
bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward)
Definition: tuplesort.c:2285
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
Definition: tuplesort.c:2245
TuplesortMethod
Definition: tuplesort.h:65
void tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
Definition: tuplesort.c:1556
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:880
Sharedsort * sharedsort
Definition: tuplesort.h:55
const char * tuplesort_method_name(TuplesortMethod m)
Definition: tuplesort.c:3176
unsigned int Oid
Definition: postgres_ext.h:31
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
void tuplesort_restorepos(Tuplesortstate *state)
Definition: tuplesort.c:3095
void tuplesort_get_stats(Tuplesortstate *state, TuplesortInstrumentation *stats)
Definition: tuplesort.c:3129
TuplesortMethod sortMethod
Definition: tuplesort.h:82
void tuplesort_rescan(Tuplesortstate *state)
Definition: tuplesort.c:3028
unsigned int uint32
Definition: c.h:358
void tuplesort_set_bound(Tuplesortstate *state, int64 bound)
Definition: tuplesort.c:1186
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
Definition: tuplesort.c:1478
void tuplesort_markpos(Tuplesortstate *state)
Definition: tuplesort.c:3063
struct SortCoordinateData SortCoordinateData
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2196
const char * tuplesort_space_type_name(TuplesortSpaceType t)
Definition: tuplesort.c:3199
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236
struct SortCoordinateData * SortCoordinate
Definition: tuplesort.h:58
uintptr_t Datum
Definition: postgres.h:367
Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:806
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2216
Definition: regguts.h:298
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
Definition: tuplesort.c:1457
Tuplesortstate * tuplesort_begin_index_hash(Relation heapRel, Relation indexRel, uint32 high_mask, uint32 low_mask, uint32 max_buckets, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:1053
TuplesortSpaceType
Definition: tuplesort.h:74
size_t Size
Definition: c.h:466
Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:1099
void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg)
Definition: tuplesort.c:4391
int tuplesort_merge_order(int64 allowedMem)
Definition: tuplesort.c:2353
static Datum values[MAXATTR]
Definition: bootstrap.c:167
void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
Definition: tuplesort.c:4414
TuplesortSpaceType spaceType
Definition: tuplesort.h:83
int16 AttrNumber
Definition: attnum.h:21
bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)
Definition: tuplesort.c:2159
Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:975
long val
Definition: informix.c:684
Size tuplesort_estimate_shared(int nworkers)
Definition: tuplesort.c:4370