PostgreSQL Source Code git master
nodeSort.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nodeSort.c
4 * Routines to handle sorting of relations.
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeSort.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#include "postgres.h"
17
18#include "access/parallel.h"
19#include "executor/execdebug.h"
20#include "executor/nodeSort.h"
21#include "miscadmin.h"
22#include "utils/tuplesort.h"
23
24
25/* ----------------------------------------------------------------
26 * ExecSort
27 *
28 * Sorts tuples from the outer subtree of the node using tuplesort,
29 * which saves the results in a temporary file or memory. After the
30 * initial call, returns a tuple from the file with each call.
31 *
32 * There are two distinct ways that this sort can be performed:
33 *
34 * 1) When the result is a single column we perform a Datum sort.
35 *
36 * 2) When the result contains multiple columns we perform a tuple sort.
37 *
38 * We could do this by always performing a tuple sort, however sorting
39 * Datums only can be significantly faster than sorting tuples,
40 * especially when the Datums are of a pass-by-value type.
41 *
42 * Conditions:
43 * -- none.
44 *
45 * Initial States:
46 * -- the outer child is prepared to return the first tuple.
47 * ----------------------------------------------------------------
48 */
49static TupleTableSlot *
51{
52 SortState *node = castNode(SortState, pstate);
53 EState *estate;
54 ScanDirection dir;
55 Tuplesortstate *tuplesortstate;
56 TupleTableSlot *slot;
57
59
60 /*
61 * get state info from node
62 */
63 SO1_printf("ExecSort: %s\n",
64 "entering routine");
65
66 estate = node->ss.ps.state;
67 dir = estate->es_direction;
68 tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
69
70 /*
71 * If first time through, read all tuples from outer plan and pass them to
72 * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
73 */
74
75 if (!node->sort_Done)
76 {
77 Sort *plannode = (Sort *) node->ss.ps.plan;
78 PlanState *outerNode;
79 TupleDesc tupDesc;
80 int tuplesortopts = TUPLESORT_NONE;
81
82 SO1_printf("ExecSort: %s\n",
83 "sorting subplan");
84
85 /*
86 * Want to scan subplan in the forward direction while creating the
87 * sorted data.
88 */
90
91 /*
92 * Initialize tuplesort module.
93 */
94 SO1_printf("ExecSort: %s\n",
95 "calling tuplesort_begin");
96
97 outerNode = outerPlanState(node);
98 tupDesc = ExecGetResultType(outerNode);
99
100 if (node->randomAccess)
101 tuplesortopts |= TUPLESORT_RANDOMACCESS;
102 if (node->bounded)
103 tuplesortopts |= TUPLESORT_ALLOWBOUNDED;
104
105 if (node->datumSort)
106 tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
107 plannode->sortOperators[0],
108 plannode->collations[0],
109 plannode->nullsFirst[0],
110 work_mem,
111 NULL,
112 tuplesortopts);
113 else
114 tuplesortstate = tuplesort_begin_heap(tupDesc,
115 plannode->numCols,
116 plannode->sortColIdx,
117 plannode->sortOperators,
118 plannode->collations,
119 plannode->nullsFirst,
120 work_mem,
121 NULL,
122 tuplesortopts);
123 if (node->bounded)
124 tuplesort_set_bound(tuplesortstate, node->bound);
125 node->tuplesortstate = tuplesortstate;
126
127 /*
128 * Scan the subplan and feed all the tuples to tuplesort using the
129 * appropriate method based on the type of sort we're doing.
130 */
131 if (node->datumSort)
132 {
133 for (;;)
134 {
135 slot = ExecProcNode(outerNode);
136
137 if (TupIsNull(slot))
138 break;
139 slot_getsomeattrs(slot, 1);
140 tuplesort_putdatum(tuplesortstate,
141 slot->tts_values[0],
142 slot->tts_isnull[0]);
143 }
144 }
145 else
146 {
147 for (;;)
148 {
149 slot = ExecProcNode(outerNode);
150
151 if (TupIsNull(slot))
152 break;
153 tuplesort_puttupleslot(tuplesortstate, slot);
154 }
155 }
156
157 /*
158 * Complete the sort.
159 */
160 tuplesort_performsort(tuplesortstate);
161
162 /*
163 * restore to user specified direction
164 */
165 estate->es_direction = dir;
166
167 /*
168 * finally set the sorted flag to true
169 */
170 node->sort_Done = true;
171 node->bounded_Done = node->bounded;
172 node->bound_Done = node->bound;
173 if (node->shared_info && node->am_worker)
174 {
176
178 Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
180 tuplesort_get_stats(tuplesortstate, si);
181 }
182 SO1_printf("ExecSort: %s\n", "sorting done");
183 }
184
185 SO1_printf("ExecSort: %s\n",
186 "retrieving tuple from tuplesort");
187
188 slot = node->ss.ps.ps_ResultTupleSlot;
189
190 /*
191 * Fetch the next sorted item from the appropriate tuplesort function. For
192 * datum sorts we must manage the slot ourselves and leave it clear when
193 * tuplesort_getdatum returns false to indicate there are no more datums.
194 * For tuple sorts, tuplesort_gettupleslot manages the slot for us and
195 * empties the slot when it runs out of tuples.
196 */
197 if (node->datumSort)
198 {
199 ExecClearTuple(slot);
200 if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
201 false, &(slot->tts_values[0]),
202 &(slot->tts_isnull[0]), NULL))
204 }
205 else
206 (void) tuplesort_gettupleslot(tuplesortstate,
208 false, slot, NULL);
209
210 return slot;
211}
212
213/* ----------------------------------------------------------------
214 * ExecInitSort
215 *
216 * Creates the run-time state information for the sort node
217 * produced by the planner and initializes its outer subtree.
218 * ----------------------------------------------------------------
219 */
220SortState *
221ExecInitSort(Sort *node, EState *estate, int eflags)
222{
223 SortState *sortstate;
224 TupleDesc outerTupDesc;
225
226 SO1_printf("ExecInitSort: %s\n",
227 "initializing sort node");
228
229 /*
230 * create state structure
231 */
232 sortstate = makeNode(SortState);
233 sortstate->ss.ps.plan = (Plan *) node;
234 sortstate->ss.ps.state = estate;
235 sortstate->ss.ps.ExecProcNode = ExecSort;
236
237 /*
238 * We must have random access to the sort output to do backward scan or
239 * mark/restore. We also prefer to materialize the sort output if we
240 * might be called on to rewind and replay it many times.
241 */
242 sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
244 EXEC_FLAG_MARK)) != 0;
245
246 sortstate->bounded = false;
247 sortstate->sort_Done = false;
248 sortstate->tuplesortstate = NULL;
249
250 /*
251 * Miscellaneous initialization
252 *
253 * Sort nodes don't initialize their ExprContexts because they never call
254 * ExecQual or ExecProject.
255 */
256
257 /*
258 * initialize child nodes
259 *
260 * We shield the child node from the need to support REWIND, BACKWARD, or
261 * MARK/RESTORE.
262 */
264
265 outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
266
267 /*
268 * Initialize scan slot and type.
269 */
270 ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
271
272 /*
273 * Initialize return slot and type. No need to initialize projection info
274 * because this node doesn't do projections.
275 */
277 sortstate->ss.ps.ps_ProjInfo = NULL;
278
279 outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
280
281 /*
282 * We perform a Datum sort when we're sorting just a single column,
283 * otherwise we perform a tuple sort.
284 */
285 if (outerTupDesc->natts == 1)
286 sortstate->datumSort = true;
287 else
288 sortstate->datumSort = false;
289
290 SO1_printf("ExecInitSort: %s\n",
291 "sort node initialized");
292
293 return sortstate;
294}
295
296/* ----------------------------------------------------------------
297 * ExecEndSort(node)
298 * ----------------------------------------------------------------
299 */
300void
302{
303 SO1_printf("ExecEndSort: %s\n",
304 "shutting down sort node");
305
306 /*
307 * Release tuplesort resources
308 */
309 if (node->tuplesortstate != NULL)
311 node->tuplesortstate = NULL;
312
313 /*
314 * shut down the subplan
315 */
317
318 SO1_printf("ExecEndSort: %s\n",
319 "sort node shutdown");
320}
321
322/* ----------------------------------------------------------------
323 * ExecSortMarkPos
324 *
325 * Calls tuplesort to save the current position in the sorted file.
326 * ----------------------------------------------------------------
327 */
328void
330{
331 /*
332 * if we haven't sorted yet, just return
333 */
334 if (!node->sort_Done)
335 return;
336
338}
339
340/* ----------------------------------------------------------------
341 * ExecSortRestrPos
342 *
343 * Calls tuplesort to restore the last saved sort file position.
344 * ----------------------------------------------------------------
345 */
346void
348{
349 /*
350 * if we haven't sorted yet, just return.
351 */
352 if (!node->sort_Done)
353 return;
354
355 /*
356 * restore the scan to the previously marked position
357 */
359}
360
361void
363{
365
366 /*
367 * If we haven't sorted yet, just return. If outerplan's chgParam is not
368 * NULL then it will be re-scanned by ExecProcNode, else no reason to
369 * re-scan it at all.
370 */
371 if (!node->sort_Done)
372 return;
373
374 /* must drop pointer to sort result tuple */
376
377 /*
378 * If subnode is to be rescanned then we forget previous sort results; we
379 * have to re-read the subplan and re-sort. Also must re-sort if the
380 * bounded-sort parameters changed or we didn't select randomAccess.
381 *
382 * Otherwise we can just rewind and rescan the sorted output.
383 */
384 if (outerPlan->chgParam != NULL ||
385 node->bounded != node->bounded_Done ||
386 node->bound != node->bound_Done ||
387 !node->randomAccess)
388 {
389 node->sort_Done = false;
391 node->tuplesortstate = NULL;
392
393 /*
394 * if chgParam of subnode is not null then plan will be re-scanned by
395 * first ExecProcNode.
396 */
397 if (outerPlan->chgParam == NULL)
399 }
400 else
402}
403
404/* ----------------------------------------------------------------
405 * Parallel Query Support
406 * ----------------------------------------------------------------
407 */
408
409/* ----------------------------------------------------------------
410 * ExecSortEstimate
411 *
412 * Estimate space required to propagate sort statistics.
413 * ----------------------------------------------------------------
414 */
415void
417{
418 Size size;
419
420 /* don't need this if not instrumenting or no workers */
421 if (!node->ss.ps.instrument || pcxt->nworkers == 0)
422 return;
423
425 size = add_size(size, offsetof(SharedSortInfo, sinstrument));
428}
429
430/* ----------------------------------------------------------------
431 * ExecSortInitializeDSM
432 *
433 * Initialize DSM space for sort statistics.
434 * ----------------------------------------------------------------
435 */
436void
438{
439 Size size;
440
441 /* don't need this if not instrumenting or no workers */
442 if (!node->ss.ps.instrument || pcxt->nworkers == 0)
443 return;
444
445 size = offsetof(SharedSortInfo, sinstrument)
446 + pcxt->nworkers * sizeof(TuplesortInstrumentation);
447 node->shared_info = shm_toc_allocate(pcxt->toc, size);
448 /* ensure any unfilled slots will contain zeroes */
449 memset(node->shared_info, 0, size);
450 node->shared_info->num_workers = pcxt->nworkers;
451 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
452 node->shared_info);
453}
454
455/* ----------------------------------------------------------------
456 * ExecSortInitializeWorker
457 *
458 * Attach worker to DSM space for sort statistics.
459 * ----------------------------------------------------------------
460 */
461void
463{
464 node->shared_info =
465 shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
466 node->am_worker = true;
467}
468
469/* ----------------------------------------------------------------
470 * ExecSortRetrieveInstrumentation
471 *
472 * Transfer sort statistics from DSM to private memory.
473 * ----------------------------------------------------------------
474 */
475void
477{
478 Size size;
479 SharedSortInfo *si;
480
481 if (node->shared_info == NULL)
482 return;
483
484 size = offsetof(SharedSortInfo, sinstrument)
486 si = palloc(size);
487 memcpy(si, node->shared_info, size);
488 node->shared_info = si;
489}
int ParallelWorkerNumber
Definition: parallel.c:114
#define Assert(condition)
Definition: c.h:812
size_t Size
Definition: c.h:559
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:562
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
Definition: execTuples.c:1739
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1986
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:86
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:495
void ExecCreateScanSlotFromOuterPlan(EState *estate, ScanState *scanstate, const TupleTableSlotOps *tts_ops)
Definition: execUtils.c:704
#define SO1_printf(s, p)
Definition: execdebug.h:93
#define outerPlanState(node)
Definition: execnodes.h:1221
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
#define EXEC_FLAG_REWIND
Definition: executor.h:67
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:267
#define EXEC_FLAG_MARK
Definition: executor.h:69
int work_mem
Definition: globals.c:130
#define IsParallelWorker()
Definition: parallel.h:60
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void * palloc(Size size)
Definition: mcxt.c:1317
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
SortState * ExecInitSort(Sort *node, EState *estate, int eflags)
Definition: nodeSort.c:221
void ExecSortMarkPos(SortState *node)
Definition: nodeSort.c:329
static TupleTableSlot * ExecSort(PlanState *pstate)
Definition: nodeSort.c:50
void ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
Definition: nodeSort.c:462
void ExecReScanSort(SortState *node)
Definition: nodeSort.c:362
void ExecSortRestrPos(SortState *node)
Definition: nodeSort.c:347
void ExecSortEstimate(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:416
void ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:437
void ExecSortRetrieveInstrumentation(SortState *node)
Definition: nodeSort.c:476
void ExecEndSort(SortState *node)
Definition: nodeSort.c:301
#define makeNode(_type_)
Definition: nodes.h:155
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
#define outerPlan(node)
Definition: plannodes.h:183
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
ScanDirection
Definition: sdir.h:25
@ ForwardScanDirection
Definition: sdir.h:28
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:488
Size mul_size(Size s1, Size s2)
Definition: shmem.c:505
static pg_noinline void Size size
Definition: slab.c:607
ScanDirection es_direction
Definition: execnodes.h:631
shm_toc_estimator estimator
Definition: parallel.h:41
shm_toc * toc
Definition: parallel.h:44
Instrumentation * instrument
Definition: execnodes.h:1135
Plan * plan
Definition: execnodes.h:1125
EState * state
Definition: execnodes.h:1127
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1163
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1165
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1131
int plan_node_id
Definition: plannodes.h:152
PlanState ps
Definition: execnodes.h:1572
TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:2353
bool sort_Done
Definition: execnodes.h:2366
bool am_worker
Definition: execnodes.h:2370
int64 bound_Done
Definition: execnodes.h:2368
bool bounded_Done
Definition: execnodes.h:2367
void * tuplesortstate
Definition: execnodes.h:2369
bool randomAccess
Definition: execnodes.h:2363
SharedSortInfo * shared_info
Definition: execnodes.h:2372
bool datumSort
Definition: execnodes.h:2371
ScanState ss
Definition: execnodes.h:2362
bool bounded
Definition: execnodes.h:2364
int64 bound
Definition: execnodes.h:2365
int numCols
Definition: plannodes.h:937
bool * tts_isnull
Definition: tuptable.h:127
Datum * tts_values
Definition: tuptable.h:125
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:153
void tuplesort_rescan(Tuplesortstate *state)
Definition: tuplesort.c:2402
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1363
void tuplesort_get_stats(Tuplesortstate *state, TuplesortInstrumentation *stats)
Definition: tuplesort.c:2499
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:951
void tuplesort_markpos(Tuplesortstate *state)
Definition: tuplesort.c:2435
void tuplesort_restorepos(Tuplesortstate *state)
Definition: tuplesort.c:2466
void tuplesort_set_bound(Tuplesortstate *state, int64 bound)
Definition: tuplesort.c:838
#define TUPLESORT_NONE
Definition: tuplesort.h:93
#define TUPLESORT_RANDOMACCESS
Definition: tuplesort.h:96
#define TUPLESORT_ALLOWBOUNDED
Definition: tuplesort.h:99
struct TuplesortInstrumentation TuplesortInstrumentation
void tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
void tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, int sortopt)
bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)
Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, SortCoordinate coordinate, int sortopt)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, Datum *val, bool *isNull, Datum *abbrev)
static void slot_getsomeattrs(TupleTableSlot *slot, int attnum)
Definition: tuptable.h:355
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
#define TupIsNull(slot)
Definition: tuptable.h:306