PostgreSQL Source Code  git master
nodeSort.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSort.c
4  * Routines to handle sorting of relations.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeSort.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/parallel.h"
19 #include "executor/execdebug.h"
20 #include "executor/nodeSort.h"
21 #include "miscadmin.h"
22 #include "utils/tuplesort.h"
23 
24 
25 /* ----------------------------------------------------------------
26  * ExecSort
27  *
28  * Sorts tuples from the outer subtree of the node using tuplesort,
29  * which saves the results in a temporary file or memory. After the
30  * initial call, returns a tuple from the file with each call.
31  *
32  * There are two distinct ways that this sort can be performed:
33  *
34  * 1) When the result is a single column we perform a Datum sort.
35  *
36  * 2) When the result contains multiple columns we perform a tuple sort.
37  *
38  * We could do this by always performing a tuple sort, however sorting
39  * Datums only can be significantly faster than sorting tuples,
40  * especially when the Datums are of a pass-by-value type.
41  *
42  * Conditions:
43  * -- none.
44  *
45  * Initial States:
46  * -- the outer child is prepared to return the first tuple.
47  * ----------------------------------------------------------------
48  */
49 static TupleTableSlot *
51 {
52  SortState *node = castNode(SortState, pstate);
53  EState *estate;
54  ScanDirection dir;
55  Tuplesortstate *tuplesortstate;
56  TupleTableSlot *slot;
57 
59 
60  /*
61  * get state info from node
62  */
63  SO1_printf("ExecSort: %s\n",
64  "entering routine");
65 
66  estate = node->ss.ps.state;
67  dir = estate->es_direction;
68  tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
69 
70  /*
71  * If first time through, read all tuples from outer plan and pass them to
72  * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
73  */
74 
75  if (!node->sort_Done)
76  {
77  Sort *plannode = (Sort *) node->ss.ps.plan;
78  PlanState *outerNode;
79  TupleDesc tupDesc;
80  int tuplesortopts = TUPLESORT_NONE;
81 
82  SO1_printf("ExecSort: %s\n",
83  "sorting subplan");
84 
85  /*
86  * Want to scan subplan in the forward direction while creating the
87  * sorted data.
88  */
90 
91  /*
92  * Initialize tuplesort module.
93  */
94  SO1_printf("ExecSort: %s\n",
95  "calling tuplesort_begin");
96 
97  outerNode = outerPlanState(node);
98  tupDesc = ExecGetResultType(outerNode);
99 
100  if (node->randomAccess)
101  tuplesortopts |= TUPLESORT_RANDOMACCESS;
102  if (node->bounded)
103  tuplesortopts |= TUPLESORT_ALLOWBOUNDED;
104 
105  if (node->datumSort)
106  tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
107  plannode->sortOperators[0],
108  plannode->collations[0],
109  plannode->nullsFirst[0],
110  work_mem,
111  NULL,
112  tuplesortopts);
113  else
114  tuplesortstate = tuplesort_begin_heap(tupDesc,
115  plannode->numCols,
116  plannode->sortColIdx,
117  plannode->sortOperators,
118  plannode->collations,
119  plannode->nullsFirst,
120  work_mem,
121  NULL,
122  tuplesortopts);
123  if (node->bounded)
124  tuplesort_set_bound(tuplesortstate, node->bound);
125  node->tuplesortstate = (void *) tuplesortstate;
126 
127  /*
128  * Scan the subplan and feed all the tuples to tuplesort using the
129  * appropriate method based on the type of sort we're doing.
130  */
131  if (node->datumSort)
132  {
133  for (;;)
134  {
135  slot = ExecProcNode(outerNode);
136 
137  if (TupIsNull(slot))
138  break;
139  slot_getsomeattrs(slot, 1);
140  tuplesort_putdatum(tuplesortstate,
141  slot->tts_values[0],
142  slot->tts_isnull[0]);
143  }
144  }
145  else
146  {
147  for (;;)
148  {
149  slot = ExecProcNode(outerNode);
150 
151  if (TupIsNull(slot))
152  break;
153  tuplesort_puttupleslot(tuplesortstate, slot);
154  }
155  }
156 
157  /*
158  * Complete the sort.
159  */
160  tuplesort_performsort(tuplesortstate);
161 
162  /*
163  * restore to user specified direction
164  */
165  estate->es_direction = dir;
166 
167  /*
168  * finally set the sorted flag to true
169  */
170  node->sort_Done = true;
171  node->bounded_Done = node->bounded;
172  node->bound_Done = node->bound;
173  if (node->shared_info && node->am_worker)
174  {
176 
178  Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
180  tuplesort_get_stats(tuplesortstate, si);
181  }
182  SO1_printf("ExecSort: %s\n", "sorting done");
183  }
184 
185  SO1_printf("ExecSort: %s\n",
186  "retrieving tuple from tuplesort");
187 
188  slot = node->ss.ps.ps_ResultTupleSlot;
189 
190  /*
191  * Fetch the next sorted item from the appropriate tuplesort function. For
192  * datum sorts we must manage the slot ourselves and leave it clear when
193  * tuplesort_getdatum returns false to indicate there are no more datums.
194  * For tuple sorts, tuplesort_gettupleslot manages the slot for us and
195  * empties the slot when it runs out of tuples.
196  */
197  if (node->datumSort)
198  {
199  ExecClearTuple(slot);
200  if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
201  false, &(slot->tts_values[0]),
202  &(slot->tts_isnull[0]), NULL))
203  ExecStoreVirtualTuple(slot);
204  }
205  else
206  (void) tuplesort_gettupleslot(tuplesortstate,
208  false, slot, NULL);
209 
210  return slot;
211 }
212 
213 /* ----------------------------------------------------------------
214  * ExecInitSort
215  *
216  * Creates the run-time state information for the sort node
217  * produced by the planner and initializes its outer subtree.
218  * ----------------------------------------------------------------
219  */
220 SortState *
221 ExecInitSort(Sort *node, EState *estate, int eflags)
222 {
223  SortState *sortstate;
224  TupleDesc outerTupDesc;
225 
226  SO1_printf("ExecInitSort: %s\n",
227  "initializing sort node");
228 
229  /*
230  * create state structure
231  */
232  sortstate = makeNode(SortState);
233  sortstate->ss.ps.plan = (Plan *) node;
234  sortstate->ss.ps.state = estate;
235  sortstate->ss.ps.ExecProcNode = ExecSort;
236 
237  /*
238  * We must have random access to the sort output to do backward scan or
239  * mark/restore. We also prefer to materialize the sort output if we
240  * might be called on to rewind and replay it many times.
241  */
242  sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
244  EXEC_FLAG_MARK)) != 0;
245 
246  sortstate->bounded = false;
247  sortstate->sort_Done = false;
248  sortstate->tuplesortstate = NULL;
249 
250  /*
251  * Miscellaneous initialization
252  *
253  * Sort nodes don't initialize their ExprContexts because they never call
254  * ExecQual or ExecProject.
255  */
256 
257  /*
258  * initialize child nodes
259  *
260  * We shield the child node from the need to support REWIND, BACKWARD, or
261  * MARK/RESTORE.
262  */
264 
265  outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
266 
267  /*
268  * Initialize scan slot and type.
269  */
270  ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
271 
272  /*
273  * Initialize return slot and type. No need to initialize projection info
274  * because this node doesn't do projections.
275  */
277  sortstate->ss.ps.ps_ProjInfo = NULL;
278 
279  outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
280 
281  /*
282  * We perform a Datum sort when we're sorting just a single column,
283  * otherwise we perform a tuple sort.
284  */
285  if (outerTupDesc->natts == 1)
286  sortstate->datumSort = true;
287  else
288  sortstate->datumSort = false;
289 
290  SO1_printf("ExecInitSort: %s\n",
291  "sort node initialized");
292 
293  return sortstate;
294 }
295 
296 /* ----------------------------------------------------------------
297  * ExecEndSort(node)
298  * ----------------------------------------------------------------
299  */
300 void
302 {
303  SO1_printf("ExecEndSort: %s\n",
304  "shutting down sort node");
305 
306  /*
307  * Release tuplesort resources
308  */
309  if (node->tuplesortstate != NULL)
311  node->tuplesortstate = NULL;
312 
313  /*
314  * shut down the subplan
315  */
317 
318  SO1_printf("ExecEndSort: %s\n",
319  "sort node shutdown");
320 }
321 
322 /* ----------------------------------------------------------------
323  * ExecSortMarkPos
324  *
325  * Calls tuplesort to save the current position in the sorted file.
326  * ----------------------------------------------------------------
327  */
328 void
330 {
331  /*
332  * if we haven't sorted yet, just return
333  */
334  if (!node->sort_Done)
335  return;
336 
338 }
339 
340 /* ----------------------------------------------------------------
341  * ExecSortRestrPos
342  *
343  * Calls tuplesort to restore the last saved sort file position.
344  * ----------------------------------------------------------------
345  */
346 void
348 {
349  /*
350  * if we haven't sorted yet, just return.
351  */
352  if (!node->sort_Done)
353  return;
354 
355  /*
356  * restore the scan to the previously marked position
357  */
359 }
360 
361 void
363 {
365 
366  /*
367  * If we haven't sorted yet, just return. If outerplan's chgParam is not
368  * NULL then it will be re-scanned by ExecProcNode, else no reason to
369  * re-scan it at all.
370  */
371  if (!node->sort_Done)
372  return;
373 
374  /* must drop pointer to sort result tuple */
376 
377  /*
378  * If subnode is to be rescanned then we forget previous sort results; we
379  * have to re-read the subplan and re-sort. Also must re-sort if the
380  * bounded-sort parameters changed or we didn't select randomAccess.
381  *
382  * Otherwise we can just rewind and rescan the sorted output.
383  */
384  if (outerPlan->chgParam != NULL ||
385  node->bounded != node->bounded_Done ||
386  node->bound != node->bound_Done ||
387  !node->randomAccess)
388  {
389  node->sort_Done = false;
391  node->tuplesortstate = NULL;
392 
393  /*
394  * if chgParam of subnode is not null then plan will be re-scanned by
395  * first ExecProcNode.
396  */
397  if (outerPlan->chgParam == NULL)
399  }
400  else
402 }
403 
404 /* ----------------------------------------------------------------
405  * Parallel Query Support
406  * ----------------------------------------------------------------
407  */
408 
409 /* ----------------------------------------------------------------
410  * ExecSortEstimate
411  *
412  * Estimate space required to propagate sort statistics.
413  * ----------------------------------------------------------------
414  */
415 void
417 {
418  Size size;
419 
420  /* don't need this if not instrumenting or no workers */
421  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
422  return;
423 
425  size = add_size(size, offsetof(SharedSortInfo, sinstrument));
427  shm_toc_estimate_keys(&pcxt->estimator, 1);
428 }
429 
430 /* ----------------------------------------------------------------
431  * ExecSortInitializeDSM
432  *
433  * Initialize DSM space for sort statistics.
434  * ----------------------------------------------------------------
435  */
436 void
438 {
439  Size size;
440 
441  /* don't need this if not instrumenting or no workers */
442  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
443  return;
444 
445  size = offsetof(SharedSortInfo, sinstrument)
446  + pcxt->nworkers * sizeof(TuplesortInstrumentation);
447  node->shared_info = shm_toc_allocate(pcxt->toc, size);
448  /* ensure any unfilled slots will contain zeroes */
449  memset(node->shared_info, 0, size);
450  node->shared_info->num_workers = pcxt->nworkers;
451  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
452  node->shared_info);
453 }
454 
455 /* ----------------------------------------------------------------
456  * ExecSortInitializeWorker
457  *
458  * Attach worker to DSM space for sort statistics.
459  * ----------------------------------------------------------------
460  */
461 void
463 {
464  node->shared_info =
465  shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
466  node->am_worker = true;
467 }
468 
469 /* ----------------------------------------------------------------
470  * ExecSortRetrieveInstrumentation
471  *
472  * Transfer sort statistics from DSM to private memory.
473  * ----------------------------------------------------------------
474  */
475 void
477 {
478  Size size;
479  SharedSortInfo *si;
480 
481  if (node->shared_info == NULL)
482  return;
483 
484  size = offsetof(SharedSortInfo, sinstrument)
486  si = palloc(size);
487  memcpy(si, node->shared_info, size);
488  node->shared_info = si;
489 }
int ParallelWorkerNumber
Definition: parallel.c:112
size_t Size
Definition: c.h:592
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:557
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
Definition: execTuples.c:1639
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1886
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:86
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:493
void ExecCreateScanSlotFromOuterPlan(EState *estate, ScanState *scanstate, const TupleTableSlotOps *tts_ops)
Definition: execUtils.c:659
#define SO1_printf(s, p)
Definition: execdebug.h:93
#define outerPlanState(node)
Definition: execnodes.h:1212
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
#define EXEC_FLAG_REWIND
Definition: executor.h:67
#define EXEC_FLAG_MARK
Definition: executor.h:69
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:269
int work_mem
Definition: globals.c:128
#define IsParallelWorker()
Definition: parallel.h:60
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1304
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static TupleTableSlot * ExecSort(PlanState *pstate)
Definition: nodeSort.c:50
void ExecSortMarkPos(SortState *node)
Definition: nodeSort.c:329
void ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
Definition: nodeSort.c:462
void ExecReScanSort(SortState *node)
Definition: nodeSort.c:362
void ExecSortRestrPos(SortState *node)
Definition: nodeSort.c:347
void ExecSortEstimate(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:416
void ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:437
void ExecSortRetrieveInstrumentation(SortState *node)
Definition: nodeSort.c:476
SortState * ExecInitSort(Sort *node, EState *estate, int eflags)
Definition: nodeSort.c:221
void ExecEndSort(SortState *node)
Definition: nodeSort.c:301
#define makeNode(_type_)
Definition: nodes.h:155
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
#define outerPlan(node)
Definition: plannodes.h:182
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
ScanDirection
Definition: sdir.h:25
@ ForwardScanDirection
Definition: sdir.h:28
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
ScanDirection es_direction
Definition: execnodes.h:621
shm_toc_estimator estimator
Definition: parallel.h:41
shm_toc * toc
Definition: parallel.h:44
Instrumentation * instrument
Definition: execnodes.h:1126
Plan * plan
Definition: execnodes.h:1116
EState * state
Definition: execnodes.h:1118
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1154
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1156
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1122
int plan_node_id
Definition: plannodes.h:151
PlanState ps
Definition: execnodes.h:1556
TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:2317
bool sort_Done
Definition: execnodes.h:2330
bool am_worker
Definition: execnodes.h:2334
int64 bound_Done
Definition: execnodes.h:2332
bool bounded_Done
Definition: execnodes.h:2331
void * tuplesortstate
Definition: execnodes.h:2333
bool randomAccess
Definition: execnodes.h:2327
SharedSortInfo * shared_info
Definition: execnodes.h:2336
bool datumSort
Definition: execnodes.h:2335
ScanState ss
Definition: execnodes.h:2326
bool bounded
Definition: execnodes.h:2328
int64 bound
Definition: execnodes.h:2329
int numCols
Definition: plannodes.h:934
bool * tts_isnull
Definition: tuptable.h:127
Datum * tts_values
Definition: tuptable.h:125
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
void tuplesort_rescan(Tuplesortstate *state)
Definition: tuplesort.c:2434
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1379
void tuplesort_get_stats(Tuplesortstate *state, TuplesortInstrumentation *stats)
Definition: tuplesort.c:2531
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:966
void tuplesort_markpos(Tuplesortstate *state)
Definition: tuplesort.c:2467
void tuplesort_restorepos(Tuplesortstate *state)
Definition: tuplesort.c:2498
void tuplesort_set_bound(Tuplesortstate *state, int64 bound)
Definition: tuplesort.c:838
#define TUPLESORT_NONE
Definition: tuplesort.h:93
#define TUPLESORT_RANDOMACCESS
Definition: tuplesort.h:96
#define TUPLESORT_ALLOWBOUNDED
Definition: tuplesort.h:99
struct TuplesortInstrumentation TuplesortInstrumentation
void tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
void tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, SortCoordinate coordinate, int sortopt)
bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, Datum *val, bool *isNull, Datum *abbrev)
Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, int sortopt)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
static void slot_getsomeattrs(TupleTableSlot *slot, int attnum)
Definition: tuptable.h:355
#define TupIsNull(slot)
Definition: tuptable.h:306