PostgreSQL Source Code  git master
nodeSort.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSort.c
4  * Routines to handle sorting of relations.
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeSort.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/parallel.h"
19 #include "executor/execdebug.h"
20 #include "executor/nodeSort.h"
21 #include "miscadmin.h"
22 #include "utils/tuplesort.h"
23 
24 
25 /* ----------------------------------------------------------------
26  * ExecSort
27  *
28  * Sorts tuples from the outer subtree of the node using tuplesort,
29  * which saves the results in a temporary file or memory. After the
30  * initial call, returns a tuple from the file with each call.
31  *
32  * There are two distinct ways that this sort can be performed:
33  *
34  * 1) When the result is a single column we perform a Datum sort.
35  *
36  * 2) When the result contains multiple columns we perform a tuple sort.
37  *
38  * We could do this by always performing a tuple sort, however sorting
39  * Datums only can be significantly faster than sorting tuples,
40  * especially when the Datums are of a pass-by-value type.
41  *
42  * Conditions:
43  * -- none.
44  *
45  * Initial States:
46  * -- the outer child is prepared to return the first tuple.
47  * ----------------------------------------------------------------
48  */
49 static TupleTableSlot *
51 {
52  SortState *node = castNode(SortState, pstate);
53  EState *estate;
54  ScanDirection dir;
55  Tuplesortstate *tuplesortstate;
56  TupleTableSlot *slot;
57 
59 
60  /*
61  * get state info from node
62  */
63  SO1_printf("ExecSort: %s\n",
64  "entering routine");
65 
66  estate = node->ss.ps.state;
67  dir = estate->es_direction;
68  tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
69 
70  /*
71  * If first time through, read all tuples from outer plan and pass them to
72  * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
73  */
74 
75  if (!node->sort_Done)
76  {
77  Sort *plannode = (Sort *) node->ss.ps.plan;
78  PlanState *outerNode;
79  TupleDesc tupDesc;
80  int tuplesortopts = TUPLESORT_NONE;
81 
82  SO1_printf("ExecSort: %s\n",
83  "sorting subplan");
84 
85  /*
86  * Want to scan subplan in the forward direction while creating the
87  * sorted data.
88  */
90 
91  /*
92  * Initialize tuplesort module.
93  */
94  SO1_printf("ExecSort: %s\n",
95  "calling tuplesort_begin");
96 
97  outerNode = outerPlanState(node);
98  tupDesc = ExecGetResultType(outerNode);
99 
100  if (node->randomAccess)
101  tuplesortopts |= TUPLESORT_RANDOMACCESS;
102  if (node->bounded)
103  tuplesortopts |= TUPLESORT_ALLOWBOUNDED;
104 
105  if (node->datumSort)
106  tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
107  plannode->sortOperators[0],
108  plannode->collations[0],
109  plannode->nullsFirst[0],
110  work_mem,
111  NULL,
112  tuplesortopts);
113  else
114  tuplesortstate = tuplesort_begin_heap(tupDesc,
115  plannode->numCols,
116  plannode->sortColIdx,
117  plannode->sortOperators,
118  plannode->collations,
119  plannode->nullsFirst,
120  work_mem,
121  NULL,
122  tuplesortopts);
123  if (node->bounded)
124  tuplesort_set_bound(tuplesortstate, node->bound);
125  node->tuplesortstate = (void *) tuplesortstate;
126 
127  /*
128  * Scan the subplan and feed all the tuples to tuplesort using the
129  * appropriate method based on the type of sort we're doing.
130  */
131  if (node->datumSort)
132  {
133  for (;;)
134  {
135  slot = ExecProcNode(outerNode);
136 
137  if (TupIsNull(slot))
138  break;
139  slot_getsomeattrs(slot, 1);
140  tuplesort_putdatum(tuplesortstate,
141  slot->tts_values[0],
142  slot->tts_isnull[0]);
143  }
144  }
145  else
146  {
147  for (;;)
148  {
149  slot = ExecProcNode(outerNode);
150 
151  if (TupIsNull(slot))
152  break;
153  tuplesort_puttupleslot(tuplesortstate, slot);
154  }
155  }
156 
157  /*
158  * Complete the sort.
159  */
160  tuplesort_performsort(tuplesortstate);
161 
162  /*
163  * restore to user specified direction
164  */
165  estate->es_direction = dir;
166 
167  /*
168  * finally set the sorted flag to true
169  */
170  node->sort_Done = true;
171  node->bounded_Done = node->bounded;
172  node->bound_Done = node->bound;
173  if (node->shared_info && node->am_worker)
174  {
176 
178  Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
180  tuplesort_get_stats(tuplesortstate, si);
181  }
182  SO1_printf("ExecSort: %s\n", "sorting done");
183  }
184 
185  SO1_printf("ExecSort: %s\n",
186  "retrieving tuple from tuplesort");
187 
188  slot = node->ss.ps.ps_ResultTupleSlot;
189 
190  /*
191  * Fetch the next sorted item from the appropriate tuplesort function. For
192  * datum sorts we must manage the slot ourselves and leave it clear when
193  * tuplesort_getdatum returns false to indicate there are no more datums.
194  * For tuple sorts, tuplesort_gettupleslot manages the slot for us and
195  * empties the slot when it runs out of tuples.
196  */
197  if (node->datumSort)
198  {
199  ExecClearTuple(slot);
200  if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
201  false, &(slot->tts_values[0]),
202  &(slot->tts_isnull[0]), NULL))
203  ExecStoreVirtualTuple(slot);
204  }
205  else
206  (void) tuplesort_gettupleslot(tuplesortstate,
208  false, slot, NULL);
209 
210  return slot;
211 }
212 
213 /* ----------------------------------------------------------------
214  * ExecInitSort
215  *
216  * Creates the run-time state information for the sort node
217  * produced by the planner and initializes its outer subtree.
218  * ----------------------------------------------------------------
219  */
220 SortState *
221 ExecInitSort(Sort *node, EState *estate, int eflags)
222 {
223  SortState *sortstate;
224  TupleDesc outerTupDesc;
225 
226  SO1_printf("ExecInitSort: %s\n",
227  "initializing sort node");
228 
229  /*
230  * create state structure
231  */
232  sortstate = makeNode(SortState);
233  sortstate->ss.ps.plan = (Plan *) node;
234  sortstate->ss.ps.state = estate;
235  sortstate->ss.ps.ExecProcNode = ExecSort;
236 
237  /*
238  * We must have random access to the sort output to do backward scan or
239  * mark/restore. We also prefer to materialize the sort output if we
240  * might be called on to rewind and replay it many times.
241  */
242  sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
244  EXEC_FLAG_MARK)) != 0;
245 
246  sortstate->bounded = false;
247  sortstate->sort_Done = false;
248  sortstate->tuplesortstate = NULL;
249 
250  /*
251  * Miscellaneous initialization
252  *
253  * Sort nodes don't initialize their ExprContexts because they never call
254  * ExecQual or ExecProject.
255  */
256 
257  /*
258  * initialize child nodes
259  *
260  * We shield the child node from the need to support REWIND, BACKWARD, or
261  * MARK/RESTORE.
262  */
264 
265  outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
266 
267  /*
268  * Initialize scan slot and type.
269  */
270  ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
271 
272  /*
273  * Initialize return slot and type. No need to initialize projection info
274  * because this node doesn't do projections.
275  */
277  sortstate->ss.ps.ps_ProjInfo = NULL;
278 
279  outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
280 
281  /*
282  * We perform a Datum sort when we're sorting just a single column,
283  * otherwise we perform a tuple sort.
284  */
285  if (outerTupDesc->natts == 1)
286  sortstate->datumSort = true;
287  else
288  sortstate->datumSort = false;
289 
290  SO1_printf("ExecInitSort: %s\n",
291  "sort node initialized");
292 
293  return sortstate;
294 }
295 
296 /* ----------------------------------------------------------------
297  * ExecEndSort(node)
298  * ----------------------------------------------------------------
299  */
300 void
302 {
303  SO1_printf("ExecEndSort: %s\n",
304  "shutting down sort node");
305 
306  /*
307  * clean out the tuple table
308  */
310  /* must drop pointer to sort result tuple */
312 
313  /*
314  * Release tuplesort resources
315  */
316  if (node->tuplesortstate != NULL)
318  node->tuplesortstate = NULL;
319 
320  /*
321  * shut down the subplan
322  */
324 
325  SO1_printf("ExecEndSort: %s\n",
326  "sort node shutdown");
327 }
328 
329 /* ----------------------------------------------------------------
330  * ExecSortMarkPos
331  *
332  * Calls tuplesort to save the current position in the sorted file.
333  * ----------------------------------------------------------------
334  */
335 void
337 {
338  /*
339  * if we haven't sorted yet, just return
340  */
341  if (!node->sort_Done)
342  return;
343 
345 }
346 
347 /* ----------------------------------------------------------------
348  * ExecSortRestrPos
349  *
350  * Calls tuplesort to restore the last saved sort file position.
351  * ----------------------------------------------------------------
352  */
353 void
355 {
356  /*
357  * if we haven't sorted yet, just return.
358  */
359  if (!node->sort_Done)
360  return;
361 
362  /*
363  * restore the scan to the previously marked position
364  */
366 }
367 
368 void
370 {
372 
373  /*
374  * If we haven't sorted yet, just return. If outerplan's chgParam is not
375  * NULL then it will be re-scanned by ExecProcNode, else no reason to
376  * re-scan it at all.
377  */
378  if (!node->sort_Done)
379  return;
380 
381  /* must drop pointer to sort result tuple */
383 
384  /*
385  * If subnode is to be rescanned then we forget previous sort results; we
386  * have to re-read the subplan and re-sort. Also must re-sort if the
387  * bounded-sort parameters changed or we didn't select randomAccess.
388  *
389  * Otherwise we can just rewind and rescan the sorted output.
390  */
391  if (outerPlan->chgParam != NULL ||
392  node->bounded != node->bounded_Done ||
393  node->bound != node->bound_Done ||
394  !node->randomAccess)
395  {
396  node->sort_Done = false;
398  node->tuplesortstate = NULL;
399 
400  /*
401  * if chgParam of subnode is not null then plan will be re-scanned by
402  * first ExecProcNode.
403  */
404  if (outerPlan->chgParam == NULL)
406  }
407  else
409 }
410 
411 /* ----------------------------------------------------------------
412  * Parallel Query Support
413  * ----------------------------------------------------------------
414  */
415 
416 /* ----------------------------------------------------------------
417  * ExecSortEstimate
418  *
419  * Estimate space required to propagate sort statistics.
420  * ----------------------------------------------------------------
421  */
422 void
424 {
425  Size size;
426 
427  /* don't need this if not instrumenting or no workers */
428  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
429  return;
430 
431  size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation));
432  size = add_size(size, offsetof(SharedSortInfo, sinstrument));
433  shm_toc_estimate_chunk(&pcxt->estimator, size);
434  shm_toc_estimate_keys(&pcxt->estimator, 1);
435 }
436 
437 /* ----------------------------------------------------------------
438  * ExecSortInitializeDSM
439  *
440  * Initialize DSM space for sort statistics.
441  * ----------------------------------------------------------------
442  */
443 void
445 {
446  Size size;
447 
448  /* don't need this if not instrumenting or no workers */
449  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
450  return;
451 
452  size = offsetof(SharedSortInfo, sinstrument)
453  + pcxt->nworkers * sizeof(TuplesortInstrumentation);
454  node->shared_info = shm_toc_allocate(pcxt->toc, size);
455  /* ensure any unfilled slots will contain zeroes */
456  memset(node->shared_info, 0, size);
457  node->shared_info->num_workers = pcxt->nworkers;
458  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
459  node->shared_info);
460 }
461 
462 /* ----------------------------------------------------------------
463  * ExecSortInitializeWorker
464  *
465  * Attach worker to DSM space for sort statistics.
466  * ----------------------------------------------------------------
467  */
468 void
470 {
471  node->shared_info =
472  shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
473  node->am_worker = true;
474 }
475 
476 /* ----------------------------------------------------------------
477  * ExecSortRetrieveInstrumentation
478  *
479  * Transfer sort statistics from DSM to private memory.
480  * ----------------------------------------------------------------
481  */
482 void
484 {
485  Size size;
486  SharedSortInfo *si;
487 
488  if (node->shared_info == NULL)
489  return;
490 
491  size = offsetof(SharedSortInfo, sinstrument)
493  si = palloc(size);
494  memcpy(si, node->shared_info, size);
495  node->shared_info = si;
496 }
int ParallelWorkerNumber
Definition: parallel.c:113
size_t Size
Definition: c.h:541
void ExecReScan(PlanState *node)
Definition: execAmi.c:78
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:557
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:83
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
Definition: execTuples.c:1552
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1799
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:85
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:496
void ExecCreateScanSlotFromOuterPlan(EState *estate, ScanState *scanstate, const TupleTableSlotOps *tts_ops)
Definition: execUtils.c:688
#define SO1_printf(s, p)
Definition: execdebug.h:93
#define outerPlanState(node)
Definition: execnodes.h:1124
#define EXEC_FLAG_BACKWARD
Definition: executor.h:58
#define EXEC_FLAG_REWIND
Definition: executor.h:57
#define EXEC_FLAG_MARK
Definition: executor.h:59
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:257
int work_mem
Definition: globals.c:125
#define IsParallelWorker()
Definition: parallel.h:61
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1199
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
static TupleTableSlot * ExecSort(PlanState *pstate)
Definition: nodeSort.c:50
void ExecSortMarkPos(SortState *node)
Definition: nodeSort.c:336
void ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
Definition: nodeSort.c:469
void ExecReScanSort(SortState *node)
Definition: nodeSort.c:369
void ExecSortRestrPos(SortState *node)
Definition: nodeSort.c:354
void ExecSortEstimate(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:423
void ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:444
void ExecSortRetrieveInstrumentation(SortState *node)
Definition: nodeSort.c:483
SortState * ExecInitSort(Sort *node, EState *estate, int eflags)
Definition: nodeSort.c:221
void ExecEndSort(SortState *node)
Definition: nodeSort.c:301
#define makeNode(_type_)
Definition: nodes.h:165
#define castNode(_type_, nodeptr)
Definition: nodes.h:186
#define outerPlan(node)
Definition: plannodes.h:186
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
ScanDirection
Definition: sdir.h:23
@ ForwardScanDirection
Definition: sdir.h:26
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
ScanDirection es_direction
Definition: execnodes.h:608
shm_toc_estimator estimator
Definition: parallel.h:42
shm_toc * toc
Definition: parallel.h:45
Instrumentation * instrument
Definition: execnodes.h:1038
Plan * plan
Definition: execnodes.h:1028
EState * state
Definition: execnodes.h:1030
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1066
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1068
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1034
int plan_node_id
Definition: plannodes.h:155
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1456
PlanState ps
Definition: execnodes.h:1453
TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:2217
bool sort_Done
Definition: execnodes.h:2230
bool am_worker
Definition: execnodes.h:2234
int64 bound_Done
Definition: execnodes.h:2232
bool bounded_Done
Definition: execnodes.h:2231
void * tuplesortstate
Definition: execnodes.h:2233
bool randomAccess
Definition: execnodes.h:2227
SharedSortInfo * shared_info
Definition: execnodes.h:2236
bool datumSort
Definition: execnodes.h:2235
ScanState ss
Definition: execnodes.h:2226
bool bounded
Definition: execnodes.h:2228
int64 bound
Definition: execnodes.h:2229
int numCols
Definition: plannodes.h:935
bool * tts_isnull
Definition: tuptable.h:128
Datum * tts_values
Definition: tuptable.h:126
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
void tuplesort_rescan(Tuplesortstate *state)
Definition: tuplesort.c:2440
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1385
void tuplesort_get_stats(Tuplesortstate *state, TuplesortInstrumentation *stats)
Definition: tuplesort.c:2537
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:972
void tuplesort_markpos(Tuplesortstate *state)
Definition: tuplesort.c:2473
void tuplesort_restorepos(Tuplesortstate *state)
Definition: tuplesort.c:2504
void tuplesort_set_bound(Tuplesortstate *state, int64 bound)
Definition: tuplesort.c:844
#define TUPLESORT_NONE
Definition: tuplesort.h:92
#define TUPLESORT_RANDOMACCESS
Definition: tuplesort.h:95
#define TUPLESORT_ALLOWBOUNDED
Definition: tuplesort.h:98
struct TuplesortInstrumentation TuplesortInstrumentation
void tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
void tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, SortCoordinate coordinate, int sortopt)
bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, Datum *val, bool *isNull, Datum *abbrev)
Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, int sortopt)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433
static void slot_getsomeattrs(TupleTableSlot *slot, int attnum)
Definition: tuptable.h:349
#define TupIsNull(slot)
Definition: tuptable.h:300