PostgreSQL Source Code  git master
nodeSort.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSort.c
4  * Routines to handle sorting of relations.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeSort.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/parallel.h"
19 #include "executor/execdebug.h"
20 #include "executor/nodeSort.h"
21 #include "miscadmin.h"
22 #include "utils/tuplesort.h"
23 
24 
25 /* ----------------------------------------------------------------
26  * ExecSort
27  *
28  * Sorts tuples from the outer subtree of the node using tuplesort,
29  * which saves the results in a temporary file or memory. After the
30  * initial call, returns a tuple from the file with each call.
31  *
32  * There are two distinct ways that this sort can be performed:
33  *
34  * 1) When the result is a single column we perform a Datum sort.
35  *
36  * 2) When the result contains multiple columns we perform a tuple sort.
37  *
38  * We could do this by always performing a tuple sort, however sorting
39  * Datums only can be significantly faster than sorting tuples,
40  * especially when the Datums are of a pass-by-value type.
41  *
42  * Conditions:
43  * -- none.
44  *
45  * Initial States:
46  * -- the outer child is prepared to return the first tuple.
47  * ----------------------------------------------------------------
48  */
49 static TupleTableSlot *
51 {
52  SortState *node = castNode(SortState, pstate);
53  EState *estate;
54  ScanDirection dir;
55  Tuplesortstate *tuplesortstate;
56  TupleTableSlot *slot;
57 
59 
60  /*
61  * get state info from node
62  */
63  SO1_printf("ExecSort: %s\n",
64  "entering routine");
65 
66  estate = node->ss.ps.state;
67  dir = estate->es_direction;
68  tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
69 
70  /*
71  * If first time through, read all tuples from outer plan and pass them to
72  * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
73  */
74 
75  if (!node->sort_Done)
76  {
77  Sort *plannode = (Sort *) node->ss.ps.plan;
78  PlanState *outerNode;
79  TupleDesc tupDesc;
80 
81  SO1_printf("ExecSort: %s\n",
82  "sorting subplan");
83 
84  /*
85  * Want to scan subplan in the forward direction while creating the
86  * sorted data.
87  */
89 
90  /*
91  * Initialize tuplesort module.
92  */
93  SO1_printf("ExecSort: %s\n",
94  "calling tuplesort_begin");
95 
96  outerNode = outerPlanState(node);
97  tupDesc = ExecGetResultType(outerNode);
98 
99  if (node->datumSort)
100  tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
101  plannode->sortOperators[0],
102  plannode->collations[0],
103  plannode->nullsFirst[0],
104  work_mem,
105  NULL,
106  node->randomAccess);
107  else
108  tuplesortstate = tuplesort_begin_heap(tupDesc,
109  plannode->numCols,
110  plannode->sortColIdx,
111  plannode->sortOperators,
112  plannode->collations,
113  plannode->nullsFirst,
114  work_mem,
115  NULL,
116  node->randomAccess);
117  if (node->bounded)
118  tuplesort_set_bound(tuplesortstate, node->bound);
119  node->tuplesortstate = (void *) tuplesortstate;
120 
121  /*
122  * Scan the subplan and feed all the tuples to tuplesort using the
123  * appropriate method based on the type of sort we're doing.
124  */
125  if (node->datumSort)
126  {
127  for (;;)
128  {
129  slot = ExecProcNode(outerNode);
130 
131  if (TupIsNull(slot))
132  break;
133  slot_getsomeattrs(slot, 1);
134  tuplesort_putdatum(tuplesortstate,
135  slot->tts_values[0],
136  slot->tts_isnull[0]);
137  }
138  }
139  else
140  {
141  for (;;)
142  {
143  slot = ExecProcNode(outerNode);
144 
145  if (TupIsNull(slot))
146  break;
147  tuplesort_puttupleslot(tuplesortstate, slot);
148  }
149  }
150 
151  /*
152  * Complete the sort.
153  */
154  tuplesort_performsort(tuplesortstate);
155 
156  /*
157  * restore to user specified direction
158  */
159  estate->es_direction = dir;
160 
161  /*
162  * finally set the sorted flag to true
163  */
164  node->sort_Done = true;
165  node->bounded_Done = node->bounded;
166  node->bound_Done = node->bound;
167  if (node->shared_info && node->am_worker)
168  {
170 
172  Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
174  tuplesort_get_stats(tuplesortstate, si);
175  }
176  SO1_printf("ExecSort: %s\n", "sorting done");
177  }
178 
179  SO1_printf("ExecSort: %s\n",
180  "retrieving tuple from tuplesort");
181 
182  slot = node->ss.ps.ps_ResultTupleSlot;
183 
184  /*
185  * Fetch the next sorted item from the appropriate tuplesort function. For
186  * datum sorts we must manage the slot ourselves and leave it clear when
187  * tuplesort_getdatum returns false to indicate there are no more datums.
188  * For tuple sorts, tuplesort_gettupleslot manages the slot for us and
189  * empties the slot when it runs out of tuples.
190  */
191  if (node->datumSort)
192  {
193  ExecClearTuple(slot);
194  if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
195  &(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
196  ExecStoreVirtualTuple(slot);
197  }
198  else
199  (void) tuplesort_gettupleslot(tuplesortstate,
201  false, slot, NULL);
202 
203  return slot;
204 }
205 
206 /* ----------------------------------------------------------------
207  * ExecInitSort
208  *
209  * Creates the run-time state information for the sort node
210  * produced by the planner and initializes its outer subtree.
211  * ----------------------------------------------------------------
212  */
213 SortState *
214 ExecInitSort(Sort *node, EState *estate, int eflags)
215 {
216  SortState *sortstate;
217 
218  SO1_printf("ExecInitSort: %s\n",
219  "initializing sort node");
220 
221  /*
222  * create state structure
223  */
224  sortstate = makeNode(SortState);
225  sortstate->ss.ps.plan = (Plan *) node;
226  sortstate->ss.ps.state = estate;
227  sortstate->ss.ps.ExecProcNode = ExecSort;
228 
229  /*
230  * We must have random access to the sort output to do backward scan or
231  * mark/restore. We also prefer to materialize the sort output if we
232  * might be called on to rewind and replay it many times.
233  */
234  sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
236  EXEC_FLAG_MARK)) != 0;
237 
238  sortstate->bounded = false;
239  sortstate->sort_Done = false;
240  sortstate->tuplesortstate = NULL;
241 
242  /*
243  * Miscellaneous initialization
244  *
245  * Sort nodes don't initialize their ExprContexts because they never call
246  * ExecQual or ExecProject.
247  */
248 
249  /*
250  * initialize child nodes
251  *
252  * We shield the child node from the need to support REWIND, BACKWARD, or
253  * MARK/RESTORE.
254  */
256 
257  outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
258 
259  /*
260  * Initialize scan slot and type.
261  */
262  ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
263 
264  /*
265  * Initialize return slot and type. No need to initialize projection info
266  * because this node doesn't do projections.
267  */
269  sortstate->ss.ps.ps_ProjInfo = NULL;
270 
271  /*
272  * We perform a Datum sort when we're sorting just a single column,
273  * otherwise we perform a tuple sort.
274  */
275  if (ExecGetResultType(outerPlanState(sortstate))->natts == 1)
276  sortstate->datumSort = true;
277  else
278  sortstate->datumSort = false;
279 
280  SO1_printf("ExecInitSort: %s\n",
281  "sort node initialized");
282 
283  return sortstate;
284 }
285 
286 /* ----------------------------------------------------------------
287  * ExecEndSort(node)
288  * ----------------------------------------------------------------
289  */
290 void
292 {
293  SO1_printf("ExecEndSort: %s\n",
294  "shutting down sort node");
295 
296  /*
297  * clean out the tuple table
298  */
300  /* must drop pointer to sort result tuple */
302 
303  /*
304  * Release tuplesort resources
305  */
306  if (node->tuplesortstate != NULL)
308  node->tuplesortstate = NULL;
309 
310  /*
311  * shut down the subplan
312  */
314 
315  SO1_printf("ExecEndSort: %s\n",
316  "sort node shutdown");
317 }
318 
319 /* ----------------------------------------------------------------
320  * ExecSortMarkPos
321  *
322  * Calls tuplesort to save the current position in the sorted file.
323  * ----------------------------------------------------------------
324  */
325 void
327 {
328  /*
329  * if we haven't sorted yet, just return
330  */
331  if (!node->sort_Done)
332  return;
333 
335 }
336 
337 /* ----------------------------------------------------------------
338  * ExecSortRestrPos
339  *
340  * Calls tuplesort to restore the last saved sort file position.
341  * ----------------------------------------------------------------
342  */
343 void
345 {
346  /*
347  * if we haven't sorted yet, just return.
348  */
349  if (!node->sort_Done)
350  return;
351 
352  /*
353  * restore the scan to the previously marked position
354  */
356 }
357 
358 void
360 {
362 
363  /*
364  * If we haven't sorted yet, just return. If outerplan's chgParam is not
365  * NULL then it will be re-scanned by ExecProcNode, else no reason to
366  * re-scan it at all.
367  */
368  if (!node->sort_Done)
369  return;
370 
371  /* must drop pointer to sort result tuple */
373 
374  /*
375  * If subnode is to be rescanned then we forget previous sort results; we
376  * have to re-read the subplan and re-sort. Also must re-sort if the
377  * bounded-sort parameters changed or we didn't select randomAccess.
378  *
379  * Otherwise we can just rewind and rescan the sorted output.
380  */
381  if (outerPlan->chgParam != NULL ||
382  node->bounded != node->bounded_Done ||
383  node->bound != node->bound_Done ||
384  !node->randomAccess)
385  {
386  node->sort_Done = false;
388  node->tuplesortstate = NULL;
389 
390  /*
391  * if chgParam of subnode is not null then plan will be re-scanned by
392  * first ExecProcNode.
393  */
394  if (outerPlan->chgParam == NULL)
395  ExecReScan(outerPlan);
396  }
397  else
399 }
400 
401 /* ----------------------------------------------------------------
402  * Parallel Query Support
403  * ----------------------------------------------------------------
404  */
405 
406 /* ----------------------------------------------------------------
407  * ExecSortEstimate
408  *
409  * Estimate space required to propagate sort statistics.
410  * ----------------------------------------------------------------
411  */
412 void
414 {
415  Size size;
416 
417  /* don't need this if not instrumenting or no workers */
418  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
419  return;
420 
421  size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation));
422  size = add_size(size, offsetof(SharedSortInfo, sinstrument));
423  shm_toc_estimate_chunk(&pcxt->estimator, size);
424  shm_toc_estimate_keys(&pcxt->estimator, 1);
425 }
426 
427 /* ----------------------------------------------------------------
428  * ExecSortInitializeDSM
429  *
430  * Initialize DSM space for sort statistics.
431  * ----------------------------------------------------------------
432  */
433 void
435 {
436  Size size;
437 
438  /* don't need this if not instrumenting or no workers */
439  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
440  return;
441 
442  size = offsetof(SharedSortInfo, sinstrument)
443  + pcxt->nworkers * sizeof(TuplesortInstrumentation);
444  node->shared_info = shm_toc_allocate(pcxt->toc, size);
445  /* ensure any unfilled slots will contain zeroes */
446  memset(node->shared_info, 0, size);
447  node->shared_info->num_workers = pcxt->nworkers;
448  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
449  node->shared_info);
450 }
451 
452 /* ----------------------------------------------------------------
453  * ExecSortInitializeWorker
454  *
455  * Attach worker to DSM space for sort statistics.
456  * ----------------------------------------------------------------
457  */
458 void
460 {
461  node->shared_info =
462  shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
463  node->am_worker = true;
464 }
465 
466 /* ----------------------------------------------------------------
467  * ExecSortRetrieveInstrumentation
468  *
469  * Transfer sort statistics from DSM to private memory.
470  * ----------------------------------------------------------------
471  */
472 void
474 {
475  Size size;
476  SharedSortInfo *si;
477 
478  if (node->shared_info == NULL)
479  return;
480 
481  size = offsetof(SharedSortInfo, sinstrument)
483  si = palloc(size);
484  memcpy(si, node->shared_info, size);
485  node->shared_info = si;
486 }
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
Definition: tuplesort.c:2494
SortState * ExecInitSort(Sort *node, EState *estate, int eflags)
Definition: nodeSort.c:214
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:2043
struct TuplesortInstrumentation TuplesortInstrumentation
void tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
Definition: tuplesort.c:1808
Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:1246
void tuplesort_restorepos(Tuplesortstate *state)
Definition: tuplesort.c:3284
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1007
Instrumentation * instrument
Definition: execnodes.h:977
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
bool bounded_Done
Definition: execnodes.h:2151
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
void ExecSortEstimate(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:413
#define castNode(_type_, nodeptr)
Definition: nodes.h:605
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:556
bool bounded
Definition: execnodes.h:2148
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
shm_toc_estimator estimator
Definition: parallel.h:42
void ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
Definition: nodeSort.c:459
int64 bound
Definition: execnodes.h:2149
void ExecReScan(PlanState *node)
Definition: execAmi.c:78
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:83
int plan_node_id
Definition: plannodes.h:140
SharedSortInfo * shared_info
Definition: execnodes.h:2156
static void slot_getsomeattrs(TupleTableSlot *slot, int attnum)
Definition: tuptable.h:341
bool * nullsFirst
Definition: plannodes.h:818
Datum * tts_values
Definition: tuptable.h:126
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1381
bool am_worker
Definition: execnodes.h:2154
EState * state
Definition: execnodes.h:969
#define SO1_printf(s, p)
Definition: execdebug.h:93
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
void tuplesort_get_stats(Tuplesortstate *state, TuplesortInstrumentation *stats)
Definition: tuplesort.c:3317
Oid * sortOperators
Definition: plannodes.h:816
ScanDirection es_direction
Definition: execnodes.h:558
void ExecReScanSort(SortState *node)
Definition: nodeSort.c:359
Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:896
PlanState ps
Definition: execnodes.h:1378
void * tuplesortstate
Definition: execnodes.h:2153
void ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
Definition: nodeSort.c:434
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1005
static TupleTableSlot * ExecSort(PlanState *pstate)
Definition: nodeSort.c:50
void tuplesort_rescan(Tuplesortstate *state)
Definition: tuplesort.c:3220
void ExecEndSort(SortState *node)
Definition: nodeSort.c:291
#define EXEC_FLAG_BACKWARD
Definition: executor.h:58
#define outerPlanState(node)
Definition: execnodes.h:1063
bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)
Definition: tuplesort.c:2408
void tuplesort_set_bound(Tuplesortstate *state, int64 bound)
Definition: tuplesort.c:1333
bool * tts_isnull
Definition: tuptable.h:128
void ExecSortMarkPos(SortState *node)
Definition: nodeSort.c:326
ScanDirection
Definition: sdir.h:22
int ParallelWorkerNumber
Definition: parallel.c:112
bool datumSort
Definition: execnodes.h:2155
#define TupIsNull(slot)
Definition: tuptable.h:292
int64 bound_Done
Definition: execnodes.h:2152
ScanState ss
Definition: execnodes.h:2146
#define EXEC_FLAG_REWIND
Definition: executor.h:57
#define IsParallelWorker()
Definition: parallel.h:61
Bitmapset * chgParam
Definition: execnodes.h:999
#define outerPlan(node)
Definition: plannodes.h:171
int numCols
Definition: plannodes.h:814
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:973
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:252
int work_mem
Definition: globals.c:124
Plan * plan
Definition: execnodes.h:967
#define makeNode(_type_)
Definition: nodes.h:584
#define Assert(condition)
Definition: c.h:804
#define EXEC_FLAG_MARK
Definition: executor.h:59
void ExecSortRestrPos(SortState *node)
Definition: nodeSort.c:344
size_t Size
Definition: c.h:540
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
bool randomAccess
Definition: execnodes.h:2147
bool sort_Done
Definition: execnodes.h:2150
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1799
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:490
AttrNumber * sortColIdx
Definition: plannodes.h:815
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:1062
void tuplesort_markpos(Tuplesortstate *state)
Definition: tuplesort.c:3253
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
void ExecSortRetrieveInstrumentation(SortState *node)
Definition: nodeSort.c:473
void ExecCreateScanSlotFromOuterPlan(EState *estate, ScanState *scanstate, const TupleTableSlotOps *tts_ops)
Definition: execUtils.c:682
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1467
Oid * collations
Definition: plannodes.h:817
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:141
TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:2137
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:85
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define offsetof(type, field)
Definition: c.h:727
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
Definition: execTuples.c:1552
shm_toc * toc
Definition: parallel.h:45
void tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
Definition: tuplesort.c:1687