PostgreSQL Source Code  git master
nodeSamplescan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSamplescan.c
4  * Support routines for sample scans of relations (table sampling).
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeSamplescan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/relscan.h"
18 #include "access/tableam.h"
19 #include "access/tsmapi.h"
20 #include "common/pg_prng.h"
21 #include "executor/executor.h"
23 #include "miscadmin.h"
24 #include "pgstat.h"
25 #include "storage/bufmgr.h"
26 #include "storage/predicate.h"
27 #include "utils/builtins.h"
28 #include "utils/rel.h"
29 
31 static void tablesample_init(SampleScanState *scanstate);
33 
34 /* ----------------------------------------------------------------
35  * Scan Support
36  * ----------------------------------------------------------------
37  */
38 
39 /* ----------------------------------------------------------------
40  * SampleNext
41  *
42  * This is a workhorse for ExecSampleScan
43  * ----------------------------------------------------------------
44  */
45 static TupleTableSlot *
47 {
48  /*
49  * if this is first call within a scan, initialize
50  */
51  if (!node->begun)
52  tablesample_init(node);
53 
54  /*
55  * get the next tuple, and store it in our result slot
56  */
57  return tablesample_getnext(node);
58 }
59 
60 /*
61  * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
62  */
63 static bool
65 {
66  /*
67  * No need to recheck for SampleScan, since like SeqScan we don't pass any
68  * checkable keys to heap_beginscan.
69  */
70  return true;
71 }
72 
73 /* ----------------------------------------------------------------
74  * ExecSampleScan(node)
75  *
76  * Scans the relation using the sampling method and returns
77  * the next qualifying tuple.
78  * We call the ExecScan() routine and pass it the appropriate
79  * access method functions.
80  * ----------------------------------------------------------------
81  */
82 static TupleTableSlot *
84 {
85  SampleScanState *node = castNode(SampleScanState, pstate);
86 
87  return ExecScan(&node->ss,
90 }
91 
92 /* ----------------------------------------------------------------
93  * ExecInitSampleScan
94  * ----------------------------------------------------------------
95  */
97 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
98 {
99  SampleScanState *scanstate;
100  TableSampleClause *tsc = node->tablesample;
101  TsmRoutine *tsm;
102 
103  Assert(outerPlan(node) == NULL);
104  Assert(innerPlan(node) == NULL);
105 
106  /*
107  * create state structure
108  */
109  scanstate = makeNode(SampleScanState);
110  scanstate->ss.ps.plan = (Plan *) node;
111  scanstate->ss.ps.state = estate;
112  scanstate->ss.ps.ExecProcNode = ExecSampleScan;
113 
114  /*
115  * Miscellaneous initialization
116  *
117  * create expression context for node
118  */
119  ExecAssignExprContext(estate, &scanstate->ss.ps);
120 
121  /*
122  * open the scan relation
123  */
124  scanstate->ss.ss_currentRelation =
125  ExecOpenScanRelation(estate,
126  node->scan.scanrelid,
127  eflags);
128 
129  /* we won't set up the HeapScanDesc till later */
130  scanstate->ss.ss_currentScanDesc = NULL;
131 
132  /* and create slot with appropriate rowtype */
133  ExecInitScanTupleSlot(estate, &scanstate->ss,
136 
137  /*
138  * Initialize result type and projection.
139  */
140  ExecInitResultTypeTL(&scanstate->ss.ps);
141  ExecAssignScanProjectionInfo(&scanstate->ss);
142 
143  /*
144  * initialize child expressions
145  */
146  scanstate->ss.ps.qual =
147  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
148 
149  scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
150  scanstate->repeatable =
151  ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
152 
153  /*
154  * If we don't have a REPEATABLE clause, select a random seed. We want to
155  * do this just once, since the seed shouldn't change over rescans.
156  */
157  if (tsc->repeatable == NULL)
159 
160  /*
161  * Finally, initialize the TABLESAMPLE method handler.
162  */
163  tsm = GetTsmRoutine(tsc->tsmhandler);
164  scanstate->tsmroutine = tsm;
165  scanstate->tsm_state = NULL;
166 
167  if (tsm->InitSampleScan)
168  tsm->InitSampleScan(scanstate, eflags);
169 
170  /* We'll do BeginSampleScan later; we can't evaluate params yet */
171  scanstate->begun = false;
172 
173  return scanstate;
174 }
175 
176 /* ----------------------------------------------------------------
177  * ExecEndSampleScan
178  *
179  * frees any storage allocated through C routines.
180  * ----------------------------------------------------------------
181  */
182 void
184 {
185  /*
186  * Tell sampling function that we finished the scan.
187  */
188  if (node->tsmroutine->EndSampleScan)
189  node->tsmroutine->EndSampleScan(node);
190 
191  /*
192  * Free the exprcontext
193  */
194  ExecFreeExprContext(&node->ss.ps);
195 
196  /*
197  * clean out the tuple table
198  */
199  if (node->ss.ps.ps_ResultTupleSlot)
202 
203  /*
204  * close heap scan
205  */
206  if (node->ss.ss_currentScanDesc)
208 }
209 
210 /* ----------------------------------------------------------------
211  * ExecReScanSampleScan
212  *
213  * Rescans the relation.
214  *
215  * ----------------------------------------------------------------
216  */
217 void
219 {
220  /* Remember we need to do BeginSampleScan again (if we did it at all) */
221  node->begun = false;
222  node->done = false;
223  node->haveblock = false;
224  node->donetuples = 0;
225 
226  ExecScanReScan(&node->ss);
227 }
228 
229 
230 /*
231  * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
232  */
233 static void
235 {
236  TsmRoutine *tsm = scanstate->tsmroutine;
237  ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
238  Datum *params;
239  Datum datum;
240  bool isnull;
241  uint32 seed;
242  bool allow_sync;
243  int i;
244  ListCell *arg;
245 
246  scanstate->donetuples = 0;
247  params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
248 
249  i = 0;
250  foreach(arg, scanstate->args)
251  {
252  ExprState *argstate = (ExprState *) lfirst(arg);
253 
254  params[i] = ExecEvalExprSwitchContext(argstate,
255  econtext,
256  &isnull);
257  if (isnull)
258  ereport(ERROR,
259  (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
260  errmsg("TABLESAMPLE parameter cannot be null")));
261  i++;
262  }
263 
264  if (scanstate->repeatable)
265  {
266  datum = ExecEvalExprSwitchContext(scanstate->repeatable,
267  econtext,
268  &isnull);
269  if (isnull)
270  ereport(ERROR,
271  (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
272  errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
273 
274  /*
275  * The REPEATABLE parameter has been coerced to float8 by the parser.
276  * The reason for using float8 at the SQL level is that it will
277  * produce unsurprising results both for users used to databases that
278  * accept only integers in the REPEATABLE clause and for those who
279  * might expect that REPEATABLE works like setseed() (a float in the
280  * range from -1 to 1).
281  *
282  * We use hashfloat8() to convert the supplied value into a suitable
283  * seed. For regression-testing purposes, that has the convenient
284  * property that REPEATABLE(0) gives a machine-independent result.
285  */
287  }
288  else
289  {
290  /* Use the seed selected by ExecInitSampleScan */
291  seed = scanstate->seed;
292  }
293 
294  /* Set default values for params that BeginSampleScan can adjust */
295  scanstate->use_bulkread = true;
296  scanstate->use_pagemode = true;
297 
298  /* Let tablesample method do its thing */
299  tsm->BeginSampleScan(scanstate,
300  params,
301  list_length(scanstate->args),
302  seed);
303 
304  /* We'll use syncscan if there's no NextSampleBlock function */
305  allow_sync = (tsm->NextSampleBlock == NULL);
306 
307  /* Now we can create or reset the HeapScanDesc */
308  if (scanstate->ss.ss_currentScanDesc == NULL)
309  {
310  scanstate->ss.ss_currentScanDesc =
312  scanstate->ss.ps.state->es_snapshot,
313  0, NULL,
314  scanstate->use_bulkread,
315  allow_sync,
316  scanstate->use_pagemode);
317  }
318  else
319  {
321  scanstate->use_bulkread,
322  allow_sync,
323  scanstate->use_pagemode);
324  }
325 
326  pfree(params);
327 
328  /* And we're initialized. */
329  scanstate->begun = true;
330 }
331 
332 /*
333  * Get next tuple from TABLESAMPLE method.
334  */
335 static TupleTableSlot *
337 {
338  TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
339  TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
340 
341  ExecClearTuple(slot);
342 
343  if (scanstate->done)
344  return NULL;
345 
346  for (;;)
347  {
348  if (!scanstate->haveblock)
349  {
350  if (!table_scan_sample_next_block(scan, scanstate))
351  {
352  scanstate->haveblock = false;
353  scanstate->done = true;
354 
355  /* exhausted relation */
356  return NULL;
357  }
358 
359  scanstate->haveblock = true;
360  }
361 
362  if (!table_scan_sample_next_tuple(scan, scanstate, slot))
363  {
364  /*
365  * If we get here, it means we've exhausted the items on this page
366  * and it's time to move to the next.
367  */
368  scanstate->haveblock = false;
369  continue;
370  }
371 
372  /* Found visible tuple, return it. */
373  break;
374  }
375 
376  scanstate->donetuples++;
377 
378  return slot;
379 }
unsigned int uint32
Definition: c.h:490
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
List * ExecInitExprList(List *nodes, PlanState *parent)
Definition: execExpr.c:322
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:213
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:127
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:157
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:271
void ExecScanReScan(ScanState *node)
Definition: execScan.c:298
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1812
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1756
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:488
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:728
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:658
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:472
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:473
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:347
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:642
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:195
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc(Size size)
Definition: mcxt.c:1226
static void tablesample_init(SampleScanState *scanstate)
SampleScanState * ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
void ExecReScanSampleScan(SampleScanState *node)
void ExecEndSampleScan(SampleScanState *node)
static TupleTableSlot * ExecSampleScan(PlanState *pstate)
static TupleTableSlot * SampleNext(SampleScanState *node)
static bool SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
static TupleTableSlot * tablesample_getnext(SampleScanState *scanstate)
#define makeNode(_type_)
Definition: nodes.h:176
#define castNode(_type_, nodeptr)
Definition: nodes.h:197
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
uint32 pg_prng_uint32(pg_prng_state *state)
Definition: pg_prng.c:191
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
#define innerPlan(node)
Definition: plannodes.h:182
#define outerPlan(node)
Definition: plannodes.h:183
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:222
uintptr_t Datum
Definition: postgres.h:64
#define RelationGetDescr(relation)
Definition: rel.h:530
Snapshot es_snapshot
Definition: execnodes.h:616
ExprState * qual
Definition: execnodes.h:1058
Plan * plan
Definition: execnodes.h:1037
EState * state
Definition: execnodes.h:1039
ExprContext * ps_ExprContext
Definition: execnodes.h:1076
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1075
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1043
ExprState * repeatable
Definition: execnodes.h:1498
void * tsm_state
Definition: execnodes.h:1501
ScanState ss
Definition: execnodes.h:1496
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1500
struct TableSampleClause * tablesample
Definition: plannodes.h:407
Scan scan
Definition: plannodes.h:405
Relation ss_currentRelation
Definition: execnodes.h:1475
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1477
PlanState ps
Definition: execnodes.h:1474
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1476
Index scanrelid
Definition: plannodes.h:387
EndSampleScan_function EndSampleScan
Definition: tsmapi.h:75
BeginSampleScan_function BeginSampleScan
Definition: tsmapi.h:72
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
InitSampleScan_function InitSampleScan
Definition: tsmapi.h:71
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1009
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:962
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1033
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:2009
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:2031
TsmRoutine * GetTsmRoutine(Oid tsmhandler)
Definition: tablesample.c:27
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433