PostgreSQL Source Code  git master
nodeSamplescan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSamplescan.c
4  * Support routines for sample scans of relations (table sampling).
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeSamplescan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/relscan.h"
18 #include "access/tableam.h"
19 #include "access/tsmapi.h"
20 #include "common/pg_prng.h"
21 #include "executor/executor.h"
23 #include "miscadmin.h"
24 #include "pgstat.h"
25 #include "storage/bufmgr.h"
26 #include "storage/predicate.h"
27 #include "utils/builtins.h"
28 #include "utils/rel.h"
29 
31 static void tablesample_init(SampleScanState *scanstate);
33 
34 /* ----------------------------------------------------------------
35  * Scan Support
36  * ----------------------------------------------------------------
37  */
38 
39 /* ----------------------------------------------------------------
40  * SampleNext
41  *
42  * This is a workhorse for ExecSampleScan
43  * ----------------------------------------------------------------
44  */
45 static TupleTableSlot *
47 {
48  /*
49  * if this is first call within a scan, initialize
50  */
51  if (!node->begun)
52  tablesample_init(node);
53 
54  /*
55  * get the next tuple, and store it in our result slot
56  */
57  return tablesample_getnext(node);
58 }
59 
60 /*
61  * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
62  */
63 static bool
65 {
66  /*
67  * No need to recheck for SampleScan, since like SeqScan we don't pass any
68  * checkable keys to heap_beginscan.
69  */
70  return true;
71 }
72 
73 /* ----------------------------------------------------------------
74  * ExecSampleScan(node)
75  *
76  * Scans the relation using the sampling method and returns
77  * the next qualifying tuple.
78  * We call the ExecScan() routine and pass it the appropriate
79  * access method functions.
80  * ----------------------------------------------------------------
81  */
82 static TupleTableSlot *
84 {
85  SampleScanState *node = castNode(SampleScanState, pstate);
86 
87  return ExecScan(&node->ss,
90 }
91 
92 /* ----------------------------------------------------------------
93  * ExecInitSampleScan
94  * ----------------------------------------------------------------
95  */
97 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
98 {
99  SampleScanState *scanstate;
100  TableSampleClause *tsc = node->tablesample;
101  TsmRoutine *tsm;
102 
103  Assert(outerPlan(node) == NULL);
104  Assert(innerPlan(node) == NULL);
105 
106  /*
107  * create state structure
108  */
109  scanstate = makeNode(SampleScanState);
110  scanstate->ss.ps.plan = (Plan *) node;
111  scanstate->ss.ps.state = estate;
112  scanstate->ss.ps.ExecProcNode = ExecSampleScan;
113 
114  /*
115  * Miscellaneous initialization
116  *
117  * create expression context for node
118  */
119  ExecAssignExprContext(estate, &scanstate->ss.ps);
120 
121  /*
122  * open the scan relation
123  */
124  scanstate->ss.ss_currentRelation =
125  ExecOpenScanRelation(estate,
126  node->scan.scanrelid,
127  eflags);
128 
129  /* we won't set up the HeapScanDesc till later */
130  scanstate->ss.ss_currentScanDesc = NULL;
131 
132  /* and create slot with appropriate rowtype */
133  ExecInitScanTupleSlot(estate, &scanstate->ss,
136 
137  /*
138  * Initialize result type and projection.
139  */
140  ExecInitResultTypeTL(&scanstate->ss.ps);
141  ExecAssignScanProjectionInfo(&scanstate->ss);
142 
143  /*
144  * initialize child expressions
145  */
146  scanstate->ss.ps.qual =
147  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
148 
149  scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
150  scanstate->repeatable =
151  ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
152 
153  /*
154  * If we don't have a REPEATABLE clause, select a random seed. We want to
155  * do this just once, since the seed shouldn't change over rescans.
156  */
157  if (tsc->repeatable == NULL)
159 
160  /*
161  * Finally, initialize the TABLESAMPLE method handler.
162  */
163  tsm = GetTsmRoutine(tsc->tsmhandler);
164  scanstate->tsmroutine = tsm;
165  scanstate->tsm_state = NULL;
166 
167  if (tsm->InitSampleScan)
168  tsm->InitSampleScan(scanstate, eflags);
169 
170  /* We'll do BeginSampleScan later; we can't evaluate params yet */
171  scanstate->begun = false;
172 
173  return scanstate;
174 }
175 
176 /* ----------------------------------------------------------------
177  * ExecEndSampleScan
178  *
179  * frees any storage allocated through C routines.
180  * ----------------------------------------------------------------
181  */
182 void
184 {
185  /*
186  * Tell sampling function that we finished the scan.
187  */
188  if (node->tsmroutine->EndSampleScan)
189  node->tsmroutine->EndSampleScan(node);
190 
191  /*
192  * close heap scan
193  */
194  if (node->ss.ss_currentScanDesc)
196 }
197 
198 /* ----------------------------------------------------------------
199  * ExecReScanSampleScan
200  *
201  * Rescans the relation.
202  *
203  * ----------------------------------------------------------------
204  */
205 void
207 {
208  /* Remember we need to do BeginSampleScan again (if we did it at all) */
209  node->begun = false;
210  node->done = false;
211  node->haveblock = false;
212  node->donetuples = 0;
213 
214  ExecScanReScan(&node->ss);
215 }
216 
217 
218 /*
219  * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
220  */
221 static void
223 {
224  TsmRoutine *tsm = scanstate->tsmroutine;
225  ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
226  Datum *params;
227  Datum datum;
228  bool isnull;
229  uint32 seed;
230  bool allow_sync;
231  int i;
232  ListCell *arg;
233 
234  scanstate->donetuples = 0;
235  params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
236 
237  i = 0;
238  foreach(arg, scanstate->args)
239  {
240  ExprState *argstate = (ExprState *) lfirst(arg);
241 
242  params[i] = ExecEvalExprSwitchContext(argstate,
243  econtext,
244  &isnull);
245  if (isnull)
246  ereport(ERROR,
247  (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
248  errmsg("TABLESAMPLE parameter cannot be null")));
249  i++;
250  }
251 
252  if (scanstate->repeatable)
253  {
254  datum = ExecEvalExprSwitchContext(scanstate->repeatable,
255  econtext,
256  &isnull);
257  if (isnull)
258  ereport(ERROR,
259  (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
260  errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
261 
262  /*
263  * The REPEATABLE parameter has been coerced to float8 by the parser.
264  * The reason for using float8 at the SQL level is that it will
265  * produce unsurprising results both for users used to databases that
266  * accept only integers in the REPEATABLE clause and for those who
267  * might expect that REPEATABLE works like setseed() (a float in the
268  * range from -1 to 1).
269  *
270  * We use hashfloat8() to convert the supplied value into a suitable
271  * seed. For regression-testing purposes, that has the convenient
272  * property that REPEATABLE(0) gives a machine-independent result.
273  */
275  }
276  else
277  {
278  /* Use the seed selected by ExecInitSampleScan */
279  seed = scanstate->seed;
280  }
281 
282  /* Set default values for params that BeginSampleScan can adjust */
283  scanstate->use_bulkread = true;
284  scanstate->use_pagemode = true;
285 
286  /* Let tablesample method do its thing */
287  tsm->BeginSampleScan(scanstate,
288  params,
289  list_length(scanstate->args),
290  seed);
291 
292  /* We'll use syncscan if there's no NextSampleBlock function */
293  allow_sync = (tsm->NextSampleBlock == NULL);
294 
295  /* Now we can create or reset the HeapScanDesc */
296  if (scanstate->ss.ss_currentScanDesc == NULL)
297  {
298  scanstate->ss.ss_currentScanDesc =
300  scanstate->ss.ps.state->es_snapshot,
301  0, NULL,
302  scanstate->use_bulkread,
303  allow_sync,
304  scanstate->use_pagemode);
305  }
306  else
307  {
309  scanstate->use_bulkread,
310  allow_sync,
311  scanstate->use_pagemode);
312  }
313 
314  pfree(params);
315 
316  /* And we're initialized. */
317  scanstate->begun = true;
318 }
319 
320 /*
321  * Get next tuple from TABLESAMPLE method.
322  */
323 static TupleTableSlot *
325 {
326  TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
327  TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
328 
329  ExecClearTuple(slot);
330 
331  if (scanstate->done)
332  return NULL;
333 
334  for (;;)
335  {
336  if (!scanstate->haveblock)
337  {
338  if (!table_scan_sample_next_block(scan, scanstate))
339  {
340  scanstate->haveblock = false;
341  scanstate->done = true;
342 
343  /* exhausted relation */
344  return NULL;
345  }
346 
347  scanstate->haveblock = true;
348  }
349 
350  if (!table_scan_sample_next_tuple(scan, scanstate, slot))
351  {
352  /*
353  * If we get here, it means we've exhausted the items on this page
354  * and it's time to move to the next.
355  */
356  scanstate->haveblock = false;
357  continue;
358  }
359 
360  /* Found visible tuple, return it. */
361  break;
362  }
363 
364  scanstate->donetuples++;
365 
366  return slot;
367 }
unsigned int uint32
Definition: c.h:495
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
List * ExecInitExprList(List *nodes, PlanState *parent)
Definition: execExpr.c:323
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:214
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:128
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:157
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:271
void ExecScanReScan(ScanState *node)
Definition: execScan.c:298
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1810
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1754
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:488
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:702
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:472
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:473
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:347
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:642
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:195
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc(Size size)
Definition: mcxt.c:1226
static void tablesample_init(SampleScanState *scanstate)
SampleScanState * ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
void ExecReScanSampleScan(SampleScanState *node)
void ExecEndSampleScan(SampleScanState *node)
static TupleTableSlot * ExecSampleScan(PlanState *pstate)
static TupleTableSlot * SampleNext(SampleScanState *node)
static bool SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
static TupleTableSlot * tablesample_getnext(SampleScanState *scanstate)
#define makeNode(_type_)
Definition: nodes.h:176
#define castNode(_type_, nodeptr)
Definition: nodes.h:197
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
uint32 pg_prng_uint32(pg_prng_state *state)
Definition: pg_prng.c:191
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
#define innerPlan(node)
Definition: plannodes.h:181
#define outerPlan(node)
Definition: plannodes.h:182
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:222
uintptr_t Datum
Definition: postgres.h:64
#define RelationGetDescr(relation)
Definition: rel.h:530
Snapshot es_snapshot
Definition: execnodes.h:615
ExprState * qual
Definition: execnodes.h:1057
Plan * plan
Definition: execnodes.h:1036
EState * state
Definition: execnodes.h:1038
ExprContext * ps_ExprContext
Definition: execnodes.h:1075
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1042
ExprState * repeatable
Definition: execnodes.h:1497
void * tsm_state
Definition: execnodes.h:1500
ScanState ss
Definition: execnodes.h:1495
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1499
struct TableSampleClause * tablesample
Definition: plannodes.h:407
Scan scan
Definition: plannodes.h:405
Relation ss_currentRelation
Definition: execnodes.h:1474
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1476
PlanState ps
Definition: execnodes.h:1473
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1475
Index scanrelid
Definition: plannodes.h:387
EndSampleScan_function EndSampleScan
Definition: tsmapi.h:75
BeginSampleScan_function BeginSampleScan
Definition: tsmapi.h:72
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
InitSampleScan_function InitSampleScan
Definition: tsmapi.h:71
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1009
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:962
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1033
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:2009
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:2031
TsmRoutine * GetTsmRoutine(Oid tsmhandler)
Definition: tablesample.c:27
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433