PostgreSQL Source Code  git master
nodeSamplescan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSamplescan.c
4  * Support routines for sample scans of relations (table sampling).
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeSamplescan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/relscan.h"
18 #include "access/tableam.h"
19 #include "access/tsmapi.h"
20 #include "common/pg_prng.h"
21 #include "executor/executor.h"
23 #include "utils/fmgrprotos.h"
24 #include "utils/rel.h"
25 
27 static void tablesample_init(SampleScanState *scanstate);
29 
30 /* ----------------------------------------------------------------
31  * Scan Support
32  * ----------------------------------------------------------------
33  */
34 
35 /* ----------------------------------------------------------------
36  * SampleNext
37  *
38  * This is a workhorse for ExecSampleScan
39  * ----------------------------------------------------------------
40  */
41 static TupleTableSlot *
43 {
44  /*
45  * if this is first call within a scan, initialize
46  */
47  if (!node->begun)
48  tablesample_init(node);
49 
50  /*
51  * get the next tuple, and store it in our result slot
52  */
53  return tablesample_getnext(node);
54 }
55 
56 /*
57  * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
58  */
59 static bool
61 {
62  /*
63  * No need to recheck for SampleScan, since like SeqScan we don't pass any
64  * checkable keys to heap_beginscan.
65  */
66  return true;
67 }
68 
69 /* ----------------------------------------------------------------
70  * ExecSampleScan(node)
71  *
72  * Scans the relation using the sampling method and returns
73  * the next qualifying tuple.
74  * We call the ExecScan() routine and pass it the appropriate
75  * access method functions.
76  * ----------------------------------------------------------------
77  */
78 static TupleTableSlot *
80 {
81  SampleScanState *node = castNode(SampleScanState, pstate);
82 
83  return ExecScan(&node->ss,
86 }
87 
88 /* ----------------------------------------------------------------
89  * ExecInitSampleScan
90  * ----------------------------------------------------------------
91  */
93 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
94 {
95  SampleScanState *scanstate;
96  TableSampleClause *tsc = node->tablesample;
97  TsmRoutine *tsm;
98 
99  Assert(outerPlan(node) == NULL);
100  Assert(innerPlan(node) == NULL);
101 
102  /*
103  * create state structure
104  */
105  scanstate = makeNode(SampleScanState);
106  scanstate->ss.ps.plan = (Plan *) node;
107  scanstate->ss.ps.state = estate;
108  scanstate->ss.ps.ExecProcNode = ExecSampleScan;
109 
110  /*
111  * Miscellaneous initialization
112  *
113  * create expression context for node
114  */
115  ExecAssignExprContext(estate, &scanstate->ss.ps);
116 
117  /*
118  * open the scan relation
119  */
120  scanstate->ss.ss_currentRelation =
121  ExecOpenScanRelation(estate,
122  node->scan.scanrelid,
123  eflags);
124 
125  /* we won't set up the HeapScanDesc till later */
126  scanstate->ss.ss_currentScanDesc = NULL;
127 
128  /* and create slot with appropriate rowtype */
129  ExecInitScanTupleSlot(estate, &scanstate->ss,
132 
133  /*
134  * Initialize result type and projection.
135  */
136  ExecInitResultTypeTL(&scanstate->ss.ps);
137  ExecAssignScanProjectionInfo(&scanstate->ss);
138 
139  /*
140  * initialize child expressions
141  */
142  scanstate->ss.ps.qual =
143  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
144 
145  scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
146  scanstate->repeatable =
147  ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
148 
149  /*
150  * If we don't have a REPEATABLE clause, select a random seed. We want to
151  * do this just once, since the seed shouldn't change over rescans.
152  */
153  if (tsc->repeatable == NULL)
155 
156  /*
157  * Finally, initialize the TABLESAMPLE method handler.
158  */
159  tsm = GetTsmRoutine(tsc->tsmhandler);
160  scanstate->tsmroutine = tsm;
161  scanstate->tsm_state = NULL;
162 
163  if (tsm->InitSampleScan)
164  tsm->InitSampleScan(scanstate, eflags);
165 
166  /* We'll do BeginSampleScan later; we can't evaluate params yet */
167  scanstate->begun = false;
168 
169  return scanstate;
170 }
171 
172 /* ----------------------------------------------------------------
173  * ExecEndSampleScan
174  *
175  * frees any storage allocated through C routines.
176  * ----------------------------------------------------------------
177  */
178 void
180 {
181  /*
182  * Tell sampling function that we finished the scan.
183  */
184  if (node->tsmroutine->EndSampleScan)
185  node->tsmroutine->EndSampleScan(node);
186 
187  /*
188  * close heap scan
189  */
190  if (node->ss.ss_currentScanDesc)
192 }
193 
194 /* ----------------------------------------------------------------
195  * ExecReScanSampleScan
196  *
197  * Rescans the relation.
198  *
199  * ----------------------------------------------------------------
200  */
201 void
203 {
204  /* Remember we need to do BeginSampleScan again (if we did it at all) */
205  node->begun = false;
206  node->done = false;
207  node->haveblock = false;
208  node->donetuples = 0;
209 
210  ExecScanReScan(&node->ss);
211 }
212 
213 
214 /*
215  * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
216  */
217 static void
219 {
220  TsmRoutine *tsm = scanstate->tsmroutine;
221  ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
222  Datum *params;
223  Datum datum;
224  bool isnull;
225  uint32 seed;
226  bool allow_sync;
227  int i;
228  ListCell *arg;
229 
230  scanstate->donetuples = 0;
231  params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
232 
233  i = 0;
234  foreach(arg, scanstate->args)
235  {
236  ExprState *argstate = (ExprState *) lfirst(arg);
237 
238  params[i] = ExecEvalExprSwitchContext(argstate,
239  econtext,
240  &isnull);
241  if (isnull)
242  ereport(ERROR,
243  (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
244  errmsg("TABLESAMPLE parameter cannot be null")));
245  i++;
246  }
247 
248  if (scanstate->repeatable)
249  {
250  datum = ExecEvalExprSwitchContext(scanstate->repeatable,
251  econtext,
252  &isnull);
253  if (isnull)
254  ereport(ERROR,
255  (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
256  errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
257 
258  /*
259  * The REPEATABLE parameter has been coerced to float8 by the parser.
260  * The reason for using float8 at the SQL level is that it will
261  * produce unsurprising results both for users used to databases that
262  * accept only integers in the REPEATABLE clause and for those who
263  * might expect that REPEATABLE works like setseed() (a float in the
264  * range from -1 to 1).
265  *
266  * We use hashfloat8() to convert the supplied value into a suitable
267  * seed. For regression-testing purposes, that has the convenient
268  * property that REPEATABLE(0) gives a machine-independent result.
269  */
271  }
272  else
273  {
274  /* Use the seed selected by ExecInitSampleScan */
275  seed = scanstate->seed;
276  }
277 
278  /* Set default values for params that BeginSampleScan can adjust */
279  scanstate->use_bulkread = true;
280  scanstate->use_pagemode = true;
281 
282  /* Let tablesample method do its thing */
283  tsm->BeginSampleScan(scanstate,
284  params,
285  list_length(scanstate->args),
286  seed);
287 
288  /* We'll use syncscan if there's no NextSampleBlock function */
289  allow_sync = (tsm->NextSampleBlock == NULL);
290 
291  /* Now we can create or reset the HeapScanDesc */
292  if (scanstate->ss.ss_currentScanDesc == NULL)
293  {
294  scanstate->ss.ss_currentScanDesc =
296  scanstate->ss.ps.state->es_snapshot,
297  0, NULL,
298  scanstate->use_bulkread,
299  allow_sync,
300  scanstate->use_pagemode);
301  }
302  else
303  {
305  scanstate->use_bulkread,
306  allow_sync,
307  scanstate->use_pagemode);
308  }
309 
310  pfree(params);
311 
312  /* And we're initialized. */
313  scanstate->begun = true;
314 }
315 
316 /*
317  * Get next tuple from TABLESAMPLE method.
318  */
319 static TupleTableSlot *
321 {
322  TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
323  TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
324 
325  ExecClearTuple(slot);
326 
327  if (scanstate->done)
328  return NULL;
329 
330  for (;;)
331  {
332  if (!scanstate->haveblock)
333  {
334  if (!table_scan_sample_next_block(scan, scanstate))
335  {
336  scanstate->haveblock = false;
337  scanstate->done = true;
338 
339  /* exhausted relation */
340  return NULL;
341  }
342 
343  scanstate->haveblock = true;
344  }
345 
346  if (!table_scan_sample_next_tuple(scan, scanstate, slot))
347  {
348  /*
349  * If we get here, it means we've exhausted the items on this page
350  * and it's time to move to the next.
351  */
352  scanstate->haveblock = false;
353  continue;
354  }
355 
356  /* Found visible tuple, return it. */
357  break;
358  }
359 
360  scanstate->donetuples++;
361 
362  return slot;
363 }
unsigned int uint32
Definition: c.h:506
#define Assert(condition)
Definition: c.h:858
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
List * ExecInitExprList(List *nodes, PlanState *parent)
Definition: execExpr.c:330
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:224
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:138
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:156
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:270
void ExecScanReScan(ScanState *node)
Definition: execScan.c:297
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1898
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1842
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:483
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:697
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:484
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:485
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:359
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:641
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:193
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
static void tablesample_init(SampleScanState *scanstate)
SampleScanState * ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
void ExecReScanSampleScan(SampleScanState *node)
void ExecEndSampleScan(SampleScanState *node)
static TupleTableSlot * ExecSampleScan(PlanState *pstate)
static TupleTableSlot * SampleNext(SampleScanState *node)
static bool SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
static TupleTableSlot * tablesample_getnext(SampleScanState *scanstate)
#define makeNode(_type_)
Definition: nodes.h:155
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
uint32 pg_prng_uint32(pg_prng_state *state)
Definition: pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
#define innerPlan(node)
Definition: plannodes.h:182
#define outerPlan(node)
Definition: plannodes.h:183
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:222
uintptr_t Datum
Definition: postgres.h:64
#define RelationGetDescr(relation)
Definition: rel.h:531
Snapshot es_snapshot
Definition: execnodes.h:629
ExprState * qual
Definition: execnodes.h:1141
Plan * plan
Definition: execnodes.h:1120
EState * state
Definition: execnodes.h:1122
ExprContext * ps_ExprContext
Definition: execnodes.h:1159
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1126
ExprState * repeatable
Definition: execnodes.h:1591
void * tsm_state
Definition: execnodes.h:1594
ScanState ss
Definition: execnodes.h:1589
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1593
struct TableSampleClause * tablesample
Definition: plannodes.h:410
Scan scan
Definition: plannodes.h:408
Relation ss_currentRelation
Definition: execnodes.h:1568
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1570
PlanState ps
Definition: execnodes.h:1567
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1569
Index scanrelid
Definition: plannodes.h:390
EndSampleScan_function EndSampleScan
Definition: tsmapi.h:75
BeginSampleScan_function BeginSampleScan
Definition: tsmapi.h:72
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
InitSampleScan_function InitSampleScan
Definition: tsmapi.h:71
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1019
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:972
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1043
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:2012
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:2034
TsmRoutine * GetTsmRoutine(Oid tsmhandler)
Definition: tablesample.c:27
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454