PostgreSQL Source Code  git master
parse_cte.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parse_cte.c
4  * handle CTEs (common table expressions) in parser
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/parser/parse_cte.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "catalog/pg_collation.h"
18 #include "catalog/pg_type.h"
19 #include "nodes/nodeFuncs.h"
20 #include "parser/analyze.h"
21 #include "parser/parse_coerce.h"
22 #include "parser/parse_collate.h"
23 #include "parser/parse_cte.h"
24 #include "parser/parse_expr.h"
25 #include "utils/builtins.h"
26 #include "utils/lsyscache.h"
27 #include "utils/typcache.h"
28 
29 
30 /* Enumeration of contexts in which a self-reference is disallowed */
31 typedef enum
32 {
34  RECURSION_NONRECURSIVETERM, /* inside the left-hand term */
35  RECURSION_SUBLINK, /* inside a sublink */
36  RECURSION_OUTERJOIN, /* inside nullable side of an outer join */
37  RECURSION_INTERSECT, /* underneath INTERSECT (ALL) */
38  RECURSION_EXCEPT /* underneath EXCEPT (ALL) */
40 
41 /* Associated error messages --- each must have one %s for CTE name */
42 static const char *const recursion_errormsgs[] = {
43  /* RECURSION_OK */
44  NULL,
45  /* RECURSION_NONRECURSIVETERM */
46  gettext_noop("recursive reference to query \"%s\" must not appear within its non-recursive term"),
47  /* RECURSION_SUBLINK */
48  gettext_noop("recursive reference to query \"%s\" must not appear within a subquery"),
49  /* RECURSION_OUTERJOIN */
50  gettext_noop("recursive reference to query \"%s\" must not appear within an outer join"),
51  /* RECURSION_INTERSECT */
52  gettext_noop("recursive reference to query \"%s\" must not appear within INTERSECT"),
53  /* RECURSION_EXCEPT */
54  gettext_noop("recursive reference to query \"%s\" must not appear within EXCEPT")
55 };
56 
57 /*
58  * For WITH RECURSIVE, we have to find an ordering of the clause members
59  * with no forward references, and determine which members are recursive
60  * (i.e., self-referential). It is convenient to do this with an array
61  * of CteItems instead of a list of CommonTableExprs.
62  */
63 typedef struct CteItem
64 {
65  CommonTableExpr *cte; /* One CTE to examine */
66  int id; /* Its ID number for dependencies */
67  Bitmapset *depends_on; /* CTEs depended on (not including self) */
68 } CteItem;
69 
70 /* CteState is what we need to pass around in the tree walkers */
71 typedef struct CteState
72 {
73  /* global state: */
74  ParseState *pstate; /* global parse state */
75  CteItem *items; /* array of CTEs and extra data */
76  int numitems; /* number of CTEs */
77  /* working state during a tree walk: */
78  int curitem; /* index of item currently being examined */
79  List *innerwiths; /* list of lists of CommonTableExpr */
80  /* working state for checkWellFormedRecursion walk only: */
81  int selfrefcount; /* number of self-references detected */
82  RecursionContext context; /* context to allow or disallow self-ref */
83 } CteState;
84 
85 
86 static void analyzeCTE(ParseState *pstate, CommonTableExpr *cte);
87 
88 /* Dependency processing functions */
89 static void makeDependencyGraph(CteState *cstate);
90 static bool makeDependencyGraphWalker(Node *node, CteState *cstate);
91 static void TopologicalSort(ParseState *pstate, CteItem *items, int numitems);
92 
93 /* Recursion validity checker functions */
94 static void checkWellFormedRecursion(CteState *cstate);
95 static bool checkWellFormedRecursionWalker(Node *node, CteState *cstate);
96 static void checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate);
97 
98 
99 /*
100  * transformWithClause -
101  * Transform the list of WITH clause "common table expressions" into
102  * Query nodes.
103  *
104  * The result is the list of transformed CTEs to be put into the output
105  * Query. (This is in fact the same as the ending value of p_ctenamespace,
106  * but it seems cleaner to not expose that in the function's API.)
107  */
108 List *
110 {
111  ListCell *lc;
112 
113  /* Only one WITH clause per query level */
114  Assert(pstate->p_ctenamespace == NIL);
115  Assert(pstate->p_future_ctes == NIL);
116 
117  /*
118  * For either type of WITH, there must not be duplicate CTE names in the
119  * list. Check this right away so we needn't worry later.
120  *
121  * Also, tentatively mark each CTE as non-recursive, and initialize its
122  * reference count to zero, and set pstate->p_hasModifyingCTE if needed.
123  */
124  foreach(lc, withClause->ctes)
125  {
127  ListCell *rest;
128 
129  for_each_cell(rest, withClause->ctes, lnext(withClause->ctes, lc))
130  {
131  CommonTableExpr *cte2 = (CommonTableExpr *) lfirst(rest);
132 
133  if (strcmp(cte->ctename, cte2->ctename) == 0)
134  ereport(ERROR,
135  (errcode(ERRCODE_DUPLICATE_ALIAS),
136  errmsg("WITH query name \"%s\" specified more than once",
137  cte2->ctename),
138  parser_errposition(pstate, cte2->location)));
139  }
140 
141  cte->cterecursive = false;
142  cte->cterefcount = 0;
143 
144  if (!IsA(cte->ctequery, SelectStmt))
145  {
146  /* must be a data-modifying statement */
147  Assert(IsA(cte->ctequery, InsertStmt) ||
148  IsA(cte->ctequery, UpdateStmt) ||
149  IsA(cte->ctequery, DeleteStmt));
150 
151  pstate->p_hasModifyingCTE = true;
152  }
153  }
154 
155  if (withClause->recursive)
156  {
157  /*
158  * For WITH RECURSIVE, we rearrange the list elements if needed to
159  * eliminate forward references. First, build a work array and set up
160  * the data structure needed by the tree walkers.
161  */
162  CteState cstate;
163  int i;
164 
165  cstate.pstate = pstate;
166  cstate.numitems = list_length(withClause->ctes);
167  cstate.items = (CteItem *) palloc0(cstate.numitems * sizeof(CteItem));
168  i = 0;
169  foreach(lc, withClause->ctes)
170  {
171  cstate.items[i].cte = (CommonTableExpr *) lfirst(lc);
172  cstate.items[i].id = i;
173  i++;
174  }
175 
176  /*
177  * Find all the dependencies and sort the CteItems into a safe
178  * processing order. Also, mark CTEs that contain self-references.
179  */
180  makeDependencyGraph(&cstate);
181 
182  /*
183  * Check that recursive queries are well-formed.
184  */
185  checkWellFormedRecursion(&cstate);
186 
187  /*
188  * Set up the ctenamespace for parse analysis. Per spec, all the WITH
189  * items are visible to all others, so stuff them all in before parse
190  * analysis. We build the list in safe processing order so that the
191  * planner can process the queries in sequence.
192  */
193  for (i = 0; i < cstate.numitems; i++)
194  {
195  CommonTableExpr *cte = cstate.items[i].cte;
196 
197  pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte);
198  }
199 
200  /*
201  * Do parse analysis in the order determined by the topological sort.
202  */
203  for (i = 0; i < cstate.numitems; i++)
204  {
205  CommonTableExpr *cte = cstate.items[i].cte;
206 
207  analyzeCTE(pstate, cte);
208  }
209  }
210  else
211  {
212  /*
213  * For non-recursive WITH, just analyze each CTE in sequence and then
214  * add it to the ctenamespace. This corresponds to the spec's
215  * definition of the scope of each WITH name. However, to allow error
216  * reports to be aware of the possibility of an erroneous reference,
217  * we maintain a list in p_future_ctes of the not-yet-visible CTEs.
218  */
219  pstate->p_future_ctes = list_copy(withClause->ctes);
220 
221  foreach(lc, withClause->ctes)
222  {
224 
225  analyzeCTE(pstate, cte);
226  pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte);
227  pstate->p_future_ctes = list_delete_first(pstate->p_future_ctes);
228  }
229  }
230 
231  return pstate->p_ctenamespace;
232 }
233 
234 
235 /*
236  * Perform the actual parse analysis transformation of one CTE. All
237  * CTEs it depends on have already been loaded into pstate->p_ctenamespace,
238  * and have been marked with the correct output column names/types.
239  */
240 static void
242 {
243  Query *query;
244 
245  /* Analysis not done already */
246  Assert(!IsA(cte->ctequery, Query));
247 
248  query = parse_sub_analyze(cte->ctequery, pstate, cte, false, true);
249  cte->ctequery = (Node *) query;
250 
251  /*
252  * Check that we got something reasonable. These first two cases should
253  * be prevented by the grammar.
254  */
255  if (!IsA(query, Query))
256  elog(ERROR, "unexpected non-Query statement in WITH");
257  if (query->utilityStmt != NULL)
258  elog(ERROR, "unexpected utility statement in WITH");
259 
260  /*
261  * We disallow data-modifying WITH except at the top level of a query,
262  * because it's not clear when such a modification should be executed.
263  */
264  if (query->commandType != CMD_SELECT &&
265  pstate->parentParseState != NULL)
266  ereport(ERROR,
267  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
268  errmsg("WITH clause containing a data-modifying statement must be at the top level"),
269  parser_errposition(pstate, cte->location)));
270 
271  /*
272  * CTE queries are always marked not canSetTag. (Currently this only
273  * matters for data-modifying statements, for which the flag will be
274  * propagated to the ModifyTable plan node.)
275  */
276  query->canSetTag = false;
277 
278  if (!cte->cterecursive)
279  {
280  /* Compute the output column names/types if not done yet */
281  analyzeCTETargetList(pstate, cte, GetCTETargetList(cte));
282  }
283  else
284  {
285  /*
286  * Verify that the previously determined output column types and
287  * collations match what the query really produced. We have to check
288  * this because the recursive term could have overridden the
289  * non-recursive term, and we don't have any easy way to fix that.
290  */
291  ListCell *lctlist,
292  *lctyp,
293  *lctypmod,
294  *lccoll;
295  int varattno;
296 
297  lctyp = list_head(cte->ctecoltypes);
298  lctypmod = list_head(cte->ctecoltypmods);
299  lccoll = list_head(cte->ctecolcollations);
300  varattno = 0;
301  foreach(lctlist, GetCTETargetList(cte))
302  {
303  TargetEntry *te = (TargetEntry *) lfirst(lctlist);
304  Node *texpr;
305 
306  if (te->resjunk)
307  continue;
308  varattno++;
309  Assert(varattno == te->resno);
310  if (lctyp == NULL || lctypmod == NULL || lccoll == NULL) /* shouldn't happen */
311  elog(ERROR, "wrong number of output columns in WITH");
312  texpr = (Node *) te->expr;
313  if (exprType(texpr) != lfirst_oid(lctyp) ||
314  exprTypmod(texpr) != lfirst_int(lctypmod))
315  ereport(ERROR,
316  (errcode(ERRCODE_DATATYPE_MISMATCH),
317  errmsg("recursive query \"%s\" column %d has type %s in non-recursive term but type %s overall",
318  cte->ctename, varattno,
320  lfirst_int(lctypmod)),
322  exprTypmod(texpr))),
323  errhint("Cast the output of the non-recursive term to the correct type."),
324  parser_errposition(pstate, exprLocation(texpr))));
325  if (exprCollation(texpr) != lfirst_oid(lccoll))
326  ereport(ERROR,
327  (errcode(ERRCODE_COLLATION_MISMATCH),
328  errmsg("recursive query \"%s\" column %d has collation \"%s\" in non-recursive term but collation \"%s\" overall",
329  cte->ctename, varattno,
332  errhint("Use the COLLATE clause to set the collation of the non-recursive term."),
333  parser_errposition(pstate, exprLocation(texpr))));
334  lctyp = lnext(cte->ctecoltypes, lctyp);
335  lctypmod = lnext(cte->ctecoltypmods, lctypmod);
336  lccoll = lnext(cte->ctecolcollations, lccoll);
337  }
338  if (lctyp != NULL || lctypmod != NULL || lccoll != NULL) /* shouldn't happen */
339  elog(ERROR, "wrong number of output columns in WITH");
340  }
341 
342  if (cte->search_clause || cte->cycle_clause)
343  {
344  Query *ctequery;
345  SetOperationStmt *sos;
346 
347  if (!cte->cterecursive)
348  ereport(ERROR,
349  (errcode(ERRCODE_SYNTAX_ERROR),
350  errmsg("WITH query is not recursive"),
351  parser_errposition(pstate, cte->location)));
352 
353  /*
354  * SQL requires a WITH list element (CTE) to be "expandable" in order
355  * to allow a search or cycle clause. That is a stronger requirement
356  * than just being recursive. It basically means the query expression
357  * looks like
358  *
359  * non-recursive query UNION [ALL] recursive query
360  *
361  * and that the recursive query is not itself a set operation.
362  *
363  * As of this writing, most of these criteria are already satisfied by
364  * all recursive CTEs allowed by PostgreSQL. In the future, if
365  * further variants recursive CTEs are accepted, there might be
366  * further checks required here to determine what is "expandable".
367  */
368 
369  ctequery = castNode(Query, cte->ctequery);
370  Assert(ctequery->setOperations);
371  sos = castNode(SetOperationStmt, ctequery->setOperations);
372 
373  /*
374  * This left side check is not required for expandability, but
375  * rewriteSearchAndCycle() doesn't currently have support for it, so
376  * we catch it here.
377  */
378  if (!IsA(sos->larg, RangeTblRef))
379  ereport(ERROR,
380  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
381  errmsg("with a SEARCH or CYCLE clause, the left side of the UNION must be a SELECT")));
382 
383  if (!IsA(sos->rarg, RangeTblRef))
384  ereport(ERROR,
385  (errcode(ERRCODE_SYNTAX_ERROR),
386  errmsg("with a SEARCH or CYCLE clause, the right side of the UNION must be a SELECT")));
387  }
388 
389  if (cte->search_clause)
390  {
391  ListCell *lc;
392  List *seen = NIL;
393 
394  foreach(lc, cte->search_clause->search_col_list)
395  {
396  String *colname = lfirst_node(String, lc);
397 
398  if (!list_member(cte->ctecolnames, colname))
399  ereport(ERROR,
400  (errcode(ERRCODE_SYNTAX_ERROR),
401  errmsg("search column \"%s\" not in WITH query column list",
402  strVal(colname)),
403  parser_errposition(pstate, cte->search_clause->location)));
404 
405  if (list_member(seen, colname))
406  ereport(ERROR,
407  (errcode(ERRCODE_DUPLICATE_COLUMN),
408  errmsg("search column \"%s\" specified more than once",
409  strVal(colname)),
410  parser_errposition(pstate, cte->search_clause->location)));
411  seen = lappend(seen, colname);
412  }
413 
415  ereport(ERROR,
416  errcode(ERRCODE_SYNTAX_ERROR),
417  errmsg("search sequence column name \"%s\" already used in WITH query column list",
419  parser_errposition(pstate, cte->search_clause->location));
420  }
421 
422  if (cte->cycle_clause)
423  {
424  ListCell *lc;
425  List *seen = NIL;
426  TypeCacheEntry *typentry;
427  Oid op;
428 
429  foreach(lc, cte->cycle_clause->cycle_col_list)
430  {
431  String *colname = lfirst_node(String, lc);
432 
433  if (!list_member(cte->ctecolnames, colname))
434  ereport(ERROR,
435  (errcode(ERRCODE_SYNTAX_ERROR),
436  errmsg("cycle column \"%s\" not in WITH query column list",
437  strVal(colname)),
438  parser_errposition(pstate, cte->cycle_clause->location)));
439 
440  if (list_member(seen, colname))
441  ereport(ERROR,
442  (errcode(ERRCODE_DUPLICATE_COLUMN),
443  errmsg("cycle column \"%s\" specified more than once",
444  strVal(colname)),
445  parser_errposition(pstate, cte->cycle_clause->location)));
446  seen = lappend(seen, colname);
447  }
448 
450  ereport(ERROR,
451  errcode(ERRCODE_SYNTAX_ERROR),
452  errmsg("cycle mark column name \"%s\" already used in WITH query column list",
454  parser_errposition(pstate, cte->cycle_clause->location));
455 
460 
462  ereport(ERROR,
463  errcode(ERRCODE_SYNTAX_ERROR),
464  errmsg("cycle path column name \"%s\" already used in WITH query column list",
466  parser_errposition(pstate, cte->cycle_clause->location));
467 
468  if (strcmp(cte->cycle_clause->cycle_mark_column,
469  cte->cycle_clause->cycle_path_column) == 0)
470  ereport(ERROR,
471  errcode(ERRCODE_SYNTAX_ERROR),
472  errmsg("cycle mark column name and cycle path column name are the same"),
473  parser_errposition(pstate, cte->cycle_clause->location));
474 
478  "CYCLE", NULL);
482  "CYCLE/SET/TO");
486  "CYCLE/SET/DEFAULT");
487 
492 
496  true);
497 
499  if (!typentry->eq_opr)
500  ereport(ERROR,
501  errcode(ERRCODE_UNDEFINED_FUNCTION),
502  errmsg("could not identify an equality operator for type %s",
504  op = get_negator(typentry->eq_opr);
505  if (!op)
506  ereport(ERROR,
507  errcode(ERRCODE_UNDEFINED_FUNCTION),
508  errmsg("could not identify an inequality operator for type %s",
510 
511  cte->cycle_clause->cycle_mark_neop = op;
512  }
513 
514  if (cte->search_clause && cte->cycle_clause)
515  {
516  if (strcmp(cte->search_clause->search_seq_column,
517  cte->cycle_clause->cycle_mark_column) == 0)
518  ereport(ERROR,
519  errcode(ERRCODE_SYNTAX_ERROR),
520  errmsg("search sequence column name and cycle mark column name are the same"),
521  parser_errposition(pstate, cte->search_clause->location));
522 
523  if (strcmp(cte->search_clause->search_seq_column,
524  cte->cycle_clause->cycle_path_column) == 0)
525  ereport(ERROR,
526  errcode(ERRCODE_SYNTAX_ERROR),
527  errmsg("search sequence column name and cycle path column name are the same"),
528  parser_errposition(pstate, cte->search_clause->location));
529  }
530 }
531 
532 /*
533  * Compute derived fields of a CTE, given the transformed output targetlist
534  *
535  * For a nonrecursive CTE, this is called after transforming the CTE's query.
536  * For a recursive CTE, we call it after transforming the non-recursive term,
537  * and pass the targetlist emitted by the non-recursive term only.
538  *
539  * Note: in the recursive case, the passed pstate is actually the one being
540  * used to analyze the CTE's query, so it is one level lower down than in
541  * the nonrecursive case. This doesn't matter since we only use it for
542  * error message context anyway.
543  */
544 void
546 {
547  int numaliases;
548  int varattno;
549  ListCell *tlistitem;
550 
551  /* Not done already ... */
552  Assert(cte->ctecolnames == NIL);
553 
554  /*
555  * We need to determine column names, types, and collations. The alias
556  * column names override anything coming from the query itself. (Note:
557  * the SQL spec says that the alias list must be empty or exactly as long
558  * as the output column set; but we allow it to be shorter for consistency
559  * with Alias handling.)
560  */
561  cte->ctecolnames = copyObject(cte->aliascolnames);
562  cte->ctecoltypes = cte->ctecoltypmods = cte->ctecolcollations = NIL;
563  numaliases = list_length(cte->aliascolnames);
564  varattno = 0;
565  foreach(tlistitem, tlist)
566  {
567  TargetEntry *te = (TargetEntry *) lfirst(tlistitem);
568  Oid coltype;
569  int32 coltypmod;
570  Oid colcoll;
571 
572  if (te->resjunk)
573  continue;
574  varattno++;
575  Assert(varattno == te->resno);
576  if (varattno > numaliases)
577  {
578  char *attrname;
579 
580  attrname = pstrdup(te->resname);
581  cte->ctecolnames = lappend(cte->ctecolnames, makeString(attrname));
582  }
583  coltype = exprType((Node *) te->expr);
584  coltypmod = exprTypmod((Node *) te->expr);
585  colcoll = exprCollation((Node *) te->expr);
586 
587  /*
588  * If the CTE is recursive, force the exposed column type of any
589  * "unknown" column to "text". We must deal with this here because
590  * we're called on the non-recursive term before there's been any
591  * attempt to force unknown output columns to some other type. We
592  * have to resolve unknowns before looking at the recursive term.
593  *
594  * The column might contain 'foo' COLLATE "bar", so don't override
595  * collation if it's already set.
596  */
597  if (cte->cterecursive && coltype == UNKNOWNOID)
598  {
599  coltype = TEXTOID;
600  coltypmod = -1; /* should be -1 already, but be sure */
601  if (!OidIsValid(colcoll))
602  colcoll = DEFAULT_COLLATION_OID;
603  }
604  cte->ctecoltypes = lappend_oid(cte->ctecoltypes, coltype);
605  cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, coltypmod);
606  cte->ctecolcollations = lappend_oid(cte->ctecolcollations, colcoll);
607  }
608  if (varattno < numaliases)
609  ereport(ERROR,
610  (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
611  errmsg("WITH query \"%s\" has %d columns available but %d columns specified",
612  cte->ctename, varattno, numaliases),
613  parser_errposition(pstate, cte->location)));
614 }
615 
616 
617 /*
618  * Identify the cross-references of a list of WITH RECURSIVE items,
619  * and sort into an order that has no forward references.
620  */
621 static void
623 {
624  int i;
625 
626  for (i = 0; i < cstate->numitems; i++)
627  {
628  CommonTableExpr *cte = cstate->items[i].cte;
629 
630  cstate->curitem = i;
631  cstate->innerwiths = NIL;
632  makeDependencyGraphWalker((Node *) cte->ctequery, cstate);
633  Assert(cstate->innerwiths == NIL);
634  }
635 
636  TopologicalSort(cstate->pstate, cstate->items, cstate->numitems);
637 }
638 
639 /*
640  * Tree walker function to detect cross-references and self-references of the
641  * CTEs in a WITH RECURSIVE list.
642  */
643 static bool
645 {
646  if (node == NULL)
647  return false;
648  if (IsA(node, RangeVar))
649  {
650  RangeVar *rv = (RangeVar *) node;
651 
652  /* If unqualified name, might be a CTE reference */
653  if (!rv->schemaname)
654  {
655  ListCell *lc;
656  int i;
657 
658  /* ... but first see if it's captured by an inner WITH */
659  foreach(lc, cstate->innerwiths)
660  {
661  List *withlist = (List *) lfirst(lc);
662  ListCell *lc2;
663 
664  foreach(lc2, withlist)
665  {
667 
668  if (strcmp(rv->relname, cte->ctename) == 0)
669  return false; /* yes, so bail out */
670  }
671  }
672 
673  /* No, could be a reference to the query level we are working on */
674  for (i = 0; i < cstate->numitems; i++)
675  {
676  CommonTableExpr *cte = cstate->items[i].cte;
677 
678  if (strcmp(rv->relname, cte->ctename) == 0)
679  {
680  int myindex = cstate->curitem;
681 
682  if (i != myindex)
683  {
684  /* Add cross-item dependency */
685  cstate->items[myindex].depends_on =
686  bms_add_member(cstate->items[myindex].depends_on,
687  cstate->items[i].id);
688  }
689  else
690  {
691  /* Found out this one is self-referential */
692  cte->cterecursive = true;
693  }
694  break;
695  }
696  }
697  }
698  return false;
699  }
700  if (IsA(node, SelectStmt))
701  {
702  SelectStmt *stmt = (SelectStmt *) node;
703  ListCell *lc;
704 
705  if (stmt->withClause)
706  {
707  if (stmt->withClause->recursive)
708  {
709  /*
710  * In the RECURSIVE case, all query names of the WITH are
711  * visible to all WITH items as well as the main query. So
712  * push them all on, process, pop them all off.
713  */
714  cstate->innerwiths = lcons(stmt->withClause->ctes,
715  cstate->innerwiths);
716  foreach(lc, stmt->withClause->ctes)
717  {
719 
720  (void) makeDependencyGraphWalker(cte->ctequery, cstate);
721  }
722  (void) raw_expression_tree_walker(node,
724  (void *) cstate);
725  cstate->innerwiths = list_delete_first(cstate->innerwiths);
726  }
727  else
728  {
729  /*
730  * In the non-RECURSIVE case, query names are visible to the
731  * WITH items after them and to the main query.
732  */
733  cstate->innerwiths = lcons(NIL, cstate->innerwiths);
734  foreach(lc, stmt->withClause->ctes)
735  {
737  ListCell *cell1;
738 
739  (void) makeDependencyGraphWalker(cte->ctequery, cstate);
740  /* note that recursion could mutate innerwiths list */
741  cell1 = list_head(cstate->innerwiths);
742  lfirst(cell1) = lappend((List *) lfirst(cell1), cte);
743  }
744  (void) raw_expression_tree_walker(node,
746  (void *) cstate);
747  cstate->innerwiths = list_delete_first(cstate->innerwiths);
748  }
749  /* We're done examining the SelectStmt */
750  return false;
751  }
752  /* if no WITH clause, just fall through for normal processing */
753  }
754  if (IsA(node, WithClause))
755  {
756  /*
757  * Prevent raw_expression_tree_walker from recursing directly into a
758  * WITH clause. We need that to happen only under the control of the
759  * code above.
760  */
761  return false;
762  }
763  return raw_expression_tree_walker(node,
765  (void *) cstate);
766 }
767 
768 /*
769  * Sort by dependencies, using a standard topological sort operation
770  */
771 static void
772 TopologicalSort(ParseState *pstate, CteItem *items, int numitems)
773 {
774  int i,
775  j;
776 
777  /* for each position in sequence ... */
778  for (i = 0; i < numitems; i++)
779  {
780  /* ... scan the remaining items to find one that has no dependencies */
781  for (j = i; j < numitems; j++)
782  {
783  if (bms_is_empty(items[j].depends_on))
784  break;
785  }
786 
787  /* if we didn't find one, the dependency graph has a cycle */
788  if (j >= numitems)
789  ereport(ERROR,
790  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
791  errmsg("mutual recursion between WITH items is not implemented"),
792  parser_errposition(pstate, items[i].cte->location)));
793 
794  /*
795  * Found one. Move it to front and remove it from every other item's
796  * dependencies.
797  */
798  if (i != j)
799  {
800  CteItem tmp;
801 
802  tmp = items[i];
803  items[i] = items[j];
804  items[j] = tmp;
805  }
806 
807  /*
808  * Items up through i are known to have no dependencies left, so we
809  * can skip them in this loop.
810  */
811  for (j = i + 1; j < numitems; j++)
812  {
813  items[j].depends_on = bms_del_member(items[j].depends_on,
814  items[i].id);
815  }
816  }
817 }
818 
819 
820 /*
821  * Check that recursive queries are well-formed.
822  */
823 static void
825 {
826  int i;
827 
828  for (i = 0; i < cstate->numitems; i++)
829  {
830  CommonTableExpr *cte = cstate->items[i].cte;
831  SelectStmt *stmt = (SelectStmt *) cte->ctequery;
832 
833  Assert(!IsA(stmt, Query)); /* not analyzed yet */
834 
835  /* Ignore items that weren't found to be recursive */
836  if (!cte->cterecursive)
837  continue;
838 
839  /* Must be a SELECT statement */
840  if (!IsA(stmt, SelectStmt))
841  ereport(ERROR,
842  (errcode(ERRCODE_INVALID_RECURSION),
843  errmsg("recursive query \"%s\" must not contain data-modifying statements",
844  cte->ctename),
845  parser_errposition(cstate->pstate, cte->location)));
846 
847  /* Must have top-level UNION */
848  if (stmt->op != SETOP_UNION)
849  ereport(ERROR,
850  (errcode(ERRCODE_INVALID_RECURSION),
851  errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION [ALL] recursive-term",
852  cte->ctename),
853  parser_errposition(cstate->pstate, cte->location)));
854 
855  /* The left-hand operand mustn't contain self-reference at all */
856  cstate->curitem = i;
857  cstate->innerwiths = NIL;
858  cstate->selfrefcount = 0;
860  checkWellFormedRecursionWalker((Node *) stmt->larg, cstate);
861  Assert(cstate->innerwiths == NIL);
862 
863  /* Right-hand operand should contain one reference in a valid place */
864  cstate->curitem = i;
865  cstate->innerwiths = NIL;
866  cstate->selfrefcount = 0;
867  cstate->context = RECURSION_OK;
868  checkWellFormedRecursionWalker((Node *) stmt->rarg, cstate);
869  Assert(cstate->innerwiths == NIL);
870  if (cstate->selfrefcount != 1) /* shouldn't happen */
871  elog(ERROR, "missing recursive reference");
872 
873  /* WITH mustn't contain self-reference, either */
874  if (stmt->withClause)
875  {
876  cstate->curitem = i;
877  cstate->innerwiths = NIL;
878  cstate->selfrefcount = 0;
879  cstate->context = RECURSION_SUBLINK;
881  cstate);
882  Assert(cstate->innerwiths == NIL);
883  }
884 
885  /*
886  * Disallow ORDER BY and similar decoration atop the UNION. These
887  * don't make sense because it's impossible to figure out what they
888  * mean when we have only part of the recursive query's results. (If
889  * we did allow them, we'd have to check for recursive references
890  * inside these subtrees.)
891  */
892  if (stmt->sortClause)
893  ereport(ERROR,
894  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
895  errmsg("ORDER BY in a recursive query is not implemented"),
896  parser_errposition(cstate->pstate,
897  exprLocation((Node *) stmt->sortClause))));
898  if (stmt->limitOffset)
899  ereport(ERROR,
900  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
901  errmsg("OFFSET in a recursive query is not implemented"),
902  parser_errposition(cstate->pstate,
903  exprLocation(stmt->limitOffset))));
904  if (stmt->limitCount)
905  ereport(ERROR,
906  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
907  errmsg("LIMIT in a recursive query is not implemented"),
908  parser_errposition(cstate->pstate,
909  exprLocation(stmt->limitCount))));
910  if (stmt->lockingClause)
911  ereport(ERROR,
912  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
913  errmsg("FOR UPDATE/SHARE in a recursive query is not implemented"),
914  parser_errposition(cstate->pstate,
915  exprLocation((Node *) stmt->lockingClause))));
916  }
917 }
918 
919 /*
920  * Tree walker function to detect invalid self-references in a recursive query.
921  */
922 static bool
924 {
925  RecursionContext save_context = cstate->context;
926 
927  if (node == NULL)
928  return false;
929  if (IsA(node, RangeVar))
930  {
931  RangeVar *rv = (RangeVar *) node;
932 
933  /* If unqualified name, might be a CTE reference */
934  if (!rv->schemaname)
935  {
936  ListCell *lc;
937  CommonTableExpr *mycte;
938 
939  /* ... but first see if it's captured by an inner WITH */
940  foreach(lc, cstate->innerwiths)
941  {
942  List *withlist = (List *) lfirst(lc);
943  ListCell *lc2;
944 
945  foreach(lc2, withlist)
946  {
948 
949  if (strcmp(rv->relname, cte->ctename) == 0)
950  return false; /* yes, so bail out */
951  }
952  }
953 
954  /* No, could be a reference to the query level we are working on */
955  mycte = cstate->items[cstate->curitem].cte;
956  if (strcmp(rv->relname, mycte->ctename) == 0)
957  {
958  /* Found a recursive reference to the active query */
959  if (cstate->context != RECURSION_OK)
960  ereport(ERROR,
961  (errcode(ERRCODE_INVALID_RECURSION),
963  mycte->ctename),
964  parser_errposition(cstate->pstate,
965  rv->location)));
966  /* Count references */
967  if (++(cstate->selfrefcount) > 1)
968  ereport(ERROR,
969  (errcode(ERRCODE_INVALID_RECURSION),
970  errmsg("recursive reference to query \"%s\" must not appear more than once",
971  mycte->ctename),
972  parser_errposition(cstate->pstate,
973  rv->location)));
974  }
975  }
976  return false;
977  }
978  if (IsA(node, SelectStmt))
979  {
980  SelectStmt *stmt = (SelectStmt *) node;
981  ListCell *lc;
982 
983  if (stmt->withClause)
984  {
985  if (stmt->withClause->recursive)
986  {
987  /*
988  * In the RECURSIVE case, all query names of the WITH are
989  * visible to all WITH items as well as the main query. So
990  * push them all on, process, pop them all off.
991  */
992  cstate->innerwiths = lcons(stmt->withClause->ctes,
993  cstate->innerwiths);
994  foreach(lc, stmt->withClause->ctes)
995  {
997 
998  (void) checkWellFormedRecursionWalker(cte->ctequery, cstate);
999  }
1000  checkWellFormedSelectStmt(stmt, cstate);
1001  cstate->innerwiths = list_delete_first(cstate->innerwiths);
1002  }
1003  else
1004  {
1005  /*
1006  * In the non-RECURSIVE case, query names are visible to the
1007  * WITH items after them and to the main query.
1008  */
1009  cstate->innerwiths = lcons(NIL, cstate->innerwiths);
1010  foreach(lc, stmt->withClause->ctes)
1011  {
1013  ListCell *cell1;
1014 
1015  (void) checkWellFormedRecursionWalker(cte->ctequery, cstate);
1016  /* note that recursion could mutate innerwiths list */
1017  cell1 = list_head(cstate->innerwiths);
1018  lfirst(cell1) = lappend((List *) lfirst(cell1), cte);
1019  }
1020  checkWellFormedSelectStmt(stmt, cstate);
1021  cstate->innerwiths = list_delete_first(cstate->innerwiths);
1022  }
1023  }
1024  else
1025  checkWellFormedSelectStmt(stmt, cstate);
1026  /* We're done examining the SelectStmt */
1027  return false;
1028  }
1029  if (IsA(node, WithClause))
1030  {
1031  /*
1032  * Prevent raw_expression_tree_walker from recursing directly into a
1033  * WITH clause. We need that to happen only under the control of the
1034  * code above.
1035  */
1036  return false;
1037  }
1038  if (IsA(node, JoinExpr))
1039  {
1040  JoinExpr *j = (JoinExpr *) node;
1041 
1042  switch (j->jointype)
1043  {
1044  case JOIN_INNER:
1048  break;
1049  case JOIN_LEFT:
1051  if (save_context == RECURSION_OK)
1052  cstate->context = RECURSION_OUTERJOIN;
1054  cstate->context = save_context;
1056  break;
1057  case JOIN_FULL:
1058  if (save_context == RECURSION_OK)
1059  cstate->context = RECURSION_OUTERJOIN;
1062  cstate->context = save_context;
1064  break;
1065  case JOIN_RIGHT:
1066  if (save_context == RECURSION_OK)
1067  cstate->context = RECURSION_OUTERJOIN;
1069  cstate->context = save_context;
1072  break;
1073  default:
1074  elog(ERROR, "unrecognized join type: %d",
1075  (int) j->jointype);
1076  }
1077  return false;
1078  }
1079  if (IsA(node, SubLink))
1080  {
1081  SubLink *sl = (SubLink *) node;
1082 
1083  /*
1084  * we intentionally override outer context, since subquery is
1085  * independent
1086  */
1087  cstate->context = RECURSION_SUBLINK;
1089  cstate->context = save_context;
1091  return false;
1092  }
1093  return raw_expression_tree_walker(node,
1095  (void *) cstate);
1096 }
1097 
1098 /*
1099  * subroutine for checkWellFormedRecursionWalker: process a SelectStmt
1100  * without worrying about its WITH clause
1101  */
1102 static void
1104 {
1105  RecursionContext save_context = cstate->context;
1106 
1107  if (save_context != RECURSION_OK)
1108  {
1109  /* just recurse without changing state */
1112  (void *) cstate);
1113  }
1114  else
1115  {
1116  switch (stmt->op)
1117  {
1118  case SETOP_NONE:
1119  case SETOP_UNION:
1122  (void *) cstate);
1123  break;
1124  case SETOP_INTERSECT:
1125  if (stmt->all)
1126  cstate->context = RECURSION_INTERSECT;
1127  checkWellFormedRecursionWalker((Node *) stmt->larg,
1128  cstate);
1129  checkWellFormedRecursionWalker((Node *) stmt->rarg,
1130  cstate);
1131  cstate->context = save_context;
1132  checkWellFormedRecursionWalker((Node *) stmt->sortClause,
1133  cstate);
1134  checkWellFormedRecursionWalker((Node *) stmt->limitOffset,
1135  cstate);
1136  checkWellFormedRecursionWalker((Node *) stmt->limitCount,
1137  cstate);
1138  checkWellFormedRecursionWalker((Node *) stmt->lockingClause,
1139  cstate);
1140  /* stmt->withClause is intentionally ignored here */
1141  break;
1142  case SETOP_EXCEPT:
1143  if (stmt->all)
1144  cstate->context = RECURSION_EXCEPT;
1145  checkWellFormedRecursionWalker((Node *) stmt->larg,
1146  cstate);
1147  cstate->context = RECURSION_EXCEPT;
1148  checkWellFormedRecursionWalker((Node *) stmt->rarg,
1149  cstate);
1150  cstate->context = save_context;
1151  checkWellFormedRecursionWalker((Node *) stmt->sortClause,
1152  cstate);
1153  checkWellFormedRecursionWalker((Node *) stmt->limitOffset,
1154  cstate);
1155  checkWellFormedRecursionWalker((Node *) stmt->limitCount,
1156  cstate);
1157  checkWellFormedRecursionWalker((Node *) stmt->lockingClause,
1158  cstate);
1159  /* stmt->withClause is intentionally ignored here */
1160  break;
1161  default:
1162  elog(ERROR, "unrecognized set op: %d",
1163  (int) stmt->op);
1164  }
1165  }
1166 }
#define list_make2(x1, x2)
Definition: pg_list.h:208
#define NIL
Definition: pg_list.h:65
struct SelectStmt * larg
Definition: parsenodes.h:1695
#define IsA(nodeptr, _type_)
Definition: nodes.h:587
static bool makeDependencyGraphWalker(Node *node, CteState *cstate)
Definition: parse_cte.c:644
int errhint(const char *fmt,...)
Definition: elog.c:1156
int exprLocation(const Node *expr)
Definition: nodeFuncs.c:1250
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:322
int selfrefcount
Definition: parse_cte.c:81
#define castNode(_type_, nodeptr)
Definition: nodes.h:605
static void makeDependencyGraph(CteState *cstate)
Definition: parse_cte.c:622
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:267
void analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist)
Definition: parse_cte.c:545
char * search_seq_column
Definition: parsenodes.h:1489
Node * limitOffset
Definition: parsenodes.h:1684
#define for_each_cell(cell, lst, initcell)
Definition: pg_list.h:417
int numitems
Definition: parse_cte.c:76
char * get_collation_name(Oid colloid)
Definition: lsyscache.c:1060
int32 select_common_typmod(ParseState *pstate, List *exprs, Oid common_type)
char * pstrdup(const char *in)
Definition: mcxt.c:1299
static void checkWellFormedRecursion(CteState *cstate)
Definition: parse_cte.c:824
ParseState * parentParseState
Definition: parse_node.h:180
Node * transformExpr(ParseState *pstate, Node *expr, ParseExprKind exprKind)
Definition: parse_expr.c:94
#define gettext_noop(x)
Definition: c.h:1197
List * list_copy(const List *oldlist)
Definition: list.c:1418
Definition: nodes.h:536
#define TYPECACHE_EQ_OPR
Definition: typcache.h:136
#define strVal(v)
Definition: value.h:65
int errcode(int sqlerrcode)
Definition: elog.c:698
Definition: value.h:51
char * format_type_be(Oid type_oid)
Definition: format_type.c:339
String * makeString(char *str)
Definition: value.c:51
RecursionContext
Definition: parse_cte.c:31
struct CteState CteState
unsigned int Oid
Definition: postgres_ext.h:31
char * resname
Definition: primnodes.h:1457
Node * utilityStmt
Definition: parsenodes.h:128
List * lappend_oid(List *list, Oid datum)
Definition: list.c:372
#define OidIsValid(objectId)
Definition: c.h:710
signed int int32
Definition: c.h:429
char * schemaname
Definition: primnodes.h:67
Node * larg
Definition: primnodes.h:1543
int location
Definition: primnodes.h:73
char * relname
Definition: primnodes.h:68
bool resjunk
Definition: primnodes.h:1462
#define ERROR
Definition: elog.h:46
List * innerwiths
Definition: parse_cte.c:79
bool list_member(const List *list, const void *datum)
Definition: list.c:628
CTESearchClause * search_clause
Definition: parsenodes.h:1517
bool raw_expression_tree_walker(Node *node, bool(*walker)(), void *context)
Definition: nodeFuncs.c:3515
#define lfirst_int(lc)
Definition: pg_list.h:170
#define lfirst_node(type, lc)
Definition: pg_list.h:172
CommonTableExpr * cte
Definition: parse_cte.c:65
static void checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate)
Definition: parse_cte.c:1103
List * sortClause
Definition: parsenodes.h:1683
List * cycle_col_list
Definition: parsenodes.h:1496
AttrNumber resno
Definition: primnodes.h:1456
Oid select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
bool recursive
Definition: parsenodes.h:1437
List * ctecoltypmods
Definition: parsenodes.h:1526
List * lockingClause
Definition: parsenodes.h:1687
bool p_hasModifyingCTE
Definition: parse_node.h:212
List * lappend_int(List *list, int datum)
Definition: list.c:354
Node * coerce_to_common_type(ParseState *pstate, Node *node, Oid targetTypeId, const char *context)
List * lappend(List *list, void *datum)
Definition: list.c:336
static void analyzeCTE(ParseState *pstate, CommonTableExpr *cte)
Definition: parse_cte.c:241
Bitmapset * depends_on
Definition: parse_cte.c:67
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:701
int curitem
Definition: parse_cte.c:78
List * search_col_list
Definition: parsenodes.h:1487
List * ctecolnames
Definition: parsenodes.h:1524
char * format_type_with_typemod(Oid type_oid, int32 typemod)
Definition: format_type.c:358
List * p_future_ctes
Definition: parse_node.h:190
void * palloc0(Size size)
Definition: mcxt.c:1093
Node * quals
Definition: primnodes.h:1547
Oid cycle_mark_collation
Definition: parsenodes.h:1505
SetOperation op
Definition: parsenodes.h:1693
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
Query * parse_sub_analyze(Node *parseTree, ParseState *parentParseState, CommonTableExpr *parentCTE, bool locked_from_parent, bool resolve_unknowns)
Definition: analyze.c:186
#define ereport(elevel,...)
Definition: elog.h:157
static bool checkWellFormedRecursionWalker(Node *node, CteState *cstate)
Definition: parse_cte.c:923
RecursionContext context
Definition: parse_cte.c:82
int id
Definition: parse_cte.c:66
CmdType commandType
Definition: parsenodes.h:120
List * lcons(void *datum, List *list)
Definition: list.c:468
Node * rarg
Definition: primnodes.h:1544
JoinType jointype
Definition: primnodes.h:1541
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
Expr * expr
Definition: primnodes.h:1455
bool canSetTag
Definition: parsenodes.h:126
struct SelectStmt * rarg
Definition: parsenodes.h:1696
List * transformWithClause(ParseState *pstate, WithClause *withClause)
Definition: parse_cte.c:109
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:149
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:111
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:759
Node * cycle_mark_value
Definition: parsenodes.h:1498
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
Node * cycle_mark_default
Definition: parsenodes.h:1499
List * ctecoltypes
Definition: parsenodes.h:1525
char * cycle_path_column
Definition: parsenodes.h:1500
CTECycleClause * cycle_clause
Definition: parsenodes.h:1518
Node * setOperations
Definition: parsenodes.h:177
struct CteItem CteItem
int errmsg(const char *fmt,...)
Definition: elog.c:909
char * cycle_mark_column
Definition: parsenodes.h:1497
#define elog(elevel,...)
Definition: elog.h:232
int i
List * p_ctenamespace
Definition: parse_node.h:189
Oid select_common_type(ParseState *pstate, List *exprs, const char *context, Node **which_expr)
CteItem * items
Definition: parse_cte.c:75
WithClause * withClause
Definition: parsenodes.h:1688
static void TopologicalSort(ParseState *pstate, CteItem *items, int numitems)
Definition: parse_cte.c:772
ParseState * pstate
Definition: parse_cte.c:74
Oid get_negator(Oid opno)
Definition: lsyscache.c:1504
List * ctecolcollations
Definition: parsenodes.h:1527
#define copyObject(obj)
Definition: nodes.h:652
static const char *const recursion_errormsgs[]
Definition: parse_cte.c:42
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:773
Definition: pg_list.h:50
#define GetCTETargetList(cte)
Definition: parsenodes.h:1531
List * aliascolnames
Definition: parsenodes.h:1513
Node * limitCount
Definition: parsenodes.h:1685
#define lfirst_oid(lc)
Definition: pg_list.h:171
List * list_delete_first(List *list)
Definition: list.c:875
List * ctes
Definition: parsenodes.h:1436