PostgreSQL Source Code  git master
parse_collate.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parse_collate.c
4  * Routines for assigning collation information.
5  *
6  * We choose to handle collation analysis in a post-pass over the output
7  * of expression parse analysis. This is because we need more state to
8  * perform this processing than is needed in the finished tree. If we
9  * did it on-the-fly while building the tree, all that state would have
10  * to be kept in expression node trees permanently. This way, the extra
11  * storage is just local variables in this recursive routine.
12  *
13  * The info that is actually saved in the finished tree is:
14  * 1. The output collation of each expression node, or InvalidOid if it
15  * returns a noncollatable data type. This can also be InvalidOid if the
16  * result type is collatable but the collation is indeterminate.
17  * 2. The collation to be used in executing each function. InvalidOid means
18  * that there are no collatable inputs or their collation is indeterminate.
19  * This value is only stored in node types that might call collation-using
20  * functions.
21  *
22  * You might think we could get away with storing only one collation per
23  * node, but the two concepts really need to be kept distinct. Otherwise
24  * it's too confusing when a function produces a collatable output type but
25  * has no collatable inputs or produces noncollatable output from collatable
26  * inputs.
27  *
28  * Cases with indeterminate collation might result in an error being thrown
29  * at runtime. If we knew exactly which functions require collation
30  * information, we could throw those errors at parse time instead.
31  *
32  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
33  * Portions Copyright (c) 1994, Regents of the University of California
34  *
35  *
36  * IDENTIFICATION
37  * src/backend/parser/parse_collate.c
38  *
39  *-------------------------------------------------------------------------
40  */
41 #include "postgres.h"
42 
43 #include "catalog/pg_aggregate.h"
44 #include "catalog/pg_collation.h"
45 #include "nodes/makefuncs.h"
46 #include "nodes/nodeFuncs.h"
47 #include "parser/parse_collate.h"
48 #include "utils/lsyscache.h"
49 
50 
51 /*
52  * Collation strength (the SQL standard calls this "derivation"). Order is
53  * chosen to allow comparisons to work usefully. Note: the standard doesn't
54  * seem to distinguish between NONE and CONFLICT.
55  */
56 typedef enum
57 {
58  COLLATE_NONE, /* expression is of a noncollatable datatype */
59  COLLATE_IMPLICIT, /* collation was derived implicitly */
60  COLLATE_CONFLICT, /* we had a conflict of implicit collations */
61  COLLATE_EXPLICIT /* collation was derived explicitly */
63 
64 typedef struct
65 {
66  ParseState *pstate; /* parse state (for error reporting) */
67  Oid collation; /* OID of current collation, if any */
68  CollateStrength strength; /* strength of current collation choice */
69  int location; /* location of expr that set collation */
70  /* Remaining fields are only valid when strength == COLLATE_CONFLICT */
71  Oid collation2; /* OID of conflicting collation */
72  int location2; /* location of expr that set collation2 */
74 
75 static bool assign_query_collations_walker(Node *node, ParseState *pstate);
76 static bool assign_collations_walker(Node *node,
77  assign_collations_context *context);
78 static void merge_collation_state(Oid collation,
79  CollateStrength strength,
80  int location,
81  Oid collation2,
82  int location2,
83  assign_collations_context *context);
84 static void assign_aggregate_collations(Aggref *aggref,
85  assign_collations_context *loccontext);
86 static void assign_ordered_set_collations(Aggref *aggref,
87  assign_collations_context *loccontext);
88 static void assign_hypothetical_collations(Aggref *aggref,
89  assign_collations_context *loccontext);
90 
91 
92 /*
93  * assign_query_collations()
94  * Mark all expressions in the given Query with collation information.
95  *
96  * This should be applied to each Query after completion of parse analysis
97  * for expressions. Note that we do not recurse into sub-Queries, since
98  * those should have been processed when built.
99  */
100 void
102 {
103  /*
104  * We just use query_tree_walker() to visit all the contained expressions.
105  * We can skip the rangetable and CTE subqueries, though, since RTEs and
106  * subqueries had better have been processed already (else Vars referring
107  * to them would not get created with the right collation).
108  */
109  (void) query_tree_walker(query,
111  (void *) pstate,
114 }
115 
116 /*
117  * Walker for assign_query_collations
118  *
119  * Each expression found by query_tree_walker is processed independently.
120  * Note that query_tree_walker may pass us a whole List, such as the
121  * targetlist, in which case each subexpression must be processed
122  * independently --- we don't want to bleat if two different targetentries
123  * have different collations.
124  */
125 static bool
127 {
128  /* Need do nothing for empty subexpressions */
129  if (node == NULL)
130  return false;
131 
132  /*
133  * We don't want to recurse into a set-operations tree; it's already been
134  * fully processed in transformSetOperationStmt.
135  */
136  if (IsA(node, SetOperationStmt))
137  return false;
138 
139  if (IsA(node, List))
140  assign_list_collations(pstate, (List *) node);
141  else
142  assign_expr_collations(pstate, node);
143 
144  return false;
145 }
146 
147 /*
148  * assign_list_collations()
149  * Mark all nodes in the list of expressions with collation information.
150  *
151  * The list member expressions are processed independently; they do not have
152  * to share a common collation.
153  */
154 void
156 {
157  ListCell *lc;
158 
159  foreach(lc, exprs)
160  {
161  Node *node = (Node *) lfirst(lc);
162 
163  assign_expr_collations(pstate, node);
164  }
165 }
166 
167 /*
168  * assign_expr_collations()
169  * Mark all nodes in the given expression tree with collation information.
170  *
171  * This is exported for the benefit of various utility commands that process
172  * expressions without building a complete Query. It should be applied after
173  * calling transformExpr() plus any expression-modifying operations such as
174  * coerce_to_boolean().
175  */
176 void
178 {
180 
181  /* initialize context for tree walk */
182  context.pstate = pstate;
183  context.collation = InvalidOid;
184  context.strength = COLLATE_NONE;
185  context.location = -1;
186 
187  /* and away we go */
188  (void) assign_collations_walker(expr, &context);
189 }
190 
191 /*
192  * select_common_collation()
193  * Identify a common collation for a list of expressions.
194  *
195  * The expressions should all return the same datatype, else this is not
196  * terribly meaningful.
197  *
198  * none_ok means that it is permitted to return InvalidOid, indicating that
199  * no common collation could be identified, even for collatable datatypes.
200  * Otherwise, an error is thrown for conflict of implicit collations.
201  *
202  * In theory, none_ok = true reflects the rules of SQL standard clause "Result
203  * of data type combinations", none_ok = false reflects the rules of clause
204  * "Collation determination" (in some cases invoked via "Grouping
205  * operations").
206  */
207 Oid
208 select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
209 {
211 
212  /* initialize context for tree walk */
213  context.pstate = pstate;
214  context.collation = InvalidOid;
215  context.strength = COLLATE_NONE;
216  context.location = -1;
217 
218  /* and away we go */
219  (void) assign_collations_walker((Node *) exprs, &context);
220 
221  /* deal with collation conflict */
222  if (context.strength == COLLATE_CONFLICT)
223  {
224  if (none_ok)
225  return InvalidOid;
226  ereport(ERROR,
227  (errcode(ERRCODE_COLLATION_MISMATCH),
228  errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
229  get_collation_name(context.collation),
230  get_collation_name(context.collation2)),
231  errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
232  parser_errposition(context.pstate, context.location2)));
233  }
234 
235  /*
236  * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
237  * that's okay because it must mean none of the expressions returned
238  * collatable datatypes.
239  */
240  return context.collation;
241 }
242 
243 /*
244  * assign_collations_walker()
245  * Recursive guts of collation processing.
246  *
247  * Nodes with no children (eg, Vars, Consts, Params) must have been marked
248  * when built. All upper-level nodes are marked here.
249  *
250  * Note: if this is invoked directly on a List, it will attempt to infer a
251  * common collation for all the list members. In particular, it will throw
252  * error if there are conflicting explicit collations for different members.
253  */
254 static bool
256 {
257  assign_collations_context loccontext;
258  Oid collation;
259  CollateStrength strength;
260  int location;
261 
262  /* Need do nothing for empty subexpressions */
263  if (node == NULL)
264  return false;
265 
266  /*
267  * Prepare for recursion. For most node types, though not all, the first
268  * thing we do is recurse to process all nodes below this one. Each level
269  * of the tree has its own local context.
270  */
271  loccontext.pstate = context->pstate;
272  loccontext.collation = InvalidOid;
273  loccontext.strength = COLLATE_NONE;
274  loccontext.location = -1;
275  /* Set these fields just to suppress uninitialized-value warnings: */
276  loccontext.collation2 = InvalidOid;
277  loccontext.location2 = -1;
278 
279  /*
280  * Recurse if appropriate, then determine the collation for this node.
281  *
282  * Note: the general cases are at the bottom of the switch, after various
283  * special cases.
284  */
285  switch (nodeTag(node))
286  {
287  case T_CollateExpr:
288  {
289  /*
290  * COLLATE sets an explicitly derived collation, regardless of
291  * what the child state is. But we must recurse to set up
292  * collation info below here.
293  */
294  CollateExpr *expr = (CollateExpr *) node;
295 
296  (void) expression_tree_walker(node,
298  (void *) &loccontext);
299 
300  collation = expr->collOid;
301  Assert(OidIsValid(collation));
302  strength = COLLATE_EXPLICIT;
303  location = expr->location;
304  }
305  break;
306  case T_FieldSelect:
307  {
308  /*
309  * For FieldSelect, the result has the field's declared
310  * collation, independently of what happened in the arguments.
311  * (The immediate argument must be composite and thus not
312  * collatable, anyhow.) The field's collation was already
313  * looked up and saved in the node.
314  */
315  FieldSelect *expr = (FieldSelect *) node;
316 
317  /* ... but first, recurse */
318  (void) expression_tree_walker(node,
320  (void *) &loccontext);
321 
322  if (OidIsValid(expr->resultcollid))
323  {
324  /* Node's result type is collatable. */
325  /* Pass up field's collation as an implicit choice. */
326  collation = expr->resultcollid;
327  strength = COLLATE_IMPLICIT;
328  location = exprLocation(node);
329  }
330  else
331  {
332  /* Node's result type isn't collatable. */
333  collation = InvalidOid;
334  strength = COLLATE_NONE;
335  location = -1; /* won't be used */
336  }
337  }
338  break;
339  case T_RowExpr:
340  {
341  /*
342  * RowExpr is a special case because the subexpressions are
343  * independent: we don't want to complain if some of them have
344  * incompatible explicit collations.
345  */
346  RowExpr *expr = (RowExpr *) node;
347 
348  assign_list_collations(context->pstate, expr->args);
349 
350  /*
351  * Since the result is always composite and therefore never
352  * has a collation, we can just stop here: this node has no
353  * impact on the collation of its parent.
354  */
355  return false; /* done */
356  }
357  case T_RowCompareExpr:
358  {
359  /*
360  * For RowCompare, we have to find the common collation of
361  * each pair of input columns and build a list. If we can't
362  * find a common collation, we just put InvalidOid into the
363  * list, which may or may not cause an error at runtime.
364  */
365  RowCompareExpr *expr = (RowCompareExpr *) node;
366  List *colls = NIL;
367  ListCell *l;
368  ListCell *r;
369 
370  forboth(l, expr->largs, r, expr->rargs)
371  {
372  Node *le = (Node *) lfirst(l);
373  Node *re = (Node *) lfirst(r);
374  Oid coll;
375 
376  coll = select_common_collation(context->pstate,
377  list_make2(le, re),
378  true);
379  colls = lappend_oid(colls, coll);
380  }
381  expr->inputcollids = colls;
382 
383  /*
384  * Since the result is always boolean and therefore never has
385  * a collation, we can just stop here: this node has no impact
386  * on the collation of its parent.
387  */
388  return false; /* done */
389  }
390  case T_CoerceToDomain:
391  {
392  /*
393  * If the domain declaration included a non-default COLLATE
394  * spec, then use that collation as the output collation of
395  * the coercion. Otherwise allow the input collation to
396  * bubble up. (The input should be of the domain's base type,
397  * therefore we don't need to worry about it not being
398  * collatable when the domain is.)
399  */
400  CoerceToDomain *expr = (CoerceToDomain *) node;
401  Oid typcollation = get_typcollation(expr->resulttype);
402 
403  /* ... but first, recurse */
404  (void) expression_tree_walker(node,
406  (void *) &loccontext);
407 
408  if (OidIsValid(typcollation))
409  {
410  /* Node's result type is collatable. */
411  if (typcollation == DEFAULT_COLLATION_OID)
412  {
413  /* Collation state bubbles up from child. */
414  collation = loccontext.collation;
415  strength = loccontext.strength;
416  location = loccontext.location;
417  }
418  else
419  {
420  /* Use domain's collation as an implicit choice. */
421  collation = typcollation;
422  strength = COLLATE_IMPLICIT;
423  location = exprLocation(node);
424  }
425  }
426  else
427  {
428  /* Node's result type isn't collatable. */
429  collation = InvalidOid;
430  strength = COLLATE_NONE;
431  location = -1; /* won't be used */
432  }
433 
434  /*
435  * Save the state into the expression node. We know it
436  * doesn't care about input collation.
437  */
438  if (strength == COLLATE_CONFLICT)
440  else
441  exprSetCollation(node, collation);
442  }
443  break;
444  case T_TargetEntry:
445  (void) expression_tree_walker(node,
447  (void *) &loccontext);
448 
449  /*
450  * TargetEntry can have only one child, and should bubble that
451  * state up to its parent. We can't use the general-case code
452  * below because exprType and friends don't work on TargetEntry.
453  */
454  collation = loccontext.collation;
455  strength = loccontext.strength;
456  location = loccontext.location;
457 
458  /*
459  * Throw error if the collation is indeterminate for a TargetEntry
460  * that is a sort/group target. We prefer to do this now, instead
461  * of leaving the comparison functions to fail at runtime, because
462  * we can give a syntax error pointer to help locate the problem.
463  * There are some cases where there might not be a failure, for
464  * example if the planner chooses to use hash aggregation instead
465  * of sorting for grouping; but it seems better to predictably
466  * throw an error. (Compare transformSetOperationTree, which will
467  * throw error for indeterminate collation of set-op columns, even
468  * though the planner might be able to implement the set-op
469  * without sorting.)
470  */
471  if (strength == COLLATE_CONFLICT &&
472  ((TargetEntry *) node)->ressortgroupref != 0)
473  ereport(ERROR,
474  (errcode(ERRCODE_COLLATION_MISMATCH),
475  errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
476  get_collation_name(loccontext.collation),
477  get_collation_name(loccontext.collation2)),
478  errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
479  parser_errposition(context->pstate,
480  loccontext.location2)));
481  break;
482  case T_InferenceElem:
483  case T_RangeTblRef:
484  case T_JoinExpr:
485  case T_FromExpr:
486  case T_OnConflictExpr:
487  case T_SortGroupClause:
488  case T_MergeAction:
489  (void) expression_tree_walker(node,
491  (void *) &loccontext);
492 
493  /*
494  * When we're invoked on a query's jointree, we don't need to do
495  * anything with join nodes except recurse through them to process
496  * WHERE/ON expressions. So just stop here. Likewise, we don't
497  * need to do anything when invoked on sort/group lists.
498  */
499  return false;
500  case T_Query:
501  {
502  /*
503  * We get here when we're invoked on the Query belonging to a
504  * SubLink. Act as though the Query returns its first output
505  * column, which indeed is what it does for EXPR_SUBLINK and
506  * ARRAY_SUBLINK cases. In the cases where the SubLink
507  * returns boolean, this info will be ignored. Special case:
508  * in EXISTS, the Query might return no columns, in which case
509  * we need do nothing.
510  *
511  * We needn't recurse, since the Query is already processed.
512  */
513  Query *qtree = (Query *) node;
514  TargetEntry *tent;
515 
516  if (qtree->targetList == NIL)
517  return false;
518  tent = linitial_node(TargetEntry, qtree->targetList);
519  if (tent->resjunk)
520  return false;
521 
522  collation = exprCollation((Node *) tent->expr);
523  /* collation doesn't change if it's converted to array */
524  strength = COLLATE_IMPLICIT;
525  location = exprLocation((Node *) tent->expr);
526  }
527  break;
528  case T_List:
529  (void) expression_tree_walker(node,
531  (void *) &loccontext);
532 
533  /*
534  * When processing a list, collation state just bubbles up from
535  * the list elements.
536  */
537  collation = loccontext.collation;
538  strength = loccontext.strength;
539  location = loccontext.location;
540  break;
541 
542  case T_Var:
543  case T_Const:
544  case T_Param:
546  case T_CaseTestExpr:
547  case T_SetToDefault:
548  case T_CurrentOfExpr:
549 
550  /*
551  * General case for childless expression nodes. These should
552  * already have a collation assigned; it is not this function's
553  * responsibility to look into the catalogs for base-case
554  * information.
555  */
556  collation = exprCollation(node);
557 
558  /*
559  * Note: in most cases, there will be an assigned collation
560  * whenever type_is_collatable(exprType(node)); but an exception
561  * occurs for a Var referencing a subquery output column for which
562  * a unique collation was not determinable. That may lead to a
563  * runtime failure if a collation-sensitive function is applied to
564  * the Var.
565  */
566 
567  if (OidIsValid(collation))
568  strength = COLLATE_IMPLICIT;
569  else
570  strength = COLLATE_NONE;
571  location = exprLocation(node);
572  break;
573 
574  default:
575  {
576  /*
577  * General case for most expression nodes with children. First
578  * recurse, then figure out what to assign to this node.
579  */
580  Oid typcollation;
581 
582  /*
583  * For most node types, we want to treat all the child
584  * expressions alike; but there are a few exceptions, hence
585  * this inner switch.
586  */
587  switch (nodeTag(node))
588  {
589  case T_Aggref:
590  {
591  /*
592  * Aggref is messy enough that we give it its own
593  * function, in fact three of them. The FILTER
594  * clause is independent of the rest of the
595  * aggregate, however, so it can be processed
596  * separately.
597  */
598  Aggref *aggref = (Aggref *) node;
599 
600  switch (aggref->aggkind)
601  {
602  case AGGKIND_NORMAL:
604  &loccontext);
605  break;
606  case AGGKIND_ORDERED_SET:
608  &loccontext);
609  break;
610  case AGGKIND_HYPOTHETICAL:
612  &loccontext);
613  break;
614  default:
615  elog(ERROR, "unrecognized aggkind: %d",
616  (int) aggref->aggkind);
617  }
618 
620  (Node *) aggref->aggfilter);
621  }
622  break;
623  case T_WindowFunc:
624  {
625  /*
626  * WindowFunc requires special processing only for
627  * its aggfilter clause, as for aggregates.
628  */
629  WindowFunc *wfunc = (WindowFunc *) node;
630 
631  (void) assign_collations_walker((Node *) wfunc->args,
632  &loccontext);
633 
635  (Node *) wfunc->aggfilter);
636  }
637  break;
638  case T_CaseExpr:
639  {
640  /*
641  * CaseExpr is a special case because we do not
642  * want to recurse into the test expression (if
643  * any). It was already marked with collations
644  * during transformCaseExpr, and furthermore its
645  * collation is not relevant to the result of the
646  * CASE --- only the output expressions are.
647  */
648  CaseExpr *expr = (CaseExpr *) node;
649  ListCell *lc;
650 
651  foreach(lc, expr->args)
652  {
654 
655  /*
656  * The condition expressions mustn't affect
657  * the CASE's result collation either; but
658  * since they are known to yield boolean, it's
659  * safe to recurse directly on them --- they
660  * won't change loccontext.
661  */
662  (void) assign_collations_walker((Node *) when->expr,
663  &loccontext);
664  (void) assign_collations_walker((Node *) when->result,
665  &loccontext);
666  }
667  (void) assign_collations_walker((Node *) expr->defresult,
668  &loccontext);
669  }
670  break;
671  case T_SubscriptingRef:
672  {
673  /*
674  * The subscripts are treated as independent
675  * expressions not contributing to the node's
676  * collation. Only the container, and the source
677  * expression if any, contribute. (This models
678  * the old behavior, in which the subscripts could
679  * be counted on to be integers and thus not
680  * contribute anything.)
681  */
682  SubscriptingRef *sbsref = (SubscriptingRef *) node;
683 
685  (Node *) sbsref->refupperindexpr);
687  (Node *) sbsref->reflowerindexpr);
688  (void) assign_collations_walker((Node *) sbsref->refexpr,
689  &loccontext);
690  (void) assign_collations_walker((Node *) sbsref->refassgnexpr,
691  &loccontext);
692  }
693  break;
694  case T_JsonExpr:
695 
696  /*
697  * Context item and PASSING arguments are already
698  * marked with collations in parse_expr.c.
699  */
700  break;
701  default:
702 
703  /*
704  * Normal case: all child expressions contribute
705  * equally to loccontext.
706  */
707  (void) expression_tree_walker(node,
709  (void *) &loccontext);
710  break;
711  }
712 
713  /*
714  * Now figure out what collation to assign to this node.
715  */
716  typcollation = get_typcollation(exprType(node));
717  if (OidIsValid(typcollation))
718  {
719  /* Node's result is collatable; what about its input? */
720  if (loccontext.strength > COLLATE_NONE)
721  {
722  /* Collation state bubbles up from children. */
723  collation = loccontext.collation;
724  strength = loccontext.strength;
725  location = loccontext.location;
726  }
727  else
728  {
729  /*
730  * Collatable output produced without any collatable
731  * input. Use the type's collation (which is usually
732  * DEFAULT_COLLATION_OID, but might be different for a
733  * domain).
734  */
735  collation = typcollation;
736  strength = COLLATE_IMPLICIT;
737  location = exprLocation(node);
738  }
739  }
740  else
741  {
742  /* Node's result type isn't collatable. */
743  collation = InvalidOid;
744  strength = COLLATE_NONE;
745  location = -1; /* won't be used */
746  }
747 
748  /*
749  * Save the result collation into the expression node. If the
750  * state is COLLATE_CONFLICT, we'll set the collation to
751  * InvalidOid, which might result in an error at runtime.
752  */
753  if (strength == COLLATE_CONFLICT)
755  else
756  exprSetCollation(node, collation);
757 
758  /*
759  * Likewise save the input collation, which is the one that
760  * any function called by this node should use.
761  */
762  if (loccontext.strength == COLLATE_CONFLICT)
764  else
765  exprSetInputCollation(node, loccontext.collation);
766  }
767  break;
768  }
769 
770  /*
771  * Now, merge my information into my parent's state.
772  */
773  merge_collation_state(collation,
774  strength,
775  location,
776  loccontext.collation2,
777  loccontext.location2,
778  context);
779 
780  return false;
781 }
782 
783 /*
784  * Merge collation state of a subexpression into the context for its parent.
785  */
786 static void
788  CollateStrength strength,
789  int location,
790  Oid collation2,
791  int location2,
792  assign_collations_context *context)
793 {
794  /*
795  * If the collation strength for this node is different from what's
796  * already in *context, then this node either dominates or is dominated by
797  * earlier siblings.
798  */
799  if (strength > context->strength)
800  {
801  /* Override previous parent state */
802  context->collation = collation;
803  context->strength = strength;
804  context->location = location;
805  /* Bubble up error info if applicable */
806  if (strength == COLLATE_CONFLICT)
807  {
808  context->collation2 = collation2;
809  context->location2 = location2;
810  }
811  }
812  else if (strength == context->strength)
813  {
814  /* Merge, or detect error if there's a collation conflict */
815  switch (strength)
816  {
817  case COLLATE_NONE:
818  /* Nothing + nothing is still nothing */
819  break;
820  case COLLATE_IMPLICIT:
821  if (collation != context->collation)
822  {
823  /*
824  * Non-default implicit collation always beats default.
825  */
826  if (context->collation == DEFAULT_COLLATION_OID)
827  {
828  /* Override previous parent state */
829  context->collation = collation;
830  context->strength = strength;
831  context->location = location;
832  }
833  else if (collation != DEFAULT_COLLATION_OID)
834  {
835  /*
836  * Oops, we have a conflict. We cannot throw error
837  * here, since the conflict could be resolved by a
838  * later sibling CollateExpr, or the parent might not
839  * care about collation anyway. Return enough info to
840  * throw the error later, if needed.
841  */
842  context->strength = COLLATE_CONFLICT;
843  context->collation2 = collation;
844  context->location2 = location;
845  }
846  }
847  break;
848  case COLLATE_CONFLICT:
849  /* We're still conflicted ... */
850  break;
851  case COLLATE_EXPLICIT:
852  if (collation != context->collation)
853  {
854  /*
855  * Oops, we have a conflict of explicit COLLATE clauses.
856  * Here we choose to throw error immediately; that is what
857  * the SQL standard says to do, and there's no good reason
858  * to be less strict.
859  */
860  ereport(ERROR,
861  (errcode(ERRCODE_COLLATION_MISMATCH),
862  errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
863  get_collation_name(context->collation),
864  get_collation_name(collation)),
865  parser_errposition(context->pstate, location)));
866  }
867  break;
868  }
869  }
870 }
871 
872 /*
873  * Aggref is a special case because expressions used only for ordering
874  * shouldn't be taken to conflict with each other or with regular args,
875  * indeed shouldn't affect the aggregate's result collation at all.
876  * We handle this by applying assign_expr_collations() to them rather than
877  * passing down our loccontext.
878  *
879  * Note that we recurse to each TargetEntry, not directly to its contained
880  * expression, so that the case above for T_TargetEntry will complain if we
881  * can't resolve a collation for an ORDER BY item (whether or not it is also
882  * a normal aggregate arg).
883  *
884  * We need not recurse into the aggorder or aggdistinct lists, because those
885  * contain only SortGroupClause nodes which we need not process.
886  */
887 static void
889  assign_collations_context *loccontext)
890 {
891  ListCell *lc;
892 
893  /* Plain aggregates have no direct args */
894  Assert(aggref->aggdirectargs == NIL);
895 
896  /* Process aggregated args, holding resjunk ones at arm's length */
897  foreach(lc, aggref->args)
898  {
899  TargetEntry *tle = lfirst_node(TargetEntry, lc);
900 
901  if (tle->resjunk)
902  assign_expr_collations(loccontext->pstate, (Node *) tle);
903  else
904  (void) assign_collations_walker((Node *) tle, loccontext);
905  }
906 }
907 
908 /*
909  * For ordered-set aggregates, it's somewhat unclear how best to proceed.
910  * The spec-defined inverse distribution functions have only one sort column
911  * and don't return collatable types, but this is clearly too restrictive in
912  * the general case. Our solution is to consider that the aggregate's direct
913  * arguments contribute normally to determination of the aggregate's own
914  * collation, while aggregated arguments contribute only when the aggregate
915  * is designed to have exactly one aggregated argument (i.e., it has a single
916  * aggregated argument and is non-variadic). If it can have more than one
917  * aggregated argument, we process the aggregated arguments as independent
918  * sort columns. This avoids throwing error for something like
919  * agg(...) within group (order by x collate "foo", y collate "bar")
920  * while also guaranteeing that variadic aggregates don't change in behavior
921  * depending on how many sort columns a particular call happens to have.
922  *
923  * Otherwise this is much like the plain-aggregate case.
924  */
925 static void
927  assign_collations_context *loccontext)
928 {
929  bool merge_sort_collations;
930  ListCell *lc;
931 
932  /* Merge sort collations to parent only if there can be only one */
933  merge_sort_collations = (list_length(aggref->args) == 1 &&
935 
936  /* Direct args, if any, are normal children of the Aggref node */
937  (void) assign_collations_walker((Node *) aggref->aggdirectargs,
938  loccontext);
939 
940  /* Process aggregated args appropriately */
941  foreach(lc, aggref->args)
942  {
943  TargetEntry *tle = lfirst_node(TargetEntry, lc);
944 
945  if (merge_sort_collations)
946  (void) assign_collations_walker((Node *) tle, loccontext);
947  else
948  assign_expr_collations(loccontext->pstate, (Node *) tle);
949  }
950 }
951 
952 /*
953  * Hypothetical-set aggregates are even more special: per spec, we need to
954  * unify the collations of each pair of hypothetical and aggregated args.
955  * And we need to force the choice of collation down into the sort column
956  * to ensure that the sort happens with the chosen collation. Other than
957  * that, the behavior is like regular ordered-set aggregates. Note that
958  * hypothetical direct arguments contribute to the aggregate collation
959  * only when their partner aggregated arguments do.
960  */
961 static void
963  assign_collations_context *loccontext)
964 {
965  ListCell *h_cell = list_head(aggref->aggdirectargs);
966  ListCell *s_cell = list_head(aggref->args);
967  bool merge_sort_collations;
968  int extra_args;
969 
970  /* Merge sort collations to parent only if there can be only one */
971  merge_sort_collations = (list_length(aggref->args) == 1 &&
973 
974  /* Process any non-hypothetical direct args */
975  extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
976  Assert(extra_args >= 0);
977  while (extra_args-- > 0)
978  {
979  (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
980  h_cell = lnext(aggref->aggdirectargs, h_cell);
981  }
982 
983  /* Scan hypothetical args and aggregated args in parallel */
984  while (h_cell && s_cell)
985  {
986  Node *h_arg = (Node *) lfirst(h_cell);
987  TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell);
988  assign_collations_context paircontext;
989 
990  /*
991  * Assign collations internally in this pair of expressions, then
992  * choose a common collation for them. This should match
993  * select_common_collation(), but we can't use that function as-is
994  * because we need access to the whole collation state so we can
995  * bubble it up to the aggregate function's level.
996  */
997  paircontext.pstate = loccontext->pstate;
998  paircontext.collation = InvalidOid;
999  paircontext.strength = COLLATE_NONE;
1000  paircontext.location = -1;
1001  /* Set these fields just to suppress uninitialized-value warnings: */
1002  paircontext.collation2 = InvalidOid;
1003  paircontext.location2 = -1;
1004 
1005  (void) assign_collations_walker(h_arg, &paircontext);
1006  (void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
1007 
1008  /* deal with collation conflict */
1009  if (paircontext.strength == COLLATE_CONFLICT)
1010  ereport(ERROR,
1011  (errcode(ERRCODE_COLLATION_MISMATCH),
1012  errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
1013  get_collation_name(paircontext.collation),
1014  get_collation_name(paircontext.collation2)),
1015  errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
1016  parser_errposition(paircontext.pstate,
1017  paircontext.location2)));
1018 
1019  /*
1020  * At this point paircontext.collation can be InvalidOid only if the
1021  * type is not collatable; no need to do anything in that case. If we
1022  * do have to change the sort column's collation, do it by inserting a
1023  * RelabelType node into the sort column TLE.
1024  *
1025  * XXX This is pretty grotty for a couple of reasons:
1026  * assign_collations_walker isn't supposed to be changing the
1027  * expression structure like this, and a parse-time change of
1028  * collation ought to be signaled by a CollateExpr not a RelabelType
1029  * (the use of RelabelType for collation marking is supposed to be a
1030  * planner/executor thing only). But we have no better alternative.
1031  * In particular, injecting a CollateExpr could result in the
1032  * expression being interpreted differently after dump/reload, since
1033  * we might be effectively promoting an implicit collation to
1034  * explicit. This kluge is relying on ruleutils.c not printing a
1035  * COLLATE clause for a RelabelType, and probably on some other
1036  * fragile behaviors.
1037  */
1038  if (OidIsValid(paircontext.collation) &&
1039  paircontext.collation != exprCollation((Node *) s_tle->expr))
1040  {
1041  s_tle->expr = (Expr *)
1042  makeRelabelType(s_tle->expr,
1043  exprType((Node *) s_tle->expr),
1044  exprTypmod((Node *) s_tle->expr),
1045  paircontext.collation,
1047  }
1048 
1049  /*
1050  * If appropriate, merge this column's collation state up to the
1051  * aggregate function.
1052  */
1053  if (merge_sort_collations)
1054  merge_collation_state(paircontext.collation,
1055  paircontext.strength,
1056  paircontext.location,
1057  paircontext.collation2,
1058  paircontext.location2,
1059  loccontext);
1060 
1061  h_cell = lnext(aggref->aggdirectargs, h_cell);
1062  s_cell = lnext(aggref->args, s_cell);
1063  }
1064  Assert(h_cell == NULL && s_cell == NULL);
1065 }
#define OidIsValid(objectId)
Definition: c.h:710
int errhint(const char *fmt,...)
Definition: elog.c:1151
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend_oid(List *list, Oid datum)
Definition: list.c:372
Oid get_func_variadictype(Oid funcid)
Definition: lsyscache.c:1704
Oid get_typcollation(Oid typid)
Definition: lsyscache.c:3013
char * get_collation_name(Oid colloid)
Definition: lsyscache.c:1060
RelabelType * makeRelabelType(Expr *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat)
Definition: makefuncs.c:403
bool query_tree_walker(Query *query, bool(*walker)(), void *context, int flags)
Definition: nodeFuncs.c:2570
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
void exprSetCollation(Node *expr, Oid collation)
Definition: nodeFuncs.c:1082
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:286
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:788
void exprSetInputCollation(Node *expr, Oid inputcollation)
Definition: nodeFuncs.c:1279
bool expression_tree_walker(Node *node, bool(*walker)(), void *context)
Definition: nodeFuncs.c:2015
int exprLocation(const Node *expr)
Definition: nodeFuncs.c:1343
#define QTW_IGNORE_CTE_SUBQUERIES
Definition: nodeFuncs.h:21
#define QTW_IGNORE_RANGE_TABLE
Definition: nodeFuncs.h:24
#define IsA(nodeptr, _type_)
Definition: nodes.h:624
#define nodeTag(nodeptr)
Definition: nodes.h:578
@ T_List
Definition: nodes.h:317
@ T_OnConflictExpr
Definition: nodes.h:203
@ T_RangeTblRef
Definition: nodes.h:200
@ T_InferenceElem
Definition: nodes.h:198
@ T_SortGroupClause
Definition: nodes.h:483
@ T_CoerceToDomainValue
Definition: nodes.h:194
@ T_WindowFunc
Definition: nodes.h:162
@ T_FieldSelect
Definition: nodes.h:174
@ T_CollateExpr
Definition: nodes.h:180
@ T_JoinExpr
Definition: nodes.h:201
@ T_CaseExpr
Definition: nodes.h:181
@ T_Query
Definition: nodes.h:330
@ T_FromExpr
Definition: nodes.h:202
@ T_RowExpr
Definition: nodes.h:185
@ T_TargetEntry
Definition: nodes.h:199
@ T_CurrentOfExpr
Definition: nodes.h:196
@ T_Aggref
Definition: nodes.h:160
@ T_MergeAction
Definition: nodes.h:296
@ T_Const
Definition: nodes.h:158
@ T_JsonExpr
Definition: nodes.h:212
@ T_Param
Definition: nodes.h:159
@ T_CoerceToDomain
Definition: nodes.h:193
@ T_Var
Definition: nodes.h:157
@ T_RowCompareExpr
Definition: nodes.h:186
@ T_CaseTestExpr
Definition: nodes.h:183
@ T_SubscriptingRef
Definition: nodes.h:163
@ T_SetToDefault
Definition: nodes.h:195
void assign_list_collations(ParseState *pstate, List *exprs)
Oid select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
static bool assign_collations_walker(Node *node, assign_collations_context *context)
static void assign_aggregate_collations(Aggref *aggref, assign_collations_context *loccontext)
static bool assign_query_collations_walker(Node *node, ParseState *pstate)
void assign_query_collations(ParseState *pstate, Query *query)
static void merge_collation_state(Oid collation, CollateStrength strength, int location, Oid collation2, int location2, assign_collations_context *context)
static void assign_ordered_set_collations(Aggref *aggref, assign_collations_context *loccontext)
void assign_expr_collations(ParseState *pstate, Node *expr)
CollateStrength
Definition: parse_collate.c:57
@ COLLATE_IMPLICIT
Definition: parse_collate.c:59
@ COLLATE_NONE
Definition: parse_collate.c:58
@ COLLATE_CONFLICT
Definition: parse_collate.c:60
@ COLLATE_EXPLICIT
Definition: parse_collate.c:61
static void assign_hypothetical_collations(Aggref *aggref, assign_collations_context *loccontext)
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:110
#define lfirst(lc)
Definition: pg_list.h:169
#define lfirst_node(type, lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:149
#define linitial_node(type, l)
Definition: pg_list.h:177
#define NIL
Definition: pg_list.h:65
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:446
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:322
#define list_make2(x1, x2)
Definition: pg_list.h:208
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
@ COERCE_IMPLICIT_CAST
Definition: primnodes.h:494
Oid aggfnoid
Definition: primnodes.h:332
List * aggdirectargs
Definition: primnodes.h:338
char aggkind
Definition: primnodes.h:346
List * args
Definition: primnodes.h:339
Expr * aggfilter
Definition: primnodes.h:342
Expr * defresult
Definition: primnodes.h:989
List * args
Definition: primnodes.h:988
int location
Definition: primnodes.h:957
Oid resultcollid
Definition: primnodes.h:822
Definition: pg_list.h:51
Definition: nodes.h:574
List * targetList
Definition: parsenodes.h:155
List * inputcollids
Definition: primnodes.h:1126
List * args
Definition: primnodes.h:1075
Expr * refassgnexpr
Definition: primnodes.h:460
List * refupperindexpr
Definition: primnodes.h:453
Expr * refexpr
Definition: primnodes.h:458
List * reflowerindexpr
Definition: primnodes.h:455
Expr * expr
Definition: primnodes.h:1716
bool resjunk
Definition: primnodes.h:1723
List * args
Definition: primnodes.h:399
Expr * aggfilter
Definition: primnodes.h:400
CollateStrength strength
Definition: parse_collate.c:68
Definition: type.h:83