PostgreSQL Source Code git master
parse_collate.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * parse_collate.c
4 * Routines for assigning collation information.
5 *
6 * We choose to handle collation analysis in a post-pass over the output
7 * of expression parse analysis. This is because we need more state to
8 * perform this processing than is needed in the finished tree. If we
9 * did it on-the-fly while building the tree, all that state would have
10 * to be kept in expression node trees permanently. This way, the extra
11 * storage is just local variables in this recursive routine.
12 *
13 * The info that is actually saved in the finished tree is:
14 * 1. The output collation of each expression node, or InvalidOid if it
15 * returns a noncollatable data type. This can also be InvalidOid if the
16 * result type is collatable but the collation is indeterminate.
17 * 2. The collation to be used in executing each function. InvalidOid means
18 * that there are no collatable inputs or their collation is indeterminate.
19 * This value is only stored in node types that might call collation-using
20 * functions.
21 *
22 * You might think we could get away with storing only one collation per
23 * node, but the two concepts really need to be kept distinct. Otherwise
24 * it's too confusing when a function produces a collatable output type but
25 * has no collatable inputs or produces noncollatable output from collatable
26 * inputs.
27 *
28 * Cases with indeterminate collation might result in an error being thrown
29 * at runtime. If we knew exactly which functions require collation
30 * information, we could throw those errors at parse time instead.
31 *
32 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
33 * Portions Copyright (c) 1994, Regents of the University of California
34 *
35 *
36 * IDENTIFICATION
37 * src/backend/parser/parse_collate.c
38 *
39 *-------------------------------------------------------------------------
40 */
41#include "postgres.h"
42
45#include "nodes/makefuncs.h"
46#include "nodes/nodeFuncs.h"
48#include "utils/lsyscache.h"
49
50
51/*
52 * Collation strength (the SQL standard calls this "derivation"). Order is
53 * chosen to allow comparisons to work usefully. Note: the standard doesn't
54 * seem to distinguish between NONE and CONFLICT.
55 */
56typedef enum
57{
58 COLLATE_NONE, /* expression is of a noncollatable datatype */
59 COLLATE_IMPLICIT, /* collation was derived implicitly */
60 COLLATE_CONFLICT, /* we had a conflict of implicit collations */
61 COLLATE_EXPLICIT, /* collation was derived explicitly */
63
64typedef struct
65{
66 ParseState *pstate; /* parse state (for error reporting) */
67 Oid collation; /* OID of current collation, if any */
68 CollateStrength strength; /* strength of current collation choice */
69 int location; /* location of expr that set collation */
70 /* Remaining fields are only valid when strength == COLLATE_CONFLICT */
71 Oid collation2; /* OID of conflicting collation */
72 int location2; /* location of expr that set collation2 */
74
75static bool assign_query_collations_walker(Node *node, ParseState *pstate);
76static bool assign_collations_walker(Node *node,
78static void merge_collation_state(Oid collation,
79 CollateStrength strength,
80 int location,
81 Oid collation2,
82 int location2,
84static void assign_aggregate_collations(Aggref *aggref,
85 assign_collations_context *loccontext);
86static void assign_ordered_set_collations(Aggref *aggref,
87 assign_collations_context *loccontext);
88static void assign_hypothetical_collations(Aggref *aggref,
89 assign_collations_context *loccontext);
90
91
92/*
93 * assign_query_collations()
94 * Mark all expressions in the given Query with collation information.
95 *
96 * This should be applied to each Query after completion of parse analysis
97 * for expressions. Note that we do not recurse into sub-Queries, since
98 * those should have been processed when built.
99 */
100void
102{
103 /*
104 * We just use query_tree_walker() to visit all the contained expressions.
105 * We can skip the rangetable and CTE subqueries, though, since RTEs and
106 * subqueries had better have been processed already (else Vars referring
107 * to them would not get created with the right collation).
108 */
109 (void) query_tree_walker(query,
111 pstate,
114}
115
116/*
117 * Walker for assign_query_collations
118 *
119 * Each expression found by query_tree_walker is processed independently.
120 * Note that query_tree_walker may pass us a whole List, such as the
121 * targetlist, in which case each subexpression must be processed
122 * independently --- we don't want to bleat if two different targetentries
123 * have different collations.
124 */
125static bool
127{
128 /* Need do nothing for empty subexpressions */
129 if (node == NULL)
130 return false;
131
132 /*
133 * We don't want to recurse into a set-operations tree; it's already been
134 * fully processed in transformSetOperationStmt.
135 */
136 if (IsA(node, SetOperationStmt))
137 return false;
138
139 if (IsA(node, List))
140 assign_list_collations(pstate, (List *) node);
141 else
142 assign_expr_collations(pstate, node);
143
144 return false;
145}
146
147/*
148 * assign_list_collations()
149 * Mark all nodes in the list of expressions with collation information.
150 *
151 * The list member expressions are processed independently; they do not have
152 * to share a common collation.
153 */
154void
156{
157 ListCell *lc;
158
159 foreach(lc, exprs)
160 {
161 Node *node = (Node *) lfirst(lc);
162
163 assign_expr_collations(pstate, node);
164 }
165}
166
167/*
168 * assign_expr_collations()
169 * Mark all nodes in the given expression tree with collation information.
170 *
171 * This is exported for the benefit of various utility commands that process
172 * expressions without building a complete Query. It should be applied after
173 * calling transformExpr() plus any expression-modifying operations such as
174 * coerce_to_boolean().
175 */
176void
178{
180
181 /* initialize context for tree walk */
182 context.pstate = pstate;
183 context.collation = InvalidOid;
184 context.strength = COLLATE_NONE;
185 context.location = -1;
186
187 /* and away we go */
188 (void) assign_collations_walker(expr, &context);
189}
190
191/*
192 * select_common_collation()
193 * Identify a common collation for a list of expressions.
194 *
195 * The expressions should all return the same datatype, else this is not
196 * terribly meaningful.
197 *
198 * none_ok means that it is permitted to return InvalidOid, indicating that
199 * no common collation could be identified, even for collatable datatypes.
200 * Otherwise, an error is thrown for conflict of implicit collations.
201 *
202 * In theory, none_ok = true reflects the rules of SQL standard clause "Result
203 * of data type combinations", none_ok = false reflects the rules of clause
204 * "Collation determination" (in some cases invoked via "Grouping
205 * operations").
206 */
207Oid
208select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
209{
211
212 /* initialize context for tree walk */
213 context.pstate = pstate;
214 context.collation = InvalidOid;
215 context.strength = COLLATE_NONE;
216 context.location = -1;
217
218 /* and away we go */
219 (void) assign_collations_walker((Node *) exprs, &context);
220
221 /* deal with collation conflict */
222 if (context.strength == COLLATE_CONFLICT)
223 {
224 if (none_ok)
225 return InvalidOid;
227 (errcode(ERRCODE_COLLATION_MISMATCH),
228 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
231 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
232 parser_errposition(context.pstate, context.location2)));
233 }
234
235 /*
236 * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
237 * that's okay because it must mean none of the expressions returned
238 * collatable datatypes.
239 */
240 return context.collation;
241}
242
243/*
244 * assign_collations_walker()
245 * Recursive guts of collation processing.
246 *
247 * Nodes with no children (eg, Vars, Consts, Params) must have been marked
248 * when built. All upper-level nodes are marked here.
249 *
250 * Note: if this is invoked directly on a List, it will attempt to infer a
251 * common collation for all the list members. In particular, it will throw
252 * error if there are conflicting explicit collations for different members.
253 */
254static bool
256{
257 assign_collations_context loccontext;
258 Oid collation;
259 CollateStrength strength;
260 int location;
261
262 /* Need do nothing for empty subexpressions */
263 if (node == NULL)
264 return false;
265
266 /*
267 * Prepare for recursion. For most node types, though not all, the first
268 * thing we do is recurse to process all nodes below this one. Each level
269 * of the tree has its own local context.
270 */
271 loccontext.pstate = context->pstate;
272 loccontext.collation = InvalidOid;
273 loccontext.strength = COLLATE_NONE;
274 loccontext.location = -1;
275 /* Set these fields just to suppress uninitialized-value warnings: */
276 loccontext.collation2 = InvalidOid;
277 loccontext.location2 = -1;
278
279 /*
280 * Recurse if appropriate, then determine the collation for this node.
281 *
282 * Note: the general cases are at the bottom of the switch, after various
283 * special cases.
284 */
285 switch (nodeTag(node))
286 {
287 case T_CollateExpr:
288 {
289 /*
290 * COLLATE sets an explicitly derived collation, regardless of
291 * what the child state is. But we must recurse to set up
292 * collation info below here.
293 */
294 CollateExpr *expr = (CollateExpr *) node;
295
296 (void) expression_tree_walker(node,
298 &loccontext);
299
300 collation = expr->collOid;
301 Assert(OidIsValid(collation));
302 strength = COLLATE_EXPLICIT;
303 location = expr->location;
304 }
305 break;
306 case T_FieldSelect:
307 {
308 /*
309 * For FieldSelect, the result has the field's declared
310 * collation, independently of what happened in the arguments.
311 * (The immediate argument must be composite and thus not
312 * collatable, anyhow.) The field's collation was already
313 * looked up and saved in the node.
314 */
315 FieldSelect *expr = (FieldSelect *) node;
316
317 /* ... but first, recurse */
318 (void) expression_tree_walker(node,
320 &loccontext);
321
322 if (OidIsValid(expr->resultcollid))
323 {
324 /* Node's result type is collatable. */
325 /* Pass up field's collation as an implicit choice. */
326 collation = expr->resultcollid;
327 strength = COLLATE_IMPLICIT;
328 location = exprLocation(node);
329 }
330 else
331 {
332 /* Node's result type isn't collatable. */
333 collation = InvalidOid;
334 strength = COLLATE_NONE;
335 location = -1; /* won't be used */
336 }
337 }
338 break;
339 case T_RowExpr:
340 {
341 /*
342 * RowExpr is a special case because the subexpressions are
343 * independent: we don't want to complain if some of them have
344 * incompatible explicit collations.
345 */
346 RowExpr *expr = (RowExpr *) node;
347
348 assign_list_collations(context->pstate, expr->args);
349
350 /*
351 * Since the result is always composite and therefore never
352 * has a collation, we can just stop here: this node has no
353 * impact on the collation of its parent.
354 */
355 return false; /* done */
356 }
357 case T_RowCompareExpr:
358 {
359 /*
360 * For RowCompare, we have to find the common collation of
361 * each pair of input columns and build a list. If we can't
362 * find a common collation, we just put InvalidOid into the
363 * list, which may or may not cause an error at runtime.
364 */
365 RowCompareExpr *expr = (RowCompareExpr *) node;
366 List *colls = NIL;
367 ListCell *l;
368 ListCell *r;
369
370 forboth(l, expr->largs, r, expr->rargs)
371 {
372 Node *le = (Node *) lfirst(l);
373 Node *re = (Node *) lfirst(r);
374 Oid coll;
375
376 coll = select_common_collation(context->pstate,
377 list_make2(le, re),
378 true);
379 colls = lappend_oid(colls, coll);
380 }
381 expr->inputcollids = colls;
382
383 /*
384 * Since the result is always boolean and therefore never has
385 * a collation, we can just stop here: this node has no impact
386 * on the collation of its parent.
387 */
388 return false; /* done */
389 }
390 case T_CoerceToDomain:
391 {
392 /*
393 * If the domain declaration included a non-default COLLATE
394 * spec, then use that collation as the output collation of
395 * the coercion. Otherwise allow the input collation to
396 * bubble up. (The input should be of the domain's base type,
397 * therefore we don't need to worry about it not being
398 * collatable when the domain is.)
399 */
400 CoerceToDomain *expr = (CoerceToDomain *) node;
401 Oid typcollation = get_typcollation(expr->resulttype);
402
403 /* ... but first, recurse */
404 (void) expression_tree_walker(node,
406 &loccontext);
407
408 if (OidIsValid(typcollation))
409 {
410 /* Node's result type is collatable. */
411 if (typcollation == DEFAULT_COLLATION_OID)
412 {
413 /* Collation state bubbles up from child. */
414 collation = loccontext.collation;
415 strength = loccontext.strength;
416 location = loccontext.location;
417 }
418 else
419 {
420 /* Use domain's collation as an implicit choice. */
421 collation = typcollation;
422 strength = COLLATE_IMPLICIT;
423 location = exprLocation(node);
424 }
425 }
426 else
427 {
428 /* Node's result type isn't collatable. */
429 collation = InvalidOid;
430 strength = COLLATE_NONE;
431 location = -1; /* won't be used */
432 }
433
434 /*
435 * Save the state into the expression node. We know it
436 * doesn't care about input collation.
437 */
438 if (strength == COLLATE_CONFLICT)
440 else
441 exprSetCollation(node, collation);
442 }
443 break;
444 case T_TargetEntry:
445 (void) expression_tree_walker(node,
447 &loccontext);
448
449 /*
450 * TargetEntry can have only one child, and should bubble that
451 * state up to its parent. We can't use the general-case code
452 * below because exprType and friends don't work on TargetEntry.
453 */
454 collation = loccontext.collation;
455 strength = loccontext.strength;
456 location = loccontext.location;
457
458 /*
459 * Throw error if the collation is indeterminate for a TargetEntry
460 * that is a sort/group target. We prefer to do this now, instead
461 * of leaving the comparison functions to fail at runtime, because
462 * we can give a syntax error pointer to help locate the problem.
463 * There are some cases where there might not be a failure, for
464 * example if the planner chooses to use hash aggregation instead
465 * of sorting for grouping; but it seems better to predictably
466 * throw an error. (Compare transformSetOperationTree, which will
467 * throw error for indeterminate collation of set-op columns, even
468 * though the planner might be able to implement the set-op
469 * without sorting.)
470 */
471 if (strength == COLLATE_CONFLICT &&
472 ((TargetEntry *) node)->ressortgroupref != 0)
474 (errcode(ERRCODE_COLLATION_MISMATCH),
475 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
476 get_collation_name(loccontext.collation),
477 get_collation_name(loccontext.collation2)),
478 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
479 parser_errposition(context->pstate,
480 loccontext.location2)));
481 break;
482 case T_InferenceElem:
483 case T_RangeTblRef:
484 case T_JoinExpr:
485 case T_FromExpr:
486 case T_OnConflictExpr:
487 case T_SortGroupClause:
488 case T_MergeAction:
489 (void) expression_tree_walker(node,
491 &loccontext);
492
493 /*
494 * When we're invoked on a query's jointree, we don't need to do
495 * anything with join nodes except recurse through them to process
496 * WHERE/ON expressions. So just stop here. Likewise, we don't
497 * need to do anything when invoked on sort/group lists.
498 */
499 return false;
500 case T_Query:
501 {
502 /*
503 * We get here when we're invoked on the Query belonging to a
504 * SubLink. Act as though the Query returns its first output
505 * column, which indeed is what it does for EXPR_SUBLINK and
506 * ARRAY_SUBLINK cases. In the cases where the SubLink
507 * returns boolean, this info will be ignored. Special case:
508 * in EXISTS, the Query might return no columns, in which case
509 * we need do nothing.
510 *
511 * We needn't recurse, since the Query is already processed.
512 */
513 Query *qtree = (Query *) node;
514 TargetEntry *tent;
515
516 if (qtree->targetList == NIL)
517 return false;
518 tent = linitial_node(TargetEntry, qtree->targetList);
519 if (tent->resjunk)
520 return false;
521
522 collation = exprCollation((Node *) tent->expr);
523 /* collation doesn't change if it's converted to array */
524 strength = COLLATE_IMPLICIT;
525 location = exprLocation((Node *) tent->expr);
526 }
527 break;
528 case T_List:
529 (void) expression_tree_walker(node,
531 &loccontext);
532
533 /*
534 * When processing a list, collation state just bubbles up from
535 * the list elements.
536 */
537 collation = loccontext.collation;
538 strength = loccontext.strength;
539 location = loccontext.location;
540 break;
541
542 case T_Var:
543 case T_Const:
544 case T_Param:
545 case T_CoerceToDomainValue:
546 case T_CaseTestExpr:
547 case T_SetToDefault:
548 case T_CurrentOfExpr:
549
550 /*
551 * General case for childless expression nodes. These should
552 * already have a collation assigned; it is not this function's
553 * responsibility to look into the catalogs for base-case
554 * information.
555 */
556 collation = exprCollation(node);
557
558 /*
559 * Note: in most cases, there will be an assigned collation
560 * whenever type_is_collatable(exprType(node)); but an exception
561 * occurs for a Var referencing a subquery output column for which
562 * a unique collation was not determinable. That may lead to a
563 * runtime failure if a collation-sensitive function is applied to
564 * the Var.
565 */
566
567 if (OidIsValid(collation))
568 strength = COLLATE_IMPLICIT;
569 else
570 strength = COLLATE_NONE;
571 location = exprLocation(node);
572 break;
573
574 default:
575 {
576 /*
577 * General case for most expression nodes with children. First
578 * recurse, then figure out what to assign to this node.
579 */
580 Oid typcollation;
581
582 /*
583 * For most node types, we want to treat all the child
584 * expressions alike; but there are a few exceptions, hence
585 * this inner switch.
586 */
587 switch (nodeTag(node))
588 {
589 case T_Aggref:
590 {
591 /*
592 * Aggref is messy enough that we give it its own
593 * function, in fact three of them. The FILTER
594 * clause is independent of the rest of the
595 * aggregate, however, so it can be processed
596 * separately.
597 */
598 Aggref *aggref = (Aggref *) node;
599
600 switch (aggref->aggkind)
601 {
602 case AGGKIND_NORMAL:
604 &loccontext);
605 break;
606 case AGGKIND_ORDERED_SET:
608 &loccontext);
609 break;
610 case AGGKIND_HYPOTHETICAL:
612 &loccontext);
613 break;
614 default:
615 elog(ERROR, "unrecognized aggkind: %d",
616 (int) aggref->aggkind);
617 }
618
620 (Node *) aggref->aggfilter);
621 }
622 break;
623 case T_WindowFunc:
624 {
625 /*
626 * WindowFunc requires special processing only for
627 * its aggfilter clause, as for aggregates.
628 */
629 WindowFunc *wfunc = (WindowFunc *) node;
630
631 (void) assign_collations_walker((Node *) wfunc->args,
632 &loccontext);
633
635 (Node *) wfunc->aggfilter);
636 }
637 break;
638 case T_CaseExpr:
639 {
640 /*
641 * CaseExpr is a special case because we do not
642 * want to recurse into the test expression (if
643 * any). It was already marked with collations
644 * during transformCaseExpr, and furthermore its
645 * collation is not relevant to the result of the
646 * CASE --- only the output expressions are.
647 */
648 CaseExpr *expr = (CaseExpr *) node;
649 ListCell *lc;
650
651 foreach(lc, expr->args)
652 {
654
655 /*
656 * The condition expressions mustn't affect
657 * the CASE's result collation either; but
658 * since they are known to yield boolean, it's
659 * safe to recurse directly on them --- they
660 * won't change loccontext.
661 */
662 (void) assign_collations_walker((Node *) when->expr,
663 &loccontext);
664 (void) assign_collations_walker((Node *) when->result,
665 &loccontext);
666 }
667 (void) assign_collations_walker((Node *) expr->defresult,
668 &loccontext);
669 }
670 break;
671 case T_SubscriptingRef:
672 {
673 /*
674 * The subscripts are treated as independent
675 * expressions not contributing to the node's
676 * collation. Only the container, and the source
677 * expression if any, contribute. (This models
678 * the old behavior, in which the subscripts could
679 * be counted on to be integers and thus not
680 * contribute anything.)
681 */
682 SubscriptingRef *sbsref = (SubscriptingRef *) node;
683
685 (Node *) sbsref->refupperindexpr);
687 (Node *) sbsref->reflowerindexpr);
688 (void) assign_collations_walker((Node *) sbsref->refexpr,
689 &loccontext);
690 (void) assign_collations_walker((Node *) sbsref->refassgnexpr,
691 &loccontext);
692 }
693 break;
694 default:
695
696 /*
697 * Normal case: all child expressions contribute
698 * equally to loccontext.
699 */
700 (void) expression_tree_walker(node,
702 &loccontext);
703 break;
704 }
705
706 /*
707 * Now figure out what collation to assign to this node.
708 */
709 typcollation = get_typcollation(exprType(node));
710 if (OidIsValid(typcollation))
711 {
712 /* Node's result is collatable; what about its input? */
713 if (loccontext.strength > COLLATE_NONE)
714 {
715 /* Collation state bubbles up from children. */
716 collation = loccontext.collation;
717 strength = loccontext.strength;
718 location = loccontext.location;
719 }
720 else
721 {
722 /*
723 * Collatable output produced without any collatable
724 * input. Use the type's collation (which is usually
725 * DEFAULT_COLLATION_OID, but might be different for a
726 * domain).
727 */
728 collation = typcollation;
729 strength = COLLATE_IMPLICIT;
730 location = exprLocation(node);
731 }
732 }
733 else
734 {
735 /* Node's result type isn't collatable. */
736 collation = InvalidOid;
737 strength = COLLATE_NONE;
738 location = -1; /* won't be used */
739 }
740
741 /*
742 * Save the result collation into the expression node. If the
743 * state is COLLATE_CONFLICT, we'll set the collation to
744 * InvalidOid, which might result in an error at runtime.
745 */
746 if (strength == COLLATE_CONFLICT)
748 else
749 exprSetCollation(node, collation);
750
751 /*
752 * Likewise save the input collation, which is the one that
753 * any function called by this node should use.
754 */
755 if (loccontext.strength == COLLATE_CONFLICT)
757 else
758 exprSetInputCollation(node, loccontext.collation);
759 }
760 break;
761 }
762
763 /*
764 * Now, merge my information into my parent's state.
765 */
766 merge_collation_state(collation,
767 strength,
768 location,
769 loccontext.collation2,
770 loccontext.location2,
771 context);
772
773 return false;
774}
775
776/*
777 * Merge collation state of a subexpression into the context for its parent.
778 */
779static void
781 CollateStrength strength,
782 int location,
783 Oid collation2,
784 int location2,
786{
787 /*
788 * If the collation strength for this node is different from what's
789 * already in *context, then this node either dominates or is dominated by
790 * earlier siblings.
791 */
792 if (strength > context->strength)
793 {
794 /* Override previous parent state */
795 context->collation = collation;
796 context->strength = strength;
797 context->location = location;
798 /* Bubble up error info if applicable */
799 if (strength == COLLATE_CONFLICT)
800 {
801 context->collation2 = collation2;
802 context->location2 = location2;
803 }
804 }
805 else if (strength == context->strength)
806 {
807 /* Merge, or detect error if there's a collation conflict */
808 switch (strength)
809 {
810 case COLLATE_NONE:
811 /* Nothing + nothing is still nothing */
812 break;
813 case COLLATE_IMPLICIT:
814 if (collation != context->collation)
815 {
816 /*
817 * Non-default implicit collation always beats default.
818 */
819 if (context->collation == DEFAULT_COLLATION_OID)
820 {
821 /* Override previous parent state */
822 context->collation = collation;
823 context->strength = strength;
824 context->location = location;
825 }
826 else if (collation != DEFAULT_COLLATION_OID)
827 {
828 /*
829 * Oops, we have a conflict. We cannot throw error
830 * here, since the conflict could be resolved by a
831 * later sibling CollateExpr, or the parent might not
832 * care about collation anyway. Return enough info to
833 * throw the error later, if needed.
834 */
835 context->strength = COLLATE_CONFLICT;
836 context->collation2 = collation;
837 context->location2 = location;
838 }
839 }
840 break;
841 case COLLATE_CONFLICT:
842 /* We're still conflicted ... */
843 break;
844 case COLLATE_EXPLICIT:
845 if (collation != context->collation)
846 {
847 /*
848 * Oops, we have a conflict of explicit COLLATE clauses.
849 * Here we choose to throw error immediately; that is what
850 * the SQL standard says to do, and there's no good reason
851 * to be less strict.
852 */
854 (errcode(ERRCODE_COLLATION_MISMATCH),
855 errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
857 get_collation_name(collation)),
858 parser_errposition(context->pstate, location)));
859 }
860 break;
861 }
862 }
863}
864
865/*
866 * Aggref is a special case because expressions used only for ordering
867 * shouldn't be taken to conflict with each other or with regular args,
868 * indeed shouldn't affect the aggregate's result collation at all.
869 * We handle this by applying assign_expr_collations() to them rather than
870 * passing down our loccontext.
871 *
872 * Note that we recurse to each TargetEntry, not directly to its contained
873 * expression, so that the case above for T_TargetEntry will complain if we
874 * can't resolve a collation for an ORDER BY item (whether or not it is also
875 * a normal aggregate arg).
876 *
877 * We need not recurse into the aggorder or aggdistinct lists, because those
878 * contain only SortGroupClause nodes which we need not process.
879 */
880static void
882 assign_collations_context *loccontext)
883{
884 ListCell *lc;
885
886 /* Plain aggregates have no direct args */
887 Assert(aggref->aggdirectargs == NIL);
888
889 /* Process aggregated args, holding resjunk ones at arm's length */
890 foreach(lc, aggref->args)
891 {
893
894 if (tle->resjunk)
895 assign_expr_collations(loccontext->pstate, (Node *) tle);
896 else
897 (void) assign_collations_walker((Node *) tle, loccontext);
898 }
899}
900
901/*
902 * For ordered-set aggregates, it's somewhat unclear how best to proceed.
903 * The spec-defined inverse distribution functions have only one sort column
904 * and don't return collatable types, but this is clearly too restrictive in
905 * the general case. Our solution is to consider that the aggregate's direct
906 * arguments contribute normally to determination of the aggregate's own
907 * collation, while aggregated arguments contribute only when the aggregate
908 * is designed to have exactly one aggregated argument (i.e., it has a single
909 * aggregated argument and is non-variadic). If it can have more than one
910 * aggregated argument, we process the aggregated arguments as independent
911 * sort columns. This avoids throwing error for something like
912 * agg(...) within group (order by x collate "foo", y collate "bar")
913 * while also guaranteeing that variadic aggregates don't change in behavior
914 * depending on how many sort columns a particular call happens to have.
915 *
916 * Otherwise this is much like the plain-aggregate case.
917 */
918static void
920 assign_collations_context *loccontext)
921{
922 bool merge_sort_collations;
923 ListCell *lc;
924
925 /* Merge sort collations to parent only if there can be only one */
926 merge_sort_collations = (list_length(aggref->args) == 1 &&
928
929 /* Direct args, if any, are normal children of the Aggref node */
931 loccontext);
932
933 /* Process aggregated args appropriately */
934 foreach(lc, aggref->args)
935 {
937
938 if (merge_sort_collations)
939 (void) assign_collations_walker((Node *) tle, loccontext);
940 else
941 assign_expr_collations(loccontext->pstate, (Node *) tle);
942 }
943}
944
945/*
946 * Hypothetical-set aggregates are even more special: per spec, we need to
947 * unify the collations of each pair of hypothetical and aggregated args.
948 * And we need to force the choice of collation down into the sort column
949 * to ensure that the sort happens with the chosen collation. Other than
950 * that, the behavior is like regular ordered-set aggregates. Note that
951 * hypothetical direct arguments contribute to the aggregate collation
952 * only when their partner aggregated arguments do.
953 */
954static void
956 assign_collations_context *loccontext)
957{
958 ListCell *h_cell = list_head(aggref->aggdirectargs);
959 ListCell *s_cell = list_head(aggref->args);
960 bool merge_sort_collations;
961 int extra_args;
962
963 /* Merge sort collations to parent only if there can be only one */
964 merge_sort_collations = (list_length(aggref->args) == 1 &&
966
967 /* Process any non-hypothetical direct args */
968 extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
969 Assert(extra_args >= 0);
970 while (extra_args-- > 0)
971 {
972 (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
973 h_cell = lnext(aggref->aggdirectargs, h_cell);
974 }
975
976 /* Scan hypothetical args and aggregated args in parallel */
977 while (h_cell && s_cell)
978 {
979 Node *h_arg = (Node *) lfirst(h_cell);
980 TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell);
981 assign_collations_context paircontext;
982
983 /*
984 * Assign collations internally in this pair of expressions, then
985 * choose a common collation for them. This should match
986 * select_common_collation(), but we can't use that function as-is
987 * because we need access to the whole collation state so we can
988 * bubble it up to the aggregate function's level.
989 */
990 paircontext.pstate = loccontext->pstate;
991 paircontext.collation = InvalidOid;
992 paircontext.strength = COLLATE_NONE;
993 paircontext.location = -1;
994 /* Set these fields just to suppress uninitialized-value warnings: */
995 paircontext.collation2 = InvalidOid;
996 paircontext.location2 = -1;
997
998 (void) assign_collations_walker(h_arg, &paircontext);
999 (void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
1000
1001 /* deal with collation conflict */
1002 if (paircontext.strength == COLLATE_CONFLICT)
1003 ereport(ERROR,
1004 (errcode(ERRCODE_COLLATION_MISMATCH),
1005 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
1006 get_collation_name(paircontext.collation),
1007 get_collation_name(paircontext.collation2)),
1008 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
1009 parser_errposition(paircontext.pstate,
1010 paircontext.location2)));
1011
1012 /*
1013 * At this point paircontext.collation can be InvalidOid only if the
1014 * type is not collatable; no need to do anything in that case. If we
1015 * do have to change the sort column's collation, do it by inserting a
1016 * RelabelType node into the sort column TLE.
1017 *
1018 * XXX This is pretty grotty for a couple of reasons:
1019 * assign_collations_walker isn't supposed to be changing the
1020 * expression structure like this, and a parse-time change of
1021 * collation ought to be signaled by a CollateExpr not a RelabelType
1022 * (the use of RelabelType for collation marking is supposed to be a
1023 * planner/executor thing only). But we have no better alternative.
1024 * In particular, injecting a CollateExpr could result in the
1025 * expression being interpreted differently after dump/reload, since
1026 * we might be effectively promoting an implicit collation to
1027 * explicit. This kluge is relying on ruleutils.c not printing a
1028 * COLLATE clause for a RelabelType, and probably on some other
1029 * fragile behaviors.
1030 */
1031 if (OidIsValid(paircontext.collation) &&
1032 paircontext.collation != exprCollation((Node *) s_tle->expr))
1033 {
1034 s_tle->expr = (Expr *)
1035 makeRelabelType(s_tle->expr,
1036 exprType((Node *) s_tle->expr),
1037 exprTypmod((Node *) s_tle->expr),
1038 paircontext.collation,
1040 }
1041
1042 /*
1043 * If appropriate, merge this column's collation state up to the
1044 * aggregate function.
1045 */
1046 if (merge_sort_collations)
1047 merge_collation_state(paircontext.collation,
1048 paircontext.strength,
1049 paircontext.location,
1050 paircontext.collation2,
1051 paircontext.location2,
1052 loccontext);
1053
1054 h_cell = lnext(aggref->aggdirectargs, h_cell);
1055 s_cell = lnext(aggref->args, s_cell);
1056 }
1057 Assert(h_cell == NULL && s_cell == NULL);
1058}
#define Assert(condition)
Definition: c.h:815
#define OidIsValid(objectId)
Definition: c.h:732
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
List * lappend_oid(List *list, Oid datum)
Definition: list.c:375
Oid get_func_variadictype(Oid funcid)
Definition: lsyscache.c:1750
Oid get_typcollation(Oid typid)
Definition: lsyscache.c:3083
char * get_collation_name(Oid colloid)
Definition: lsyscache.c:1036
RelabelType * makeRelabelType(Expr *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat)
Definition: makefuncs.c:406
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
void exprSetCollation(Node *expr, Oid collation)
Definition: nodeFuncs.c:1124
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:301
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:821
void exprSetInputCollation(Node *expr, Oid inputcollation)
Definition: nodeFuncs.c:1324
int exprLocation(const Node *expr)
Definition: nodeFuncs.c:1388
#define QTW_IGNORE_CTE_SUBQUERIES
Definition: nodeFuncs.h:23
#define query_tree_walker(q, w, c, f)
Definition: nodeFuncs.h:158
#define expression_tree_walker(n, w, c)
Definition: nodeFuncs.h:153
#define QTW_IGNORE_RANGE_TABLE
Definition: nodeFuncs.h:26
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define nodeTag(nodeptr)
Definition: nodes.h:133
void assign_list_collations(ParseState *pstate, List *exprs)
Oid select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
static bool assign_collations_walker(Node *node, assign_collations_context *context)
static void assign_aggregate_collations(Aggref *aggref, assign_collations_context *loccontext)
static bool assign_query_collations_walker(Node *node, ParseState *pstate)
void assign_query_collations(ParseState *pstate, Query *query)
static void merge_collation_state(Oid collation, CollateStrength strength, int location, Oid collation2, int location2, assign_collations_context *context)
static void assign_ordered_set_collations(Aggref *aggref, assign_collations_context *loccontext)
void assign_expr_collations(ParseState *pstate, Node *expr)
CollateStrength
Definition: parse_collate.c:57
@ COLLATE_IMPLICIT
Definition: parse_collate.c:59
@ COLLATE_NONE
Definition: parse_collate.c:58
@ COLLATE_CONFLICT
Definition: parse_collate.c:60
@ COLLATE_EXPLICIT
Definition: parse_collate.c:61
static void assign_hypothetical_collations(Aggref *aggref, assign_collations_context *loccontext)
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:106
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial_node(type, l)
Definition: pg_list.h:181
#define NIL
Definition: pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:518
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define list_make2(x1, x2)
Definition: pg_list.h:214
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
@ COERCE_IMPLICIT_CAST
Definition: primnodes.h:753
Oid aggfnoid
Definition: primnodes.h:461
List * aggdirectargs
Definition: primnodes.h:482
List * args
Definition: primnodes.h:485
Expr * aggfilter
Definition: primnodes.h:494
Expr * defresult
Definition: primnodes.h:1332
List * args
Definition: primnodes.h:1331
ParseLoc location
Definition: primnodes.h:1298
Definition: pg_list.h:54
Definition: nodes.h:129
List * targetList
Definition: parsenodes.h:193
List * args
Definition: primnodes.h:1428
Expr * refassgnexpr
Definition: primnodes.h:720
List * refupperindexpr
Definition: primnodes.h:710
Expr * refexpr
Definition: primnodes.h:718
List * reflowerindexpr
Definition: primnodes.h:716
Expr * expr
Definition: primnodes.h:2219
List * args
Definition: primnodes.h:592
Expr * aggfilter
Definition: primnodes.h:594
CollateStrength strength
Definition: parse_collate.c:68
Definition: type.h:89