PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pgpa_identifier.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pgpa_identifier.c
4 * create appropriate identifiers for range table entries
5 *
6 * The goal of this module is to be able to produce identifiers for range
7 * table entries that are unique, understandable to human beings, and
8 * able to be reconstructed during future planning cycles. As an
9 * exception, we do not care about, or want to produce, identifiers for
10 * RTE_JOIN entries. This is because (1) we would end up with a ton of
11 * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
12 * RTEs; and (3) we intend to refer to joins by their constituent members
13 * rather than by reference to the join RTE.
14 *
15 * In general, we construct identifiers of the following form:
16 *
17 * alias_name#occurrence_number/child_table_name@subquery_name
18 *
19 * However, occurrence_number is omitted when it is the first occurrence
20 * within the same subquery, child_table_name is omitted for relations that
21 * are not child tables, and subquery_name is omitted for the topmost
22 * query level. Whenever an item is omitted, the preceding punctuation mark
23 * is also omitted. Identifier-style escaping is applied to alias_name and
24 * subquery_name. In generated advice, child table names are always
25 * schema-qualified, but users can supply advice where the schema name is
26 * not mentioned. Identifier-style escaping is applied to the schema and to
27 * the relation name separately.
28 *
29 * The upshot of all of these rules is that in simple cases, the relation
30 * identifier is textually identical to the alias name, making life easier
31 * for users. However, even in complex cases, every relation identifier
32 * for a given query will be unique (or at least we hope so: if not, this
33 * code is buggy and the identifier format might need to be rethought).
34 *
35 * A key goal of this system is that we want to be able to reconstruct the
36 * same identifiers during a future planning cycle for the same query, so
37 * that if a certain behavior is specified for a certain identifier, we can
38 * properly identify the RTI for which that behavior is mandated. In order
39 * for this to work, subquery names must be unique and known before the
40 * subquery is planned, and the remainder of the identifier must not depend
41 * on any part of the query outside of the current subquery level. In
42 * particular, occurrence_number must be calculated relative to the range
43 * table for the relevant subquery, not the final flattened range table.
44 *
45 * NB: All of this code must use rt_fetch(), not planner_rt_fetch()!
46 * Join removal and self-join elimination remove rels from the arrays
47 * that planner_rt_fetch() uses; using rt_fetch() is necessary to get
48 * stable results.
49 *
50 * Copyright (c) 2016-2026, PostgreSQL Global Development Group
51 *
52 * contrib/pg_plan_advice/pgpa_identifier.c
53 *
54 *-------------------------------------------------------------------------
55 */
56
57#include "postgres.h"
58
59#include "pgpa_identifier.h"
60
61#include "parser/parsetree.h"
62#include "utils/builtins.h"
63#include "utils/lsyscache.h"
64
66 List *appinfos);
67static int pgpa_occurrence_number(List *rtable, Index *top_rti_map,
69
70/*
71 * Create a range table identifier from scratch.
72 *
73 * This function leaves the caller to do all the heavy lifting, so it's
74 * generally better to use one of the functions below instead.
75 *
76 * See the file header comments for more details on the format of an
77 * identifier.
78 */
79const char *
81{
82 const char *result;
83
84 Assert(rid->alias_name != NULL);
85 result = quote_identifier(rid->alias_name);
86
87 Assert(rid->occurrence >= 0);
88 if (rid->occurrence > 1)
89 result = psprintf("%s#%d", result, rid->occurrence);
90
91 if (rid->partrel != NULL)
92 {
93 if (rid->partnsp == NULL)
94 result = psprintf("%s/%s", result,
96 else
97 result = psprintf("%s/%s.%s", result,
100 }
101
102 if (rid->plan_name != NULL)
103 result = psprintf("%s@%s", result, quote_identifier(rid->plan_name));
104
105 return result;
106}
107
108/*
109 * Compute a relation identifier for a particular RTI.
110 *
111 * The caller provides root and rti, and gets the necessary details back via
112 * the remaining parameters.
113 */
114void
116 pgpa_identifier *rid)
117{
118 Index top_rti = rti;
119 int occurrence = 1;
122 char *partnsp = NULL;
123 char *partrel = NULL;
124
125 /*
126 * If this is a child RTE, find the topmost parent that is still of type
127 * RTE_RELATION. We do this because we identify children of partitioned
128 * tables by the name of the child table, but subqueries can also have
129 * child rels and we don't care about those here.
130 */
131 for (;;)
132 {
135
136 /* append_rel_array can be NULL if there are no children */
137 if (root->append_rel_array == NULL ||
138 (appinfo = root->append_rel_array[top_rti]) == NULL)
139 break;
140
141 parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
142 if (parent_rte->rtekind != RTE_RELATION)
143 break;
144
145 top_rti = appinfo->parent_relid;
146 }
147
148 /* Get the range table entries for the RTI and top RTI. */
149 rte = rt_fetch(rti, root->parse->rtable);
150 top_rte = rt_fetch(top_rti, root->parse->rtable);
151 Assert(rte->rtekind != RTE_JOIN);
152 Assert(top_rte->rtekind != RTE_JOIN);
153
154 /* Work out the correct occurrence number. */
156 {
159
160 /*
161 * If this is a child rel of a parent that is a relation, skip it.
162 *
163 * Such range table entries are disambiguated by mentioning the schema
164 * and name of the table, not by counting them as separate occurrences
165 * of the same table.
166 *
167 * NB: append_rel_array can be NULL if there are no children
168 */
169 if (root->append_rel_array != NULL &&
170 (appinfo = root->append_rel_array[prior_rti]) != NULL)
171 {
173
174 parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
175 if (parent_rte->rtekind == RTE_RELATION)
176 continue;
177 }
178
179 /* Skip NULL entries and joins. */
180 prior_rte = rt_fetch(prior_rti, root->parse->rtable);
181 if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN)
182 continue;
183
184 /* Skip if the alias name differs. */
185 if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
186 continue;
187
188 /* Looks like a true duplicate. */
189 ++occurrence;
190 }
191
192 /* If this is a child table, get the schema and relation names. */
193 if (rti != top_rti)
194 {
196 partrel = get_rel_name(rte->relid);
197 }
198
199 /* OK, we have all the answers we need. Return them to the caller. */
200 rid->alias_name = top_rte->eref->aliasname;
201 rid->occurrence = occurrence;
202 rid->partnsp = partnsp;
203 rid->partrel = partrel;
204 rid->plan_name = root->plan_name;
205}
206
207/*
208 * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN
209 * RTIs that may be present.
210 *
211 * RTE_JOIN entries are excluded because they cannot be mentioned by plan
212 * advice.
213 *
214 * The caller is responsible for making sure that the tkeys array is large
215 * enough to store the results.
216 *
217 * The return value is the number of identifiers computed.
218 */
219int
221 pgpa_identifier *rids)
222{
223 int count = 0;
224 int rti = -1;
225
226 while ((rti = bms_next_member(relids, rti)) >= 0)
227 {
228 RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable);
229
230 if (rte->rtekind == RTE_JOIN)
231 continue;
232 pgpa_compute_identifier_by_rti(root, rti, &rids[count++]);
233 }
234
235 Assert(count > 0);
236 return count;
237}
238
239/*
240 * Create an array of range table identifiers for all the non-NULL,
241 * non-RTE_JOIN entries in the PlannedStmt's range table.
242 */
245{
249 int rtinfoindex = 0;
252
253 /*
254 * Account for relations added by inheritance expansion of partitioned
255 * tables.
256 */
258 pstmt->appendRelations);
259
260 /*
261 * When we begin iterating, we're processing the portion of the range
262 * table that originated from the top-level PlannerInfo, so subrtinfo is
263 * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
264 * portion of the range table we are processing. nextrtinfo is always the
265 * SubPlanRTInfo that follows the current one, if any, so when we're
266 * processing the top-level query's portion of the range table, the next
267 * SubPlanRTInfo is the very first one.
268 */
269 if (pstmt->subrtinfos != NULL)
271
272 /* Main loop over the range table. */
273 for (Index rti = 1; rti <= rtable_length; rti++)
274 {
275 const char *plan_name;
279 char *partnsp = NULL;
280 char *partrel = NULL;
281 int occurrence;
282 pgpa_identifier *rid;
283
284 /*
285 * Advance to the next SubPlanRTInfo, if it's time to do that.
286 *
287 * This loop probably shouldn't ever iterate more than once, because
288 * that would imply that a subquery was planned but added nothing to
289 * the range table; but let's be defensive and assume it can happen.
290 */
291 while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
292 {
294 if (++rtinfoindex >= list_length(pstmt->subrtinfos))
296 else
298 }
299
300 /* Fetch the range table entry, if any. */
301 rte = rt_fetch(rti, pstmt->rtable);
302
303 /*
304 * We can't and don't need to identify null entries, and we don't want
305 * to identify join entries.
306 */
307 if (rte == NULL || rte->rtekind == RTE_JOIN)
308 continue;
309
310 /*
311 * If this is not a relation added by partitioned table expansion,
312 * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
313 * we need the information for the top RTI/RTE, and must also fetch
314 * the partition schema and name.
315 */
316 top_rti = top_rti_map[rti - 1];
317 if (rti == top_rti)
318 top_rte = rte;
319 else
320 {
321 top_rte = rt_fetch(top_rti, pstmt->rtable);
322 partnsp =
324 partrel = get_rel_name(rte->relid);
325 }
326
327 /* Compute the correct occurrence number. */
328 occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
329 rtinfo, top_rti);
330
331 /* Get the name of the current plan (NULL for toplevel query). */
332 plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
333
334 /* Save all the details we've derived. */
335 rid = &result[rti - 1];
336 rid->alias_name = top_rte->eref->aliasname;
337 rid->occurrence = occurrence;
338 rid->partnsp = partnsp;
339 rid->partrel = partrel;
340 rid->plan_name = plan_name;
341 }
342
343 return result;
344}
345
346/*
347 * Search for a pgpa_identifier in the array of identifiers computed for the
348 * range table. If exactly one match is found, return the matching RTI; else
349 * return 0.
350 */
351Index
354 pgpa_identifier *rid)
355{
356 Index result = 0;
357
358 for (Index rti = 1; rti <= rtable_length; ++rti)
359 {
361
362 /* If there's no identifier for this RTI, skip it. */
363 if (rti_rid->alias_name == NULL)
364 continue;
365
366 /*
367 * If it matches, return this RTI. As usual, an omitted partition
368 * schema matches anything, but partition and plan names must either
369 * match exactly or be omitted on both sides.
370 */
371 if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 &&
372 rid->occurrence == rti_rid->occurrence &&
373 (rid->partnsp == NULL || rti_rid->partnsp == NULL ||
374 strcmp(rid->partnsp, rti_rid->partnsp) == 0) &&
377 {
378 if (result != 0)
379 {
380 /* Multiple matches were found. */
381 return 0;
382 }
383 result = rti;
384 }
385 }
386
387 return result;
388}
389
390/*
391 * Build a mapping from each RTI to the RTI whose alias_name will be used to
392 * construct the range table identifier.
393 *
394 * For child relations, this is the topmost parent that is still of type
395 * RTE_RELATION. For other relations, it's just the original RTI.
396 *
397 * Since we're eventually going to need this information for every RTI in
398 * the range table, it's best to compute all the answers in a single pass over
399 * the AppendRelInfo list. Otherwise, we might end up searching through that
400 * list repeatedly for entries of interest.
401 *
402 * Note that the returned array is uses zero-based indexing, while RTIs use
403 * 1-based indexing, so subtract 1 from the RTI before looking it up in the
404 * array.
405 */
406static Index *
408{
410
411 /* Initially, make every RTI point to itself. */
412 for (Index rti = 1; rti <= rtable_length; ++rti)
413 top_rti_map[rti - 1] = rti;
414
415 /* Update the map for each AppendRelInfo object. */
417 {
418 Index parent_rti = appinfo->parent_relid;
420
421 /* If the parent is not RTE_RELATION, ignore this entry. */
422 if (parent_rte->rtekind != RTE_RELATION)
423 continue;
424
425 /*
426 * Map the child to wherever we mapped the parent. Parents always
427 * precede their children in the AppendRelInfo list, so this should
428 * work out.
429 */
430 top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
431 }
432
433 return top_rti_map;
434}
435
436/*
437 * Find the occurrence number of a certain relation within a certain subquery.
438 *
439 * The same alias name can occur multiple times within a subquery, but we want
440 * to disambiguate by giving different occurrences different integer indexes.
441 * However, child tables are disambiguated by including the table name rather
442 * than by incrementing the occurrence number; and joins are not named and so
443 * shouldn't increment the occurrence number either.
444 */
445static int
448{
449 Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
450 int occurrence = 1;
451 RangeTblEntry *rte = rt_fetch(rti, rtable);
452
453 for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
454 {
456
457 /*
458 * If this is a child rel of a parent that is a relation, skip it.
459 *
460 * Such range table entries are disambiguated by mentioning the schema
461 * and name of the table, not by counting them as separate occurrences
462 * of the same table.
463 */
464 if (top_rti_map[prior_rti - 1] != prior_rti)
465 continue;
466
467 /* Skip joins. */
468 prior_rte = rt_fetch(prior_rti, rtable);
469 if (prior_rte->rtekind == RTE_JOIN)
470 continue;
471
472 /* Skip if the alias name differs. */
473 if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
474 continue;
475
476 /* Looks like a true duplicate. */
477 ++occurrence;
478 }
479
480 return occurrence;
481}
int bms_next_member(const Bitmapset *a, int prevbit)
Definition bitmapset.c:1290
#define Assert(condition)
Definition c.h:915
unsigned int Index
Definition c.h:670
#define palloc0_array(type, count)
Definition fe_memutils.h:77
char * get_rel_name(Oid relid)
Definition lsyscache.c:2146
Oid get_rel_namespace(Oid relid)
Definition lsyscache.c:2170
char * get_namespace_name_or_temp(Oid nspid)
Definition lsyscache.c:3610
@ RTE_JOIN
@ RTE_RELATION
#define rt_fetch(rangetable_index, rangetable)
Definition parsetree.h:31
static int list_length(const List *l)
Definition pg_list.h:152
static void * list_nth(const List *list, int n)
Definition pg_list.h:299
#define linitial(l)
Definition pg_list.h:178
#define foreach_node(type, var, lst)
Definition pg_list.h:496
static int pgpa_occurrence_number(List *rtable, Index *top_rti_map, SubPlanRTInfo *rtinfo, Index rti)
static Index * pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
void pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti, pgpa_identifier *rid)
Index pgpa_compute_rti_from_identifier(int rtable_length, pgpa_identifier *rt_identifiers, pgpa_identifier *rid)
pgpa_identifier * pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
int pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids, pgpa_identifier *rids)
const char * pgpa_identifier_string(const pgpa_identifier *rid)
static bool strings_equal_or_both_null(const char *a, const char *b)
static int fb(int x)
char * psprintf(const char *fmt,...)
Definition psprintf.c:43
tree ctl root
Definition radixtree.h:1857
const char * quote_identifier(const char *ident)
Definition pg_list.h:54
List * appendRelations
Definition plannodes.h:127
List * subrtinfos
Definition plannodes.h:135
List * rtable
Definition plannodes.h:109
const char * alias_name
const char * partnsp
const char * partrel
const char * plan_name