PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pgpa_join.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pgpa_join.c
4 * analysis of joins in Plan trees
5 *
6 * Copyright (c) 2016-2026, PostgreSQL Global Development Group
7 *
8 * contrib/pg_plan_advice/pgpa_join.c
9 *
10 *-------------------------------------------------------------------------
11 */
12
13#include "postgres.h"
14
15#include "pgpa_join.h"
16#include "pgpa_scan.h"
17#include "pgpa_walker.h"
18
19#include "nodes/pathnodes.h"
20#include "nodes/print.h"
21#include "parser/parsetree.h"
22
23/*
24 * Temporary object used when unrolling a join tree.
25 */
39
41 Plan *plan,
50 bool *found_any_gather);
51static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
52 ElidedNode **elided_node);
53
55static bool is_sorting_plan(Plan *plan);
56
57/*
58 * Create an initially-empty object for unrolling joins.
59 *
60 * This function creates a helper object that can later be used to create a
61 * pgpa_unrolled_join, after first calling pgpa_unroll_join one or more times.
62 */
65{
67
69 join_unroller->nallocated = 4;
70 join_unroller->strategy =
72 join_unroller->inner_subplans =
73 palloc_array(Plan *, join_unroller->nallocated);
74 join_unroller->inner_elided_nodes =
76 join_unroller->inner_unrollers =
78 join_unroller->inner_beneath_any_gather =
79 palloc_array(bool, join_unroller->nallocated);
80
81 return join_unroller;
82}
83
84/*
85 * Unroll one level of an unrollable join tree.
86 *
87 * Our basic goal here is to unroll join trees as they occur in the Plan
88 * tree into a simpler and more regular structure that we can more easily
89 * use for further processing. Unrolling is outer-deep, so if the plan tree
90 * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be
91 * used for Join1 and Join2, but a different one will be needed for Join3,
92 * since that involves a join within the *inner* side of another join.
93 *
94 * pgpa_plan_walker creates a "top level" join unroller object when it
95 * encounters a join in a portion of the plan tree in which no join unroller
96 * is already active. From there, this function is responsible for determining
97 * to what portion of the plan tree that join unroller applies, and for
98 * creating any subordinate join unroller objects that are needed as a result
99 * of non-outer-deep join trees. We do this by returning the join unroller
100 * objects that should be used for further traversal of the outer and inner
101 * subtrees of the current plan node via *outer_join_unroller and
102 * *inner_join_unroller, respectively.
103 */
104void
110{
111 pgpa_join_strategy strategy;
113 *realouter;
116 int n;
117 bool found_any_outer_gather = false;
118 bool found_any_inner_gather = false;
119
121
122 /*
123 * We need to pass the join_unroller object down through certain types of
124 * plan nodes -- anything that's considered part of the join strategy, and
125 * any other nodes that can occur in a join tree despite not being scans
126 * or joins.
127 *
128 * This includes:
129 *
130 * (1) Materialize, Memoize, and Hash nodes, which are part of the join
131 * strategy,
132 *
133 * (2) Gather and Gather Merge nodes, which can occur at any point in the
134 * join tree where the planner decided to initiate parallelism,
135 *
136 * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin
137 * or GatherMerge,
138 *
139 * (4) Agg and Unique nodes, which can occur when we decide to make the
140 * nullable side of a semijoin unique and then join the result, and
141 *
142 * (5) Result nodes with children, which can be added either to project to
143 * enforce a one-time filter (but Result nodes without children are
144 * degenerate scans or joins).
145 */
146 if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash)
150 {
152 return;
153 }
154
155 /*
156 * Since we've already handled nodes that require pass-through treatment,
157 * this should be an unrollable join.
158 */
159 strategy = pgpa_decompose_join(walker, plan,
164
165 /* If our workspace is full, expand it. */
166 if (join_unroller->nused >= join_unroller->nallocated)
167 {
168 join_unroller->nallocated *= 2;
169 join_unroller->strategy =
172 join_unroller->nallocated);
173 join_unroller->inner_subplans =
174 repalloc_array(join_unroller->inner_subplans,
175 Plan *,
176 join_unroller->nallocated);
177 join_unroller->inner_elided_nodes =
178 repalloc_array(join_unroller->inner_elided_nodes,
179 ElidedNode *,
180 join_unroller->nallocated);
181 join_unroller->inner_beneath_any_gather =
182 repalloc_array(join_unroller->inner_beneath_any_gather,
183 bool,
184 join_unroller->nallocated);
185 join_unroller->inner_unrollers =
186 repalloc_array(join_unroller->inner_unrollers,
189 }
190
191 /*
192 * Since we're flattening outer-deep join trees, it follows that if the
193 * outer side is still an unrollable join, it should be unrolled into this
194 * same object. Otherwise, we've reached the limit of what we can unroll
195 * into this object and must remember the outer side as the final outer
196 * subplan.
197 */
200 else
201 {
202 join_unroller->outer_subplan = realouter;
203 join_unroller->outer_elided_node = elidedouter;
204 join_unroller->outer_beneath_any_gather =
206 }
207
208 /*
209 * Store the inner subplan. If it's an unrollable join, it needs to be
210 * flattened in turn, but into a new unroller object, not this one.
211 */
212 n = join_unroller->nused++;
213 join_unroller->strategy[n] = strategy;
214 join_unroller->inner_subplans[n] = realinner;
215 join_unroller->inner_elided_nodes[n] = elidedinner;
216 join_unroller->inner_beneath_any_gather[n] =
220 else
222 join_unroller->inner_unrollers[n] = *inner_join_unroller;
223}
224
225/*
226 * Use the data we've accumulated in a pgpa_join_unroller object to construct
227 * a pgpa_unrolled_join.
228 */
232{
234 int i;
235
236 /*
237 * We shouldn't have gone even so far as to create a join unroller unless
238 * we found at least one unrollable join.
239 */
240 Assert(join_unroller->nused > 0);
241
242 /* Allocate result structures. */
244 ujoin->ninner = join_unroller->nused;
247
248 /* Handle the outermost join. */
249 ujoin->outer.plan = join_unroller->outer_subplan;
250 ujoin->outer.elided_node = join_unroller->outer_elided_node;
251 ujoin->outer.scan =
252 pgpa_build_scan(walker, ujoin->outer.plan,
253 ujoin->outer.elided_node,
254 join_unroller->outer_beneath_any_gather,
255 true);
256
257 /*
258 * We want the joins from the deepest part of the plan tree to appear
259 * first in the result object, but the join unroller adds them in exactly
260 * the reverse of that order, so we need to flip the order of the arrays
261 * when constructing the final result.
262 */
263 for (i = 0; i < join_unroller->nused; ++i)
264 {
265 int k = join_unroller->nused - i - 1;
266
267 /* Copy strategy, Plan, and ElidedNode. */
268 ujoin->strategy[i] = join_unroller->strategy[k];
269 ujoin->inner[i].plan = join_unroller->inner_subplans[k];
270 ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k];
271
272 /*
273 * Fill in remaining details, using either the nested join unroller,
274 * or by deriving them from the plan and elided nodes.
275 */
276 if (join_unroller->inner_unrollers[k] != NULL)
277 ujoin->inner[i].unrolled_join =
279 join_unroller->inner_unrollers[k]);
280 else
281 ujoin->inner[i].scan =
282 pgpa_build_scan(walker, ujoin->inner[i].plan,
283 ujoin->inner[i].elided_node,
284 join_unroller->inner_beneath_any_gather[k],
285 true);
286 }
287
288 return ujoin;
289}
290
291/*
292 * Free memory allocated for pgpa_join_unroller.
293 */
294void
296{
297 pfree(join_unroller->strategy);
298 pfree(join_unroller->inner_subplans);
299 pfree(join_unroller->inner_elided_nodes);
300 pfree(join_unroller->inner_unrollers);
301 pfree(join_unroller->inner_beneath_any_gather);
303}
304
305/*
306 * Identify the join strategy used by a join and the "real" inner and outer
307 * plans.
308 *
309 * For example, a Hash Join always has a Hash node on the inner side, but
310 * for all intents and purposes the real inner input is the Hash node's child,
311 * not the Hash node itself.
312 *
313 * Likewise, a Merge Join may have Sort node on the inner or outer side; if
314 * it does, the real input to the join is the Sort node's child, not the
315 * Sort node itself.
316 *
317 * In addition, with a Merge Join or a Nested Loop, the join planning code
318 * may add additional nodes such as Materialize or Memoize. We regard these
319 * as an aspect of the join strategy. As in the previous cases, the true input
320 * to the join is the underlying node.
321 *
322 * However, if any involved child node previously had a now-elided node stacked
323 * on top, then we can't "look through" that node -- indeed, what's going to be
324 * relevant for our purposes is the ElidedNode on top of that plan node, rather
325 * than the plan node itself.
326 *
327 * If there are multiple elided nodes, we want that one that would have been
328 * uppermost in the plan tree prior to setrefs processing; we expect to find
329 * that one last in the list of elided nodes.
330 *
331 * On return *realouter and *realinner will have been set to the real inner
332 * and real outer plans that we identified, and *elidedrealouter and
333 * *elidedrealinner to the last of any corresponding elided nodes.
334 * Additionally, *found_any_outer_gather and *found_any_inner_gather will
335 * be set to true if we looked through a Gather or Gather Merge node on
336 * that side of the join, and false otherwise.
337 */
343{
344 PlannedStmt *pstmt = walker->pstmt;
345 JoinType jointype = ((Join *) plan)->jointype;
350 pgpa_join_strategy strategy;
351 bool uniqueouter;
352 bool uniqueinner;
353
356 *found_any_outer_gather = false;
357 *found_any_inner_gather = false;
358
359 switch (nodeTag(plan))
360 {
361 case T_MergeJoin:
362
363 /*
364 * The planner may have chosen to place a Material node on the
365 * inner side of the MergeJoin; if this is present, we record it
366 * as part of the join strategy. (However, scan-level Materialize
367 * nodes are an exception.)
368 */
369 if (elidedinner == NULL && IsA(innerplan, Material) &&
371 {
374 }
375 else
376 strategy = JSTRAT_MERGE_JOIN_PLAIN;
377
378 /*
379 * For a MergeJoin, either the outer or the inner subplan, or
380 * both, may have needed to be sorted; we must disregard any Sort
381 * or IncrementalSort node to find the real inner or outer
382 * subplan.
383 */
388 break;
389
390 case T_NestLoop:
391
392 /*
393 * The planner may have chosen to place a Material or Memoize node
394 * on the inner side of the NestLoop; if this is present, we
395 * record it as part of the join strategy. (However, scan-level
396 * Materialize nodes are an exception.)
397 */
398 if (elidedinner == NULL && IsA(innerplan, Material) &&
400 {
403 }
404 else if (elidedinner == NULL && IsA(innerplan, Memoize))
405 {
408 }
409 else
410 strategy = JSTRAT_NESTED_LOOP_PLAIN;
411 break;
412
413 case T_HashJoin:
414
415 /*
416 * The inner subplan of a HashJoin is always a Hash node; the real
417 * inner subplan is the Hash node's child.
418 */
422 strategy = JSTRAT_HASH_JOIN;
423 break;
424
425 default:
426 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
427 }
428
429 /*
430 * The planner may have decided to implement a semijoin by first making
431 * the nullable side of the plan unique, and then performing a normal join
432 * against the result. Therefore, we might need to descend through a
433 * unique node on either side of the plan.
434 */
437
438 /*
439 * Can we see a Result node here, to project above a Gather? So far I've
440 * found no example that behaves that way; rather, the Gather or Gather
441 * Merge is made to project. Hence, don't test is_result_node_with_child()
442 * at this point.
443 */
444
445 /*
446 * The planner may have decided to parallelize part of the join tree, so
447 * we could find a Gather or Gather Merge node here. Note that, if
448 * present, this will appear below nodes we considered as part of the join
449 * strategy, but we could find another uniqueness-enforcing node below the
450 * Gather or Gather Merge, if present.
451 */
452 if (elidedouter == NULL)
453 {
458 uniqueouter = true;
459 }
460 if (elidedinner == NULL)
461 {
466 uniqueinner = true;
467 }
468
469 /*
470 * It's possible that a Result node has been inserted either to project a
471 * target list or to implement a one-time filter. If so, we can descend
472 * through it. Note that a Result node without a child would be a
473 * degenerate scan or join, and not something we could descend through.
474 */
479
480 /*
481 * If this is a semijoin that was converted to an inner join by making one
482 * side or the other unique, make a note that the inner or outer subplan,
483 * as appropriate, should be treated as a query plan feature when the main
484 * tree traversal reaches it.
485 *
486 * Conversely, if the planner could have made one side of the join unique
487 * and thereby converted it to an inner join, and chose not to do so, that
488 * is also worth noting.
489 *
490 * NB: This code could appear slightly higher up in this function, but
491 * none of the nodes through which we just descended should have
492 * associated RTIs.
493 *
494 * NB: This seems like a somewhat hacky way of passing information up to
495 * the main tree walk, but I don't currently have a better idea.
496 */
497 if (uniqueouter)
499 else if (jointype == JOIN_RIGHT_SEMI)
501 if (uniqueinner)
503 else if (jointype == JOIN_SEMI)
505
506 /* Set output parameters. */
511 return strategy;
512}
513
514/*
515 * Descend through a Plan node in a join tree that the caller has determined
516 * to be irrelevant.
517 *
518 * Updates *plan, and returns the last of any elided nodes pertaining to the
519 * new plan node.
520 */
521static ElidedNode *
523{
524 *plan = (*plan)->lefttree;
525 return pgpa_last_elided_node(pstmt, *plan);
526}
527
528/*
529 * Descend through a Gather or Gather Merge node, if present, and any Sort
530 * or IncrementalSort node occurring under a Gather Merge.
531 *
532 * Caller should have verified that there is no ElidedNode pertaining to
533 * the initial value of *plan.
534 *
535 * Updates *plan, and returns the last of any elided nodes pertaining to the
536 * new plan node. Sets *found_any_gather = true if either Gather or
537 * Gather Merge was found, and otherwise leaves it unchanged.
538 */
539static ElidedNode *
541 bool *found_any_gather)
542{
543 if (IsA(*plan, Gather))
544 {
545 *found_any_gather = true;
546 return pgpa_descend_node(pstmt, plan);
547 }
548
549 if (IsA(*plan, GatherMerge))
550 {
552
553 if (elided == NULL && is_sorting_plan(*plan))
554 elided = pgpa_descend_node(pstmt, plan);
555
556 *found_any_gather = true;
557 return elided;
558 }
559
560 return NULL;
561}
562
563/*
564 * If *plan is an Agg or Unique node, we want to descend through it, unless
565 * it has a corresponding elided node. If its immediate child is a Sort or
566 * IncrementalSort, we also want to descend through that, unless it has a
567 * corresponding elided node.
568 *
569 * On entry, *elided_node must be the last of any elided nodes corresponding
570 * to *plan; on exit, this will still be true, but *plan may have been updated.
571 *
572 * The reason we don't want to descend through elided nodes is that a single
573 * join tree can't cross through any sort of elided node: subqueries are
574 * planned separately, and planning inside an Append or MergeAppend is
575 * separate from planning outside of it.
576 *
577 * The return value is true if we descend through a node that we believe is
578 * making one side of a semijoin unique, and otherwise false.
579 */
580static bool
582 ElidedNode **elided_node)
583{
584 bool descend = false;
585 bool sjunique = false;
586
587 if (*elided_node != NULL)
588 return sjunique;
589
590 if (IsA(*plan, Unique))
591 {
592 descend = true;
593 sjunique = true;
594 }
595 else if (IsA(*plan, Agg))
596 {
597 /*
598 * If this is a simple Agg node, then assume it's here to implement
599 * semijoin uniqueness. Otherwise, assume it's completing an eager
600 * aggregation or partitionwise aggregation operation that began at a
601 * higher level of the plan tree.
602 *
603 * (Note that when we're using an Agg node for uniqueness, there's no
604 * need for any case other than AGGSPLIT_SIMPLE, because there's no
605 * aggregated column being computed. However, the fact that
606 * AGGSPLIT_SIMPLE is in use doesn't prove that this Agg is here for
607 * the semijoin uniqueness. Maybe we should adjust an Agg node to
608 * carry a "purpose" field so that code like this can be more certain
609 * of its analysis.)
610 */
611 descend = true;
612 sjunique = (((Agg *) *plan)->aggsplit == AGGSPLIT_SIMPLE);
613 }
614
615 if (descend)
616 {
617 *elided_node = pgpa_descend_node(pstmt, plan);
618
619 if (*elided_node == NULL && is_sorting_plan(*plan))
620 *elided_node = pgpa_descend_node(pstmt, plan);
621 }
622
623 return sjunique;
624}
625
626/*
627 * Is this a Result node that has a child?
628 */
629static bool
631{
632 return IsA(plan, Result) && plan->lefttree != NULL;
633}
634
635/*
636 * Is this a Plan node whose purpose is to put the data in a certain order?
637 */
638static bool
640{
641 return IsA(plan, Sort) || IsA(plan, IncrementalSort);
642}
#define Assert(condition)
Definition c.h:943
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define repalloc_array(pointer, type, count)
Definition fe_memutils.h:78
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define nodeTag(nodeptr)
Definition nodes.h:139
@ AGGSPLIT_SIMPLE
Definition nodes.h:387
JoinType
Definition nodes.h:298
@ JOIN_SEMI
Definition nodes.h:317
@ JOIN_RIGHT_SEMI
Definition nodes.h:319
#define plan(x)
Definition pg_regress.c:164
pgpa_unrolled_join * pgpa_build_unrolled_join(pgpa_plan_walker_context *walker, pgpa_join_unroller *join_unroller)
Definition pgpa_join.c:230
static ElidedNode * pgpa_descend_node(PlannedStmt *pstmt, Plan **plan)
Definition pgpa_join.c:522
void pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan, bool beneath_any_gather, pgpa_join_unroller *join_unroller, pgpa_join_unroller **outer_join_unroller, pgpa_join_unroller **inner_join_unroller)
Definition pgpa_join.c:105
static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan, Plan **realouter, Plan **realinner, ElidedNode **elidedrealouter, ElidedNode **elidedrealinner, bool *found_any_outer_gather, bool *found_any_inner_gather)
Definition pgpa_join.c:339
static ElidedNode * pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan, bool *found_any_gather)
Definition pgpa_join.c:540
static bool is_sorting_plan(Plan *plan)
Definition pgpa_join.c:639
void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller)
Definition pgpa_join.c:295
static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, ElidedNode **elided_node)
Definition pgpa_join.c:581
pgpa_join_unroller * pgpa_create_join_unroller(void)
Definition pgpa_join.c:64
static bool is_result_node_with_child(Plan *plan)
Definition pgpa_join.c:630
pgpa_join_strategy
Definition pgpa_join.h:28
@ JSTRAT_MERGE_JOIN_PLAIN
Definition pgpa_join.h:29
@ JSTRAT_NESTED_LOOP_MATERIALIZE
Definition pgpa_join.h:32
@ JSTRAT_NESTED_LOOP_MEMOIZE
Definition pgpa_join.h:33
@ JSTRAT_HASH_JOIN
Definition pgpa_join.h:34
@ JSTRAT_NESTED_LOOP_PLAIN
Definition pgpa_join.h:31
@ JSTRAT_MERGE_JOIN_MATERIALIZE
Definition pgpa_join.h:30
static bool pgpa_is_join(Plan *plan)
Definition pgpa_join.h:90
pgpa_scan * pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan, ElidedNode *elided_node, bool beneath_any_gather, bool within_join_problem)
Definition pgpa_scan.c:44
ElidedNode * pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan)
bool pgpa_is_scan_level_materialize(Plan *plan)
void pgpa_add_future_feature(pgpa_plan_walker_context *walker, pgpa_qf_type type, Plan *plan)
@ PGPAQF_SEMIJOIN_UNIQUE
Definition pgpa_walker.h:48
@ PGPAQF_SEMIJOIN_NON_UNIQUE
Definition pgpa_walker.h:47
static int fb(int x)
struct Plan * lefttree
Definition plannodes.h:239
struct Plan * righttree
Definition plannodes.h:240
Plan ** inner_subplans
Definition pgpa_join.c:34
unsigned nallocated
Definition pgpa_join.c:28
bool outer_beneath_any_gather
Definition pgpa_join.c:32
bool * inner_beneath_any_gather
Definition pgpa_join.c:37
ElidedNode * outer_elided_node
Definition pgpa_join.c:31
ElidedNode ** inner_elided_nodes
Definition pgpa_join.c:35
pgpa_join_unroller ** inner_unrollers
Definition pgpa_join.c:36
pgpa_join_strategy * strategy
Definition pgpa_join.c:33