PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pgpa_join.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pgpa_join.c
4 * analysis of joins in Plan trees
5 *
6 * Copyright (c) 2016-2026, PostgreSQL Global Development Group
7 *
8 * contrib/pg_plan_advice/pgpa_join.c
9 *
10 *-------------------------------------------------------------------------
11 */
12
13#include "postgres.h"
14
15#include "pgpa_join.h"
16#include "pgpa_scan.h"
17#include "pgpa_walker.h"
18
19#include "nodes/pathnodes.h"
20#include "nodes/print.h"
21#include "parser/parsetree.h"
22
23/*
24 * Temporary object used when unrolling a join tree.
25 */
39
41 Plan *plan,
50 bool *found_any_gather);
51static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
52 ElidedNode **elided_node);
53
55static bool is_sorting_plan(Plan *plan);
56
57/*
58 * Create an initially-empty object for unrolling joins.
59 *
60 * This function creates a helper object that can later be used to create a
61 * pgpa_unrolled_join, after first calling pgpa_unroll_join one or more times.
62 */
65{
67
69 join_unroller->nallocated = 4;
70 join_unroller->strategy =
72 join_unroller->inner_subplans =
73 palloc_array(Plan *, join_unroller->nallocated);
74 join_unroller->inner_elided_nodes =
76 join_unroller->inner_unrollers =
78 join_unroller->inner_beneath_any_gather =
79 palloc_array(bool, join_unroller->nallocated);
80
81 return join_unroller;
82}
83
84/*
85 * Unroll one level of an unrollable join tree.
86 *
87 * Our basic goal here is to unroll join trees as they occur in the Plan
88 * tree into a simpler and more regular structure that we can more easily
89 * use for further processing. Unrolling is outer-deep, so if the plan tree
90 * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be
91 * used for Join1 and Join2, but a different one will be needed for Join3,
92 * since that involves a join within the *inner* side of another join.
93 *
94 * pgpa_plan_walker creates a "top level" join unroller object when it
95 * encounters a join in a portion of the plan tree in which no join unroller
96 * is already active. From there, this function is responsible for determing
97 * to what portion of the plan tree that join unroller applies, and for
98 * creating any subordinate join unroller objects that are needed as a result
99 * of non-outer-deep join trees. We do this by returning the join unroller
100 * objects that should be used for further traversal of the outer and inner
101 * subtrees of the current plan node via *outer_join_unroller and
102 * *inner_join_unroller, respectively.
103 */
104void
110{
111 pgpa_join_strategy strategy;
113 *realouter;
116 int n;
117 bool found_any_outer_gather = false;
118 bool found_any_inner_gather = false;
119
121
122 /*
123 * We need to pass the join_unroller object down through certain types of
124 * plan nodes -- anything that's considered part of the join strategy, and
125 * any other nodes that can occur in a join tree despite not being scans
126 * or joins.
127 *
128 * This includes:
129 *
130 * (1) Materialize, Memoize, and Hash nodes, which are part of the join
131 * strategy,
132 *
133 * (2) Gather and Gather Merge nodes, which can occur at any point in the
134 * join tree where the planner decided to initiate parallelism,
135 *
136 * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin
137 * or GatherMerge,
138 *
139 * (4) Agg and Unique nodes, which can occur when we decide to make the
140 * nullable side of a semijoin unique and then join the result, and
141 *
142 * (5) Result nodes with children, which can be added either to project to
143 * enforce a one-time filter (but Result nodes without children are
144 * degenerate scans or joins).
145 */
146 if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash)
150 {
152 return;
153 }
154
155 /*
156 * Since we've already handled nodes that require pass-through treatment,
157 * this should be an unrollable join.
158 */
159 strategy = pgpa_decompose_join(walker, plan,
164
165 /* If our workspace is full, expand it. */
166 if (join_unroller->nused >= join_unroller->nallocated)
167 {
168 join_unroller->nallocated *= 2;
169 join_unroller->strategy =
172 join_unroller->nallocated);
173 join_unroller->inner_subplans =
174 repalloc_array(join_unroller->inner_subplans,
175 Plan *,
176 join_unroller->nallocated);
177 join_unroller->inner_elided_nodes =
178 repalloc_array(join_unroller->inner_elided_nodes,
179 ElidedNode *,
180 join_unroller->nallocated);
181 join_unroller->inner_beneath_any_gather =
182 repalloc_array(join_unroller->inner_beneath_any_gather,
183 bool,
184 join_unroller->nallocated);
185 join_unroller->inner_unrollers =
186 repalloc_array(join_unroller->inner_unrollers,
189 }
190
191 /*
192 * Since we're flattening outer-deep join trees, it follows that if the
193 * outer side is still an unrollable join, it should be unrolled into this
194 * same object. Otherwise, we've reached the limit of what we can unroll
195 * into this object and must remember the outer side as the final outer
196 * subplan.
197 */
200 else
201 {
202 join_unroller->outer_subplan = realouter;
203 join_unroller->outer_elided_node = elidedouter;
204 join_unroller->outer_beneath_any_gather =
206 }
207
208 /*
209 * Store the inner subplan. If it's an unrollable join, it needs to be
210 * flattened in turn, but into a new unroller object, not this one.
211 */
212 n = join_unroller->nused++;
213 join_unroller->strategy[n] = strategy;
214 join_unroller->inner_subplans[n] = realinner;
215 join_unroller->inner_elided_nodes[n] = elidedinner;
216 join_unroller->inner_beneath_any_gather[n] =
220 else
222 join_unroller->inner_unrollers[n] = *inner_join_unroller;
223}
224
225/*
226 * Use the data we've accumulated in a pgpa_join_unroller object to construct
227 * a pgpa_unrolled_join.
228 */
232{
234 int i;
235
236 /*
237 * We shouldn't have gone even so far as to create a join unroller unless
238 * we found at least one unrollable join.
239 */
240 Assert(join_unroller->nused > 0);
241
242 /* Allocate result structures. */
244 ujoin->ninner = join_unroller->nused;
247
248 /* Handle the outermost join. */
249 ujoin->outer.plan = join_unroller->outer_subplan;
250 ujoin->outer.elided_node = join_unroller->outer_elided_node;
251 ujoin->outer.scan =
252 pgpa_build_scan(walker, ujoin->outer.plan,
253 ujoin->outer.elided_node,
254 join_unroller->outer_beneath_any_gather,
255 true);
256
257 /*
258 * We want the joins from the deepest part of the plan tree to appear
259 * first in the result object, but the join unroller adds them in exactly
260 * the reverse of that order, so we need to flip the order of the arrays
261 * when constructing the final result.
262 */
263 for (i = 0; i < join_unroller->nused; ++i)
264 {
265 int k = join_unroller->nused - i - 1;
266
267 /* Copy strategy, Plan, and ElidedNode. */
268 ujoin->strategy[i] = join_unroller->strategy[k];
269 ujoin->inner[i].plan = join_unroller->inner_subplans[k];
270 ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k];
271
272 /*
273 * Fill in remaining details, using either the nested join unroller,
274 * or by deriving them from the plan and elided nodes.
275 */
276 if (join_unroller->inner_unrollers[k] != NULL)
277 ujoin->inner[i].unrolled_join =
279 join_unroller->inner_unrollers[k]);
280 else
281 ujoin->inner[i].scan =
282 pgpa_build_scan(walker, ujoin->inner[i].plan,
283 ujoin->inner[i].elided_node,
284 join_unroller->inner_beneath_any_gather[k],
285 true);
286 }
287
288 return ujoin;
289}
290
291/*
292 * Free memory allocated for pgpa_join_unroller.
293 */
294void
296{
297 pfree(join_unroller->strategy);
298 pfree(join_unroller->inner_subplans);
299 pfree(join_unroller->inner_elided_nodes);
300 pfree(join_unroller->inner_unrollers);
301 pfree(join_unroller->inner_beneath_any_gather);
303}
304
305/*
306 * Identify the join strategy used by a join and the "real" inner and outer
307 * plans.
308 *
309 * For example, a Hash Join always has a Hash node on the inner side, but
310 * for all intents and purposes the real inner input is the Hash node's child,
311 * not the Hash node itself.
312 *
313 * Likewise, a Merge Join may have Sort node on the inner or outer side; if
314 * it does, the real input to the join is the Sort node's child, not the
315 * Sort node itself.
316 *
317 * In addition, with a Merge Join or a Nested Loop, the join planning code
318 * may add additional nodes such as Materialize or Memoize. We regard these
319 * as an aspect of the join strategy. As in the previous cases, the true input
320 * to the join is the underlying node.
321 *
322 * However, if any involved child node previously had a now-elided node stacked
323 * on top, then we can't "look through" that node -- indeed, what's going to be
324 * relevant for our purposes is the ElidedNode on top of that plan node, rather
325 * than the plan node itself.
326 *
327 * If there are multiple elided nodes, we want that one that would have been
328 * uppermost in the plan tree prior to setrefs processing; we expect to find
329 * that one last in the list of elided nodes.
330 *
331 * On return *realouter and *realinner will have been set to the real inner
332 * and real outer plans that we identified, and *elidedrealouter and
333 * *elidedrealinner to the last of any corresponding elided nodes.
334 * Additionally, *found_any_outer_gather and *found_any_inner_gather will
335 * be set to true if we looked through a Gather or Gather Merge node on
336 * that side of the join, and false otherwise.
337 */
343{
344 PlannedStmt *pstmt = walker->pstmt;
345 JoinType jointype = ((Join *) plan)->jointype;
350 pgpa_join_strategy strategy;
351 bool uniqueouter;
352 bool uniqueinner;
353
356 *found_any_outer_gather = false;
357 *found_any_inner_gather = false;
358
359 switch (nodeTag(plan))
360 {
361 case T_MergeJoin:
362
363 /*
364 * The planner may have chosen to place a Material node on the
365 * inner side of the MergeJoin; if this is present, we record it
366 * as part of the join strategy.
367 */
369 {
372 }
373 else
374 strategy = JSTRAT_MERGE_JOIN_PLAIN;
375
376 /*
377 * For a MergeJoin, either the outer or the inner subplan, or
378 * both, may have needed to be sorted; we must disregard any Sort
379 * or IncrementalSort node to find the real inner or outer
380 * subplan.
381 */
386 break;
387
388 case T_NestLoop:
389
390 /*
391 * The planner may have chosen to place a Material or Memoize node
392 * on the inner side of the NestLoop; if this is present, we
393 * record it as part of the join strategy.
394 */
396 {
399 }
400 else if (elidedinner == NULL && IsA(innerplan, Memoize))
401 {
404 }
405 else
406 strategy = JSTRAT_NESTED_LOOP_PLAIN;
407 break;
408
409 case T_HashJoin:
410
411 /*
412 * The inner subplan of a HashJoin is always a Hash node; the real
413 * inner subplan is the Hash node's child.
414 */
418 strategy = JSTRAT_HASH_JOIN;
419 break;
420
421 default:
422 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
423 }
424
425 /*
426 * The planner may have decided to implement a semijoin by first making
427 * the nullable side of the plan unique, and then performing a normal join
428 * against the result. Therefore, we might need to descend through a
429 * unique node on either side of the plan.
430 */
433
434 /*
435 * Can we see a Result node here, to project above a Gather? So far I've
436 * found no example that behaves that way; rather, the Gather or Gather
437 * Merge is made to project. Hence, don't test is_result_node_with_child()
438 * at this point.
439 */
440
441 /*
442 * The planner may have decided to parallelize part of the join tree, so
443 * we could find a Gather or Gather Merge node here. Note that, if
444 * present, this will appear below nodes we considered as part of the join
445 * strategy, but we could find another uniqueness-enforcing node below the
446 * Gather or Gather Merge, if present.
447 */
448 if (elidedouter == NULL)
449 {
454 uniqueouter = true;
455 }
456 if (elidedinner == NULL)
457 {
462 uniqueinner = true;
463 }
464
465 /*
466 * It's possible that a Result node has been inserted either to project a
467 * target list or to implement a one-time filter. If so, we can descend
468 * through it. Note that a Result node without a child would be a
469 * degenerate scan or join, and not something we could descend through.
470 */
475
476 /*
477 * If this is a semijoin that was converted to an inner join by making one
478 * side or the other unique, make a note that the inner or outer subplan,
479 * as appropriate, should be treated as a query plan feature when the main
480 * tree traversal reaches it.
481 *
482 * Conversely, if the planner could have made one side of the join unique
483 * and thereby converted it to an inner join, and chose not to do so, that
484 * is also worth noting.
485 *
486 * NB: This code could appear slightly higher up in this function, but
487 * none of the nodes through which we just descended should have
488 * associated RTIs.
489 *
490 * NB: This seems like a somewhat hacky way of passing information up to
491 * the main tree walk, but I don't currently have a better idea.
492 */
493 if (uniqueouter)
495 else if (jointype == JOIN_RIGHT_SEMI)
497 if (uniqueinner)
499 else if (jointype == JOIN_SEMI)
501
502 /* Set output parameters. */
507 return strategy;
508}
509
510/*
511 * Descend through a Plan node in a join tree that the caller has determined
512 * to be irrelevant.
513 *
514 * Updates *plan, and returns the last of any elided nodes pertaining to the
515 * new plan node.
516 */
517static ElidedNode *
519{
520 *plan = (*plan)->lefttree;
521 return pgpa_last_elided_node(pstmt, *plan);
522}
523
524/*
525 * Descend through a Gather or Gather Merge node, if present, and any Sort
526 * or IncrementalSort node occurring under a Gather Merge.
527 *
528 * Caller should have verified that there is no ElidedNode pertaining to
529 * the initial value of *plan.
530 *
531 * Updates *plan, and returns the last of any elided nodes pertaining to the
532 * new plan node. Sets *found_any_gather = true if either Gather or
533 * Gather Merge was found, and otherwise leaves it unchanged.
534 */
535static ElidedNode *
537 bool *found_any_gather)
538{
539 if (IsA(*plan, Gather))
540 {
541 *found_any_gather = true;
542 return pgpa_descend_node(pstmt, plan);
543 }
544
545 if (IsA(*plan, GatherMerge))
546 {
548
549 if (elided == NULL && is_sorting_plan(*plan))
550 elided = pgpa_descend_node(pstmt, plan);
551
552 *found_any_gather = true;
553 return elided;
554 }
555
556 return NULL;
557}
558
559/*
560 * If *plan is an Agg or Unique node, we want to descend through it, unless
561 * it has a corresponding elided node. If its immediate child is a Sort or
562 * IncrementalSort, we also want to descend through that, unless it has a
563 * corresponding elided node.
564 *
565 * On entry, *elided_node must be the last of any elided nodes corresponding
566 * to *plan; on exit, this will still be true, but *plan may have been updated.
567 *
568 * The reason we don't want to descend through elided nodes is that a single
569 * join tree can't cross through any sort of elided node: subqueries are
570 * planned separately, and planning inside an Append or MergeAppend is
571 * separate from planning outside of it.
572 *
573 * The return value is true if we descend through a node that we believe is
574 * making one side of a semijoin unique, and otherwise false.
575 */
576static bool
578 ElidedNode **elided_node)
579{
580 bool descend = false;
581 bool sjunique = false;
582
583 if (*elided_node != NULL)
584 return sjunique;
585
586 if (IsA(*plan, Unique))
587 {
588 descend = true;
589 sjunique = true;
590 }
591 else if (IsA(*plan, Agg))
592 {
593 /*
594 * If this is a simple Agg node, then assume it's here to implement
595 * semijoin uniqueness. Otherwise, assume it's completing an eager
596 * aggregation or partitionwise aggregation operation that began at a
597 * higher level of the plan tree.
598 *
599 * (Note that when we're using an Agg node for uniqueness, there's no
600 * need for any case other than AGGSPLIT_SIMPLE, because there's no
601 * aggregated column being computed. However, the fact that
602 * AGGSPLIT_SIMPLE is in use doesn't prove that this Agg is here for
603 * the semijoin uniqueness. Maybe we should adjust an Agg node to
604 * carry a "purpose" field so that code like this can be more certain
605 * of its analysis.)
606 */
607 descend = true;
608 sjunique = (((Agg *) *plan)->aggsplit == AGGSPLIT_SIMPLE);
609 }
610
611 if (descend)
612 {
613 *elided_node = pgpa_descend_node(pstmt, plan);
614
615 if (*elided_node == NULL && is_sorting_plan(*plan))
616 *elided_node = pgpa_descend_node(pstmt, plan);
617 }
618
619 return sjunique;
620}
621
622/*
623 * Is this a Result node that has a child?
624 */
625static bool
627{
628 return IsA(plan, Result) && plan->lefttree != NULL;
629}
630
631/*
632 * Is this a Plan node whose purpose is to put the data in a certain order?
633 */
634static bool
636{
637 return IsA(plan, Sort) || IsA(plan, IncrementalSort);
638}
#define Assert(condition)
Definition c.h:927
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define repalloc_array(pointer, type, count)
Definition fe_memutils.h:78
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define nodeTag(nodeptr)
Definition nodes.h:139
@ AGGSPLIT_SIMPLE
Definition nodes.h:387
JoinType
Definition nodes.h:298
@ JOIN_SEMI
Definition nodes.h:317
@ JOIN_RIGHT_SEMI
Definition nodes.h:319
#define plan(x)
Definition pg_regress.c:161
pgpa_unrolled_join * pgpa_build_unrolled_join(pgpa_plan_walker_context *walker, pgpa_join_unroller *join_unroller)
Definition pgpa_join.c:230
static ElidedNode * pgpa_descend_node(PlannedStmt *pstmt, Plan **plan)
Definition pgpa_join.c:518
void pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan, bool beneath_any_gather, pgpa_join_unroller *join_unroller, pgpa_join_unroller **outer_join_unroller, pgpa_join_unroller **inner_join_unroller)
Definition pgpa_join.c:105
static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan, Plan **realouter, Plan **realinner, ElidedNode **elidedrealouter, ElidedNode **elidedrealinner, bool *found_any_outer_gather, bool *found_any_inner_gather)
Definition pgpa_join.c:339
static ElidedNode * pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan, bool *found_any_gather)
Definition pgpa_join.c:536
static bool is_sorting_plan(Plan *plan)
Definition pgpa_join.c:635
void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller)
Definition pgpa_join.c:295
static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, ElidedNode **elided_node)
Definition pgpa_join.c:577
pgpa_join_unroller * pgpa_create_join_unroller(void)
Definition pgpa_join.c:64
static bool is_result_node_with_child(Plan *plan)
Definition pgpa_join.c:626
pgpa_join_strategy
Definition pgpa_join.h:28
@ JSTRAT_MERGE_JOIN_PLAIN
Definition pgpa_join.h:29
@ JSTRAT_NESTED_LOOP_MATERIALIZE
Definition pgpa_join.h:32
@ JSTRAT_NESTED_LOOP_MEMOIZE
Definition pgpa_join.h:33
@ JSTRAT_HASH_JOIN
Definition pgpa_join.h:34
@ JSTRAT_NESTED_LOOP_PLAIN
Definition pgpa_join.h:31
@ JSTRAT_MERGE_JOIN_MATERIALIZE
Definition pgpa_join.h:30
static bool pgpa_is_join(Plan *plan)
Definition pgpa_join.h:90
pgpa_scan * pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan, ElidedNode *elided_node, bool beneath_any_gather, bool within_join_problem)
Definition pgpa_scan.c:44
ElidedNode * pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan)
void pgpa_add_future_feature(pgpa_plan_walker_context *walker, pgpa_qf_type type, Plan *plan)
@ PGPAQF_SEMIJOIN_UNIQUE
Definition pgpa_walker.h:66
@ PGPAQF_SEMIJOIN_NON_UNIQUE
Definition pgpa_walker.h:65
static int fb(int x)
struct Plan * lefttree
Definition plannodes.h:239
struct Plan * righttree
Definition plannodes.h:240
Plan ** inner_subplans
Definition pgpa_join.c:34
unsigned nallocated
Definition pgpa_join.c:28
bool outer_beneath_any_gather
Definition pgpa_join.c:32
bool * inner_beneath_any_gather
Definition pgpa_join.c:37
ElidedNode * outer_elided_node
Definition pgpa_join.c:31
ElidedNode ** inner_elided_nodes
Definition pgpa_join.c:35
pgpa_join_unroller ** inner_unrollers
Definition pgpa_join.c:36
pgpa_join_strategy * strategy
Definition pgpa_join.c:33