PostgreSQL Source Code  git master
partbounds.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * partbounds.c
4  * Support routines for manipulating partition bounds
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/partitioning/partbounds.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/relation.h"
18 #include "access/table.h"
19 #include "access/tableam.h"
20 #include "catalog/partition.h"
21 #include "catalog/pg_inherits.h"
22 #include "catalog/pg_type.h"
23 #include "commands/tablecmds.h"
24 #include "common/hashfn.h"
25 #include "executor/executor.h"
26 #include "miscadmin.h"
27 #include "nodes/makefuncs.h"
28 #include "nodes/nodeFuncs.h"
29 #include "nodes/pathnodes.h"
30 #include "parser/parse_coerce.h"
32 #include "partitioning/partdesc.h"
33 #include "utils/array.h"
34 #include "utils/builtins.h"
35 #include "utils/datum.h"
36 #include "utils/fmgroids.h"
37 #include "utils/lsyscache.h"
38 #include "utils/partcache.h"
39 #include "utils/ruleutils.h"
40 #include "utils/snapmgr.h"
41 #include "utils/syscache.h"
42 
43 /*
44  * When qsort'ing partition bounds after reading from the catalog, each bound
45  * is represented with one of the following structs.
46  */
47 
48 /* One bound of a hash partition */
49 typedef struct PartitionHashBound
50 {
51  int modulus;
52  int remainder;
53  int index;
55 
56 /* One value coming from some (index'th) list partition */
57 typedef struct PartitionListValue
58 {
59  int index;
62 
63 /* One bound of a range partition */
64 typedef struct PartitionRangeBound
65 {
66  int index;
67  Datum *datums; /* range bound datums */
68  PartitionRangeDatumKind *kind; /* the kind of each datum */
69  bool lower; /* this is the lower (vs upper) bound */
71 
72 /*
73  * Mapping from partitions of a joining relation to partitions of a join
74  * relation being computed (a.k.a merged partitions)
75  */
76 typedef struct PartitionMap
77 {
78  int nparts; /* number of partitions */
79  int *merged_indexes; /* indexes of merged partitions */
80  bool *merged; /* flags to indicate whether partitions are
81  * merged with non-dummy partitions */
82  bool did_remapping; /* did we re-map partitions? */
83  int *old_indexes; /* old indexes of merged partitions if
84  * did_remapping */
86 
87 /* Macro for comparing two range bounds */
88 #define compare_range_bounds(partnatts, partsupfunc, partcollations, \
89  bound1, bound2) \
90  (partition_rbound_cmp(partnatts, partsupfunc, partcollations, \
91  (bound1)->datums, (bound1)->kind, (bound1)->lower, \
92  bound2))
93 
94 static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
95 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
96  void *arg);
97 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
98  void *arg);
100  int nparts, PartitionKey key, int **mapping);
102  int nparts, PartitionKey key, int **mapping);
104  int nparts, PartitionKey key, int **mapping);
105 static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc,
106  Oid *partcollation,
107  RelOptInfo *outer_rel,
108  RelOptInfo *inner_rel,
109  JoinType jointype,
110  List **outer_parts,
111  List **inner_parts);
112 static PartitionBoundInfo merge_range_bounds(int partnatts,
113  FmgrInfo *partsupfuncs,
114  Oid *partcollations,
115  RelOptInfo *outer_rel,
116  RelOptInfo *inner_rel,
117  JoinType jointype,
118  List **outer_parts,
119  List **inner_parts);
120 static void init_partition_map(RelOptInfo *rel, PartitionMap *map);
121 static void free_partition_map(PartitionMap *map);
122 static bool is_dummy_partition(RelOptInfo *rel, int part_index);
123 static int merge_matching_partitions(PartitionMap *outer_map,
124  PartitionMap *inner_map,
125  int outer_index,
126  int inner_index,
127  int *next_index);
128 static int process_outer_partition(PartitionMap *outer_map,
129  PartitionMap *inner_map,
130  bool outer_has_default,
131  bool inner_has_default,
132  int outer_index,
133  int inner_default,
134  JoinType jointype,
135  int *next_index,
136  int *default_index);
137 static int process_inner_partition(PartitionMap *outer_map,
138  PartitionMap *inner_map,
139  bool outer_has_default,
140  bool inner_has_default,
141  int inner_index,
142  int outer_default,
143  JoinType jointype,
144  int *next_index,
145  int *default_index);
146 static void merge_null_partitions(PartitionMap *outer_map,
147  PartitionMap *inner_map,
148  bool outer_has_null,
149  bool inner_has_null,
150  int outer_null,
151  int inner_null,
152  JoinType jointype,
153  int *next_index,
154  int *null_index);
155 static void merge_default_partitions(PartitionMap *outer_map,
156  PartitionMap *inner_map,
157  bool outer_has_default,
158  bool inner_has_default,
159  int outer_default,
160  int inner_default,
161  JoinType jointype,
162  int *next_index,
163  int *default_index);
164 static int merge_partition_with_dummy(PartitionMap *map, int index,
165  int *next_index);
166 static void fix_merged_indexes(PartitionMap *outer_map,
167  PartitionMap *inner_map,
168  int nmerged, List *merged_indexes);
169 static void generate_matching_part_pairs(RelOptInfo *outer_rel,
170  RelOptInfo *inner_rel,
171  PartitionMap *outer_map,
172  PartitionMap *inner_map,
173  int nmerged,
174  List **outer_parts,
175  List **inner_parts);
177  List *merged_datums,
178  List *merged_kinds,
179  List *merged_indexes,
180  int null_index,
181  int default_index);
182 static int get_range_partition(RelOptInfo *rel,
184  int *lb_pos,
186  PartitionRangeBound *ub);
188  int *lb_pos,
190  PartitionRangeBound *ub);
191 static bool compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
192  Oid *partcollations,
193  PartitionRangeBound *outer_lb,
194  PartitionRangeBound *outer_ub,
195  PartitionRangeBound *inner_lb,
196  PartitionRangeBound *inner_ub,
197  int *lb_cmpval, int *ub_cmpval);
198 static void get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
199  Oid *partcollations, JoinType jointype,
200  PartitionRangeBound *outer_lb,
201  PartitionRangeBound *outer_ub,
202  PartitionRangeBound *inner_lb,
203  PartitionRangeBound *inner_ub,
204  int lb_cmpval, int ub_cmpval,
205  PartitionRangeBound *merged_lb,
206  PartitionRangeBound *merged_ub);
207 static void add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
208  Oid *partcollations,
209  PartitionRangeBound *merged_lb,
210  PartitionRangeBound *merged_ub,
211  int merged_index,
212  List **merged_datums,
213  List **merged_kinds,
214  List **merged_indexes);
216  List *datums, bool lower);
217 static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
218  int remainder2);
219 static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
220  Oid *partcollation, Datum *datums1,
221  PartitionRangeDatumKind *kind1, bool lower1,
222  PartitionRangeBound *b2);
223 static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
224  Oid *partcollation,
225  PartitionBoundInfo boundinfo,
226  PartitionRangeBound *probe, int32 *cmpval);
227 static Expr *make_partition_op_expr(PartitionKey key, int keynum,
228  uint16 strategy, Expr *arg1, Expr *arg2);
230  StrategyNumber strategy, bool *need_relabel);
231 static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
232 static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
233 static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
234  bool for_default);
235 static void get_range_key_properties(PartitionKey key, int keynum,
236  PartitionRangeDatum *ldatum,
237  PartitionRangeDatum *udatum,
238  ListCell **partexprs_item,
239  Expr **keyCol,
240  Const **lower_val, Const **upper_val);
242 
243 /*
244  * get_qual_from_partbound
245  * Given a parser node for partition bound, return the list of executable
246  * expressions as partition constraint
247  */
248 List *
250 {
252  List *my_qual = NIL;
253 
254  Assert(key != NULL);
255 
256  switch (key->strategy)
257  {
260  my_qual = get_qual_for_hash(parent, spec);
261  break;
262 
265  my_qual = get_qual_for_list(parent, spec);
266  break;
267 
270  my_qual = get_qual_for_range(parent, spec, false);
271  break;
272  }
273 
274  return my_qual;
275 }
276 
277 /*
278  * partition_bounds_create
279  * Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
280  * nodes
281  *
282  * This function creates a PartitionBoundInfo and fills the values of its
283  * various members based on the input list. Importantly, 'datums' array will
284  * contain Datum representation of individual bounds (possibly after
285  * de-duplication as in case of range bounds), sorted in a canonical order
286  * defined by qsort_partition_* functions of respective partitioning methods.
287  * 'indexes' array will contain as many elements as there are bounds (specific
288  * exceptions to this rule are listed in the function body), which represent
289  * the 0-based canonical positions of partitions.
290  *
291  * Upon return from this function, *mapping is set to an array of
292  * list_length(boundspecs) elements, each of which maps the original index of
293  * a partition to its canonical index.
294  *
295  * Note: The objects returned by this function are wholly allocated in the
296  * current memory context.
297  */
300  PartitionKey key, int **mapping)
301 {
302  int i;
303 
304  Assert(nparts > 0);
305 
306  /*
307  * For each partitioning method, we first convert the partition bounds
308  * from their parser node representation to the internal representation,
309  * along with any additional preprocessing (such as de-duplicating range
310  * bounds). Resulting bound datums are then added to the 'datums' array
311  * in PartitionBoundInfo. For each datum added, an integer indicating the
312  * canonical partition index is added to the 'indexes' array.
313  *
314  * For each bound, we remember its partition's position (0-based) in the
315  * original list to later map it to the canonical index.
316  */
317 
318  /*
319  * Initialize mapping array with invalid values, this is filled within
320  * each sub-routine below depending on the bound type.
321  */
322  *mapping = (int *) palloc(sizeof(int) * nparts);
323  for (i = 0; i < nparts; i++)
324  (*mapping)[i] = -1;
325 
326  switch (key->strategy)
327  {
329  return create_hash_bounds(boundspecs, nparts, key, mapping);
330 
332  return create_list_bounds(boundspecs, nparts, key, mapping);
333 
335  return create_range_bounds(boundspecs, nparts, key, mapping);
336  }
337 
338  Assert(false);
339  return NULL; /* keep compiler quiet */
340 }
341 
342 /*
343  * create_hash_bounds
344  * Create a PartitionBoundInfo for a hash partitioned table
345  */
346 static PartitionBoundInfo
347 create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
348  PartitionKey key, int **mapping)
349 {
350  PartitionBoundInfo boundinfo;
351  PartitionHashBound *hbounds;
352  int i;
353  int greatest_modulus;
354  Datum *boundDatums;
355 
356  boundinfo = (PartitionBoundInfoData *)
358  boundinfo->strategy = key->strategy;
359  /* No special hash partitions. */
360  boundinfo->null_index = -1;
361  boundinfo->default_index = -1;
362 
363  hbounds = (PartitionHashBound *)
364  palloc(nparts * sizeof(PartitionHashBound));
365 
366  /* Convert from node to the internal representation */
367  for (i = 0; i < nparts; i++)
368  {
369  PartitionBoundSpec *spec = boundspecs[i];
370 
371  if (spec->strategy != PARTITION_STRATEGY_HASH)
372  elog(ERROR, "invalid strategy in partition bound spec");
373 
374  hbounds[i].modulus = spec->modulus;
375  hbounds[i].remainder = spec->remainder;
376  hbounds[i].index = i;
377  }
378 
379  /* Sort all the bounds in ascending order */
380  qsort(hbounds, nparts, sizeof(PartitionHashBound),
382 
383  /* After sorting, moduli are now stored in ascending order. */
384  greatest_modulus = hbounds[nparts - 1].modulus;
385 
386  boundinfo->ndatums = nparts;
387  boundinfo->datums = (Datum **) palloc0(nparts * sizeof(Datum *));
388  boundinfo->kind = NULL;
389  boundinfo->interleaved_parts = NULL;
390  boundinfo->nindexes = greatest_modulus;
391  boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
392  for (i = 0; i < greatest_modulus; i++)
393  boundinfo->indexes[i] = -1;
394 
395  /*
396  * In the loop below, to save from allocating a series of small datum
397  * arrays, here we just allocate a single array and below we'll just
398  * assign a portion of this array per partition.
399  */
400  boundDatums = (Datum *) palloc(nparts * 2 * sizeof(Datum));
401 
402  /*
403  * For hash partitioning, there are as many datums (modulus and remainder
404  * pairs) as there are partitions. Indexes are simply values ranging from
405  * 0 to (nparts - 1).
406  */
407  for (i = 0; i < nparts; i++)
408  {
409  int modulus = hbounds[i].modulus;
410  int remainder = hbounds[i].remainder;
411 
412  boundinfo->datums[i] = &boundDatums[i * 2];
413  boundinfo->datums[i][0] = Int32GetDatum(modulus);
414  boundinfo->datums[i][1] = Int32GetDatum(remainder);
415 
416  while (remainder < greatest_modulus)
417  {
418  /* overlap? */
419  Assert(boundinfo->indexes[remainder] == -1);
420  boundinfo->indexes[remainder] = i;
421  remainder += modulus;
422  }
423 
424  (*mapping)[hbounds[i].index] = i;
425  }
426  pfree(hbounds);
427 
428  return boundinfo;
429 }
430 
431 /*
432  * get_non_null_list_datum_count
433  * Counts the number of non-null Datums in each partition.
434  */
435 static int
437 {
438  int i;
439  int count = 0;
440 
441  for (i = 0; i < nparts; i++)
442  {
443  ListCell *lc;
444 
445  foreach(lc, boundspecs[i]->listdatums)
446  {
447  Const *val = lfirst_node(Const, lc);
448 
449  if (!val->constisnull)
450  count++;
451  }
452  }
453 
454  return count;
455 }
456 
457 /*
458  * create_list_bounds
459  * Create a PartitionBoundInfo for a list partitioned table
460  */
461 static PartitionBoundInfo
462 create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
463  PartitionKey key, int **mapping)
464 {
465  PartitionBoundInfo boundinfo;
466  PartitionListValue *all_values;
467  int i;
468  int j;
469  int ndatums;
470  int next_index = 0;
471  int default_index = -1;
472  int null_index = -1;
473  Datum *boundDatums;
474 
475  boundinfo = (PartitionBoundInfoData *)
477  boundinfo->strategy = key->strategy;
478  /* Will be set correctly below. */
479  boundinfo->null_index = -1;
480  boundinfo->default_index = -1;
481 
482  ndatums = get_non_null_list_datum_count(boundspecs, nparts);
483  all_values = (PartitionListValue *)
484  palloc(ndatums * sizeof(PartitionListValue));
485 
486  /* Create a unified list of non-null values across all partitions. */
487  for (j = 0, i = 0; i < nparts; i++)
488  {
489  PartitionBoundSpec *spec = boundspecs[i];
490  ListCell *c;
491 
492  if (spec->strategy != PARTITION_STRATEGY_LIST)
493  elog(ERROR, "invalid strategy in partition bound spec");
494 
495  /*
496  * Note the index of the partition bound spec for the default
497  * partition. There's no datum to add to the list on non-null datums
498  * for this partition.
499  */
500  if (spec->is_default)
501  {
502  default_index = i;
503  continue;
504  }
505 
506  foreach(c, spec->listdatums)
507  {
508  Const *val = lfirst_node(Const, c);
509 
510  if (!val->constisnull)
511  {
512  all_values[j].index = i;
513  all_values[j].value = val->constvalue;
514  j++;
515  }
516  else
517  {
518  /*
519  * Never put a null into the values array; save the index of
520  * the partition that stores nulls, instead.
521  */
522  if (null_index != -1)
523  elog(ERROR, "found null more than once");
524  null_index = i;
525  }
526  }
527  }
528 
529  /* ensure we found a Datum for every slot in the all_values array */
530  Assert(j == ndatums);
531 
532  qsort_arg(all_values, ndatums, sizeof(PartitionListValue),
534 
535  boundinfo->ndatums = ndatums;
536  boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
537  boundinfo->kind = NULL;
538  boundinfo->interleaved_parts = NULL;
539  boundinfo->nindexes = ndatums;
540  boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
541 
542  /*
543  * In the loop below, to save from allocating a series of small datum
544  * arrays, here we just allocate a single array and below we'll just
545  * assign a portion of this array per datum.
546  */
547  boundDatums = (Datum *) palloc(ndatums * sizeof(Datum));
548 
549  /*
550  * Copy values. Canonical indexes are values ranging from 0 to (nparts -
551  * 1) assigned to each partition such that all datums of a given partition
552  * receive the same value. The value for a given partition is the index of
553  * that partition's smallest datum in the all_values[] array.
554  */
555  for (i = 0; i < ndatums; i++)
556  {
557  int orig_index = all_values[i].index;
558 
559  boundinfo->datums[i] = &boundDatums[i];
560  boundinfo->datums[i][0] = datumCopy(all_values[i].value,
561  key->parttypbyval[0],
562  key->parttyplen[0]);
563 
564  /* If the old index has no mapping, assign one */
565  if ((*mapping)[orig_index] == -1)
566  (*mapping)[orig_index] = next_index++;
567 
568  boundinfo->indexes[i] = (*mapping)[orig_index];
569  }
570 
571  pfree(all_values);
572 
573  /*
574  * Set the canonical value for null_index, if any.
575  *
576  * It is possible that the null-accepting partition has not been assigned
577  * an index yet, which could happen if such partition accepts only null
578  * and hence not handled in the above loop which only looked at non-null
579  * values.
580  */
581  if (null_index != -1)
582  {
583  Assert(null_index >= 0);
584  if ((*mapping)[null_index] == -1)
585  (*mapping)[null_index] = next_index++;
586  boundinfo->null_index = (*mapping)[null_index];
587  }
588 
589  /* Set the canonical value for default_index, if any. */
590  if (default_index != -1)
591  {
592  /*
593  * The default partition accepts any value not specified in the lists
594  * of other partitions, hence it should not get mapped index while
595  * assigning those for non-null datums.
596  */
597  Assert(default_index >= 0);
598  Assert((*mapping)[default_index] == -1);
599  (*mapping)[default_index] = next_index++;
600  boundinfo->default_index = (*mapping)[default_index];
601  }
602 
603  /*
604  * Calculate interleaved partitions. Here we look for partitions which
605  * might be interleaved with other partitions and set a bit in
606  * interleaved_parts for any partitions which may be interleaved with
607  * another partition.
608  */
609 
610  /*
611  * There must be multiple partitions to have any interleaved partitions,
612  * otherwise there's nothing to interleave with.
613  */
614  if (nparts > 1)
615  {
616  /*
617  * Short-circuit check to see if only 1 Datum is allowed per
618  * partition. When this is true there's no need to do the more
619  * expensive checks to look for interleaved values.
620  */
621  if (boundinfo->ndatums +
622  partition_bound_accepts_nulls(boundinfo) +
623  partition_bound_has_default(boundinfo) != nparts)
624  {
625  int last_index = -1;
626 
627  /*
628  * Since the indexes array is sorted in Datum order, if any
629  * partitions are interleaved then it will show up by the
630  * partition indexes not being in ascending order. Here we check
631  * for that and record all partitions that are out of order.
632  */
633  for (i = 0; i < boundinfo->nindexes; i++)
634  {
635  int index = boundinfo->indexes[i];
636 
637  if (index < last_index)
638  boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
639  index);
640 
641  /*
642  * Otherwise, if the null_index exists in the indexes array,
643  * then the NULL partition must also allow some other Datum,
644  * therefore it's "interleaved".
645  */
646  else if (partition_bound_accepts_nulls(boundinfo) &&
647  index == boundinfo->null_index)
648  boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
649  index);
650 
651  last_index = index;
652  }
653  }
654 
655  /*
656  * The DEFAULT partition is the "catch-all" partition that can contain
657  * anything that does not belong to any other partition. If there are
658  * any other partitions then the DEFAULT partition must be marked as
659  * interleaved.
660  */
661  if (partition_bound_has_default(boundinfo))
662  boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
663  boundinfo->default_index);
664  }
665 
666 
667  /* All partitions must now have been assigned canonical indexes. */
668  Assert(next_index == nparts);
669  return boundinfo;
670 }
671 
672 /*
673  * create_range_bounds
674  * Create a PartitionBoundInfo for a range partitioned table
675  */
676 static PartitionBoundInfo
677 create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
678  PartitionKey key, int **mapping)
679 {
680  PartitionBoundInfo boundinfo;
681  PartitionRangeBound **rbounds = NULL;
682  PartitionRangeBound **all_bounds,
683  *prev;
684  int i,
685  k,
686  partnatts;
687  int ndatums = 0;
688  int default_index = -1;
689  int next_index = 0;
690  Datum *boundDatums;
691  PartitionRangeDatumKind *boundKinds;
692 
693  boundinfo = (PartitionBoundInfoData *)
695  boundinfo->strategy = key->strategy;
696  /* There is no special null-accepting range partition. */
697  boundinfo->null_index = -1;
698  /* Will be set correctly below. */
699  boundinfo->default_index = -1;
700 
701  all_bounds = (PartitionRangeBound **)
702  palloc0(2 * nparts * sizeof(PartitionRangeBound *));
703 
704  /* Create a unified list of range bounds across all the partitions. */
705  ndatums = 0;
706  for (i = 0; i < nparts; i++)
707  {
708  PartitionBoundSpec *spec = boundspecs[i];
710  *upper;
711 
712  if (spec->strategy != PARTITION_STRATEGY_RANGE)
713  elog(ERROR, "invalid strategy in partition bound spec");
714 
715  /*
716  * Note the index of the partition bound spec for the default
717  * partition. There's no datum to add to the all_bounds array for
718  * this partition.
719  */
720  if (spec->is_default)
721  {
722  default_index = i;
723  continue;
724  }
725 
727  upper = make_one_partition_rbound(key, i, spec->upperdatums, false);
728  all_bounds[ndatums++] = lower;
729  all_bounds[ndatums++] = upper;
730  }
731 
732  Assert(ndatums == nparts * 2 ||
733  (default_index != -1 && ndatums == (nparts - 1) * 2));
734 
735  /* Sort all the bounds in ascending order */
736  qsort_arg(all_bounds, ndatums,
737  sizeof(PartitionRangeBound *),
739  key);
740 
741  /* Save distinct bounds from all_bounds into rbounds. */
742  rbounds = (PartitionRangeBound **)
743  palloc(ndatums * sizeof(PartitionRangeBound *));
744  k = 0;
745  prev = NULL;
746  for (i = 0; i < ndatums; i++)
747  {
748  PartitionRangeBound *cur = all_bounds[i];
749  bool is_distinct = false;
750  int j;
751 
752  /* Is the current bound distinct from the previous one? */
753  for (j = 0; j < key->partnatts; j++)
754  {
755  Datum cmpval;
756 
757  if (prev == NULL || cur->kind[j] != prev->kind[j])
758  {
759  is_distinct = true;
760  break;
761  }
762 
763  /*
764  * If the bounds are both MINVALUE or MAXVALUE, stop now and treat
765  * them as equal, since any values after this point must be
766  * ignored.
767  */
768  if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
769  break;
770 
771  cmpval = FunctionCall2Coll(&key->partsupfunc[j],
772  key->partcollation[j],
773  cur->datums[j],
774  prev->datums[j]);
775  if (DatumGetInt32(cmpval) != 0)
776  {
777  is_distinct = true;
778  break;
779  }
780  }
781 
782  /*
783  * Only if the bound is distinct save it into a temporary array, i.e,
784  * rbounds which is later copied into boundinfo datums array.
785  */
786  if (is_distinct)
787  rbounds[k++] = all_bounds[i];
788 
789  prev = cur;
790  }
791 
792  pfree(all_bounds);
793 
794  /* Update ndatums to hold the count of distinct datums. */
795  ndatums = k;
796 
797  /*
798  * Add datums to boundinfo. Canonical indexes are values ranging from 0
799  * to nparts - 1, assigned in that order to each partition's upper bound.
800  * For 'datums' elements that are lower bounds, there is -1 in the
801  * 'indexes' array to signify that no partition exists for the values less
802  * than such a bound and greater than or equal to the previous upper
803  * bound.
804  */
805  boundinfo->ndatums = ndatums;
806  boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
807  boundinfo->kind = (PartitionRangeDatumKind **)
808  palloc(ndatums *
809  sizeof(PartitionRangeDatumKind *));
810  boundinfo->interleaved_parts = NULL;
811 
812  /*
813  * For range partitioning, an additional value of -1 is stored as the last
814  * element of the indexes[] array.
815  */
816  boundinfo->nindexes = ndatums + 1;
817  boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
818 
819  /*
820  * In the loop below, to save from allocating a series of small arrays,
821  * here we just allocate a single array for Datums and another for
822  * PartitionRangeDatumKinds, below we'll just assign a portion of these
823  * arrays in each loop.
824  */
825  partnatts = key->partnatts;
826  boundDatums = (Datum *) palloc(ndatums * partnatts * sizeof(Datum));
827  boundKinds = (PartitionRangeDatumKind *) palloc(ndatums * partnatts *
828  sizeof(PartitionRangeDatumKind));
829 
830  for (i = 0; i < ndatums; i++)
831  {
832  int j;
833 
834  boundinfo->datums[i] = &boundDatums[i * partnatts];
835  boundinfo->kind[i] = &boundKinds[i * partnatts];
836  for (j = 0; j < partnatts; j++)
837  {
838  if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
839  boundinfo->datums[i][j] =
840  datumCopy(rbounds[i]->datums[j],
841  key->parttypbyval[j],
842  key->parttyplen[j]);
843  boundinfo->kind[i][j] = rbounds[i]->kind[j];
844  }
845 
846  /*
847  * There is no mapping for invalid indexes.
848  *
849  * Any lower bounds in the rbounds array have invalid indexes
850  * assigned, because the values between the previous bound (if there
851  * is one) and this (lower) bound are not part of the range of any
852  * existing partition.
853  */
854  if (rbounds[i]->lower)
855  boundinfo->indexes[i] = -1;
856  else
857  {
858  int orig_index = rbounds[i]->index;
859 
860  /* If the old index has no mapping, assign one */
861  if ((*mapping)[orig_index] == -1)
862  (*mapping)[orig_index] = next_index++;
863 
864  boundinfo->indexes[i] = (*mapping)[orig_index];
865  }
866  }
867 
868  pfree(rbounds);
869 
870  /* Set the canonical value for default_index, if any. */
871  if (default_index != -1)
872  {
873  Assert(default_index >= 0 && (*mapping)[default_index] == -1);
874  (*mapping)[default_index] = next_index++;
875  boundinfo->default_index = (*mapping)[default_index];
876  }
877 
878  /* The extra -1 element. */
879  Assert(i == ndatums);
880  boundinfo->indexes[i] = -1;
881 
882  /* All partitions must now have been assigned canonical indexes. */
883  Assert(next_index == nparts);
884  return boundinfo;
885 }
886 
887 /*
888  * Are two partition bound collections logically equal?
889  *
890  * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
891  * This is also useful when b1 and b2 are bound collections of two separate
892  * relations, respectively, because PartitionBoundInfo is a canonical
893  * representation of partition bounds.
894  */
895 bool
896 partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
898 {
899  int i;
900 
901  if (b1->strategy != b2->strategy)
902  return false;
903 
904  if (b1->ndatums != b2->ndatums)
905  return false;
906 
907  if (b1->nindexes != b2->nindexes)
908  return false;
909 
910  if (b1->null_index != b2->null_index)
911  return false;
912 
913  if (b1->default_index != b2->default_index)
914  return false;
915 
916  /* For all partition strategies, the indexes[] arrays have to match */
917  for (i = 0; i < b1->nindexes; i++)
918  {
919  if (b1->indexes[i] != b2->indexes[i])
920  return false;
921  }
922 
923  /* Finally, compare the datums[] arrays */
925  {
926  /*
927  * We arrange the partitions in the ascending order of their moduli
928  * and remainders. Also every modulus is factor of next larger
929  * modulus. Therefore we can safely store index of a given partition
930  * in indexes array at remainder of that partition. Also entries at
931  * (remainder + N * modulus) positions in indexes array are all same
932  * for (modulus, remainder) specification for any partition. Thus the
933  * datums arrays from the given bounds are the same, if and only if
934  * their indexes arrays are the same. So, it suffices to compare the
935  * indexes arrays.
936  *
937  * Nonetheless make sure that the bounds are indeed the same when the
938  * indexes match. Hash partition bound stores modulus and remainder
939  * at b1->datums[i][0] and b1->datums[i][1] position respectively.
940  */
941 #ifdef USE_ASSERT_CHECKING
942  for (i = 0; i < b1->ndatums; i++)
943  Assert((b1->datums[i][0] == b2->datums[i][0] &&
944  b1->datums[i][1] == b2->datums[i][1]));
945 #endif
946  }
947  else
948  {
949  for (i = 0; i < b1->ndatums; i++)
950  {
951  int j;
952 
953  for (j = 0; j < partnatts; j++)
954  {
955  /* For range partitions, the bounds might not be finite. */
956  if (b1->kind != NULL)
957  {
958  /* The different kinds of bound all differ from each other */
959  if (b1->kind[i][j] != b2->kind[i][j])
960  return false;
961 
962  /*
963  * Non-finite bounds are equal without further
964  * examination.
965  */
966  if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
967  continue;
968  }
969 
970  /*
971  * Compare the actual values. Note that it would be both
972  * incorrect and unsafe to invoke the comparison operator
973  * derived from the partitioning specification here. It would
974  * be incorrect because we want the relcache entry to be
975  * updated for ANY change to the partition bounds, not just
976  * those that the partitioning operator thinks are
977  * significant. It would be unsafe because we might reach
978  * this code in the context of an aborted transaction, and an
979  * arbitrary partitioning operator might not be safe in that
980  * context. datumIsEqual() should be simple enough to be
981  * safe.
982  */
983  if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
984  parttypbyval[j], parttyplen[j]))
985  return false;
986  }
987  }
988  }
989  return true;
990 }
991 
992 /*
993  * Return a copy of given PartitionBoundInfo structure. The data types of bounds
994  * are described by given partition key specification.
995  *
996  * Note: it's important that this function and its callees not do any catalog
997  * access, nor anything else that would result in allocating memory other than
998  * the returned data structure. Since this is called in a long-lived context,
999  * that would result in unwanted memory leaks.
1000  */
1003  PartitionKey key)
1004 {
1006  int i;
1007  int ndatums;
1008  int nindexes;
1009  int partnatts;
1010  bool hash_part;
1011  int natts;
1012  Datum *boundDatums;
1013 
1015 
1016  dest->strategy = src->strategy;
1017  ndatums = dest->ndatums = src->ndatums;
1018  nindexes = dest->nindexes = src->nindexes;
1019  partnatts = key->partnatts;
1020 
1021  /* List partitioned tables have only a single partition key. */
1022  Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
1023 
1024  dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
1025 
1026  if (src->kind != NULL)
1027  {
1028  PartitionRangeDatumKind *boundKinds;
1029 
1030  /* only RANGE partition should have a non-NULL kind */
1031  Assert(key->strategy == PARTITION_STRATEGY_RANGE);
1032 
1033  dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
1034  sizeof(PartitionRangeDatumKind *));
1035 
1036  /*
1037  * In the loop below, to save from allocating a series of small arrays
1038  * for storing the PartitionRangeDatumKind, we allocate a single chunk
1039  * here and use a smaller portion of it for each datum.
1040  */
1041  boundKinds = (PartitionRangeDatumKind *) palloc(ndatums * partnatts *
1042  sizeof(PartitionRangeDatumKind));
1043 
1044  for (i = 0; i < ndatums; i++)
1045  {
1046  dest->kind[i] = &boundKinds[i * partnatts];
1047  memcpy(dest->kind[i], src->kind[i],
1048  sizeof(PartitionRangeDatumKind) * partnatts);
1049  }
1050  }
1051  else
1052  dest->kind = NULL;
1053 
1054  /* copy interleaved partitions for LIST partitioned tables */
1055  dest->interleaved_parts = bms_copy(src->interleaved_parts);
1056 
1057  /*
1058  * For hash partitioning, datums array will have two elements - modulus
1059  * and remainder.
1060  */
1061  hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
1062  natts = hash_part ? 2 : partnatts;
1063  boundDatums = palloc(ndatums * natts * sizeof(Datum));
1064 
1065  for (i = 0; i < ndatums; i++)
1066  {
1067  int j;
1068 
1069  dest->datums[i] = &boundDatums[i * natts];
1070 
1071  for (j = 0; j < natts; j++)
1072  {
1073  bool byval;
1074  int typlen;
1075 
1076  if (hash_part)
1077  {
1078  typlen = sizeof(int32); /* Always int4 */
1079  byval = true; /* int4 is pass-by-value */
1080  }
1081  else
1082  {
1083  byval = key->parttypbyval[j];
1084  typlen = key->parttyplen[j];
1085  }
1086 
1087  if (dest->kind == NULL ||
1088  dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
1089  dest->datums[i][j] = datumCopy(src->datums[i][j],
1090  byval, typlen);
1091  }
1092  }
1093 
1094  dest->indexes = (int *) palloc(sizeof(int) * nindexes);
1095  memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
1096 
1097  dest->null_index = src->null_index;
1098  dest->default_index = src->default_index;
1099 
1100  return dest;
1101 }
1102 
1103 /*
1104  * partition_bounds_merge
1105  * Check to see whether every partition of 'outer_rel' matches/overlaps
1106  * one partition of 'inner_rel' at most, and vice versa; and if so, build
1107  * and return the partition bounds for a join relation between the rels,
1108  * generating two lists of the matching/overlapping partitions, which are
1109  * returned to *outer_parts and *inner_parts respectively.
1110  *
1111  * The lists contain the same number of partitions, and the partitions at the
1112  * same positions in the lists indicate join pairs used for partitioned join.
1113  * If a partition on one side matches/overlaps multiple partitions on the other
1114  * side, this function returns NULL, setting *outer_parts and *inner_parts to
1115  * NIL.
1116  */
1119  FmgrInfo *partsupfunc, Oid *partcollation,
1120  RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1121  JoinType jointype,
1122  List **outer_parts, List **inner_parts)
1123 {
1124  /*
1125  * Currently, this function is called only from try_partitionwise_join(),
1126  * so the join type should be INNER, LEFT, FULL, SEMI, or ANTI.
1127  */
1128  Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT ||
1129  jointype == JOIN_FULL || jointype == JOIN_SEMI ||
1130  jointype == JOIN_ANTI);
1131 
1132  /* The partitioning strategies should be the same. */
1133  Assert(outer_rel->boundinfo->strategy == inner_rel->boundinfo->strategy);
1134 
1135  *outer_parts = *inner_parts = NIL;
1136  switch (outer_rel->boundinfo->strategy)
1137  {
1139 
1140  /*
1141  * For hash partitioned tables, we currently support partitioned
1142  * join only when they have exactly the same partition bounds.
1143  *
1144  * XXX: it might be possible to relax the restriction to support
1145  * cases where hash partitioned tables have missing partitions
1146  * and/or different moduli, but it's not clear if it would be
1147  * useful to support the former case since it's unusual to have
1148  * missing partitions. On the other hand, it would be useful to
1149  * support the latter case, but in that case, there is a high
1150  * probability that a partition on one side will match multiple
1151  * partitions on the other side, which is the scenario the current
1152  * implementation of partitioned join can't handle.
1153  */
1154  return NULL;
1155 
1157  return merge_list_bounds(partsupfunc,
1158  partcollation,
1159  outer_rel,
1160  inner_rel,
1161  jointype,
1162  outer_parts,
1163  inner_parts);
1164 
1166  return merge_range_bounds(partnatts,
1167  partsupfunc,
1168  partcollation,
1169  outer_rel,
1170  inner_rel,
1171  jointype,
1172  outer_parts,
1173  inner_parts);
1174  }
1175 
1176  return NULL;
1177 }
1178 
1179 /*
1180  * merge_list_bounds
1181  * Create the partition bounds for a join relation between list
1182  * partitioned tables, if possible
1183  *
1184  * In this function we try to find sets of matching partitions from both sides
1185  * by comparing list values stored in their partition bounds. Since the list
1186  * values appear in the ascending order, an algorithm similar to merge join is
1187  * used for that. If a partition on one side doesn't have a matching
1188  * partition on the other side, the algorithm tries to match it with the
1189  * default partition on the other side if any; if not, the algorithm tries to
1190  * match it with a dummy partition on the other side if it's on the
1191  * non-nullable side of an outer join. Also, if both sides have the default
1192  * partitions, the algorithm tries to match them with each other. We give up
1193  * if the algorithm finds a partition matching multiple partitions on the
1194  * other side, which is the scenario the current implementation of partitioned
1195  * join can't handle.
1196  */
1197 static PartitionBoundInfo
1198 merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation,
1199  RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1200  JoinType jointype,
1201  List **outer_parts, List **inner_parts)
1202 {
1203  PartitionBoundInfo merged_bounds = NULL;
1204  PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1205  PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1206  bool outer_has_default = partition_bound_has_default(outer_bi);
1207  bool inner_has_default = partition_bound_has_default(inner_bi);
1208  int outer_default = outer_bi->default_index;
1209  int inner_default = inner_bi->default_index;
1210  bool outer_has_null = partition_bound_accepts_nulls(outer_bi);
1211  bool inner_has_null = partition_bound_accepts_nulls(inner_bi);
1212  PartitionMap outer_map;
1213  PartitionMap inner_map;
1214  int outer_pos;
1215  int inner_pos;
1216  int next_index = 0;
1217  int null_index = -1;
1218  int default_index = -1;
1219  List *merged_datums = NIL;
1220  List *merged_indexes = NIL;
1221 
1222  Assert(*outer_parts == NIL);
1223  Assert(*inner_parts == NIL);
1224  Assert(outer_bi->strategy == inner_bi->strategy &&
1225  outer_bi->strategy == PARTITION_STRATEGY_LIST);
1226  /* List partitioning doesn't require kinds. */
1227  Assert(!outer_bi->kind && !inner_bi->kind);
1228 
1229  init_partition_map(outer_rel, &outer_map);
1230  init_partition_map(inner_rel, &inner_map);
1231 
1232  /*
1233  * If the default partitions (if any) have been proven empty, deem them
1234  * non-existent.
1235  */
1236  if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1237  outer_has_default = false;
1238  if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1239  inner_has_default = false;
1240 
1241  /*
1242  * Merge partitions from both sides. In each iteration we compare a pair
1243  * of list values, one from each side, and decide whether the
1244  * corresponding partitions match or not. If the two values match
1245  * exactly, move to the next pair of list values, otherwise move to the
1246  * next list value on the side with a smaller list value.
1247  */
1248  outer_pos = inner_pos = 0;
1249  while (outer_pos < outer_bi->ndatums || inner_pos < inner_bi->ndatums)
1250  {
1251  int outer_index = -1;
1252  int inner_index = -1;
1253  Datum *outer_datums;
1254  Datum *inner_datums;
1255  int cmpval;
1256  Datum *merged_datum = NULL;
1257  int merged_index = -1;
1258 
1259  if (outer_pos < outer_bi->ndatums)
1260  {
1261  /*
1262  * If the partition on the outer side has been proven empty,
1263  * ignore it and move to the next datum on the outer side.
1264  */
1265  outer_index = outer_bi->indexes[outer_pos];
1266  if (is_dummy_partition(outer_rel, outer_index))
1267  {
1268  outer_pos++;
1269  continue;
1270  }
1271  }
1272  if (inner_pos < inner_bi->ndatums)
1273  {
1274  /*
1275  * If the partition on the inner side has been proven empty,
1276  * ignore it and move to the next datum on the inner side.
1277  */
1278  inner_index = inner_bi->indexes[inner_pos];
1279  if (is_dummy_partition(inner_rel, inner_index))
1280  {
1281  inner_pos++;
1282  continue;
1283  }
1284  }
1285 
1286  /* Get the list values. */
1287  outer_datums = outer_pos < outer_bi->ndatums ?
1288  outer_bi->datums[outer_pos] : NULL;
1289  inner_datums = inner_pos < inner_bi->ndatums ?
1290  inner_bi->datums[inner_pos] : NULL;
1291 
1292  /*
1293  * We run this loop till both sides finish. This allows us to avoid
1294  * duplicating code to handle the remaining values on the side which
1295  * finishes later. For that we set the comparison parameter cmpval in
1296  * such a way that it appears as if the side which finishes earlier
1297  * has an extra value higher than any other value on the unfinished
1298  * side. That way we advance the values on the unfinished side till
1299  * all of its values are exhausted.
1300  */
1301  if (outer_pos >= outer_bi->ndatums)
1302  cmpval = 1;
1303  else if (inner_pos >= inner_bi->ndatums)
1304  cmpval = -1;
1305  else
1306  {
1307  Assert(outer_datums != NULL && inner_datums != NULL);
1308  cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
1309  partcollation[0],
1310  outer_datums[0],
1311  inner_datums[0]));
1312  }
1313 
1314  if (cmpval == 0)
1315  {
1316  /* Two list values match exactly. */
1317  Assert(outer_pos < outer_bi->ndatums);
1318  Assert(inner_pos < inner_bi->ndatums);
1319  Assert(outer_index >= 0);
1320  Assert(inner_index >= 0);
1321 
1322  /*
1323  * Try merging both partitions. If successful, add the list value
1324  * and index of the merged partition below.
1325  */
1326  merged_index = merge_matching_partitions(&outer_map, &inner_map,
1327  outer_index, inner_index,
1328  &next_index);
1329  if (merged_index == -1)
1330  goto cleanup;
1331 
1332  merged_datum = outer_datums;
1333 
1334  /* Move to the next pair of list values. */
1335  outer_pos++;
1336  inner_pos++;
1337  }
1338  else if (cmpval < 0)
1339  {
1340  /* A list value missing from the inner side. */
1341  Assert(outer_pos < outer_bi->ndatums);
1342 
1343  /*
1344  * If the inner side has the default partition, or this is an
1345  * outer join, try to assign a merged partition to the outer
1346  * partition (see process_outer_partition()). Otherwise, the
1347  * outer partition will not contribute to the result.
1348  */
1349  if (inner_has_default || IS_OUTER_JOIN(jointype))
1350  {
1351  /* Get the outer partition. */
1352  outer_index = outer_bi->indexes[outer_pos];
1353  Assert(outer_index >= 0);
1354  merged_index = process_outer_partition(&outer_map,
1355  &inner_map,
1356  outer_has_default,
1357  inner_has_default,
1358  outer_index,
1359  inner_default,
1360  jointype,
1361  &next_index,
1362  &default_index);
1363  if (merged_index == -1)
1364  goto cleanup;
1365  merged_datum = outer_datums;
1366  }
1367 
1368  /* Move to the next list value on the outer side. */
1369  outer_pos++;
1370  }
1371  else
1372  {
1373  /* A list value missing from the outer side. */
1374  Assert(cmpval > 0);
1375  Assert(inner_pos < inner_bi->ndatums);
1376 
1377  /*
1378  * If the outer side has the default partition, or this is a FULL
1379  * join, try to assign a merged partition to the inner partition
1380  * (see process_inner_partition()). Otherwise, the inner
1381  * partition will not contribute to the result.
1382  */
1383  if (outer_has_default || jointype == JOIN_FULL)
1384  {
1385  /* Get the inner partition. */
1386  inner_index = inner_bi->indexes[inner_pos];
1387  Assert(inner_index >= 0);
1388  merged_index = process_inner_partition(&outer_map,
1389  &inner_map,
1390  outer_has_default,
1391  inner_has_default,
1392  inner_index,
1393  outer_default,
1394  jointype,
1395  &next_index,
1396  &default_index);
1397  if (merged_index == -1)
1398  goto cleanup;
1399  merged_datum = inner_datums;
1400  }
1401 
1402  /* Move to the next list value on the inner side. */
1403  inner_pos++;
1404  }
1405 
1406  /*
1407  * If we assigned a merged partition, add the list value and index of
1408  * the merged partition if appropriate.
1409  */
1410  if (merged_index >= 0 && merged_index != default_index)
1411  {
1412  merged_datums = lappend(merged_datums, merged_datum);
1413  merged_indexes = lappend_int(merged_indexes, merged_index);
1414  }
1415  }
1416 
1417  /*
1418  * If the NULL partitions (if any) have been proven empty, deem them
1419  * non-existent.
1420  */
1421  if (outer_has_null &&
1422  is_dummy_partition(outer_rel, outer_bi->null_index))
1423  outer_has_null = false;
1424  if (inner_has_null &&
1425  is_dummy_partition(inner_rel, inner_bi->null_index))
1426  inner_has_null = false;
1427 
1428  /* Merge the NULL partitions if any. */
1429  if (outer_has_null || inner_has_null)
1430  merge_null_partitions(&outer_map, &inner_map,
1431  outer_has_null, inner_has_null,
1432  outer_bi->null_index, inner_bi->null_index,
1433  jointype, &next_index, &null_index);
1434  else
1435  Assert(null_index == -1);
1436 
1437  /* Merge the default partitions if any. */
1438  if (outer_has_default || inner_has_default)
1439  merge_default_partitions(&outer_map, &inner_map,
1440  outer_has_default, inner_has_default,
1441  outer_default, inner_default,
1442  jointype, &next_index, &default_index);
1443  else
1444  Assert(default_index == -1);
1445 
1446  /* If we have merged partitions, create the partition bounds. */
1447  if (next_index > 0)
1448  {
1449  /* Fix the merged_indexes list if necessary. */
1450  if (outer_map.did_remapping || inner_map.did_remapping)
1451  {
1452  Assert(jointype == JOIN_FULL);
1453  fix_merged_indexes(&outer_map, &inner_map,
1454  next_index, merged_indexes);
1455  }
1456 
1457  /* Use maps to match partitions from inputs. */
1458  generate_matching_part_pairs(outer_rel, inner_rel,
1459  &outer_map, &inner_map,
1460  next_index,
1461  outer_parts, inner_parts);
1462  Assert(*outer_parts != NIL);
1463  Assert(*inner_parts != NIL);
1464  Assert(list_length(*outer_parts) == list_length(*inner_parts));
1465  Assert(list_length(*outer_parts) <= next_index);
1466 
1467  /* Make a PartitionBoundInfo struct to return. */
1468  merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1469  merged_datums,
1470  NIL,
1471  merged_indexes,
1472  null_index,
1473  default_index);
1474  Assert(merged_bounds);
1475  }
1476 
1477 cleanup:
1478  /* Free local memory before returning. */
1479  list_free(merged_datums);
1480  list_free(merged_indexes);
1481  free_partition_map(&outer_map);
1482  free_partition_map(&inner_map);
1483 
1484  return merged_bounds;
1485 }
1486 
1487 /*
1488  * merge_range_bounds
1489  * Create the partition bounds for a join relation between range
1490  * partitioned tables, if possible
1491  *
1492  * In this function we try to find sets of overlapping partitions from both
1493  * sides by comparing ranges stored in their partition bounds. Since the
1494  * ranges appear in the ascending order, an algorithm similar to merge join is
1495  * used for that. If a partition on one side doesn't have an overlapping
1496  * partition on the other side, the algorithm tries to match it with the
1497  * default partition on the other side if any; if not, the algorithm tries to
1498  * match it with a dummy partition on the other side if it's on the
1499  * non-nullable side of an outer join. Also, if both sides have the default
1500  * partitions, the algorithm tries to match them with each other. We give up
1501  * if the algorithm finds a partition overlapping multiple partitions on the
1502  * other side, which is the scenario the current implementation of partitioned
1503  * join can't handle.
1504  */
1505 static PartitionBoundInfo
1506 merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
1507  Oid *partcollations,
1508  RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1509  JoinType jointype,
1510  List **outer_parts, List **inner_parts)
1511 {
1512  PartitionBoundInfo merged_bounds = NULL;
1513  PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1514  PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1515  bool outer_has_default = partition_bound_has_default(outer_bi);
1516  bool inner_has_default = partition_bound_has_default(inner_bi);
1517  int outer_default = outer_bi->default_index;
1518  int inner_default = inner_bi->default_index;
1519  PartitionMap outer_map;
1520  PartitionMap inner_map;
1521  int outer_index;
1522  int inner_index;
1523  int outer_lb_pos;
1524  int inner_lb_pos;
1525  PartitionRangeBound outer_lb;
1526  PartitionRangeBound outer_ub;
1527  PartitionRangeBound inner_lb;
1528  PartitionRangeBound inner_ub;
1529  int next_index = 0;
1530  int default_index = -1;
1531  List *merged_datums = NIL;
1532  List *merged_kinds = NIL;
1533  List *merged_indexes = NIL;
1534 
1535  Assert(*outer_parts == NIL);
1536  Assert(*inner_parts == NIL);
1537  Assert(outer_bi->strategy == inner_bi->strategy &&
1538  outer_bi->strategy == PARTITION_STRATEGY_RANGE);
1539 
1540  init_partition_map(outer_rel, &outer_map);
1541  init_partition_map(inner_rel, &inner_map);
1542 
1543  /*
1544  * If the default partitions (if any) have been proven empty, deem them
1545  * non-existent.
1546  */
1547  if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1548  outer_has_default = false;
1549  if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1550  inner_has_default = false;
1551 
1552  /*
1553  * Merge partitions from both sides. In each iteration we compare a pair
1554  * of ranges, one from each side, and decide whether the corresponding
1555  * partitions match or not. If the two ranges overlap, move to the next
1556  * pair of ranges, otherwise move to the next range on the side with a
1557  * lower range. outer_lb_pos/inner_lb_pos keep track of the positions of
1558  * lower bounds in the datums arrays in the outer/inner
1559  * PartitionBoundInfos respectively.
1560  */
1561  outer_lb_pos = inner_lb_pos = 0;
1562  outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1563  &outer_lb, &outer_ub);
1564  inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1565  &inner_lb, &inner_ub);
1566  while (outer_index >= 0 || inner_index >= 0)
1567  {
1568  bool overlap;
1569  int ub_cmpval;
1570  int lb_cmpval;
1571  PartitionRangeBound merged_lb = {-1, NULL, NULL, true};
1572  PartitionRangeBound merged_ub = {-1, NULL, NULL, false};
1573  int merged_index = -1;
1574 
1575  /*
1576  * We run this loop till both sides finish. This allows us to avoid
1577  * duplicating code to handle the remaining ranges on the side which
1578  * finishes later. For that we set the comparison parameter cmpval in
1579  * such a way that it appears as if the side which finishes earlier
1580  * has an extra range higher than any other range on the unfinished
1581  * side. That way we advance the ranges on the unfinished side till
1582  * all of its ranges are exhausted.
1583  */
1584  if (outer_index == -1)
1585  {
1586  overlap = false;
1587  lb_cmpval = 1;
1588  ub_cmpval = 1;
1589  }
1590  else if (inner_index == -1)
1591  {
1592  overlap = false;
1593  lb_cmpval = -1;
1594  ub_cmpval = -1;
1595  }
1596  else
1597  overlap = compare_range_partitions(partnatts, partsupfuncs,
1598  partcollations,
1599  &outer_lb, &outer_ub,
1600  &inner_lb, &inner_ub,
1601  &lb_cmpval, &ub_cmpval);
1602 
1603  if (overlap)
1604  {
1605  /* Two ranges overlap; form a join pair. */
1606 
1607  PartitionRangeBound save_outer_ub;
1608  PartitionRangeBound save_inner_ub;
1609 
1610  /* Both partitions should not have been merged yet. */
1611  Assert(outer_index >= 0);
1612  Assert(outer_map.merged_indexes[outer_index] == -1 &&
1613  outer_map.merged[outer_index] == false);
1614  Assert(inner_index >= 0);
1615  Assert(inner_map.merged_indexes[inner_index] == -1 &&
1616  inner_map.merged[inner_index] == false);
1617 
1618  /*
1619  * Get the index of the merged partition. Both partitions aren't
1620  * merged yet, so the partitions should be merged successfully.
1621  */
1622  merged_index = merge_matching_partitions(&outer_map, &inner_map,
1623  outer_index, inner_index,
1624  &next_index);
1625  Assert(merged_index >= 0);
1626 
1627  /* Get the range bounds of the merged partition. */
1628  get_merged_range_bounds(partnatts, partsupfuncs,
1629  partcollations, jointype,
1630  &outer_lb, &outer_ub,
1631  &inner_lb, &inner_ub,
1632  lb_cmpval, ub_cmpval,
1633  &merged_lb, &merged_ub);
1634 
1635  /* Save the upper bounds of both partitions for use below. */
1636  save_outer_ub = outer_ub;
1637  save_inner_ub = inner_ub;
1638 
1639  /* Move to the next pair of ranges. */
1640  outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1641  &outer_lb, &outer_ub);
1642  inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1643  &inner_lb, &inner_ub);
1644 
1645  /*
1646  * If the range of a partition on one side overlaps the range of
1647  * the next partition on the other side, that will cause the
1648  * partition on one side to match at least two partitions on the
1649  * other side, which is the case that we currently don't support
1650  * partitioned join for; give up.
1651  */
1652  if (ub_cmpval > 0 && inner_index >= 0 &&
1653  compare_range_bounds(partnatts, partsupfuncs, partcollations,
1654  &save_outer_ub, &inner_lb) > 0)
1655  goto cleanup;
1656  if (ub_cmpval < 0 && outer_index >= 0 &&
1657  compare_range_bounds(partnatts, partsupfuncs, partcollations,
1658  &outer_lb, &save_inner_ub) < 0)
1659  goto cleanup;
1660 
1661  /*
1662  * A row from a non-overlapping portion (if any) of a partition on
1663  * one side might find its join partner in the default partition
1664  * (if any) on the other side, causing the same situation as
1665  * above; give up in that case.
1666  */
1667  if ((outer_has_default && (lb_cmpval > 0 || ub_cmpval < 0)) ||
1668  (inner_has_default && (lb_cmpval < 0 || ub_cmpval > 0)))
1669  goto cleanup;
1670  }
1671  else if (ub_cmpval < 0)
1672  {
1673  /* A non-overlapping outer range. */
1674 
1675  /* The outer partition should not have been merged yet. */
1676  Assert(outer_index >= 0);
1677  Assert(outer_map.merged_indexes[outer_index] == -1 &&
1678  outer_map.merged[outer_index] == false);
1679 
1680  /*
1681  * If the inner side has the default partition, or this is an
1682  * outer join, try to assign a merged partition to the outer
1683  * partition (see process_outer_partition()). Otherwise, the
1684  * outer partition will not contribute to the result.
1685  */
1686  if (inner_has_default || IS_OUTER_JOIN(jointype))
1687  {
1688  merged_index = process_outer_partition(&outer_map,
1689  &inner_map,
1690  outer_has_default,
1691  inner_has_default,
1692  outer_index,
1693  inner_default,
1694  jointype,
1695  &next_index,
1696  &default_index);
1697  if (merged_index == -1)
1698  goto cleanup;
1699  merged_lb = outer_lb;
1700  merged_ub = outer_ub;
1701  }
1702 
1703  /* Move to the next range on the outer side. */
1704  outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1705  &outer_lb, &outer_ub);
1706  }
1707  else
1708  {
1709  /* A non-overlapping inner range. */
1710  Assert(ub_cmpval > 0);
1711 
1712  /* The inner partition should not have been merged yet. */
1713  Assert(inner_index >= 0);
1714  Assert(inner_map.merged_indexes[inner_index] == -1 &&
1715  inner_map.merged[inner_index] == false);
1716 
1717  /*
1718  * If the outer side has the default partition, or this is a FULL
1719  * join, try to assign a merged partition to the inner partition
1720  * (see process_inner_partition()). Otherwise, the inner
1721  * partition will not contribute to the result.
1722  */
1723  if (outer_has_default || jointype == JOIN_FULL)
1724  {
1725  merged_index = process_inner_partition(&outer_map,
1726  &inner_map,
1727  outer_has_default,
1728  inner_has_default,
1729  inner_index,
1730  outer_default,
1731  jointype,
1732  &next_index,
1733  &default_index);
1734  if (merged_index == -1)
1735  goto cleanup;
1736  merged_lb = inner_lb;
1737  merged_ub = inner_ub;
1738  }
1739 
1740  /* Move to the next range on the inner side. */
1741  inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1742  &inner_lb, &inner_ub);
1743  }
1744 
1745  /*
1746  * If we assigned a merged partition, add the range bounds and index
1747  * of the merged partition if appropriate.
1748  */
1749  if (merged_index >= 0 && merged_index != default_index)
1750  add_merged_range_bounds(partnatts, partsupfuncs, partcollations,
1751  &merged_lb, &merged_ub, merged_index,
1752  &merged_datums, &merged_kinds,
1753  &merged_indexes);
1754  }
1755 
1756  /* Merge the default partitions if any. */
1757  if (outer_has_default || inner_has_default)
1758  merge_default_partitions(&outer_map, &inner_map,
1759  outer_has_default, inner_has_default,
1760  outer_default, inner_default,
1761  jointype, &next_index, &default_index);
1762  else
1763  Assert(default_index == -1);
1764 
1765  /* If we have merged partitions, create the partition bounds. */
1766  if (next_index > 0)
1767  {
1768  /*
1769  * Unlike the case of list partitioning, we wouldn't have re-merged
1770  * partitions, so did_remapping should be left alone.
1771  */
1772  Assert(!outer_map.did_remapping);
1773  Assert(!inner_map.did_remapping);
1774 
1775  /* Use maps to match partitions from inputs. */
1776  generate_matching_part_pairs(outer_rel, inner_rel,
1777  &outer_map, &inner_map,
1778  next_index,
1779  outer_parts, inner_parts);
1780  Assert(*outer_parts != NIL);
1781  Assert(*inner_parts != NIL);
1782  Assert(list_length(*outer_parts) == list_length(*inner_parts));
1783  Assert(list_length(*outer_parts) == next_index);
1784 
1785  /* Make a PartitionBoundInfo struct to return. */
1786  merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1787  merged_datums,
1788  merged_kinds,
1789  merged_indexes,
1790  -1,
1791  default_index);
1792  Assert(merged_bounds);
1793  }
1794 
1795 cleanup:
1796  /* Free local memory before returning. */
1797  list_free(merged_datums);
1798  list_free(merged_kinds);
1799  list_free(merged_indexes);
1800  free_partition_map(&outer_map);
1801  free_partition_map(&inner_map);
1802 
1803  return merged_bounds;
1804 }
1805 
1806 /*
1807  * init_partition_map
1808  * Initialize a PartitionMap struct for given relation
1809  */
1810 static void
1812 {
1813  int nparts = rel->nparts;
1814  int i;
1815 
1816  map->nparts = nparts;
1817  map->merged_indexes = (int *) palloc(sizeof(int) * nparts);
1818  map->merged = (bool *) palloc(sizeof(bool) * nparts);
1819  map->did_remapping = false;
1820  map->old_indexes = (int *) palloc(sizeof(int) * nparts);
1821  for (i = 0; i < nparts; i++)
1822  {
1823  map->merged_indexes[i] = map->old_indexes[i] = -1;
1824  map->merged[i] = false;
1825  }
1826 }
1827 
1828 /*
1829  * free_partition_map
1830  */
1831 static void
1833 {
1834  pfree(map->merged_indexes);
1835  pfree(map->merged);
1836  pfree(map->old_indexes);
1837 }
1838 
1839 /*
1840  * is_dummy_partition --- has partition been proven empty?
1841  */
1842 static bool
1843 is_dummy_partition(RelOptInfo *rel, int part_index)
1844 {
1845  RelOptInfo *part_rel;
1846 
1847  Assert(part_index >= 0);
1848  part_rel = rel->part_rels[part_index];
1849  if (part_rel == NULL || IS_DUMMY_REL(part_rel))
1850  return true;
1851  return false;
1852 }
1853 
1854 /*
1855  * merge_matching_partitions
1856  * Try to merge given outer/inner partitions, and return the index of a
1857  * merged partition produced from them if successful, -1 otherwise
1858  *
1859  * If the merged partition is newly created, *next_index is incremented.
1860  */
1861 static int
1863  int outer_index, int inner_index, int *next_index)
1864 {
1865  int outer_merged_index;
1866  int inner_merged_index;
1867  bool outer_merged;
1868  bool inner_merged;
1869 
1870  Assert(outer_index >= 0 && outer_index < outer_map->nparts);
1871  outer_merged_index = outer_map->merged_indexes[outer_index];
1872  outer_merged = outer_map->merged[outer_index];
1873  Assert(inner_index >= 0 && inner_index < inner_map->nparts);
1874  inner_merged_index = inner_map->merged_indexes[inner_index];
1875  inner_merged = inner_map->merged[inner_index];
1876 
1877  /*
1878  * Handle cases where we have already assigned a merged partition to each
1879  * of the given partitions.
1880  */
1881  if (outer_merged_index >= 0 && inner_merged_index >= 0)
1882  {
1883  /*
1884  * If the merged partitions are the same, no need to do anything;
1885  * return the index of the merged partitions. Otherwise, if each of
1886  * the given partitions has been merged with a dummy partition on the
1887  * other side, re-map them to either of the two merged partitions.
1888  * Otherwise, they can't be merged, so return -1.
1889  */
1890  if (outer_merged_index == inner_merged_index)
1891  {
1892  Assert(outer_merged);
1893  Assert(inner_merged);
1894  return outer_merged_index;
1895  }
1896  if (!outer_merged && !inner_merged)
1897  {
1898  /*
1899  * This can only happen for a list-partitioning case. We re-map
1900  * them to the merged partition with the smaller of the two merged
1901  * indexes to preserve the property that the canonical order of
1902  * list partitions is determined by the indexes assigned to the
1903  * smallest list value of each partition.
1904  */
1905  if (outer_merged_index < inner_merged_index)
1906  {
1907  outer_map->merged[outer_index] = true;
1908  inner_map->merged_indexes[inner_index] = outer_merged_index;
1909  inner_map->merged[inner_index] = true;
1910  inner_map->did_remapping = true;
1911  inner_map->old_indexes[inner_index] = inner_merged_index;
1912  return outer_merged_index;
1913  }
1914  else
1915  {
1916  inner_map->merged[inner_index] = true;
1917  outer_map->merged_indexes[outer_index] = inner_merged_index;
1918  outer_map->merged[outer_index] = true;
1919  outer_map->did_remapping = true;
1920  outer_map->old_indexes[outer_index] = outer_merged_index;
1921  return inner_merged_index;
1922  }
1923  }
1924  return -1;
1925  }
1926 
1927  /* At least one of the given partitions should not have yet been merged. */
1928  Assert(outer_merged_index == -1 || inner_merged_index == -1);
1929 
1930  /*
1931  * If neither of them has been merged, merge them. Otherwise, if one has
1932  * been merged with a dummy partition on the other side (and the other
1933  * hasn't yet been merged with anything), re-merge them. Otherwise, they
1934  * can't be merged, so return -1.
1935  */
1936  if (outer_merged_index == -1 && inner_merged_index == -1)
1937  {
1938  int merged_index = *next_index;
1939 
1940  Assert(!outer_merged);
1941  Assert(!inner_merged);
1942  outer_map->merged_indexes[outer_index] = merged_index;
1943  outer_map->merged[outer_index] = true;
1944  inner_map->merged_indexes[inner_index] = merged_index;
1945  inner_map->merged[inner_index] = true;
1946  *next_index = *next_index + 1;
1947  return merged_index;
1948  }
1949  if (outer_merged_index >= 0 && !outer_map->merged[outer_index])
1950  {
1951  Assert(inner_merged_index == -1);
1952  Assert(!inner_merged);
1953  inner_map->merged_indexes[inner_index] = outer_merged_index;
1954  inner_map->merged[inner_index] = true;
1955  outer_map->merged[outer_index] = true;
1956  return outer_merged_index;
1957  }
1958  if (inner_merged_index >= 0 && !inner_map->merged[inner_index])
1959  {
1960  Assert(outer_merged_index == -1);
1961  Assert(!outer_merged);
1962  outer_map->merged_indexes[outer_index] = inner_merged_index;
1963  outer_map->merged[outer_index] = true;
1964  inner_map->merged[inner_index] = true;
1965  return inner_merged_index;
1966  }
1967  return -1;
1968 }
1969 
1970 /*
1971  * process_outer_partition
1972  * Try to assign given outer partition a merged partition, and return the
1973  * index of the merged partition if successful, -1 otherwise
1974  *
1975  * If the partition is newly created, *next_index is incremented. Also, if it
1976  * is the default partition of the join relation, *default_index is set to the
1977  * index if not already done.
1978  */
1979 static int
1981  PartitionMap *inner_map,
1982  bool outer_has_default,
1983  bool inner_has_default,
1984  int outer_index,
1985  int inner_default,
1986  JoinType jointype,
1987  int *next_index,
1988  int *default_index)
1989 {
1990  int merged_index = -1;
1991 
1992  Assert(outer_index >= 0);
1993 
1994  /*
1995  * If the inner side has the default partition, a row from the outer
1996  * partition might find its join partner in the default partition; try
1997  * merging the outer partition with the default partition. Otherwise,
1998  * this should be an outer join, in which case the outer partition has to
1999  * be scanned all the way anyway; merge the outer partition with a dummy
2000  * partition on the other side.
2001  */
2002  if (inner_has_default)
2003  {
2004  Assert(inner_default >= 0);
2005 
2006  /*
2007  * If the outer side has the default partition as well, the default
2008  * partition on the inner side will have two matching partitions on
2009  * the other side: the outer partition and the default partition on
2010  * the outer side. Partitionwise join doesn't handle this scenario
2011  * yet.
2012  */
2013  if (outer_has_default)
2014  return -1;
2015 
2016  merged_index = merge_matching_partitions(outer_map, inner_map,
2017  outer_index, inner_default,
2018  next_index);
2019  if (merged_index == -1)
2020  return -1;
2021 
2022  /*
2023  * If this is a FULL join, the default partition on the inner side has
2024  * to be scanned all the way anyway, so the resulting partition will
2025  * contain all key values from the default partition, which any other
2026  * partition of the join relation will not contain. Thus the
2027  * resulting partition will act as the default partition of the join
2028  * relation; record the index in *default_index if not already done.
2029  */
2030  if (jointype == JOIN_FULL)
2031  {
2032  if (*default_index == -1)
2033  *default_index = merged_index;
2034  else
2035  Assert(*default_index == merged_index);
2036  }
2037  }
2038  else
2039  {
2040  Assert(IS_OUTER_JOIN(jointype));
2041  Assert(jointype != JOIN_RIGHT);
2042 
2043  /* If we have already assigned a partition, no need to do anything. */
2044  merged_index = outer_map->merged_indexes[outer_index];
2045  if (merged_index == -1)
2046  merged_index = merge_partition_with_dummy(outer_map, outer_index,
2047  next_index);
2048  }
2049  return merged_index;
2050 }
2051 
2052 /*
2053  * process_inner_partition
2054  * Try to assign given inner partition a merged partition, and return the
2055  * index of the merged partition if successful, -1 otherwise
2056  *
2057  * If the partition is newly created, *next_index is incremented. Also, if it
2058  * is the default partition of the join relation, *default_index is set to the
2059  * index if not already done.
2060  */
2061 static int
2063  PartitionMap *inner_map,
2064  bool outer_has_default,
2065  bool inner_has_default,
2066  int inner_index,
2067  int outer_default,
2068  JoinType jointype,
2069  int *next_index,
2070  int *default_index)
2071 {
2072  int merged_index = -1;
2073 
2074  Assert(inner_index >= 0);
2075 
2076  /*
2077  * If the outer side has the default partition, a row from the inner
2078  * partition might find its join partner in the default partition; try
2079  * merging the inner partition with the default partition. Otherwise,
2080  * this should be a FULL join, in which case the inner partition has to be
2081  * scanned all the way anyway; merge the inner partition with a dummy
2082  * partition on the other side.
2083  */
2084  if (outer_has_default)
2085  {
2086  Assert(outer_default >= 0);
2087 
2088  /*
2089  * If the inner side has the default partition as well, the default
2090  * partition on the outer side will have two matching partitions on
2091  * the other side: the inner partition and the default partition on
2092  * the inner side. Partitionwise join doesn't handle this scenario
2093  * yet.
2094  */
2095  if (inner_has_default)
2096  return -1;
2097 
2098  merged_index = merge_matching_partitions(outer_map, inner_map,
2099  outer_default, inner_index,
2100  next_index);
2101  if (merged_index == -1)
2102  return -1;
2103 
2104  /*
2105  * If this is an outer join, the default partition on the outer side
2106  * has to be scanned all the way anyway, so the resulting partition
2107  * will contain all key values from the default partition, which any
2108  * other partition of the join relation will not contain. Thus the
2109  * resulting partition will act as the default partition of the join
2110  * relation; record the index in *default_index if not already done.
2111  */
2112  if (IS_OUTER_JOIN(jointype))
2113  {
2114  Assert(jointype != JOIN_RIGHT);
2115  if (*default_index == -1)
2116  *default_index = merged_index;
2117  else
2118  Assert(*default_index == merged_index);
2119  }
2120  }
2121  else
2122  {
2123  Assert(jointype == JOIN_FULL);
2124 
2125  /* If we have already assigned a partition, no need to do anything. */
2126  merged_index = inner_map->merged_indexes[inner_index];
2127  if (merged_index == -1)
2128  merged_index = merge_partition_with_dummy(inner_map, inner_index,
2129  next_index);
2130  }
2131  return merged_index;
2132 }
2133 
2134 /*
2135  * merge_null_partitions
2136  * Merge the NULL partitions from a join's outer and inner sides.
2137  *
2138  * If the merged partition produced from them is the NULL partition of the join
2139  * relation, *null_index is set to the index of the merged partition.
2140  *
2141  * Note: We assume here that the join clause for a partitioned join is strict
2142  * because have_partkey_equi_join() requires that the corresponding operator
2143  * be mergejoinable, and we currently assume that mergejoinable operators are
2144  * strict (see MJEvalOuterValues()/MJEvalInnerValues()).
2145  */
2146 static void
2148  PartitionMap *inner_map,
2149  bool outer_has_null,
2150  bool inner_has_null,
2151  int outer_null,
2152  int inner_null,
2153  JoinType jointype,
2154  int *next_index,
2155  int *null_index)
2156 {
2157  bool consider_outer_null = false;
2158  bool consider_inner_null = false;
2159 
2160  Assert(outer_has_null || inner_has_null);
2161  Assert(*null_index == -1);
2162 
2163  /*
2164  * Check whether the NULL partitions have already been merged and if so,
2165  * set the consider_outer_null/consider_inner_null flags.
2166  */
2167  if (outer_has_null)
2168  {
2169  Assert(outer_null >= 0 && outer_null < outer_map->nparts);
2170  if (outer_map->merged_indexes[outer_null] == -1)
2171  consider_outer_null = true;
2172  }
2173  if (inner_has_null)
2174  {
2175  Assert(inner_null >= 0 && inner_null < inner_map->nparts);
2176  if (inner_map->merged_indexes[inner_null] == -1)
2177  consider_inner_null = true;
2178  }
2179 
2180  /* If both flags are set false, we don't need to do anything. */
2181  if (!consider_outer_null && !consider_inner_null)
2182  return;
2183 
2184  if (consider_outer_null && !consider_inner_null)
2185  {
2186  Assert(outer_has_null);
2187 
2188  /*
2189  * If this is an outer join, the NULL partition on the outer side has
2190  * to be scanned all the way anyway; merge the NULL partition with a
2191  * dummy partition on the other side. In that case
2192  * consider_outer_null means that the NULL partition only contains
2193  * NULL values as the key values, so the merged partition will do so;
2194  * treat it as the NULL partition of the join relation.
2195  */
2196  if (IS_OUTER_JOIN(jointype))
2197  {
2198  Assert(jointype != JOIN_RIGHT);
2199  *null_index = merge_partition_with_dummy(outer_map, outer_null,
2200  next_index);
2201  }
2202  }
2203  else if (!consider_outer_null && consider_inner_null)
2204  {
2205  Assert(inner_has_null);
2206 
2207  /*
2208  * If this is a FULL join, the NULL partition on the inner side has to
2209  * be scanned all the way anyway; merge the NULL partition with a
2210  * dummy partition on the other side. In that case
2211  * consider_inner_null means that the NULL partition only contains
2212  * NULL values as the key values, so the merged partition will do so;
2213  * treat it as the NULL partition of the join relation.
2214  */
2215  if (jointype == JOIN_FULL)
2216  *null_index = merge_partition_with_dummy(inner_map, inner_null,
2217  next_index);
2218  }
2219  else
2220  {
2221  Assert(consider_outer_null && consider_inner_null);
2222  Assert(outer_has_null);
2223  Assert(inner_has_null);
2224 
2225  /*
2226  * If this is an outer join, the NULL partition on the outer side (and
2227  * that on the inner side if this is a FULL join) have to be scanned
2228  * all the way anyway, so merge them. Note that each of the NULL
2229  * partitions isn't merged yet, so they should be merged successfully.
2230  * Like the above, each of the NULL partitions only contains NULL
2231  * values as the key values, so the merged partition will do so; treat
2232  * it as the NULL partition of the join relation.
2233  *
2234  * Note: if this an INNER/SEMI join, the join clause will never be
2235  * satisfied by two NULL values (see comments above), so both the NULL
2236  * partitions can be eliminated.
2237  */
2238  if (IS_OUTER_JOIN(jointype))
2239  {
2240  Assert(jointype != JOIN_RIGHT);
2241  *null_index = merge_matching_partitions(outer_map, inner_map,
2242  outer_null, inner_null,
2243  next_index);
2244  Assert(*null_index >= 0);
2245  }
2246  }
2247 }
2248 
2249 /*
2250  * merge_default_partitions
2251  * Merge the default partitions from a join's outer and inner sides.
2252  *
2253  * If the merged partition produced from them is the default partition of the
2254  * join relation, *default_index is set to the index of the merged partition.
2255  */
2256 static void
2258  PartitionMap *inner_map,
2259  bool outer_has_default,
2260  bool inner_has_default,
2261  int outer_default,
2262  int inner_default,
2263  JoinType jointype,
2264  int *next_index,
2265  int *default_index)
2266 {
2267  int outer_merged_index = -1;
2268  int inner_merged_index = -1;
2269 
2270  Assert(outer_has_default || inner_has_default);
2271 
2272  /* Get the merged partition indexes for the default partitions. */
2273  if (outer_has_default)
2274  {
2275  Assert(outer_default >= 0 && outer_default < outer_map->nparts);
2276  outer_merged_index = outer_map->merged_indexes[outer_default];
2277  }
2278  if (inner_has_default)
2279  {
2280  Assert(inner_default >= 0 && inner_default < inner_map->nparts);
2281  inner_merged_index = inner_map->merged_indexes[inner_default];
2282  }
2283 
2284  if (outer_has_default && !inner_has_default)
2285  {
2286  /*
2287  * If this is an outer join, the default partition on the outer side
2288  * has to be scanned all the way anyway; if we have not yet assigned a
2289  * partition, merge the default partition with a dummy partition on
2290  * the other side. The merged partition will act as the default
2291  * partition of the join relation (see comments in
2292  * process_inner_partition()).
2293  */
2294  if (IS_OUTER_JOIN(jointype))
2295  {
2296  Assert(jointype != JOIN_RIGHT);
2297  if (outer_merged_index == -1)
2298  {
2299  Assert(*default_index == -1);
2300  *default_index = merge_partition_with_dummy(outer_map,
2301  outer_default,
2302  next_index);
2303  }
2304  else
2305  Assert(*default_index == outer_merged_index);
2306  }
2307  else
2308  Assert(*default_index == -1);
2309  }
2310  else if (!outer_has_default && inner_has_default)
2311  {
2312  /*
2313  * If this is a FULL join, the default partition on the inner side has
2314  * to be scanned all the way anyway; if we have not yet assigned a
2315  * partition, merge the default partition with a dummy partition on
2316  * the other side. The merged partition will act as the default
2317  * partition of the join relation (see comments in
2318  * process_outer_partition()).
2319  */
2320  if (jointype == JOIN_FULL)
2321  {
2322  if (inner_merged_index == -1)
2323  {
2324  Assert(*default_index == -1);
2325  *default_index = merge_partition_with_dummy(inner_map,
2326  inner_default,
2327  next_index);
2328  }
2329  else
2330  Assert(*default_index == inner_merged_index);
2331  }
2332  else
2333  Assert(*default_index == -1);
2334  }
2335  else
2336  {
2337  Assert(outer_has_default && inner_has_default);
2338 
2339  /*
2340  * The default partitions have to be joined with each other, so merge
2341  * them. Note that each of the default partitions isn't merged yet
2342  * (see, process_outer_partition()/process_inner_partition()), so they
2343  * should be merged successfully. The merged partition will act as
2344  * the default partition of the join relation.
2345  */
2346  Assert(outer_merged_index == -1);
2347  Assert(inner_merged_index == -1);
2348  Assert(*default_index == -1);
2349  *default_index = merge_matching_partitions(outer_map,
2350  inner_map,
2351  outer_default,
2352  inner_default,
2353  next_index);
2354  Assert(*default_index >= 0);
2355  }
2356 }
2357 
2358 /*
2359  * merge_partition_with_dummy
2360  * Assign given partition a new partition of a join relation
2361  *
2362  * Note: The caller assumes that the given partition doesn't have a non-dummy
2363  * matching partition on the other side, but if the given partition finds the
2364  * matching partition later, we will adjust the assignment.
2365  */
2366 static int
2367 merge_partition_with_dummy(PartitionMap *map, int index, int *next_index)
2368 {
2369  int merged_index = *next_index;
2370 
2371  Assert(index >= 0 && index < map->nparts);
2372  Assert(map->merged_indexes[index] == -1);
2373  Assert(!map->merged[index]);
2374  map->merged_indexes[index] = merged_index;
2375  /* Leave the merged flag alone! */
2376  *next_index = *next_index + 1;
2377  return merged_index;
2378 }
2379 
2380 /*
2381  * fix_merged_indexes
2382  * Adjust merged indexes of re-merged partitions
2383  */
2384 static void
2386  int nmerged, List *merged_indexes)
2387 {
2388  int *new_indexes;
2389  int merged_index;
2390  int i;
2391  ListCell *lc;
2392 
2393  Assert(nmerged > 0);
2394 
2395  new_indexes = (int *) palloc(sizeof(int) * nmerged);
2396  for (i = 0; i < nmerged; i++)
2397  new_indexes[i] = -1;
2398 
2399  /* Build the mapping of old merged indexes to new merged indexes. */
2400  if (outer_map->did_remapping)
2401  {
2402  for (i = 0; i < outer_map->nparts; i++)
2403  {
2404  merged_index = outer_map->old_indexes[i];
2405  if (merged_index >= 0)
2406  new_indexes[merged_index] = outer_map->merged_indexes[i];
2407  }
2408  }
2409  if (inner_map->did_remapping)
2410  {
2411  for (i = 0; i < inner_map->nparts; i++)
2412  {
2413  merged_index = inner_map->old_indexes[i];
2414  if (merged_index >= 0)
2415  new_indexes[merged_index] = inner_map->merged_indexes[i];
2416  }
2417  }
2418 
2419  /* Fix the merged_indexes list using the mapping. */
2420  foreach(lc, merged_indexes)
2421  {
2422  merged_index = lfirst_int(lc);
2423  Assert(merged_index >= 0);
2424  if (new_indexes[merged_index] >= 0)
2425  lfirst_int(lc) = new_indexes[merged_index];
2426  }
2427 
2428  pfree(new_indexes);
2429 }
2430 
2431 /*
2432  * generate_matching_part_pairs
2433  * Generate a pair of lists of partitions that produce merged partitions
2434  *
2435  * The lists of partitions are built in the order of merged partition indexes,
2436  * and returned in *outer_parts and *inner_parts.
2437  */
2438 static void
2440  PartitionMap *outer_map, PartitionMap *inner_map,
2441  int nmerged,
2442  List **outer_parts, List **inner_parts)
2443 {
2444  int outer_nparts = outer_map->nparts;
2445  int inner_nparts = inner_map->nparts;
2446  int *outer_indexes;
2447  int *inner_indexes;
2448  int max_nparts;
2449  int i;
2450 
2451  Assert(nmerged > 0);
2452  Assert(*outer_parts == NIL);
2453  Assert(*inner_parts == NIL);
2454 
2455  outer_indexes = (int *) palloc(sizeof(int) * nmerged);
2456  inner_indexes = (int *) palloc(sizeof(int) * nmerged);
2457  for (i = 0; i < nmerged; i++)
2458  outer_indexes[i] = inner_indexes[i] = -1;
2459 
2460  /* Set pairs of matching partitions. */
2461  Assert(outer_nparts == outer_rel->nparts);
2462  Assert(inner_nparts == inner_rel->nparts);
2463  max_nparts = Max(outer_nparts, inner_nparts);
2464  for (i = 0; i < max_nparts; i++)
2465  {
2466  if (i < outer_nparts)
2467  {
2468  int merged_index = outer_map->merged_indexes[i];
2469 
2470  if (merged_index >= 0)
2471  {
2472  Assert(merged_index < nmerged);
2473  outer_indexes[merged_index] = i;
2474  }
2475  }
2476  if (i < inner_nparts)
2477  {
2478  int merged_index = inner_map->merged_indexes[i];
2479 
2480  if (merged_index >= 0)
2481  {
2482  Assert(merged_index < nmerged);
2483  inner_indexes[merged_index] = i;
2484  }
2485  }
2486  }
2487 
2488  /* Build the list pairs. */
2489  for (i = 0; i < nmerged; i++)
2490  {
2491  int outer_index = outer_indexes[i];
2492  int inner_index = inner_indexes[i];
2493 
2494  /*
2495  * If both partitions are dummy, it means the merged partition that
2496  * had been assigned to the outer/inner partition was removed when
2497  * re-merging the outer/inner partition in
2498  * merge_matching_partitions(); ignore the merged partition.
2499  */
2500  if (outer_index == -1 && inner_index == -1)
2501  continue;
2502 
2503  *outer_parts = lappend(*outer_parts, outer_index >= 0 ?
2504  outer_rel->part_rels[outer_index] : NULL);
2505  *inner_parts = lappend(*inner_parts, inner_index >= 0 ?
2506  inner_rel->part_rels[inner_index] : NULL);
2507  }
2508 
2509  pfree(outer_indexes);
2510  pfree(inner_indexes);
2511 }
2512 
2513 /*
2514  * build_merged_partition_bounds
2515  * Create a PartitionBoundInfo struct from merged partition bounds
2516  */
2517 static PartitionBoundInfo
2518 build_merged_partition_bounds(char strategy, List *merged_datums,
2519  List *merged_kinds, List *merged_indexes,
2520  int null_index, int default_index)
2521 {
2522  PartitionBoundInfo merged_bounds;
2523  int ndatums = list_length(merged_datums);
2524  int pos;
2525  ListCell *lc;
2526 
2527  merged_bounds = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
2528  merged_bounds->strategy = strategy;
2529  merged_bounds->ndatums = ndatums;
2530 
2531  merged_bounds->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
2532  pos = 0;
2533  foreach(lc, merged_datums)
2534  merged_bounds->datums[pos++] = (Datum *) lfirst(lc);
2535 
2536  if (strategy == PARTITION_STRATEGY_RANGE)
2537  {
2538  Assert(list_length(merged_kinds) == ndatums);
2539  merged_bounds->kind = (PartitionRangeDatumKind **)
2540  palloc(sizeof(PartitionRangeDatumKind *) * ndatums);
2541  pos = 0;
2542  foreach(lc, merged_kinds)
2543  merged_bounds->kind[pos++] = (PartitionRangeDatumKind *) lfirst(lc);
2544 
2545  /* There are ndatums+1 indexes in the case of range partitioning. */
2546  merged_indexes = lappend_int(merged_indexes, -1);
2547  ndatums++;
2548  }
2549  else
2550  {
2551  Assert(strategy == PARTITION_STRATEGY_LIST);
2552  Assert(merged_kinds == NIL);
2553  merged_bounds->kind = NULL;
2554  }
2555 
2556  /* interleaved_parts is always NULL for join relations. */
2557  merged_bounds->interleaved_parts = NULL;
2558 
2559  Assert(list_length(merged_indexes) == ndatums);
2560  merged_bounds->nindexes = ndatums;
2561  merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums);
2562  pos = 0;
2563  foreach(lc, merged_indexes)
2564  merged_bounds->indexes[pos++] = lfirst_int(lc);
2565 
2566  merged_bounds->null_index = null_index;
2567  merged_bounds->default_index = default_index;
2568 
2569  return merged_bounds;
2570 }
2571 
2572 /*
2573  * get_range_partition
2574  * Get the next non-dummy partition of a range-partitioned relation,
2575  * returning the index of that partition
2576  *
2577  * *lb and *ub are set to the lower and upper bounds of that partition
2578  * respectively, and *lb_pos is advanced to the next lower bound, if any.
2579  */
2580 static int
2582  PartitionBoundInfo bi,
2583  int *lb_pos,
2584  PartitionRangeBound *lb,
2585  PartitionRangeBound *ub)
2586 {
2587  int part_index;
2588 
2590 
2591  do
2592  {
2593  part_index = get_range_partition_internal(bi, lb_pos, lb, ub);
2594  if (part_index == -1)
2595  return -1;
2596  } while (is_dummy_partition(rel, part_index));
2597 
2598  return part_index;
2599 }
2600 
2601 static int
2603  int *lb_pos,
2604  PartitionRangeBound *lb,
2605  PartitionRangeBound *ub)
2606 {
2607  /* Return the index as -1 if we've exhausted all lower bounds. */
2608  if (*lb_pos >= bi->ndatums)
2609  return -1;
2610 
2611  /* A lower bound should have at least one more bound after it. */
2612  Assert(*lb_pos + 1 < bi->ndatums);
2613 
2614  /* Set the lower bound. */
2615  lb->index = bi->indexes[*lb_pos];
2616  lb->datums = bi->datums[*lb_pos];
2617  lb->kind = bi->kind[*lb_pos];
2618  lb->lower = true;
2619  /* Set the upper bound. */
2620  ub->index = bi->indexes[*lb_pos + 1];
2621  ub->datums = bi->datums[*lb_pos + 1];
2622  ub->kind = bi->kind[*lb_pos + 1];
2623  ub->lower = false;
2624 
2625  /* The index assigned to an upper bound should be valid. */
2626  Assert(ub->index >= 0);
2627 
2628  /*
2629  * Advance the position to the next lower bound. If there are no bounds
2630  * left beyond the upper bound, we have reached the last lower bound.
2631  */
2632  if (*lb_pos + 2 >= bi->ndatums)
2633  *lb_pos = bi->ndatums;
2634  else
2635  {
2636  /*
2637  * If the index assigned to the bound next to the upper bound isn't
2638  * valid, that is the next lower bound; else, the upper bound is also
2639  * the lower bound of the next range partition.
2640  */
2641  if (bi->indexes[*lb_pos + 2] < 0)
2642  *lb_pos = *lb_pos + 2;
2643  else
2644  *lb_pos = *lb_pos + 1;
2645  }
2646 
2647  return ub->index;
2648 }
2649 
2650 /*
2651  * compare_range_partitions
2652  * Compare the bounds of two range partitions, and return true if the
2653  * two partitions overlap, false otherwise
2654  *
2655  * *lb_cmpval is set to -1, 0, or 1 if the outer partition's lower bound is
2656  * lower than, equal to, or higher than the inner partition's lower bound
2657  * respectively. Likewise, *ub_cmpval is set to -1, 0, or 1 if the outer
2658  * partition's upper bound is lower than, equal to, or higher than the inner
2659  * partition's upper bound respectively.
2660  */
2661 static bool
2662 compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
2663  Oid *partcollations,
2664  PartitionRangeBound *outer_lb,
2665  PartitionRangeBound *outer_ub,
2666  PartitionRangeBound *inner_lb,
2667  PartitionRangeBound *inner_ub,
2668  int *lb_cmpval, int *ub_cmpval)
2669 {
2670  /*
2671  * Check if the outer partition's upper bound is lower than the inner
2672  * partition's lower bound; if so the partitions aren't overlapping.
2673  */
2674  if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2675  outer_ub, inner_lb) < 0)
2676  {
2677  *lb_cmpval = -1;
2678  *ub_cmpval = -1;
2679  return false;
2680  }
2681 
2682  /*
2683  * Check if the outer partition's lower bound is higher than the inner
2684  * partition's upper bound; if so the partitions aren't overlapping.
2685  */
2686  if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2687  outer_lb, inner_ub) > 0)
2688  {
2689  *lb_cmpval = 1;
2690  *ub_cmpval = 1;
2691  return false;
2692  }
2693 
2694  /* All other cases indicate overlapping partitions. */
2695  *lb_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2696  outer_lb, inner_lb);
2697  *ub_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2698  outer_ub, inner_ub);
2699  return true;
2700 }
2701 
2702 /*
2703  * get_merged_range_bounds
2704  * Given the bounds of range partitions to be joined, determine the bounds
2705  * of a merged partition produced from the range partitions
2706  *
2707  * *merged_lb and *merged_ub are set to the lower and upper bounds of the
2708  * merged partition.
2709  */
2710 static void
2711 get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2712  Oid *partcollations, JoinType jointype,
2713  PartitionRangeBound *outer_lb,
2714  PartitionRangeBound *outer_ub,
2715  PartitionRangeBound *inner_lb,
2716  PartitionRangeBound *inner_ub,
2717  int lb_cmpval, int ub_cmpval,
2718  PartitionRangeBound *merged_lb,
2719  PartitionRangeBound *merged_ub)
2720 {
2721  Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2722  outer_lb, inner_lb) == lb_cmpval);
2723  Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2724  outer_ub, inner_ub) == ub_cmpval);
2725 
2726  switch (jointype)
2727  {
2728  case JOIN_INNER:
2729  case JOIN_SEMI:
2730 
2731  /*
2732  * An INNER/SEMI join will have the rows that fit both sides, so
2733  * the lower bound of the merged partition will be the higher of
2734  * the two lower bounds, and the upper bound of the merged
2735  * partition will be the lower of the two upper bounds.
2736  */
2737  *merged_lb = (lb_cmpval > 0) ? *outer_lb : *inner_lb;
2738  *merged_ub = (ub_cmpval < 0) ? *outer_ub : *inner_ub;
2739  break;
2740 
2741  case JOIN_LEFT:
2742  case JOIN_ANTI:
2743 
2744  /*
2745  * A LEFT/ANTI join will have all the rows from the outer side, so
2746  * the bounds of the merged partition will be the same as the
2747  * outer bounds.
2748  */
2749  *merged_lb = *outer_lb;
2750  *merged_ub = *outer_ub;
2751  break;
2752 
2753  case JOIN_FULL:
2754 
2755  /*
2756  * A FULL join will have all the rows from both sides, so the
2757  * lower bound of the merged partition will be the lower of the
2758  * two lower bounds, and the upper bound of the merged partition
2759  * will be the higher of the two upper bounds.
2760  */
2761  *merged_lb = (lb_cmpval < 0) ? *outer_lb : *inner_lb;
2762  *merged_ub = (ub_cmpval > 0) ? *outer_ub : *inner_ub;
2763  break;
2764 
2765  default:
2766  elog(ERROR, "unrecognized join type: %d", (int) jointype);
2767  }
2768 }
2769 
2770 /*
2771  * add_merged_range_bounds
2772  * Add the bounds of a merged partition to the lists of range bounds
2773  */
2774 static void
2775 add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2776  Oid *partcollations,
2777  PartitionRangeBound *merged_lb,
2778  PartitionRangeBound *merged_ub,
2779  int merged_index,
2780  List **merged_datums,
2781  List **merged_kinds,
2782  List **merged_indexes)
2783 {
2784  int cmpval;
2785 
2786  if (!*merged_datums)
2787  {
2788  /* First merged partition */
2789  Assert(!*merged_kinds);
2790  Assert(!*merged_indexes);
2791  cmpval = 1;
2792  }
2793  else
2794  {
2795  PartitionRangeBound prev_ub;
2796 
2797  Assert(*merged_datums);
2798  Assert(*merged_kinds);
2799  Assert(*merged_indexes);
2800 
2801  /* Get the last upper bound. */
2802  prev_ub.index = llast_int(*merged_indexes);
2803  prev_ub.datums = (Datum *) llast(*merged_datums);
2804  prev_ub.kind = (PartitionRangeDatumKind *) llast(*merged_kinds);
2805  prev_ub.lower = false;
2806 
2807  /*
2808  * We pass lower1 = false to partition_rbound_cmp() to prevent it from
2809  * considering the last upper bound to be smaller than the lower bound
2810  * of the merged partition when the values of the two range bounds
2811  * compare equal.
2812  */
2813  cmpval = partition_rbound_cmp(partnatts, partsupfuncs, partcollations,
2814  merged_lb->datums, merged_lb->kind,
2815  false, &prev_ub);
2816  Assert(cmpval >= 0);
2817  }
2818 
2819  /*
2820  * If the lower bound is higher than the last upper bound, add the lower
2821  * bound with the index as -1 indicating that that is a lower bound; else,
2822  * the last upper bound will be reused as the lower bound of the merged
2823  * partition, so skip this.
2824  */
2825  if (cmpval > 0)
2826  {
2827  *merged_datums = lappend(*merged_datums, merged_lb->datums);
2828  *merged_kinds = lappend(*merged_kinds, merged_lb->kind);
2829  *merged_indexes = lappend_int(*merged_indexes, -1);
2830  }
2831 
2832  /* Add the upper bound and index of the merged partition. */
2833  *merged_datums = lappend(*merged_datums, merged_ub->datums);
2834  *merged_kinds = lappend(*merged_kinds, merged_ub->kind);
2835  *merged_indexes = lappend_int(*merged_indexes, merged_index);
2836 }
2837 
2838 /*
2839  * partitions_are_ordered
2840  * Determine whether the partitions described by 'boundinfo' are ordered,
2841  * that is partitions appearing earlier in the PartitionDesc sequence
2842  * contain partition keys strictly less than those appearing later.
2843  * Also, if NULL values are possible, they must come in the last
2844  * partition defined in the PartitionDesc. 'live_parts' marks which
2845  * partitions we should include when checking the ordering. Partitions
2846  * that do not appear in 'live_parts' are ignored.
2847  *
2848  * If out of order, or there is insufficient info to know the order,
2849  * then we return false.
2850  */
2851 bool
2853 {
2854  Assert(boundinfo != NULL);
2855 
2856  switch (boundinfo->strategy)
2857  {
2859 
2860  /*
2861  * RANGE-type partitioning guarantees that the partitions can be
2862  * scanned in the order that they're defined in the PartitionDesc
2863  * to provide sequential, non-overlapping ranges of tuples.
2864  * However, if a DEFAULT partition exists and it's contained
2865  * within live_parts, then the partitions are not ordered.
2866  */
2867  if (!partition_bound_has_default(boundinfo) ||
2868  !bms_is_member(boundinfo->default_index, live_parts))
2869  return true;
2870  break;
2871 
2873 
2874  /*
2875  * LIST partitioned are ordered providing none of live_parts
2876  * overlap with the partitioned table's interleaved partitions.
2877  */
2878  if (!bms_overlap(live_parts, boundinfo->interleaved_parts))
2879  return true;
2880 
2881  break;
2883  break;
2884  }
2885 
2886  return false;
2887 }
2888 
2889 /*
2890  * check_new_partition_bound
2891  *
2892  * Checks if the new partition's bound overlaps any of the existing partitions
2893  * of parent. Also performs additional checks as necessary per strategy.
2894  */
2895 void
2897  PartitionBoundSpec *spec, ParseState *pstate)
2898 {
2900  PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
2901  PartitionBoundInfo boundinfo = partdesc->boundinfo;
2902  int with = -1;
2903  bool overlap = false;
2904  int overlap_location = -1;
2905 
2906  if (spec->is_default)
2907  {
2908  /*
2909  * The default partition bound never conflicts with any other
2910  * partition's; if that's what we're attaching, the only possible
2911  * problem is that one already exists, so check for that and we're
2912  * done.
2913  */
2914  if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
2915  return;
2916 
2917  /* Default partition already exists, error out. */
2918  ereport(ERROR,
2919  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2920  errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
2921  relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
2922  parser_errposition(pstate, spec->location)));
2923  }
2924 
2925  switch (key->strategy)
2926  {
2928  {
2930  Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
2931 
2932  if (partdesc->nparts > 0)
2933  {
2934  int greatest_modulus;
2935  int remainder;
2936  int offset;
2937 
2938  /*
2939  * Check rule that every modulus must be a factor of the
2940  * next larger modulus. (For example, if you have a bunch
2941  * of partitions that all have modulus 5, you can add a
2942  * new partition with modulus 10 or a new partition with
2943  * modulus 15, but you cannot add both a partition with
2944  * modulus 10 and a partition with modulus 15, because 10
2945  * is not a factor of 15.) We need only check the next
2946  * smaller and next larger existing moduli, relying on
2947  * previous enforcement of this rule to be sure that the
2948  * rest are in line.
2949  */
2950 
2951  /*
2952  * Get the greatest (modulus, remainder) pair contained in
2953  * boundinfo->datums that is less than or equal to the
2954  * (spec->modulus, spec->remainder) pair.
2955  */
2956  offset = partition_hash_bsearch(boundinfo,
2957  spec->modulus,
2958  spec->remainder);
2959  if (offset < 0)
2960  {
2961  int next_modulus;
2962 
2963  /*
2964  * All existing moduli are greater or equal, so the
2965  * new one must be a factor of the smallest one, which
2966  * is first in the boundinfo.
2967  */
2968  next_modulus = DatumGetInt32(boundinfo->datums[0][0]);
2969  if (next_modulus % spec->modulus != 0)
2970  ereport(ERROR,
2971  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2972  errmsg("every hash partition modulus must be a factor of the next larger modulus"),
2973  errdetail("The new modulus %d is not a factor of %d, the modulus of existing partition \"%s\".",
2974  spec->modulus, next_modulus,
2975  get_rel_name(partdesc->oids[0]))));
2976  }
2977  else
2978  {
2979  int prev_modulus;
2980 
2981  /*
2982  * We found the largest (modulus, remainder) pair less
2983  * than or equal to the new one. That modulus must be
2984  * a divisor of, or equal to, the new modulus.
2985  */
2986  prev_modulus = DatumGetInt32(boundinfo->datums[offset][0]);
2987 
2988  if (spec->modulus % prev_modulus != 0)
2989  ereport(ERROR,
2990  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2991  errmsg("every hash partition modulus must be a factor of the next larger modulus"),
2992  errdetail("The new modulus %d is not divisible by %d, the modulus of existing partition \"%s\".",
2993  spec->modulus,
2994  prev_modulus,
2995  get_rel_name(partdesc->oids[offset]))));
2996 
2997  if (offset + 1 < boundinfo->ndatums)
2998  {
2999  int next_modulus;
3000 
3001  /*
3002  * Look at the next higher (modulus, remainder)
3003  * pair. That could have the same modulus and a
3004  * larger remainder than the new pair, in which
3005  * case we're good. If it has a larger modulus,
3006  * the new modulus must divide that one.
3007  */
3008  next_modulus = DatumGetInt32(boundinfo->datums[offset + 1][0]);
3009 
3010  if (next_modulus % spec->modulus != 0)
3011  ereport(ERROR,
3012  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3013  errmsg("every hash partition modulus must be a factor of the next larger modulus"),
3014  errdetail("The new modulus %d is not a factor of %d, the modulus of existing partition \"%s\".",
3015  spec->modulus, next_modulus,
3016  get_rel_name(partdesc->oids[offset + 1]))));
3017  }
3018  }
3019 
3020  greatest_modulus = boundinfo->nindexes;
3021  remainder = spec->remainder;
3022 
3023  /*
3024  * Normally, the lowest remainder that could conflict with
3025  * the new partition is equal to the remainder specified
3026  * for the new partition, but when the new partition has a
3027  * modulus higher than any used so far, we need to adjust.
3028  */
3029  if (remainder >= greatest_modulus)
3030  remainder = remainder % greatest_modulus;
3031 
3032  /* Check every potentially-conflicting remainder. */
3033  do
3034  {
3035  if (boundinfo->indexes[remainder] != -1)
3036  {
3037  overlap = true;
3038  overlap_location = spec->location;
3039  with = boundinfo->indexes[remainder];
3040  break;
3041  }
3042  remainder += spec->modulus;
3043  } while (remainder < greatest_modulus);
3044  }
3045 
3046  break;
3047  }
3048 
3050  {
3052 
3053  if (partdesc->nparts > 0)
3054  {
3055  ListCell *cell;
3056 
3057  Assert(boundinfo &&
3058  boundinfo->strategy == PARTITION_STRATEGY_LIST &&
3059  (boundinfo->ndatums > 0 ||
3060  partition_bound_accepts_nulls(boundinfo) ||
3061  partition_bound_has_default(boundinfo)));
3062 
3063  foreach(cell, spec->listdatums)
3064  {
3065  Const *val = lfirst_node(Const, cell);
3066 
3067  overlap_location = val->location;
3068  if (!val->constisnull)
3069  {
3070  int offset;
3071  bool equal;
3072 
3073  offset = partition_list_bsearch(&key->partsupfunc[0],
3074  key->partcollation,
3075  boundinfo,
3076  val->constvalue,
3077  &equal);
3078  if (offset >= 0 && equal)
3079  {
3080  overlap = true;
3081  with = boundinfo->indexes[offset];
3082  break;
3083  }
3084  }
3085  else if (partition_bound_accepts_nulls(boundinfo))
3086  {
3087  overlap = true;
3088  with = boundinfo->null_index;
3089  break;
3090  }
3091  }
3092  }
3093 
3094  break;
3095  }
3096 
3098  {
3100  *upper;
3101  int cmpval;
3102 
3104  lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
3105  upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
3106 
3107  /*
3108  * First check if the resulting range would be empty with
3109  * specified lower and upper bounds. partition_rbound_cmp
3110  * cannot return zero here, since the lower-bound flags are
3111  * different.
3112  */
3113  cmpval = partition_rbound_cmp(key->partnatts,
3114  key->partsupfunc,
3115  key->partcollation,
3116  lower->datums, lower->kind,
3117  true, upper);
3118  Assert(cmpval != 0);
3119  if (cmpval > 0)
3120  {
3121  /* Point to problematic key in the lower datums list. */
3122  PartitionRangeDatum *datum = list_nth(spec->lowerdatums,
3123  cmpval - 1);
3124 
3125  ereport(ERROR,
3126  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3127  errmsg("empty range bound specified for partition \"%s\"",
3128  relname),
3129  errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
3132  parser_errposition(pstate, datum->location)));
3133  }
3134 
3135  if (partdesc->nparts > 0)
3136  {
3137  int offset;
3138 
3139  Assert(boundinfo &&
3140  boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
3141  (boundinfo->ndatums > 0 ||
3142  partition_bound_has_default(boundinfo)));
3143 
3144  /*
3145  * Test whether the new lower bound (which is treated
3146  * inclusively as part of the new partition) lies inside
3147  * an existing partition, or in a gap.
3148  *
3149  * If it's inside an existing partition, the bound at
3150  * offset + 1 will be the upper bound of that partition,
3151  * and its index will be >= 0.
3152  *
3153  * If it's in a gap, the bound at offset + 1 will be the
3154  * lower bound of the next partition, and its index will
3155  * be -1. This is also true if there is no next partition,
3156  * since the index array is initialised with an extra -1
3157  * at the end.
3158  */
3159  offset = partition_range_bsearch(key->partnatts,
3160  key->partsupfunc,
3161  key->partcollation,
3162  boundinfo, lower,
3163  &cmpval);
3164 
3165  if (boundinfo->indexes[offset + 1] < 0)
3166  {
3167  /*
3168  * Check that the new partition will fit in the gap.
3169  * For it to fit, the new upper bound must be less
3170  * than or equal to the lower bound of the next
3171  * partition, if there is one.
3172  */
3173  if (offset + 1 < boundinfo->ndatums)
3174  {
3175  Datum *datums;
3177  bool is_lower;
3178 
3179  datums = boundinfo->datums[offset + 1];
3180  kind = boundinfo->kind[offset + 1];
3181  is_lower = (boundinfo->indexes[offset + 1] == -1);
3182 
3183  cmpval = partition_rbound_cmp(key->partnatts,
3184  key->partsupfunc,
3185  key->partcollation,
3186  datums, kind,
3187  is_lower, upper);
3188  if (cmpval < 0)
3189  {
3190  /*
3191  * Point to problematic key in the upper
3192  * datums list.
3193  */
3194  PartitionRangeDatum *datum =
3195  list_nth(spec->upperdatums, abs(cmpval) - 1);
3196 
3197  /*
3198  * The new partition overlaps with the
3199  * existing partition between offset + 1 and
3200  * offset + 2.
3201  */
3202  overlap = true;
3203  overlap_location = datum->location;
3204  with = boundinfo->indexes[offset + 2];
3205  }
3206  }
3207  }
3208  else
3209  {
3210  /*
3211  * The new partition overlaps with the existing
3212  * partition between offset and offset + 1.
3213  */
3214  PartitionRangeDatum *datum;
3215 
3216  /*
3217  * Point to problematic key in the list of lower
3218  * datums; if we have equality, point to the first
3219  * one.
3220  */
3221  datum = cmpval == 0 ? linitial(spec->lowerdatums) :
3222  list_nth(spec->lowerdatums, abs(cmpval) - 1);
3223  overlap = true;
3224  overlap_location = datum->location;
3225  with = boundinfo->indexes[offset + 1];
3226  }
3227  }
3228 
3229  break;
3230  }
3231  }
3232 
3233  if (overlap)
3234  {
3235  Assert(with >= 0);
3236  ereport(ERROR,
3237  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3238  errmsg("partition \"%s\" would overlap partition \"%s\"",
3239  relname, get_rel_name(partdesc->oids[with])),
3240  parser_errposition(pstate, overlap_location)));
3241  }
3242 }
3243 
3244 /*
3245  * check_default_partition_contents
3246  *
3247  * This function checks if there exists a row in the default partition that
3248  * would properly belong to the new partition being added. If it finds one,
3249  * it throws an error.
3250  */
3251 void
3253  PartitionBoundSpec *new_spec)
3254 {
3255  List *new_part_constraints;
3256  List *def_part_constraints;
3257  List *all_parts;
3258  ListCell *lc;
3259 
3260  new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
3261  ? get_qual_for_list(parent, new_spec)
3262  : get_qual_for_range(parent, new_spec, false);
3263  def_part_constraints =
3264  get_proposed_default_constraint(new_part_constraints);
3265 
3266  /*
3267  * Map the Vars in the constraint expression from parent's attnos to
3268  * default_rel's.
3269  */
3270  def_part_constraints =
3271  map_partition_varattnos(def_part_constraints, 1, default_rel,
3272  parent);
3273 
3274  /*
3275  * If the existing constraints on the default partition imply that it will
3276  * not contain any row that would belong to the new partition, we can
3277  * avoid scanning the default partition.
3278  */
3279  if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
3280  {
3281  ereport(DEBUG1,
3282  (errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3283  RelationGetRelationName(default_rel))));
3284  return;
3285  }
3286 
3287  /*
3288  * Scan the default partition and its subpartitions, and check for rows
3289  * that do not satisfy the revised partition constraints.
3290  */
3291  if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3292  all_parts = find_all_inheritors(RelationGetRelid(default_rel),
3293  AccessExclusiveLock, NULL);
3294  else
3295  all_parts = list_make1_oid(RelationGetRelid(default_rel));
3296 
3297  foreach(lc, all_parts)
3298  {
3299  Oid part_relid = lfirst_oid(lc);
3300  Relation part_rel;
3301  Expr *partition_constraint;
3302  EState *estate;
3303  ExprState *partqualstate = NULL;
3304  Snapshot snapshot;
3305  ExprContext *econtext;
3306  TableScanDesc scan;
3307  MemoryContext oldCxt;
3308  TupleTableSlot *tupslot;
3309 
3310  /* Lock already taken above. */
3311  if (part_relid != RelationGetRelid(default_rel))
3312  {
3313  part_rel = table_open(part_relid, NoLock);
3314 
3315  /*
3316  * Map the Vars in the constraint expression from default_rel's
3317  * the sub-partition's.
3318  */
3319  partition_constraint = make_ands_explicit(def_part_constraints);
3320  partition_constraint = (Expr *)
3321  map_partition_varattnos((List *) partition_constraint, 1,
3322  part_rel, default_rel);
3323 
3324  /*
3325  * If the partition constraints on default partition child imply
3326  * that it will not contain any row that would belong to the new
3327  * partition, we can avoid scanning the child table.
3328  */
3330  def_part_constraints))
3331  {
3332  ereport(DEBUG1,
3333  (errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3334  RelationGetRelationName(part_rel))));
3335 
3336  table_close(part_rel, NoLock);
3337  continue;
3338  }
3339  }
3340  else
3341  {
3342  part_rel = default_rel;
3343  partition_constraint = make_ands_explicit(def_part_constraints);
3344  }
3345 
3346  /*
3347  * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
3348  * scanned.
3349  */
3350  if (part_rel->rd_rel->relkind != RELKIND_RELATION)
3351  {
3352  if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
3353  ereport(WARNING,
3354  (errcode(ERRCODE_CHECK_VIOLATION),
3355  errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
3356  RelationGetRelationName(part_rel),
3357  RelationGetRelationName(default_rel))));
3358 
3359  if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3360  table_close(part_rel, NoLock);
3361 
3362  continue;
3363  }
3364 
3365  estate = CreateExecutorState();
3366 
3367  /* Build expression execution states for partition check quals */
3368  partqualstate = ExecPrepareExpr(partition_constraint, estate);
3369 
3370  econtext = GetPerTupleExprContext(estate);
3371  snapshot = RegisterSnapshot(GetLatestSnapshot());
3372  tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
3373  scan = table_beginscan(part_rel, snapshot, 0, NULL);
3374 
3375  /*
3376  * Switch to per-tuple memory context and reset it for each tuple
3377  * produced, so we don't leak memory.
3378  */
3380 
3381  while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
3382  {
3383  econtext->ecxt_scantuple = tupslot;
3384 
3385  if (!ExecCheck(partqualstate, econtext))
3386  ereport(ERROR,
3387  (errcode(ERRCODE_CHECK_VIOLATION),
3388  errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
3389  RelationGetRelationName(default_rel)),
3390  errtable(default_rel)));
3391 
3392  ResetExprContext(econtext);
3394  }
3395 
3396  MemoryContextSwitchTo(oldCxt);
3397  table_endscan(scan);
3398  UnregisterSnapshot(snapshot);
3400  FreeExecutorState(estate);
3401 
3402  if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3403  table_close(part_rel, NoLock); /* keep the lock until commit */
3404  }
3405 }
3406 
3407 /*
3408  * get_hash_partition_greatest_modulus
3409  *
3410  * Returns the greatest modulus of the hash partition bound.
3411  * This is no longer used in the core code, but we keep it around
3412  * in case external modules are using it.
3413  */
3414 int
3416 {
3417  Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
3418  return bound->nindexes;
3419 }
3420 
3421 /*
3422  * make_one_partition_rbound
3423  *
3424  * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
3425  * and a flag telling whether the bound is lower or not. Made into a function
3426  * because there are multiple sites that want to use this facility.
3427  */
3428 static PartitionRangeBound *
3430 {
3431  PartitionRangeBound *bound;
3432  ListCell *lc;
3433  int i;
3434 
3435  Assert(datums != NIL);
3436 
3437  bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
3438  bound->index = index;
3439  bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
3440  bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
3441  sizeof(PartitionRangeDatumKind));
3442  bound->lower = lower;
3443 
3444  i = 0;
3445  foreach(lc, datums)
3446  {
3448 
3449  /* What's contained in this range datum? */
3450  bound->kind[i] = datum->kind;
3451 
3452  if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
3453  {
3454  Const *val = castNode(Const, datum->value);
3455 
3456  if (val->constisnull)
3457  elog(ERROR, "invalid range bound datum");
3458  bound->datums[i] = val->constvalue;
3459  }
3460 
3461  i++;
3462  }
3463 
3464  return bound;
3465 }
3466 
3467 /*
3468  * partition_rbound_cmp
3469  *
3470  * For two range bounds this decides whether the 1st one (specified by
3471  * datums1, kind1, and lower1) is <, =, or > the bound specified in *b2.
3472  *
3473  * 0 is returned if they are equal, otherwise a non-zero integer whose sign
3474  * indicates the ordering, and whose absolute value gives the 1-based
3475  * partition key number of the first mismatching column.
3476  *
3477  * partnatts, partsupfunc and partcollation give the number of attributes in the
3478  * bounds to be compared, comparison function to be used and the collations of
3479  * attributes, respectively.
3480  *
3481  * Note that if the values of the two range bounds compare equal, then we take
3482  * into account whether they are upper or lower bounds, and an upper bound is
3483  * considered to be smaller than a lower bound. This is important to the way
3484  * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
3485  * structure, which only stores the upper bound of a common boundary between
3486  * two contiguous partitions.
3487  */
3488 static int32
3489 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
3490  Oid *partcollation,
3491  Datum *datums1, PartitionRangeDatumKind *kind1,
3492  bool lower1, PartitionRangeBound *b2)
3493 {
3494  int32 colnum = 0;
3495  int32 cmpval = 0; /* placate compiler */
3496  int i;
3497  Datum *datums2 = b2->datums;
3498  PartitionRangeDatumKind *kind2 = b2->kind;
3499  bool lower2 = b2->lower;
3500 
3501  for (i = 0; i < partnatts; i++)
3502  {
3503  /* Track column number in case we need it for result */
3504  colnum++;
3505 
3506  /*
3507  * First, handle cases where the column is unbounded, which should not
3508  * invoke the comparison procedure, and should not consider any later
3509  * columns. Note that the PartitionRangeDatumKind enum elements
3510  * compare the same way as the values they represent.
3511  */
3512  if (kind1[i] < kind2[i])
3513  return -colnum;
3514  else if (kind1[i] > kind2[i])
3515  return colnum;
3516  else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
3517  {
3518  /*
3519  * The column bounds are both MINVALUE or both MAXVALUE. No later
3520  * columns should be considered, but we still need to compare
3521  * whether they are upper or lower bounds.
3522  */
3523  break;
3524  }
3525 
3526  cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3527  partcollation[i],
3528  datums1[i],
3529  datums2[i]));
3530  if (cmpval != 0)
3531  break;
3532  }
3533 
3534  /*
3535  * If the comparison is anything other than equal, we're done. If they
3536  * compare equal though, we still have to consider whether the boundaries
3537  * are inclusive or exclusive. Exclusive one is considered smaller of the
3538  * two.
3539  */
3540  if (cmpval == 0 && lower1 != lower2)
3541  cmpval = lower1 ? 1 : -1;
3542 
3543  return cmpval == 0 ? 0 : (cmpval < 0 ? -colnum : colnum);
3544 }
3545 
3546 /*
3547  * partition_rbound_datum_cmp
3548  *
3549  * Return whether range bound (specified in rb_datums and rb_kind)
3550  * is <, =, or > partition key of tuple (tuple_datums)
3551  *
3552  * n_tuple_datums, partsupfunc and partcollation give number of attributes in
3553  * the bounds to be compared, comparison function to be used and the collations
3554  * of attributes resp.
3555  */
3556 int32
3557 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
3558  Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
3559  Datum *tuple_datums, int n_tuple_datums)
3560 {
3561  int i;
3562  int32 cmpval = -1;
3563 
3564  for (i = 0; i < n_tuple_datums; i++)
3565  {
3566  if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
3567  return -1;
3568  else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
3569  return 1;
3570 
3571  cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3572  partcollation[i],
3573  rb_datums[i],
3574  tuple_datums[i]));
3575  if (cmpval != 0)
3576  break;
3577  }
3578 
3579  return cmpval;
3580 }
3581 
3582 /*
3583  * partition_hbound_cmp
3584  *
3585  * Compares modulus first, then remainder if modulus is equal.
3586  */
3587 static int32
3588 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
3589 {
3590  if (modulus1 < modulus2)
3591  return -1;
3592  if (modulus1 > modulus2)
3593  return 1;
3594  if (modulus1 == modulus2 && remainder1 != remainder2)
3595  return (remainder1 > remainder2) ? 1 : -1;
3596  return 0;
3597 }
3598 
3599 /*
3600  * partition_list_bsearch
3601  * Returns the index of the greatest bound datum that is less than equal
3602  * to the given value or -1 if all of the bound datums are greater
3603  *
3604  * *is_equal is set to true if the bound datum at the returned index is equal
3605  * to the input value.
3606  */
3607 int
3608 partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3609  PartitionBoundInfo boundinfo,
3610  Datum value, bool *is_equal)
3611 {
3612  int lo,
3613  hi,
3614  mid;
3615 
3616  lo = -1;
3617  hi = boundinfo->ndatums - 1;
3618  while (lo < hi)
3619  {
3620  int32 cmpval;
3621 
3622  mid = (lo + hi + 1) / 2;
3623  cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
3624  partcollation[0],
3625  boundinfo->datums[mid][0],
3626  value));
3627  if (cmpval <= 0)
3628  {
3629  lo = mid;
3630  *is_equal = (cmpval == 0);
3631  if (*is_equal)
3632  break;
3633  }
3634  else
3635  hi = mid - 1;
3636  }
3637 
3638  return lo;
3639 }
3640 
3641 /*
3642  * partition_range_bsearch
3643  * Returns the index of the greatest range bound that is less than or
3644  * equal to the given range bound or -1 if all of the range bounds are
3645  * greater
3646  *
3647  * Upon return from this function, *cmpval is set to 0 if the bound at the
3648  * returned index matches the input range bound exactly, otherwise a
3649  * non-zero integer whose sign indicates the ordering, and whose absolute
3650  * value gives the 1-based partition key number of the first mismatching
3651  * column.
3652  */
3653 static int
3654 partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
3655  Oid *partcollation,
3656  PartitionBoundInfo boundinfo,
3657  PartitionRangeBound *probe, int32 *cmpval)
3658 {
3659  int lo,
3660  hi,
3661  mid;
3662 
3663  lo = -1;
3664  hi = boundinfo->ndatums - 1;
3665  while (lo < hi)
3666  {
3667  mid = (lo + hi + 1) / 2;
3668  *cmpval = partition_rbound_cmp(partnatts, partsupfunc,
3669  partcollation,
3670  boundinfo->datums[mid],
3671  boundinfo->kind[mid],
3672  (boundinfo->indexes[mid] == -1),
3673  probe);
3674  if (*cmpval <= 0)
3675  {
3676  lo = mid;
3677  if (*cmpval == 0)
3678  break;
3679  }
3680  else
3681  hi = mid - 1;
3682  }
3683 
3684  return lo;
3685 }
3686 
3687 /*
3688  * partition_range_datum_bsearch
3689  * Returns the index of the greatest range bound that is less than or
3690  * equal to the given tuple or -1 if all of the range bounds are greater
3691  *
3692  * *is_equal is set to true if the range bound at the returned index is equal
3693  * to the input tuple.
3694  */
3695 int
3696 partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3697  PartitionBoundInfo boundinfo,
3698  int nvalues, Datum *values, bool *is_equal)
3699 {
3700  int lo,
3701  hi,
3702  mid;
3703 
3704  lo = -1;
3705  hi = boundinfo->ndatums - 1;
3706  while (lo < hi)
3707  {
3708  int32 cmpval;
3709 
3710  mid = (lo + hi + 1) / 2;
3711  cmpval = partition_rbound_datum_cmp(partsupfunc,
3712  partcollation,
3713  boundinfo->datums[mid],
3714  boundinfo->kind[mid],
3715  values,
3716  nvalues);
3717  if (cmpval <= 0)
3718  {
3719  lo = mid;
3720  *is_equal = (cmpval == 0);
3721 
3722  if (*is_equal)
3723  break;
3724  }
3725  else
3726  hi = mid - 1;
3727  }
3728 
3729  return lo;
3730 }
3731 
3732 /*
3733  * partition_hash_bsearch
3734  * Returns the index of the greatest (modulus, remainder) pair that is
3735  * less than or equal to the given (modulus, remainder) pair or -1 if
3736  * all of them are greater
3737  */
3738 int
3740  int modulus, int remainder)
3741 {
3742  int lo,
3743  hi,
3744  mid;
3745 
3746  lo = -1;
3747  hi = boundinfo->ndatums - 1;
3748  while (lo < hi)
3749  {
3750  int32 cmpval,
3751  bound_modulus,
3752  bound_remainder;
3753 
3754  mid = (lo + hi + 1) / 2;
3755  bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
3756  bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
3757  cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
3758  modulus, remainder);
3759  if (cmpval <= 0)
3760  {
3761  lo = mid;
3762 
3763  if (cmpval == 0)
3764  break;
3765  }
3766  else
3767  hi = mid - 1;
3768  }
3769 
3770  return lo;
3771 }
3772 
3773 /*
3774  * qsort_partition_hbound_cmp
3775  *
3776  * Hash bounds are sorted by modulus, then by remainder.
3777  */
3778 static int32
3779 qsort_partition_hbound_cmp(const void *a, const void *b)
3780 {
3781  const PartitionHashBound *h1 = (const PartitionHashBound *) a;
3782  const PartitionHashBound *h2 = (const PartitionHashBound *) b;
3783 
3784  return partition_hbound_cmp(h1->modulus, h1->remainder,
3785  h2->modulus, h2->remainder);
3786 }
3787 
3788 /*
3789  * qsort_partition_list_value_cmp
3790  *
3791  * Compare two list partition bound datums.
3792  */
3793 static int32
3794 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
3795 {
3796  Datum val1 = ((const PartitionListValue *) a)->value,
3797  val2 = ((const PartitionListValue *) b)->value;
3799 
3800  return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
3801  key->partcollation[0],
3802  val1, val2));
3803 }
3804 
3805 /*
3806  * qsort_partition_rbound_cmp
3807  *
3808  * Used when sorting range bounds across all range partitions.
3809  */
3810 static int32
3811 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
3812 {
3813  PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
3814  PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
3816 
3817  return compare_range_bounds(key->partnatts, key->partsupfunc,
3818  key->partcollation,
3819  b1, b2);
3820 }
3821 
3822 /*
3823  * get_partition_operator
3824  *
3825  * Return oid of the operator of the given strategy for the given partition
3826  * key column. It is assumed that the partitioning key is of the same type as
3827  * the chosen partitioning opclass, or at least binary-compatible. In the
3828  * latter case, *need_relabel is set to true if the opclass is not of a
3829  * polymorphic type (indicating a RelabelType node needed on top), otherwise
3830  * false.
3831  */
3832 static Oid
3834  bool *need_relabel)
3835 {
3836  Oid operoid;
3837 
3838  /*
3839  * Get the operator in the partitioning opfamily using the opclass'
3840  * declared input type as both left- and righttype.
3841  */
3842  operoid = get_opfamily_member(key->partopfamily[col],
3843  key->partopcintype[col],
3844  key->partopcintype[col],
3845  strategy);
3846  if (!OidIsValid(operoid))
3847  elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
3848  strategy, key->partopcintype[col], key->partopcintype[col],
3849  key->partopfamily[col]);
3850 
3851  /*
3852  * If the partition key column is not of the same type as the operator
3853  * class and not polymorphic, tell caller to wrap the non-Const expression
3854  * in a RelabelType. This matches what parse_coerce.c does.
3855  */
3856  *need_relabel = (key->parttypid[col] != key->partopcintype[col] &&
3857  key->partopcintype[col] != RECORDOID &&
3858  !IsPolymorphicType(key->partopcintype[col]));
3859 
3860  return operoid;
3861 }
3862 
3863 /*
3864  * make_partition_op_expr
3865  * Returns an Expr for the given partition key column with arg1 and
3866  * arg2 as its leftop and rightop, respectively
3867  */
3868 static Expr *
3870  uint16 strategy, Expr *arg1, Expr *arg2)
3871 {
3872  Oid operoid;
3873  bool need_relabel = false;
3874  Expr *result = NULL;
3875 
3876  /* Get the correct btree operator for this partitioning column */
3877  operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
3878 
3879  /*
3880  * Chosen operator may be such that the non-Const operand needs to be
3881  * coerced, so apply the same; see the comment in
3882  * get_partition_operator().
3883  */
3884  if (!IsA(arg1, Const) &&
3885  (need_relabel ||
3886  key->partcollation[keynum] != key->parttypcoll[keynum]))
3887  arg1 = (Expr *) makeRelabelType(arg1,
3888  key->partopcintype[keynum],
3889  -1,
3890  key->partcollation[keynum],
3892 
3893  /* Generate the actual expression */
3894  switch (key->strategy)
3895  {
3897  {
3898  List *elems = (List *) arg2;
3899  int nelems = list_length(elems);
3900 
3901  Assert(nelems >= 1);
3902  Assert(keynum == 0);
3903 
3904  if (nelems > 1 &&
3905  !type_is_array(key->parttypid[keynum]))
3906  {
3907  ArrayExpr *arrexpr;
3908  ScalarArrayOpExpr *saopexpr;
3909 
3910  /* Construct an ArrayExpr for the right-hand inputs */
3911  arrexpr = makeNode(ArrayExpr);
3912  arrexpr->array_typeid =
3913  get_array_type(key->parttypid[keynum]);
3914  arrexpr->array_collid = key->parttypcoll[keynum];
3915  arrexpr->element_typeid = key->parttypid[keynum];
3916  arrexpr->elements = elems;
3917  arrexpr->multidims = false;
3918  arrexpr->location = -1;
3919 
3920  /* Build leftop = ANY (rightop) */
3921  saopexpr = makeNode(ScalarArrayOpExpr);
3922  saopexpr->opno = operoid;
3923  saopexpr->opfuncid = get_opcode(operoid);
3924  saopexpr->hashfuncid = InvalidOid;
3925  saopexpr->negfuncid = InvalidOid;
3926  saopexpr->useOr = true;
3927  saopexpr->inputcollid = key->partcollation[keynum];
3928  saopexpr->args = list_make2(arg1, arrexpr);
3929  saopexpr->location = -1;
3930 
3931  result = (Expr *) saopexpr;
3932  }
3933  else
3934  {
3935  List *elemops = NIL;
3936  ListCell *lc;
3937 
3938  foreach(lc, elems)
3939  {
3940  Expr *elem = lfirst(lc),
3941  *elemop;
3942 
3943  elemop = make_opclause(operoid,
3944  BOOLOID,
3945  false,
3946  arg1, elem,
3947  InvalidOid,
3948  key->partcollation[keynum]);
3949  elemops = lappend(elemops, elemop);
3950  }
3951 
3952  result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
3953  }
3954  break;
3955  }
3956 
3958  result = make_opclause(operoid,
3959  BOOLOID,
3960  false,
3961  arg1, arg2,
3962  InvalidOid,
3963  key->partcollation[keynum]);
3964  break;
3965 
3967  Assert(false);
3968  break;
3969  }
3970 
3971  return result;
3972 }
3973 
3974 /*
3975  * get_qual_for_hash
3976  *
3977  * Returns a CHECK constraint expression to use as a hash partition's
3978  * constraint, given the parent relation and partition bound structure.
3979  *
3980  * The partition constraint for a hash partition is always a call to the
3981  * built-in function satisfies_hash_partition().
3982  */
3983 static List *
3985 {
3987  FuncExpr *fexpr;
3988  Node *relidConst;
3989  Node *modulusConst;
3990  Node *remainderConst;
3991  List *args;
3992  ListCell *partexprs_item;
3993  int i;
3994 
3995  /* Fixed arguments. */
3996  relidConst = (Node *) makeConst(OIDOID,
3997  -1,
3998  InvalidOid,
3999  sizeof(Oid),
4001  false,
4002  true);
4003 
4004  modulusConst = (Node *) makeConst(INT4OID,
4005  -1,
4006  InvalidOid,
4007  sizeof(int32),
4008  Int32GetDatum(spec->modulus),
4009  false,
4010  true);
4011 
4012  remainderConst = (Node *) makeConst(INT4OID,
4013  -1,
4014  InvalidOid,
4015  sizeof(int32),
4016  Int32GetDatum(spec->remainder),
4017  false,
4018  true);
4019 
4020  args = list_make3(relidConst, modulusConst, remainderConst);
4021  partexprs_item = list_head(key->partexprs);
4022 
4023  /* Add an argument for each key column. */
4024  for (i = 0; i < key->partnatts; i++)
4025  {
4026  Node *keyCol;
4027 
4028  /* Left operand */
4029  if (key->partattrs[i] != 0)
4030  {
4031  keyCol = (Node *) makeVar(1,
4032  key->partattrs[i],
4033  key->parttypid[i],
4034  key->parttypmod[i],
4035  key->parttypcoll[i],
4036  0);
4037  }
4038  else
4039  {
4040  keyCol = (Node *) copyObject(lfirst(partexprs_item));
4041  partexprs_item = lnext(key->partexprs, partexprs_item);
4042  }
4043 
4044  args = lappend(args, keyCol);
4045  }
4046 
4047  fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
4048  BOOLOID,
4049  args,
4050  InvalidOid,
4051  InvalidOid,
4053 
4054  return list_make1(fexpr);
4055 }
4056 
4057 /*
4058  * get_qual_for_list
4059  *
4060  * Returns an implicit-AND list of expressions to use as a list partition's
4061  * constraint, given the parent relation and partition bound structure.
4062  *
4063  * The function returns NIL for a default partition when it's the only
4064  * partition since in that case there is no constraint.
4065  */
4066 static List *
4068 {
4070  List *result;
4071  Expr *keyCol;
4072  Expr *opexpr;
4073  NullTest *nulltest;
4074  ListCell *cell;
4075  List *elems = NIL;
4076  bool list_has_null = false;
4077 
4078  /*
4079  * Only single-column list partitioning is supported, so we are worried
4080  * only about the partition key with index 0.
4081  */
4082  Assert(key->partnatts == 1);
4083 
4084  /* Construct Var or expression representing the partition column */
4085  if (key->partattrs[0] != 0)
4086  keyCol = (Expr *) makeVar(1,
4087  key->partattrs[0],
4088  key->parttypid[0],
4089  key->parttypmod[0],
4090  key->parttypcoll[0],
4091  0);
4092  else
4093  keyCol = (Expr *) copyObject(linitial(key->partexprs));
4094 
4095  /*
4096  * For default list partition, collect datums for all the partitions. The
4097  * default partition constraint should check that the partition key is
4098  * equal to none of those.
4099  */
4100  if (spec->is_default)
4101  {
4102  int i;
4103  int ndatums = 0;
4104  PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
4105  PartitionBoundInfo boundinfo = pdesc->boundinfo;
4106 
4107  if (boundinfo)
4108  {
4109  ndatums = boundinfo->ndatums;
4110 
4111  if (partition_bound_accepts_nulls(boundinfo))
4112  list_has_null = true;
4113  }
4114 
4115  /*
4116  * If default is the only partition, there need not be any partition
4117  * constraint on it.
4118  */
4119  if (ndatums == 0 && !list_has_null)
4120  return NIL;
4121 
4122  for (i = 0; i < ndatums; i++)
4123  {
4124  Const *val;
4125 
4126  /*
4127  * Construct Const from known-not-null datum. We must be careful
4128  * to copy the value, because our result has to be able to outlive
4129  * the relcache entry we're copying from.
4130  */
4131  val = makeConst(key->parttypid[0],
4132  key->parttypmod[0],
4133  key->parttypcoll[0],
4134  key->parttyplen[0],
4135  datumCopy(*boundinfo->datums[i],
4136  key->parttypbyval[0],
4137  key->parttyplen[0]),
4138  false, /* isnull */
4139  key->parttypbyval[0]);
4140 
4141  elems = lappend(elems, val);
4142  }
4143  }
4144  else
4145  {
4146  /*
4147  * Create list of Consts for the allowed values, excluding any nulls.
4148  */
4149  foreach(cell, spec->listdatums)
4150  {
4151  Const *val = lfirst_node(Const, cell);
4152 
4153  if (val->constisnull)
4154  list_has_null = true;
4155  else
4156  elems = lappend(elems, copyObject(val));
4157  }
4158  }
4159 
4160  if (elems)
4161  {
4162  /*
4163  * Generate the operator expression from the non-null partition
4164  * values.
4165  */
4167  keyCol, (Expr *) elems);
4168  }
4169  else
4170  {
4171  /*
4172  * If there are no partition values, we don't need an operator
4173  * expression.
4174  */
4175  opexpr = NULL;
4176  }
4177 
4178  if (!list_has_null)
4179  {
4180  /*
4181  * Gin up a "col IS NOT NULL" test that will be ANDed with the main
4182  * expression. This might seem redundant, but the partition routing
4183  * machinery needs it.
4184  */
4185  nulltest = makeNode(NullTest);
4186  nulltest->arg = keyCol;
4187  nulltest->nulltesttype = IS_NOT_NULL;
4188  nulltest->argisrow = false;
4189  nulltest->location = -1;
4190 
4191  result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
4192  }
4193  else
4194  {
4195  /*
4196  * Gin up a "col IS NULL" test that will be OR'd with the main
4197  * expression.
4198  */
4199  nulltest = makeNode(NullTest);
4200  nulltest->arg = keyCol;
4201  nulltest->nulltesttype = IS_NULL;
4202  nulltest->argisrow = false;
4203  nulltest->location = -1;
4204 
4205  if (opexpr)
4206  {
4207  Expr *or;
4208 
4209  or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
4210  result = list_make1(or);
4211  }
4212  else
4213  result = list_make1(nulltest);
4214  }
4215 
4216  /*
4217  * Note that, in general, applying NOT to a constraint expression doesn't
4218  * necessarily invert the set of rows it accepts, because NOT (NULL) is
4219  * NULL. However, the partition constraints we construct here never
4220  * evaluate to NULL, so applying NOT works as intended.
4221  */
4222  if (spec->is_default)
4223  {
4224  result = list_make1(make_ands_explicit(result));
4225  result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
4226  }
4227 
4228  return result;
4229 }
4230 
4231 /*
4232  * get_qual_for_range
4233  *
4234  * Returns an implicit-AND list of expressions to use as a range partition's
4235  * constraint, given the parent relation and partition bound structure.
4236  *
4237  * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
4238  * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
4239  * generate an expression tree of the following form:
4240  *
4241  * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4242  * AND
4243  * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
4244  * AND
4245  * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
4246  *
4247  * It is often the case that a prefix of lower and upper bound tuples contains
4248  * the same values, for example, (al = au), in which case, we will emit an
4249  * expression tree of the following form:
4250  *
4251  * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4252  * AND
4253  * (a = al)
4254  * AND
4255  * (b > bl OR (b = bl AND c >= cl))
4256  * AND
4257  * (b < bu OR (b = bu AND c < cu))
4258  *
4259  * If a bound datum is either MINVALUE or MAXVALUE, these expressions are
4260  * simplified using the fact that any value is greater than MINVALUE and less
4261  * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
4262  * true, and we need not emit any expression for it, and the last line becomes
4263  *
4264  * (b < bu) OR (b = bu), which is simplified to (b <= bu)
4265  *
4266  * In most common cases with only one partition column, say a, the following
4267  * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
4268  *
4269  * For default partition, it returns the negation of the constraints of all
4270  * the other partitions.
4271  *
4272  * External callers should pass for_default as false; we set it to true only
4273  * when recursing.
4274  */
4275 static List *
4277  bool for_default)
4278 {
4279  List *result = NIL;
4280  ListCell *cell1,
4281  *cell2,
4282  *partexprs_item,
4283  *partexprs_item_saved;
4284  int i,
4285  j;
4286  PartitionRangeDatum *ldatum,
4287  *udatum;
4289  Expr *keyCol;
4290  Const *lower_val,
4291  *upper_val;
4292  List *lower_or_arms,
4293  *upper_or_arms;
4294  int num_or_arms,
4295  current_or_arm;
4296  ListCell *lower_or_start_datum,
4297  *upper_or_start_datum;
4298  bool need_next_lower_arm,
4299  need_next_upper_arm;
4300 
4301  if (spec->is_default)
4302  {
4303  List *or_expr_args = NIL;
4304  PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
4305  Oid *inhoids = pdesc->oids;
4306  int nparts = pdesc->nparts,
4307  k;
4308 
4309  for (k = 0; k < nparts; k++)
4310  {
4311  Oid inhrelid = inhoids[k];
4312  HeapTuple tuple;
4313  Datum datum;
4314  PartitionBoundSpec *bspec;
4315 
4316  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(inhrelid));
4317  if (!HeapTupleIsValid(tuple))
4318  elog(ERROR, "cache lookup failed for relation %u", inhrelid);
4319 
4320  datum = SysCacheGetAttrNotNull(RELOID, tuple,
4321  Anum_pg_class_relpartbound);
4322  bspec = (PartitionBoundSpec *)
4324  if (!IsA(bspec, PartitionBoundSpec))
4325  elog(ERROR, "expected PartitionBoundSpec");
4326 
4327  if (!bspec->is_default)
4328  {
4329  List *part_qual;
4330 
4331  part_qual = get_qual_for_range(parent, bspec, true);
4332 
4333  /*
4334  * AND the constraints of the partition and add to
4335  * or_expr_args
4336  */
4337  or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
4338  ? makeBoolExpr(AND_EXPR, part_qual, -1)
4339  : linitial(part_qual));
4340  }
4341  ReleaseSysCache(tuple);
4342  }
4343 
4344  if (or_expr_args != NIL)
4345  {
4346  Expr *other_parts_constr;
4347 
4348  /*
4349  * Combine the constraints obtained for non-default partitions
4350  * using OR. As requested, each of the OR's args doesn't include
4351  * the NOT NULL test for partition keys (which is to avoid its
4352  * useless repetition). Add the same now.
4353  */
4354  other_parts_constr =
4357  list_length(or_expr_args) > 1
4358  ? makeBoolExpr(OR_EXPR, or_expr_args,
4359  -1)
4360  : linitial(or_expr_args)),
4361  -1);
4362 
4363  /*
4364  * Finally, the default partition contains everything *NOT*
4365  * contained in the non-default partitions.
4366  */
4367  result = list_make1(makeBoolExpr(NOT_EXPR,
4368  list_make1(other_parts_constr), -1));
4369  }
4370 
4371  return result;
4372  }
4373 
4374  /*
4375  * If it is the recursive call for default, we skip the get_range_nulltest
4376  * to avoid accumulating the NullTest on the same keys for each partition.
4377  */
4378  if (!for_default)
4379  result = get_range_nulltest(key);
4380 
4381  /*
4382  * Iterate over the key columns and check if the corresponding lower and
4383  * upper datums are equal using the btree equality operator for the
4384  * column's type. If equal, we emit single keyCol = common_value
4385  * expression. Starting from the first column for which the corresponding
4386  * lower and upper bound datums are not equal, we generate OR expressions
4387  * as shown in the function's header comment.
4388  */
4389  i = 0;
4390  partexprs_item = list_head(key->partexprs);
4391  partexprs_item_saved = partexprs_item; /* placate compiler */
4392  forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
4393  {
4394  EState *estate;
4395  MemoryContext oldcxt;
4396  Expr *test_expr;
4397  ExprState *test_exprstate;
4398  Datum test_result;
4399  bool isNull;
4400 
4401  ldatum = lfirst_node(PartitionRangeDatum, cell1);
4402  udatum = lfirst_node(PartitionRangeDatum, cell2);
4403 
4404  /*
4405  * Since get_range_key_properties() modifies partexprs_item, and we
4406  * might need to start over from the previous expression in the later
4407  * part of this function, save away the current value.
4408  */
4409  partexprs_item_saved = partexprs_item;
4410 
4411  get_range_key_properties(key, i, ldatum, udatum,
4412  &partexprs_item,
4413  &keyCol,
4414  &lower_val, &upper_val);
4415 
4416  /*
4417  * If either value is NULL, the corresponding partition bound is
4418  * either MINVALUE or MAXVALUE, and we treat them as unequal, because
4419  * even if they're the same, there is no common value to equate the
4420  * key column with.
4421  */
4422  if (!lower_val || !upper_val)
4423  break;
4424 
4425  /* Create the test expression */
4426  estate = CreateExecutorState();
4427  oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
4429  (Expr *) lower_val,
4430  (Expr *) upper_val);
4431  fix_opfuncids((Node *) test_expr);
4432  test_exprstate = ExecInitExpr(test_expr, NULL);
4433  test_result = ExecEvalExprSwitchContext(test_exprstate,
4434  GetPerTupleExprContext(estate),
4435  &isNull);
4436  MemoryContextSwitchTo(oldcxt);
4437  FreeExecutorState(estate);
4438 
4439  /* If not equal, go generate the OR expressions */
4440  if (!DatumGetBool(test_result))
4441  break;
4442 
4443  /*
4444  * The bounds for the last key column can't be equal, because such a
4445  * range partition would never be allowed to be defined (it would have
4446  * an empty range otherwise).
4447  */
4448  if (i == key->partnatts - 1)
4449  elog(ERROR, "invalid range bound specification");
4450 
4451  /* Equal, so generate keyCol = lower_val expression */
4452  result = lappend(result,
4454  keyCol, (Expr *) lower_val));
4455 
4456  i++;
4457  }
4458 
4459  /* First pair of lower_val and upper_val that are not equal. */
4460  lower_or_start_datum = cell1;
4461  upper_or_start_datum = cell2;
4462 
4463  /* OR will have as many arms as there are key columns left. */
4464  num_or_arms = key->partnatts - i;
4465  current_or_arm = 0;
4466  lower_or_arms = upper_or_arms = NIL;
4467  need_next_lower_arm = need_next_upper_arm = true;
4468  while (current_or_arm < num_or_arms)
4469  {
4470  List *lower_or_arm_args = NIL,
4471  *upper_or_arm_args = NIL;
4472 
4473  /* Restart scan of columns from the i'th one */
4474  j = i;
4475  partexprs_item = partexprs_item_saved;
4476 
4477  for_both_cell(cell1, spec->lowerdatums, lower_or_start_datum,
4478  cell2, spec->upperdatums, upper_or_start_datum)
4479  {
4480  PartitionRangeDatum *ldatum_next = NULL,
4481  *udatum_next = NULL;
4482 
4483  ldatum = lfirst_node(PartitionRangeDatum, cell1);
4484  if (lnext(spec->lowerdatums, cell1))
4485  ldatum_next = castNode(PartitionRangeDatum,
4486  lfirst(lnext(spec->lowerdatums, cell1)));
4487  udatum = lfirst_node(PartitionRangeDatum, cell2);
4488  if (lnext(spec->upperdatums, cell2))
4489  udatum_next = castNode(PartitionRangeDatum,
4490  lfirst(lnext(spec->upperdatums, cell2)));
4491  get_range_key_properties(key, j, ldatum, udatum,
4492  &partexprs_item,
4493  &keyCol,
4494  &lower_val, &upper_val);
4495 
4496  if (need_next_lower_arm && lower_val)
4497  {
4498  uint16 strategy;
4499 
4500  /*
4501  * For the non-last columns of this arm, use the EQ operator.
4502  * For the last column of this arm, use GT, unless this is the
4503  * last column of the whole bound check, or the next bound
4504  * datum is MINVALUE, in which case use GE.
4505  */
4506  if (j - i < current_or_arm)
4507  strategy = BTEqualStrategyNumber;
4508  else if (j == key->partnatts - 1 ||
4509  (ldatum_next &&
4510  ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
4511  strategy = BTGreaterEqualStrategyNumber;
4512  else
4513  strategy = BTGreaterStrategyNumber;
4514 
4515  lower_or_arm_args = lappend(lower_or_arm_args,
4517  strategy,
4518  keyCol,
4519  (Expr *) lower_val));
4520  }
4521 
4522  if (need_next_upper_arm && upper_val)
4523  {
4524  uint16 strategy;
4525 
4526  /*
4527  * For the non-last columns of this arm, use the EQ operator.
4528  * For the last column of this arm, use LT, unless the next
4529  * bound datum is MAXVALUE, in which case use LE.
4530  */
4531  if (j - i < current_or_arm)
4532  strategy = BTEqualStrategyNumber;
4533  else if (udatum_next &&
4534  udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
4535  strategy = BTLessEqualStrategyNumber;
4536  else
4537  strategy = BTLessStrategyNumber;
4538 
4539  upper_or_arm_args = lappend(upper_or_arm_args,
4541  strategy,
4542  keyCol,
4543  (Expr *) upper_val));
4544  }
4545 
4546  /*
4547  * Did we generate enough of OR's arguments? First arm considers
4548  * the first of the remaining columns, second arm considers first
4549  * two of the remaining columns, and so on.
4550  */
4551  ++j;
4552  if (j - i > current_or_arm)
4553  {
4554  /*
4555  * We must not emit any more arms if the new column that will
4556  * be considered is unbounded, or this one was.
4557  */
4558  if (!lower_val || !ldatum_next ||
4559  ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4560  need_next_lower_arm = false;
4561  if (!upper_val || !udatum_next ||
4562  udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4563  need_next_upper_arm = false;
4564  break;
4565  }
4566  }
4567 
4568  if (lower_or_arm_args != NIL)
4569  lower_or_arms = lappend(lower_or_arms,
4570  list_length(lower_or_arm_args) > 1
4571  ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
4572  : linitial(lower_or_arm_args));
4573 
4574  if (upper_or_arm_args != NIL)
4575  upper_or_arms = lappend(upper_or_arms,
4576  list_length(upper_or_arm_args) > 1
4577  ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
4578  : linitial(upper_or_arm_args));
4579 
4580  /* If no work to do in the next iteration, break away. */
4581  if (!need_next_lower_arm && !need_next_upper_arm)
4582  break;
4583 
4584  ++current_or_arm;
4585  }
4586 
4587  /*
4588  * Generate the OR expressions for each of lower and upper bounds (if
4589  * required), and append to the list of implicitly ANDed list of
4590  * expressions.
4591  */
4592  if (lower_or_arms != NIL)
4593  result = lappend(result,
4594  list_length(lower_or_arms) > 1
4595  ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
4596  : linitial(lower_or_arms));
4597  if (upper_or_arms != NIL)
4598  result = lappend(result,
4599  list_length(upper_or_arms) > 1
4600  ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
4601  : linitial(upper_or_arms));
4602 
4603  /*
4604  * As noted above, for non-default, we return list with constant TRUE. If
4605  * the result is NIL during the recursive call for default, it implies
4606  * this is the only other partition which can hold every value of the key
4607  * except NULL. Hence we return the NullTest result skipped earlier.
4608  */
4609  if (result == NIL)
4610  result = for_default
4612  : list_make1(makeBoolConst(true, false));
4613 
4614  return result;
4615 }
4616 
4617 /*
4618  * get_range_key_properties
4619  * Returns range partition key information for a given column
4620  *
4621  * This is a subroutine for get_qual_for_range, and its API is pretty
4622  * specialized to that caller.
4623  *
4624  * Constructs an Expr for the key column (returned in *keyCol) and Consts
4625  * for the lower and upper range limits (returned in *lower_val and
4626  * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
4627  * a Const. All of these structures are freshly palloc'd.
4628  *
4629  * *partexprs_item points to the cell containing the next expression in
4630  * the key->partexprs list, or NULL. It may be advanced upon return.
4631  */
4632 static void
4634  PartitionRangeDatum *ldatum,
4635  PartitionRangeDatum *udatum,
4636  ListCell **partexprs_item,
4637  Expr **keyCol,
4638  Const **lower_val, Const **upper_val)
4639 {
4640  /* Get partition key expression for this column */
4641  if (key->partattrs[keynum] != 0)
4642  {
4643  *keyCol = (Expr *) makeVar(1,
4644  key->partattrs[keynum],
4645  key->parttypid[keynum],
4646  key->parttypmod[keynum],
4647  key->parttypcoll[keynum],
4648  0);
4649  }
4650  else
4651  {
4652  if (*partexprs_item == NULL)
4653  elog(ERROR, "wrong number of partition key expressions");
4654  *keyCol = copyObject(lfirst(*partexprs_item));
4655  *partexprs_item = lnext(key->partexprs, *partexprs_item);
4656  }
4657 
4658  /* Get appropriate Const nodes for the bounds */
4659  if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
4660  *lower_val = castNode(Const, copyObject(ldatum->value));
4661  else
4662  *lower_val = NULL;
4663 
4664  if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
4665  *upper_val = castNode(Const, copyObject(udatum->value));
4666  else
4667  *upper_val = NULL;
4668 }
4669 
4670 /*
4671  * get_range_nulltest
4672  *
4673  * A non-default range partition table does not currently allow partition
4674  * keys to be null, so emit an IS NOT NULL expression for each key column.
4675  */
4676 static List *
4678 {
4679  List *result = NIL;
4680  NullTest *nulltest;
4681  ListCell *partexprs_item;
4682  int i;
4683 
4684  partexprs_item = list_head(key->partexprs);
4685  for (i = 0; i < key->partnatts; i++)
4686  {
4687  Expr *keyCol;
4688 
4689  if (key->partattrs[i] != 0)
4690  {
4691  keyCol = (Expr *) makeVar(1,
4692  key->partattrs[i],
4693  key->parttypid[i],
4694  key->parttypmod[i],
4695  key->parttypcoll[i],
4696  0);
4697  }
4698  else
4699  {
4700  if (partexprs_item == NULL)
4701  elog(ERROR, "wrong number of partition key expressions");
4702  keyCol = copyObject(lfirst(partexprs_item));
4703  partexprs_item = lnext(key->partexprs, partexprs_item);
4704  }
4705 
4706  nulltest = makeNode(NullTest);
4707  nulltest->arg = keyCol;
4708  nulltest->nulltesttype = IS_NOT_NULL;
4709  nulltest->argisrow = false;
4710  nulltest->location = -1;
4711  result = lappend(result, nulltest);
4712  }
4713 
4714  return result;
4715 }
4716 
4717 /*
4718  * compute_partition_hash_value
4719  *
4720  * Compute the hash value for given partition key values.
4721  */
4722 uint64
4723 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation,
4724  const Datum *values, const bool *isnull)
4725 {
4726  int i;
4727  uint64 rowHash = 0;
4729 
4730  for (i = 0; i < partnatts; i++)
4731  {
4732  /* Nulls are just ignored */
4733  if (!isnull[i])
4734  {
4735  Datum hash;
4736 
4737  Assert(OidIsValid(partsupfunc[i].fn_oid));
4738 
4739  /*
4740  * Compute hash for each datum value by calling respective
4741  * datatype-specific hash functions of each partition key
4742  * attribute.
4743  */
4744  hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i],
4745  values[i], seed);
4746 
4747  /* Form a single 64-bit hash value */
4748  rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4749  }
4750  }
4751 
4752  return rowHash;
4753 }
4754 
4755 /*
4756  * satisfies_hash_partition
4757  *
4758  * This is an SQL-callable function for use in hash partition constraints.
4759  * The first three arguments are the parent table OID, modulus, and remainder.
4760  * The remaining arguments are the value of the partitioning columns (or
4761  * expressions); these are hashed and the results are combined into a single
4762  * hash value by calling hash_combine64.
4763  *
4764  * Returns true if remainder produced when this computed single hash value is
4765  * divided by the given modulus is equal to given remainder, otherwise false.
4766  * NB: it's important that this never return null, as the constraint machinery
4767  * would consider that to be a "pass".
4768  *
4769  * See get_qual_for_hash() for usage.
4770  */
4771 Datum
4773 {
4774  typedef struct ColumnsHashData
4775  {
4776  Oid relid;
4777  int nkeys;
4778  Oid variadic_type;
4779  int16 variadic_typlen;
4780  bool variadic_typbyval;
4781  char variadic_typalign;
4782  Oid partcollid[PARTITION_MAX_KEYS];
4783  FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
4784  } ColumnsHashData;
4785  Oid parentId;
4786  int modulus;
4787  int remainder;
4789  ColumnsHashData *my_extra;
4790  uint64 rowHash = 0;
4791 
4792  /* Return false if the parent OID, modulus, or remainder is NULL. */
4793  if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
4794  PG_RETURN_BOOL(false);
4795  parentId = PG_GETARG_OID(0);
4796  modulus = PG_GETARG_INT32(1);
4797  remainder = PG_GETARG_INT32(2);
4798 
4799  /* Sanity check modulus and remainder. */
4800  if (modulus <= 0)
4801  ereport(ERROR,
4802  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4803  errmsg("modulus for hash partition must be an integer value greater than zero")));
4804  if (remainder < 0)
4805  ereport(ERROR,
4806  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4807  errmsg("remainder for hash partition must be an integer value greater than or equal to zero")));
4808  if (remainder >= modulus)
4809  ereport(ERROR,
4810  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4811  errmsg("remainder for hash partition must be less than modulus")));
4812 
4813  /*
4814  * Cache hash function information.
4815  */
4816  my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4817  if (my_extra == NULL || my_extra->relid != parentId)
4818  {
4819  Relation parent;
4820  PartitionKey key;
4821  int j;
4822 
4823  /* Open parent relation and fetch partition key info */
4824  parent = relation_open(parentId, AccessShareLock);
4825  key = RelationGetPartitionKey(parent);
4826 
4827  /* Reject parent table that is not hash-partitioned. */
4828  if (key == NULL || key->strategy != PARTITION_STRATEGY_HASH)
4829  ereport(ERROR,
4830  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4831  errmsg("\"%s\" is not a hash partitioned table",
4832  get_rel_name(parentId))));
4833 
4834  if (!get_fn_expr_variadic(fcinfo->flinfo))
4835  {
4836  int nargs = PG_NARGS() - 3;
4837 
4838  /* complain if wrong number of column values */
4839  if (key->partnatts != nargs)
4840  ereport(ERROR,
4841  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4842  errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4843  key->partnatts, nargs)));
4844 
4845  /* allocate space for our cache */
4846  fcinfo->flinfo->fn_extra =
4847  MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4848  offsetof(ColumnsHashData, partsupfunc) +
4849  sizeof(FmgrInfo) * nargs);
4850  my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4851  my_extra->relid = parentId;
4852  my_extra->nkeys = key->partnatts;
4853  memcpy(my_extra->partcollid, key->partcollation,
4854  key->partnatts * sizeof(Oid));
4855 
4856  /* check argument types and save fmgr_infos */
4857  for (j = 0; j < key->partnatts; ++j)
4858  {
4859  Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
4860 
4861  if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
4862  ereport(ERROR,
4863  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4864  errmsg("column %d of the partition key has type %s, but supplied value is of type %s",
4865  j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
4866 
4867  fmgr_info_copy(&my_extra->partsupfunc[j],
4868  &key->partsupfunc[j],
4869  fcinfo->flinfo->fn_mcxt);
4870  }
4871  }
4872  else
4873  {
4874  ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4875 
4876  /* allocate space for our cache -- just one FmgrInfo in this case */
4877  fcinfo->flinfo->fn_extra =
4878  MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4879  offsetof(ColumnsHashData, partsupfunc) +
4880  sizeof(FmgrInfo));
4881  my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4882  my_extra->relid = parentId;
4883  my_extra->nkeys = key->partnatts;
4884  my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
4885  get_typlenbyvalalign(my_extra->variadic_type,
4886  &my_extra->variadic_typlen,
4887  &my_extra->variadic_typbyval,
4888  &my_extra->variadic_typalign);
4889  my_extra->partcollid[0] = key->partcollation[0];
4890 
4891  /* check argument types */
4892  for (j = 0; j < key->partnatts; ++j)
4893  if (key->parttypid[j] != my_extra->variadic_type)
4894  ereport(ERROR,
4895  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4896  errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
4897  j + 1,
4898  format_type_be(key->parttypid[j]),
4899  format_type_be(my_extra->variadic_type))));
4900 
4901  fmgr_info_copy(&my_extra->partsupfunc[0],
4902  &key->partsupfunc[0],
4903  fcinfo->flinfo->fn_mcxt);
4904  }
4905 
4906  /* Hold lock until commit */
4907  relation_close(parent, NoLock);
4908  }
4909 
4910  if (!OidIsValid(my_extra->variadic_type))
4911  {
4912  int nkeys = my_extra->nkeys;
4913  int i;
4914 
4915  /*
4916  * For a non-variadic call, neither the number of arguments nor their
4917  * types can change across calls, so avoid the expense of rechecking
4918  * here.
4919  */
4920 
4921  for (i = 0; i < nkeys; i++)
4922  {
4923  Datum hash;
4924 
4925  /* keys start from fourth argument of function. */
4926  int argno = i + 3;
4927 
4928  if (PG_ARGISNULL(argno))
4929  continue;
4930 
4931  hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
4932  my_extra->partcollid[i],
4933  PG_GETARG_DATUM(argno),
4934  seed);
4935 
4936  /* Form a single 64-bit hash value */
4937  rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4938  }
4939  }
4940  else
4941  {
4942  ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4943  int i;
4944  int nelems;
4945  Datum *datum;
4946  bool *isnull;
4947 
4948  deconstruct_array(variadic_array,
4949  my_extra->variadic_type,
4950  my_extra->variadic_typlen,
4951  my_extra->variadic_typbyval,
4952  my_extra->variadic_typalign,
4953  &datum, &isnull, &nelems);
4954 
4955  /* complain if wrong number of column values */
4956  if (nelems != my_extra->nkeys)
4957  ereport(ERROR,
4958  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4959  errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4960  my_extra->nkeys, nelems)));
4961 
4962  for (i = 0; i < nelems; i++)
4963  {
4964  Datum hash;
4965 
4966  if (isnull[i])
4967  continue;
4968 
4969  hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
4970  my_extra->partcollid[0],
4971  datum[i],
4972  seed);
4973 
4974  /* Form a single 64-bit hash value */
4975  rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4976  }
4977  }
4978 
4979  PG_RETURN_BOOL(rowHash % modulus == remainder);
4980 }
4981 
4982 /*
4983  * check_two_partitions_bounds_range
4984  *
4985  * (function for BY RANGE partitioning)
4986  *
4987  * This is a helper function for check_partitions_for_split() and
4988  * calculate_partition_bound_for_merge().
4989  * This function compares upper bound of first_bound and lower bound of
4990  * second_bound. These bounds should be equal except when
4991  * "defaultPart == true" (this means that one of split partitions is DEFAULT).
4992  * In this case upper bound of first_bound can be less than lower bound of
4993  * second_bound because space between these bounds will be included in
4994  * DEFAULT partition.
4995  *
4996  * parent: partitioned table
4997  * first_name: name of first partition
4998  * first_bound: bound of first partition
4999  * second_name: name of second partition
5000  * second_bound: bound of second partition
5001  * defaultPart: true if one of split partitions is DEFAULT
5002  * pstate: pointer to ParseState struct for determining error position
5003  */
5004 static void
5006  RangeVar *first_name,
5007  PartitionBoundSpec *first_bound,
5008  RangeVar *second_name,
5009  PartitionBoundSpec *second_bound,
5010  bool defaultPart,
5011  ParseState *pstate)
5012 {
5014  PartitionRangeBound *first_upper;
5015  PartitionRangeBound *second_lower;
5016  int cmpval;
5017 
5018  Assert(key->strategy == PARTITION_STRATEGY_RANGE);
5019 
5020  first_upper = make_one_partition_rbound(key, -1, first_bound->upperdatums, false);
5021  second_lower = make_one_partition_rbound(key, -1, second_bound->lowerdatums, true);
5022 
5023  /*
5024  * lower1=false (the second to last argument) for correct comparison of
5025  * lower and upper bounds.
5026  */
5027  cmpval = partition_rbound_cmp(key->partnatts,
5028  key->partsupfunc,
5029  key->partcollation,
5030  second_lower->datums, second_lower->kind,
5031  false, first_upper);
5032  if ((!defaultPart && cmpval) || (defaultPart && cmpval < 0))
5033  {
5034  PartitionRangeDatum *datum = linitial(second_bound->lowerdatums);
5035 
5036  ereport(ERROR,
5037  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5038  errmsg("lower bound of partition \"%s\" conflicts with upper bound of previous partition \"%s\"",
5039  second_name->relname, first_name->relname),
5040  parser_errposition(pstate, datum->location)));
5041  }
5042 }
5043 
5044 /*
5045  * check_partitions_not_overlap_list
5046  *
5047  * (function for BY LIST partitioning)
5048  *
5049  * This is a helper function for check_partitions_for_split().
5050  * Checks that the values of the new partitions do not overlap.
5051  *
5052  * parent: partitioned table
5053  * parts: array of SinglePartitionSpec structs with info about split partitions
5054  * nparts: size of array "parts"
5055  */
5056 static void
5058  SinglePartitionSpec **parts,
5059  int nparts,
5060  ParseState *pstate)
5061 {
5063  int overlap_location = -1;
5064  int i,
5065  j;
5066  SinglePartitionSpec *sps1,
5067  *sps2;
5068  List *overlap;
5069 
5070  Assert(key->strategy == PARTITION_STRATEGY_LIST);
5071 
5072  for (i = 0; i < nparts; i++)
5073  {
5074  sps1 = parts[i];
5075 
5076  for (j = i + 1; j < nparts; j++)
5077  {
5078  sps2 = parts[j];
5079 
5080  /*
5081  * Calculate intersection between values of two partitions.
5082  */
5083  overlap = list_intersection(sps1->bound->listdatums,
5084  sps2->bound->listdatums);
5085  if (list_length(overlap) > 0)
5086  {
5087  Const *val = (Const *) lfirst(list_head(overlap));
5088 
5089  overlap_location = val->location;
5090  ereport(ERROR,
5091  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5092  errmsg("new partition \"%s\" would overlap with another new partition \"%s\"",
5093  sps1->name->relname, sps2->name->relname),
5094  parser_errposition(pstate, overlap_location)));
5095  }
5096  }
5097  }
5098 }
5099 
5100 /*
5101  * get_partition_bound_spec
5102  *
5103  * Returns description of partition with Oid "partOid" and name "name".
5104  *
5105  * partOid: partition Oid
5106  * name: partition name
5107  */
5108 static PartitionBoundSpec *
5110 {
5111  HeapTuple tuple;
5112  Datum datum;
5113  bool isnull;
5114  PartitionBoundSpec *boundspec = NULL;
5115 
5116  /* Try fetching the tuple from the catcache, for speed. */
5117  tuple = SearchSysCache1(RELOID, partOid);
5118  if (!HeapTupleIsValid(tuple))
5119  elog(ERROR, "cache lookup failed for relation \"%s\"",
5120  name->relname);
5121 
5122  datum = SysCacheGetAttr(RELOID, tuple,
5123  Anum_pg_class_relpartbound,
5124  &isnull);
5125  if (isnull)
5126  elog(ERROR, "partition bound for relation \"%s\" is null",
5127  name->relname);
5128 
5129  boundspec = stringToNode(TextDatumGetCString(datum));
5130 
5131  if (!IsA(boundspec, PartitionBoundSpec))
5132  elog(ERROR, "expected PartitionBoundSpec for relation \"%s\"",
5133  name->relname);
5134 
5135  ReleaseSysCache(tuple);
5136  return boundspec;
5137 }
5138 
5139 /*
5140  * check_partition_bounds_for_split_range
5141  *
5142  * (function for BY RANGE partitioning)
5143  *
5144  * Checks that bounds of new partition "spec" are inside bounds of split
5145  * partition (with Oid splitPartOid). If first=true (this means that "spec" is
5146  * the first of new partitions) then lower bound of "spec" should be equal (or
5147  * greater than or equal in case defaultPart=true) to lower bound of split
5148  * partition. If last=true (this means that "spec" is the last of new
5149  * partitions) then upper bound of "spec" should be equal (or less than or
5150  * equal in case defaultPart=true) to upper bound of split partition.
5151  *
5152  * parent: partitioned table
5153  * relname: name of the new partition
5154  * spec: bounds specification of the new partition
5155  * splitPartOid: split partition Oid
5156  * splitPartName: split partition name
5157  * first: true in case new partition "spec" is first of new partitions
5158  * last: true in case new partition "spec" is last of new partitions
5159  * defaultPart: true in case partitioned table has DEFAULT partition
5160  * pstate: pointer to ParseState struct for determine error position
5161  */
5162 static void
5164  char *relname,
5165  PartitionBoundSpec *spec,
5166  Oid splitPartOid,
5167  RangeVar *splitPartName,
5168  bool first,
5169  bool last,
5170  bool defaultPart,
5171  ParseState *pstate)
5172 {
5175  *upper;
5176  int cmpval;
5177 
5178  Assert(key->strategy == PARTITION_STRATEGY_RANGE);
5180 
5181  lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
5182  upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
5183 
5184  /*
5185  * First check if the resulting range would be empty with specified lower
5186  * and upper bounds. partition_rbound_cmp cannot return zero here, since
5187  * the lower-bound flags are different.
5188  */
5189  cmpval = partition_rbound_cmp(key->partnatts,
5190  key->partsupfunc,
5191  key->partcollation,
5192  lower->datums, lower->kind,
5193  true, upper);
5194  Assert(cmpval != 0);
5195  if (cmpval > 0)
5196  {
5197  /* Point to problematic key in the lower datums list. */
5198  PartitionRangeDatum *datum = list_nth(spec->lowerdatums, cmpval - 1);
5199 
5200  ereport(ERROR,
5201  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5202  errmsg("empty range bound specified for partition \"%s\"",
5203  relname),
5204  errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
5207  parser_errposition(pstate, datum->location)));
5208  }
5209 
5210  /* Need to check first and last partitions (from set of new partitions) */
5211  if (first || last)
5212  {
5213  PartitionBoundSpec *split_spec = get_partition_bound_spec(splitPartOid, splitPartName);
5214  bool overlap = false;
5215 
5216  if (first)
5217  {
5218  PartitionRangeBound *split_lower;
5219 
5220  split_lower = make_one_partition_rbound(key, -1, split_spec->lowerdatums, true);
5221 
5222  cmpval = partition_rbound_cmp(key->partnatts,
5223  key->partsupfunc,
5224  key->partcollation,
5225  lower->datums, lower->kind,
5226  true, split_lower);
5227 
5228  /*
5229  * Lower bound of "spec" should be equal (or greater than or equal
5230  * in case defaultPart=true) to lower bound of split partition.
5231  */
5232  if ((!defaultPart && cmpval) || (defaultPart && cmpval < 0))
5233  overlap = true;
5234  }
5235  else
5236  {
5237  PartitionRangeBound *split_upper;
5238 
5239  split_upper = make_one_partition_rbound(key, -1, split_spec->upperdatums, false);
5240 
5241  cmpval = partition_rbound_cmp(key->partnatts,
5242  key->partsupfunc,
5243  key->partcollation,
5244  upper->datums, upper->kind,
5245  false, split_upper);
5246 
5247  /*
5248  * Upper bound of "spec" should be equal (or less than or equal in
5249  * case defaultPart=true) to upper bound of split partition.
5250  */
5251  if ((!defaultPart && cmpval) || (defaultPart && cmpval > 0))
5252  overlap = true;
5253  }
5254 
5255  if (overlap)
5256  {
5257  PartitionRangeDatum *datum;
5258 
5259  datum = list_nth(first ? spec->lowerdatums : spec->upperdatums, abs(cmpval) - 1);
5260 
5261  ereport(ERROR,
5262  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5263  errmsg("%s bound of partition \"%s\" is %s %s bound of split partition",
5264  first ? "lower" : "upper",
5265  relname,
5266  defaultPart ? (first ? "less than" : "greater than") : "not equal to",
5267  first ? "lower" : "upper"),
5268  parser_errposition(pstate, datum->location)));
5269  }
5270  }
5271 }
5272 
5273 /*
5274  * check_partition_bounds_for_split_list
5275  *
5276  * (function for BY LIST partitioning)
5277  *
5278  * Checks that bounds of new partition are inside bounds of split partition
5279  * (with Oid splitPartOid).
5280  *
5281  * parent: partitioned table
5282  * relname: name of the new partition
5283  * spec: bounds specification of the new partition
5284  * splitPartOid: split partition Oid
5285  * pstate: pointer to ParseState struct for determine error position
5286  */
5287 static void
5289  PartitionBoundSpec *spec,
5290  Oid splitPartOid,
5291  ParseState *pstate)
5292 {
5294  PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
5295  PartitionBoundInfo boundinfo = partdesc->boundinfo;
5296  int with = -1;
5297  bool overlap = false;
5298  int overlap_location = -1;
5299  ListCell *cell;
5300 
5301  Assert(key->strategy == PARTITION_STRATEGY_LIST);
5303  Assert(boundinfo && boundinfo->strategy == PARTITION_STRATEGY_LIST);
5304 
5305  /*
5306  * Search each value of new partition "spec" in existing partitions. All
5307  * of them should be in split partition (with Oid splitPartOid).
5308  */
5309  foreach(cell, spec->listdatums)
5310  {
5311  Const *val = lfirst_node(Const, cell);
5312 
5313  overlap_location = val->location;
5314  if (!val->constisnull)
5315  {
5316  int offset;
5317  bool equal;
5318 
5319  offset = partition_list_bsearch(&key->partsupfunc[0],
5320  key->partcollation,
5321  boundinfo,
5322  val->constvalue,
5323  &equal);
5324  if (offset >= 0 && equal)
5325  {
5326  with = boundinfo->indexes[offset];
5327  if (partdesc->oids[with] != splitPartOid)
5328  {
5329  overlap = true;
5330  break;
5331  }
5332  }
5333  else
5334  ereport(ERROR,
5335  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5336  errmsg("new partition \"%s\" cannot have this value because split partition does not have",
5337  relname),
5338  parser_errposition(pstate, overlap_location)));
5339  }
5340  else if (partition_bound_accepts_nulls(boundinfo))
5341  {
5342  with = boundinfo->null_index;
5343  if (partdesc->oids[with] != splitPartOid)
5344  {
5345  overlap = true;
5346  break;
5347  }
5348  }
5349  else
5350  ereport(ERROR,
5351  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5352  errmsg("new partition \"%s\" cannot have NULL value because split partition does not have",
5353  relname),
5354  parser_errposition(pstate, overlap_location)));
5355  }
5356 
5357  if (overlap)
5358  {
5359  Assert(with >= 0);
5360  ereport(ERROR,
5361  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5362  errmsg("new partition \"%s\" would overlap with another (not split) partition \"%s\"",
5363  relname, get_rel_name(partdesc->oids[with])),
5364  parser_errposition(pstate, overlap_location)));
5365  }
5366 }
5367 
5368 /*
5369  * find_value_in_new_partitions_list
5370  *
5371  * (function for BY LIST partitioning)
5372  *
5373  * Function returns true in case any of new partitions contains value "value".
5374  *
5375  * partsupfunc: information about comparison function associated with the partition key
5376  * partcollation: partitioning collation
5377  * parts: pointer to array with new partitions descriptions
5378  * nparts: number of new partitions
5379  * value: the value that we are looking for
5380  * isnull: true if the value that we are looking for is NULL
5381  */
5382 static bool
5384  Oid *partcollation,
5385  SinglePartitionSpec **parts,
5386  int nparts,
5387  Datum value,
5388  bool isnull)
5389 {
5390  ListCell *valptr;
5391  int i;
5392 
5393  for (i = 0; i < nparts; i++)
5394  {
5395  SinglePartitionSpec *sps = parts[i];
5396 
5397  foreach(valptr, sps->bound->listdatums)
5398  {
5399  Const *val = lfirst_node(Const, valptr);
5400 
5401  if (isnull && val->constisnull)
5402  return true;
5403 
5404  if (!isnull && !val->constisnull)
5405  {
5406  if (DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
5407  partcollation[0],
5408  val->constvalue,
5409  value)) == 0)
5410  return true;
5411  }
5412  }
5413  }
5414  return false;
5415 }
5416 
5417 /*
5418  * check_parent_values_in_new_partitions
5419  *
5420  * (function for BY LIST partitioning)
5421  *
5422  * Checks that all values of split partition (with Oid partOid) contains in new
5423  * partitions.
5424  *
5425  * parent: partitioned table
5426  * partOid: split partition Oid
5427  * parts: pointer to array with new partitions descriptions
5428  * nparts: number of new partitions
5429  * pstate: pointer to ParseState struct for determine error position
5430  */
5431 static void
5433  Oid partOid,
5434  SinglePartitionSpec **parts,
5435  int nparts,
5436  ParseState *pstate)
5437 {
5439  PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
5440  PartitionBoundInfo boundinfo = partdesc->boundinfo;
5441  int i;
5442  bool found = true;
5443  bool searchNull = false;
5444  Datum datum = PointerGetDatum(NULL);
5445 
5446  Assert(key->strategy == PARTITION_STRATEGY_LIST);
5447 
5448  /*
5449  * Special processing for NULL value. Search NULL value if the split
5450  * partition (partOid) contains it.
5451  */
5452  if (partition_bound_accepts_nulls(boundinfo) &&
5453  partdesc->oids[boundinfo->null_index] == partOid)
5454  {
5455  if (!find_value_in_new_partitions_list(&key->partsupfunc[0],
5456  key->partcollation, parts, nparts, datum, true))
5457  {
5458  found = false;
5459  searchNull = true;
5460  }
5461  }
5462 
5463  /*
5464  * Search all values of split partition with partOid in PartitionDesc of
5465  * partitioned table.
5466  */
5467  for (i = 0; i < boundinfo->ndatums; i++)
5468  {
5469  if (partdesc->oids[boundinfo->indexes[i]] == partOid)
5470  {
5471  /* We found value that split partition contains. */
5472  datum = boundinfo->datums[i][0];
5473  if (!find_value_in_new_partitions_list(&key->partsupfunc[0],
5474  key->partcollation, parts, nparts, datum, false))
5475  {
5476  found = false;
5477  break;
5478  }
5479  }
5480  }
5481 
5482  if (!found)
5483  {
5484  Const *notFoundVal;
5485 
5486  if (!searchNull)
5487 
5488  /*
5489  * Make Const for getting string representation of not found
5490  * value.
5491  */
5492  notFoundVal = makeConst(key->parttypid[0],
5493  key->parttypmod[0],
5494  key->parttypcoll[0],
5495  key->parttyplen[0],
5496  datum,
5497  false, /* isnull */
5498  key->parttypbyval[0]);
5499 
5500  ereport(ERROR,
5501  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5502  errmsg("new partitions do not have value %s but split partition does",
5503  searchNull ? "NULL" : get_list_partvalue_string(notFoundVal))));
5504  }
5505 }
5506 
5507 /*
5508  * check_partitions_for_split
5509  *
5510  * Checks new partitions for SPLIT PARTITIONS command:
5511  * 1. DEFAULT partition should be one.
5512  * 2. New partitions should have different names
5513  * (with existing partitions too).
5514  * 3. Bounds of new partitions should not overlap with new and existing
5515  * partitions.
5516  * 4. In case split partition is DEFAULT partition, one of new partitions
5517  * should be DEFAULT.
5518  * 5. In case new partitions or existing partitions contains DEFAULT
5519  * partition, new partitions can have any bounds inside split
5520  * partition bound (can be spaces between partitions bounds).
5521  * 6. In case partitioned table does not have DEFAULT partition, DEFAULT
5522  * partition can be defined as one of new partition.
5523  * 7. In case new partitions not contains DEFAULT partition and
5524  * partitioned table does not have DEFAULT partition the following
5525  * should be true: sum bounds of new partitions should be equal
5526  * to bound of split partition.
5527  *
5528  * parent: partitioned table
5529  * splitPartOid: split partition Oid
5530  * splitPartName: split partition name
5531  * list: list of new partitions
5532  * pstate: pointer to ParseState struct for determine error position
5533  */
5534 void
5536  Oid splitPartOid,
5537  RangeVar *splitPartName,
5538  List *partlist,
5539  ParseState *pstate)
5540 {
5541  PartitionKey key;
5542  char strategy;
5543  Oid defaultPartOid;
5544  bool isSplitPartDefault;
5545  bool existsDefaultPart;
5546  ListCell *listptr;
5547  int default_index = -1;
5548  int i,
5549  j;
5550  SinglePartitionSpec **new_parts;
5551  SinglePartitionSpec *spsPrev = NULL;
5552  int nparts = 0;
5553 
5554  key = RelationGetPartitionKey(parent);
5555  strategy = get_partition_strategy(key);
5556 
5557  switch (strategy)
5558  {
5561  {
5562  /*
5563  * Make array new_parts with new partitions except DEFAULT
5564  * partition.
5565  */
5566  new_parts = (SinglePartitionSpec **)
5567  palloc0(list_length(partlist) * sizeof(SinglePartitionSpec *));
5568  i = 0;
5569  foreach(listptr, partlist)
5570  {
5571  SinglePartitionSpec *sps =
5572  (SinglePartitionSpec *) lfirst(listptr);
5573 
5574  if (sps->bound->is_default)
5575  {
5576  if (default_index >= 0)
5577  ereport(ERROR,
5578  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5579  errmsg("DEFAULT partition should be one")),
5580  parser_errposition(pstate, sps->name->location));
5581  default_index = i;
5582  }
5583  else
5584  {
5585  new_parts[nparts++] = sps;
5586  }
5587  i++;
5588  }
5589  }
5590  break;
5591 
5593  ereport(ERROR,
5594  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5595  errmsg("partition of hash-partitioned table cannot be split")));
5596  break;
5597 
5598  default:
5599  elog(ERROR, "unexpected partition strategy: %d",
5600  (int) key->strategy);
5601  break;
5602  }
5603 
5604  if (strategy == PARTITION_STRATEGY_RANGE)
5605  {
5606  PartitionRangeBound **lower_bounds;
5607  SinglePartitionSpec **tmp_new_parts;
5608 
5609  /*
5610  * For simplify check for ranges of new partitions need to sort all
5611  * partitions in ascending order of them bounds (we compare upper
5612  * bound only).
5613  */
5614  lower_bounds = (PartitionRangeBound **)
5615  palloc0(nparts * sizeof(PartitionRangeBound *));
5616 
5617  /* Create array of lower bounds. */
5618  for (i = 0; i < nparts; i++)
5619  {
5620  lower_bounds[i] = make_one_partition_rbound(key, i,
5621  new_parts[i]->bound->lowerdatums, true);
5622  }
5623 
5624  /* Sort array of lower bounds. */
5625  qsort_arg(lower_bounds, nparts, sizeof(PartitionRangeBound *),
5626  qsort_partition_rbound_cmp, (void *) key);
5627 
5628  /* Reorder array of partitions. */
5629  tmp_new_parts = new_parts;
5630  new_parts = (SinglePartitionSpec **)
5631  palloc0(nparts * sizeof(SinglePartitionSpec *));
5632  for (i = 0; i < nparts; i++)
5633  new_parts[i] = tmp_new_parts[lower_bounds[i]->index];
5634 
5635  pfree(tmp_new_parts);
5636  pfree(lower_bounds);
5637  }
5638 
5639  defaultPartOid =
5641 
5642  /* isSplitPartDefault flag: is split partition a DEFAULT partition? */
5643  isSplitPartDefault = (defaultPartOid == splitPartOid);
5644 
5645  if (isSplitPartDefault && default_index < 0)
5646  {
5647  ereport(ERROR,
5648  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5649  errmsg("one partition in the list should be DEFAULT because split partition is DEFAULT")),
5650  parser_errposition(pstate, ((SinglePartitionSpec *) linitial(partlist))->name->location));
5651  }
5652  else if (!isSplitPartDefault && (default_index >= 0) && OidIsValid(defaultPartOid))
5653  {
5654  SinglePartitionSpec *spsDef =
5655  (SinglePartitionSpec *) list_nth(partlist, default_index);
5656 
5657  ereport(ERROR,
5658  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
5659  errmsg("new partition cannot be DEFAULT because DEFAULT partition already exists")),
5660  parser_errposition(pstate, spsDef->name->location));
5661  }
5662 
5663  /* Indicator that partitioned table has (or will have) DEFAULT partition */
5664  existsDefaultPart = OidIsValid(defaultPartOid) || (default_index >= 0);
5665 
5666  for (i = 0; i < nparts; i++)
5667  {
5668  SinglePartitionSpec *sps = new_parts[i];
5669 
5670  if (isSplitPartDefault)
5671  {
5672  /*
5673  * In case split partition is DEFAULT partition we can use any
5674  * free ranges - as when creating a new partition.
5675  */
5676  check_new_partition_bound(sps->name->relname, parent, sps->bound,
5677  pstate);
5678  }
5679  else
5680  {
5681  /*
5682  * Checks that bound of current partition is inside bound of split
5683  * partition. For range partitioning: checks that upper bound of
5684  * previous partition is equal to lower bound of current
5685  * partition. For list partitioning: checks that split partition
5686  * contains all values of current partition.
5687  */
5688  if (strategy == PARTITION_STRATEGY_RANGE)
5689  {
5690  bool first = (i == 0);
5691  bool last = (i == (nparts - 1));
5692 
5694  splitPartOid, splitPartName,
5695  first, last,
5696  existsDefaultPart, pstate);
5697  }
5698  else
5700  sps->bound, splitPartOid, pstate);
5701  }
5702 
5703  /* Ranges of new partitions should not overlap. */
5704  if (strategy == PARTITION_STRATEGY_RANGE && spsPrev)
5705  check_two_partitions_bounds_range(parent, spsPrev->name, spsPrev->bound,
5706  sps->name, sps->bound, existsDefaultPart, pstate);
5707 
5708  spsPrev = sps;
5709 
5710  /* Check: new partitions should have different names. */
5711  for (j = i + 1; j < nparts; j++)
5712  {
5713  SinglePartitionSpec *sps2 = new_parts[j];
5714 
5715  if (equal(sps->name, sps2->name))
5716  ereport(ERROR,
5717  (errcode(ERRCODE_DUPLICATE_TABLE),
5718  errmsg("name \"%s\" is already used", sps2->name->relname)),
5719  parser_errposition(pstate, sps2->name->location));
5720  }
5721  }
5722 
5723  if (strategy == PARTITION_STRATEGY_LIST)
5724  {
5725  /* Values of new partitions should not overlap. */
5726  check_partitions_not_overlap_list(parent, new_parts, nparts,
5727  pstate);
5728 
5729  /*
5730  * Need to check that all values of split partition contains in new
5731  * partitions. Skip this check if DEFAULT partition exists.