PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
partition.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * partition.c
4  * Partitioning related data structures and functions.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/catalog/partition.c
12  *
13  *-------------------------------------------------------------------------
14 */
15 
16 #include "postgres.h"
17 
18 #include "access/heapam.h"
19 #include "access/htup_details.h"
20 #include "access/nbtree.h"
21 #include "access/sysattr.h"
22 #include "catalog/dependency.h"
23 #include "catalog/indexing.h"
24 #include "catalog/objectaddress.h"
25 #include "catalog/partition.h"
26 #include "catalog/pg_collation.h"
27 #include "catalog/pg_inherits.h"
28 #include "catalog/pg_inherits_fn.h"
29 #include "catalog/pg_opclass.h"
30 #include "catalog/pg_type.h"
31 #include "executor/executor.h"
32 #include "miscadmin.h"
33 #include "nodes/makefuncs.h"
34 #include "nodes/nodeFuncs.h"
35 #include "nodes/parsenodes.h"
36 #include "optimizer/clauses.h"
37 #include "optimizer/planmain.h"
38 #include "optimizer/var.h"
39 #include "rewrite/rewriteManip.h"
40 #include "storage/lmgr.h"
41 #include "utils/array.h"
42 #include "utils/builtins.h"
43 #include "utils/datum.h"
44 #include "utils/memutils.h"
45 #include "utils/fmgroids.h"
46 #include "utils/inval.h"
47 #include "utils/lsyscache.h"
48 #include "utils/rel.h"
49 #include "utils/ruleutils.h"
50 #include "utils/syscache.h"
51 
52 /*
53  * Information about bounds of a partitioned relation
54  *
55  * A list partition datum that is known to be NULL is never put into the
56  * datums array. Instead, it is tracked using the null_index field.
57  *
58  * In the case of range partitioning, ndatums will typically be far less than
59  * 2 * nparts, because a partition's upper bound and the next partition's lower
60  * bound are the same in most common cases, and we only store one of them (the
61  * upper bound).
62  *
63  * In the case of list partitioning, the indexes array stores one entry for
64  * every datum, which is the index of the partition that accepts a given datum.
65  * In case of range partitioning, it stores one entry per distinct range
66  * datum, which is the index of the partition for which a given datum
67  * is an upper bound.
68  */
69 
70 /* Ternary value to represent what's contained in a range bound datum */
71 typedef enum RangeDatumContent
72 {
73  RANGE_DATUM_FINITE = 0, /* actual datum stored elsewhere */
74  RANGE_DATUM_NEG_INF, /* negative infinity */
75  RANGE_DATUM_POS_INF /* positive infinity */
77 
78 typedef struct PartitionBoundInfoData
79 {
80  char strategy; /* list or range bounds? */
81  int ndatums; /* Length of the datums following array */
82  Datum **datums; /* Array of datum-tuples with key->partnatts
83  * datums each */
84  RangeDatumContent **content; /* what's contained in each range bound
85  * datum? (see the above enum); NULL for
86  * list partitioned tables */
87  int *indexes; /* Partition indexes; one entry per member of
88  * the datums array (plus one if range
89  * partitioned table) */
90  int null_index; /* Index of the null-accepting partition; -1
91  * if there isn't one */
93 
94 #define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
95 
96 /*
97  * When qsort'ing partition bounds after reading from the catalog, each bound
98  * is represented with one of the following structs.
99  */
100 
101 /* One value coming from some (index'th) list partition */
102 typedef struct PartitionListValue
103 {
104  int index;
107 
108 /* One bound of a range partition */
109 typedef struct PartitionRangeBound
110 {
111  int index;
112  Datum *datums; /* range bound datums */
113  RangeDatumContent *content; /* what's contained in each datum? */
114  bool lower; /* this is the lower (vs upper) bound */
116 
117 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
118  void *arg);
119 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
120  void *arg);
121 
122 static Oid get_partition_operator(PartitionKey key, int col,
123  StrategyNumber strategy, bool *need_relabel);
124 static Expr *make_partition_op_expr(PartitionKey key, int keynum,
125  uint16 strategy, Expr *arg1, Expr *arg2);
126 static void get_range_key_properties(PartitionKey key, int keynum,
127  PartitionRangeDatum *ldatum,
128  PartitionRangeDatum *udatum,
129  ListCell **partexprs_item,
130  Expr **keyCol,
131  Const **lower_val, Const **upper_val);
135 
137  List *datums, bool lower);
139  Datum *datums1, RangeDatumContent *content1, bool lower1,
140  PartitionRangeBound *b2);
142  Datum *rb_datums, RangeDatumContent *rb_content,
143  Datum *tuple_datums);
144 
146  PartitionBoundInfo boundinfo,
147  int offset, void *probe, bool probe_is_bound);
149  PartitionBoundInfo boundinfo,
150  void *probe, bool probe_is_bound, bool *is_equal);
151 
152 /*
153  * RelationBuildPartitionDesc
154  * Form rel's partition descriptor
155  *
156  * Not flushed from the cache by RelationClearRelation() unless changed because
157  * of addition or removal of partition.
158  */
159 void
161 {
162  List *inhoids,
163  *partoids;
164  Oid *oids = NULL;
165  List *boundspecs = NIL;
166  ListCell *cell;
167  int i,
168  nparts;
171  MemoryContext oldcxt;
172 
173  int ndatums = 0;
174 
175  /* List partitioning specific */
176  PartitionListValue **all_values = NULL;
177  int null_index = -1;
178 
179  /* Range partitioning specific */
180  PartitionRangeBound **rbounds = NULL;
181 
182  /*
183  * The following could happen in situations where rel has a pg_class entry
184  * but not the pg_partitioned_table entry yet.
185  */
186  if (key == NULL)
187  return;
188 
189  /* Get partition oids from pg_inherits */
191 
192  /* Collect bound spec nodes in a list */
193  i = 0;
194  partoids = NIL;
195  foreach(cell, inhoids)
196  {
197  Oid inhrelid = lfirst_oid(cell);
198  HeapTuple tuple;
199  Datum datum;
200  bool isnull;
201  Node *boundspec;
202 
203  tuple = SearchSysCache1(RELOID, inhrelid);
204  if (!HeapTupleIsValid(tuple))
205  elog(ERROR, "cache lookup failed for relation %u", inhrelid);
206 
207  /*
208  * It is possible that the pg_class tuple of a partition has not been
209  * updated yet to set its relpartbound field. The only case where
210  * this happens is when we open the parent relation to check using its
211  * partition descriptor that a new partition's bound does not overlap
212  * some existing partition.
213  */
214  if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
215  {
216  ReleaseSysCache(tuple);
217  continue;
218  }
219 
220  datum = SysCacheGetAttr(RELOID, tuple,
222  &isnull);
223  Assert(!isnull);
224  boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
225  boundspecs = lappend(boundspecs, boundspec);
226  partoids = lappend_oid(partoids, inhrelid);
227  ReleaseSysCache(tuple);
228  }
229 
230  nparts = list_length(partoids);
231 
232  if (nparts > 0)
233  {
234  oids = (Oid *) palloc(nparts * sizeof(Oid));
235  i = 0;
236  foreach(cell, partoids)
237  oids[i++] = lfirst_oid(cell);
238 
239  /* Convert from node to the internal representation */
240  if (key->strategy == PARTITION_STRATEGY_LIST)
241  {
242  List *non_null_values = NIL;
243 
244  /*
245  * Create a unified list of non-null values across all partitions.
246  */
247  i = 0;
248  null_index = -1;
249  foreach(cell, boundspecs)
250  {
252  lfirst(cell));
253  ListCell *c;
254 
255  if (spec->strategy != PARTITION_STRATEGY_LIST)
256  elog(ERROR, "invalid strategy in partition bound spec");
257 
258  foreach(c, spec->listdatums)
259  {
260  Const *val = castNode(Const, lfirst(c));
261  PartitionListValue *list_value = NULL;
262 
263  if (!val->constisnull)
264  {
265  list_value = (PartitionListValue *)
266  palloc0(sizeof(PartitionListValue));
267  list_value->index = i;
268  list_value->value = val->constvalue;
269  }
270  else
271  {
272  /*
273  * Never put a null into the values array, flag
274  * instead for the code further down below where we
275  * construct the actual relcache struct.
276  */
277  if (null_index != -1)
278  elog(ERROR, "found null more than once");
279  null_index = i;
280  }
281 
282  if (list_value)
283  non_null_values = lappend(non_null_values,
284  list_value);
285  }
286 
287  i++;
288  }
289 
290  ndatums = list_length(non_null_values);
291 
292  /*
293  * Collect all list values in one array. Alongside the value, we
294  * also save the index of partition the value comes from.
295  */
296  all_values = (PartitionListValue **) palloc(ndatums *
297  sizeof(PartitionListValue *));
298  i = 0;
299  foreach(cell, non_null_values)
300  {
301  PartitionListValue *src = lfirst(cell);
302 
303  all_values[i] = (PartitionListValue *)
304  palloc(sizeof(PartitionListValue));
305  all_values[i]->value = src->value;
306  all_values[i]->index = src->index;
307  i++;
308  }
309 
310  qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
311  qsort_partition_list_value_cmp, (void *) key);
312  }
313  else if (key->strategy == PARTITION_STRATEGY_RANGE)
314  {
315  int j,
316  k;
317  PartitionRangeBound **all_bounds,
318  *prev;
319  bool *distinct_indexes;
320 
321  all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
322  sizeof(PartitionRangeBound *));
323  distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool));
324 
325  /*
326  * Create a unified list of range bounds across all the
327  * partitions.
328  */
329  i = j = 0;
330  foreach(cell, boundspecs)
331  {
333  lfirst(cell));
335  *upper;
336 
337  if (spec->strategy != PARTITION_STRATEGY_RANGE)
338  elog(ERROR, "invalid strategy in partition bound spec");
339 
340  lower = make_one_range_bound(key, i, spec->lowerdatums,
341  true);
342  upper = make_one_range_bound(key, i, spec->upperdatums,
343  false);
344  all_bounds[j] = lower;
345  all_bounds[j + 1] = upper;
346  j += 2;
347  i++;
348  }
349  Assert(j == 2 * nparts);
350 
351  /* Sort all the bounds in ascending order */
352  qsort_arg(all_bounds, 2 * nparts,
353  sizeof(PartitionRangeBound *),
355  (void *) key);
356 
357  /*
358  * Count the number of distinct bounds to allocate an array of
359  * that size.
360  */
361  ndatums = 0;
362  prev = NULL;
363  for (i = 0; i < 2 * nparts; i++)
364  {
365  PartitionRangeBound *cur = all_bounds[i];
366  bool is_distinct = false;
367  int j;
368 
369  /* Is current bound is distinct from the previous? */
370  for (j = 0; j < key->partnatts; j++)
371  {
372  Datum cmpval;
373 
374  if (prev == NULL)
375  {
376  is_distinct = true;
377  break;
378  }
379 
380  /*
381  * If either of them has infinite element, we can't equate
382  * them. Even when both are infinite, they'd have
383  * opposite signs, because only one of cur and prev is a
384  * lower bound).
385  */
386  if (cur->content[j] != RANGE_DATUM_FINITE ||
387  prev->content[j] != RANGE_DATUM_FINITE)
388  {
389  is_distinct = true;
390  break;
391  }
392  cmpval = FunctionCall2Coll(&key->partsupfunc[j],
393  key->partcollation[j],
394  cur->datums[j],
395  prev->datums[j]);
396  if (DatumGetInt32(cmpval) != 0)
397  {
398  is_distinct = true;
399  break;
400  }
401  }
402 
403  /*
404  * Count the current bound if it is distinct from the previous
405  * one. Also, store if the index i contains a distinct bound
406  * that we'd like put in the relcache array.
407  */
408  if (is_distinct)
409  {
410  distinct_indexes[i] = true;
411  ndatums++;
412  }
413  else
414  distinct_indexes[i] = false;
415 
416  prev = cur;
417  }
418 
419  /*
420  * Finally save them in an array from where they will be copied
421  * into the relcache.
422  */
423  rbounds = (PartitionRangeBound **) palloc(ndatums *
424  sizeof(PartitionRangeBound *));
425  k = 0;
426  for (i = 0; i < 2 * nparts; i++)
427  {
428  if (distinct_indexes[i])
429  rbounds[k++] = all_bounds[i];
430  }
431  Assert(k == ndatums);
432  }
433  else
434  elog(ERROR, "unexpected partition strategy: %d",
435  (int) key->strategy);
436  }
437 
438  /* Now build the actual relcache partition descriptor */
442  oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
443 
444  result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
445  result->nparts = nparts;
446  if (nparts > 0)
447  {
448  PartitionBoundInfo boundinfo;
449  int *mapping;
450  int next_index = 0;
451 
452  result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
453 
454  boundinfo = (PartitionBoundInfoData *)
456  boundinfo->strategy = key->strategy;
457  boundinfo->ndatums = ndatums;
458  boundinfo->null_index = -1;
459  boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
460 
461  /* Initialize mapping array with invalid values */
462  mapping = (int *) palloc(sizeof(int) * nparts);
463  for (i = 0; i < nparts; i++)
464  mapping[i] = -1;
465 
466  switch (key->strategy)
467  {
469  {
470  boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
471 
472  /*
473  * Copy values. Indexes of individual values are mapped
474  * to canonical values so that they match for any two list
475  * partitioned tables with same number of partitions and
476  * same lists per partition. One way to canonicalize is
477  * to assign the index in all_values[] of the smallest
478  * value of each partition, as the index of all of the
479  * partition's values.
480  */
481  for (i = 0; i < ndatums; i++)
482  {
483  boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
484  boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
485  key->parttypbyval[0],
486  key->parttyplen[0]);
487 
488  /* If the old index has no mapping, assign one */
489  if (mapping[all_values[i]->index] == -1)
490  mapping[all_values[i]->index] = next_index++;
491 
492  boundinfo->indexes[i] = mapping[all_values[i]->index];
493  }
494 
495  /*
496  * If null-accepting partition has no mapped index yet,
497  * assign one. This could happen if such partition
498  * accepts only null and hence not covered in the above
499  * loop which only handled non-null values.
500  */
501  if (null_index != -1)
502  {
503  Assert(null_index >= 0);
504  if (mapping[null_index] == -1)
505  mapping[null_index] = next_index++;
506  boundinfo->null_index = mapping[null_index];
507  }
508 
509  /* All partition must now have a valid mapping */
510  Assert(next_index == nparts);
511  break;
512  }
513 
515  {
516  boundinfo->content = (RangeDatumContent **) palloc(ndatums *
517  sizeof(RangeDatumContent *));
518  boundinfo->indexes = (int *) palloc((ndatums + 1) *
519  sizeof(int));
520 
521  for (i = 0; i < ndatums; i++)
522  {
523  int j;
524 
525  boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
526  sizeof(Datum));
527  boundinfo->content[i] = (RangeDatumContent *)
528  palloc(key->partnatts *
529  sizeof(RangeDatumContent));
530  for (j = 0; j < key->partnatts; j++)
531  {
532  if (rbounds[i]->content[j] == RANGE_DATUM_FINITE)
533  boundinfo->datums[i][j] =
534  datumCopy(rbounds[i]->datums[j],
535  key->parttypbyval[j],
536  key->parttyplen[j]);
537  /* Remember, we are storing the tri-state value. */
538  boundinfo->content[i][j] = rbounds[i]->content[j];
539  }
540 
541  /*
542  * There is no mapping for invalid indexes.
543  *
544  * Any lower bounds in the rbounds array have invalid
545  * indexes assigned, because the values between the
546  * previous bound (if there is one) and this (lower)
547  * bound are not part of the range of any existing
548  * partition.
549  */
550  if (rbounds[i]->lower)
551  boundinfo->indexes[i] = -1;
552  else
553  {
554  int orig_index = rbounds[i]->index;
555 
556  /* If the old index has no mapping, assign one */
557  if (mapping[orig_index] == -1)
558  mapping[orig_index] = next_index++;
559 
560  boundinfo->indexes[i] = mapping[orig_index];
561  }
562  }
563  boundinfo->indexes[i] = -1;
564  break;
565  }
566 
567  default:
568  elog(ERROR, "unexpected partition strategy: %d",
569  (int) key->strategy);
570  }
571 
572  result->boundinfo = boundinfo;
573 
574  /*
575  * Now assign OIDs from the original array into mapped indexes of the
576  * result array. Order of OIDs in the former is defined by the
577  * catalog scan that retrieved them, whereas that in the latter is
578  * defined by canonicalized representation of the list values or the
579  * range bounds.
580  */
581  for (i = 0; i < nparts; i++)
582  result->oids[mapping[i]] = oids[i];
583  pfree(mapping);
584  }
585 
586  MemoryContextSwitchTo(oldcxt);
587  rel->rd_partdesc = result;
588 }
589 
590 /*
591  * Are two partition bound collections logically equal?
592  *
593  * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
594  * This is also useful when b1 and b2 are bound collections of two separate
595  * relations, respectively, because PartitionBoundInfo is a canonical
596  * representation of partition bounds.
597  */
598 bool
601 {
602  int i;
603 
604  if (b1->strategy != b2->strategy)
605  return false;
606 
607  if (b1->ndatums != b2->ndatums)
608  return false;
609 
610  if (b1->null_index != b2->null_index)
611  return false;
612 
613  for (i = 0; i < b1->ndatums; i++)
614  {
615  int j;
616 
617  for (j = 0; j < key->partnatts; j++)
618  {
619  /* For range partitions, the bounds might not be finite. */
620  if (b1->content != NULL)
621  {
622  /*
623  * A finite bound always differs from an infinite bound, and
624  * different kinds of infinities differ from each other.
625  */
626  if (b1->content[i][j] != b2->content[i][j])
627  return false;
628 
629  /* Non-finite bounds are equal without further examination. */
630  if (b1->content[i][j] != RANGE_DATUM_FINITE)
631  continue;
632  }
633 
634  /*
635  * Compare the actual values. Note that it would be both incorrect
636  * and unsafe to invoke the comparison operator derived from the
637  * partitioning specification here. It would be incorrect because
638  * we want the relcache entry to be updated for ANY change to the
639  * partition bounds, not just those that the partitioning operator
640  * thinks are significant. It would be unsafe because we might
641  * reach this code in the context of an aborted transaction, and
642  * an arbitrary partitioning operator might not be safe in that
643  * context. datumIsEqual() should be simple enough to be safe.
644  */
645  if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
646  key->parttypbyval[j],
647  key->parttyplen[j]))
648  return false;
649  }
650 
651  if (b1->indexes[i] != b2->indexes[i])
652  return false;
653  }
654 
655  /* There are ndatums+1 indexes in case of range partitions */
656  if (key->strategy == PARTITION_STRATEGY_RANGE &&
657  b1->indexes[i] != b2->indexes[i])
658  return false;
659 
660  return true;
661 }
662 
663 /*
664  * check_new_partition_bound
665  *
666  * Checks if the new partition's bound overlaps any of the existing partitions
667  * of parent. Also performs additional checks as necessary per strategy.
668  */
669 void
670 check_new_partition_bound(char *relname, Relation parent,
671  PartitionBoundSpec *spec)
672 {
674  PartitionDesc partdesc = RelationGetPartitionDesc(parent);
675  ParseState *pstate = make_parsestate(NULL);
676  int with = -1;
677  bool overlap = false;
678 
679  switch (key->strategy)
680  {
682  {
684 
685  if (partdesc->nparts > 0)
686  {
687  PartitionBoundInfo boundinfo = partdesc->boundinfo;
688  ListCell *cell;
689 
690  Assert(boundinfo &&
691  boundinfo->strategy == PARTITION_STRATEGY_LIST &&
692  (boundinfo->ndatums > 0 ||
693  partition_bound_accepts_nulls(boundinfo)));
694 
695  foreach(cell, spec->listdatums)
696  {
697  Const *val = castNode(Const, lfirst(cell));
698 
699  if (!val->constisnull)
700  {
701  int offset;
702  bool equal;
703 
704  offset = partition_bound_bsearch(key, boundinfo,
705  &val->constvalue,
706  true, &equal);
707  if (offset >= 0 && equal)
708  {
709  overlap = true;
710  with = boundinfo->indexes[offset];
711  break;
712  }
713  }
714  else if (partition_bound_accepts_nulls(boundinfo))
715  {
716  overlap = true;
717  with = boundinfo->null_index;
718  break;
719  }
720  }
721  }
722 
723  break;
724  }
725 
727  {
729  *upper;
730 
732  lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
733  upper = make_one_range_bound(key, -1, spec->upperdatums, false);
734 
735  /*
736  * First check if the resulting range would be empty with
737  * specified lower and upper bounds
738  */
739  if (partition_rbound_cmp(key, lower->datums, lower->content, true,
740  upper) >= 0)
741  ereport(ERROR,
742  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
743  errmsg("cannot create range partition with empty range"),
744  parser_errposition(pstate, spec->location)));
745 
746  if (partdesc->nparts > 0)
747  {
748  PartitionBoundInfo boundinfo = partdesc->boundinfo;
749  int offset;
750  bool equal;
751 
752  Assert(boundinfo && boundinfo->ndatums > 0 &&
753  boundinfo->strategy == PARTITION_STRATEGY_RANGE);
754 
755  /*
756  * Test whether the new lower bound (which is treated
757  * inclusively as part of the new partition) lies inside an
758  * existing partition, or in a gap.
759  *
760  * If it's inside an existing partition, the bound at
761  * offset + 1 will be the upper bound of that partition,
762  * and its index will be >= 0.
763  *
764  * If it's in a gap, the bound at offset + 1 will be the
765  * lower bound of the next partition, and its index will be
766  * -1. This is also true if there is no next partition,
767  * since the index array is initialised with an extra -1 at
768  * the end.
769  */
770  offset = partition_bound_bsearch(key, boundinfo, lower,
771  true, &equal);
772 
773  if (boundinfo->indexes[offset + 1] < 0)
774  {
775  /*
776  * Check that the new partition will fit in the gap.
777  * For it to fit, the new upper bound must be less than
778  * or equal to the lower bound of the next partition,
779  * if there is one.
780  */
781  if (offset + 1 < boundinfo->ndatums)
782  {
783  int32 cmpval;
784 
785  cmpval = partition_bound_cmp(key, boundinfo,
786  offset + 1, upper,
787  true);
788  if (cmpval < 0)
789  {
790  /*
791  * The new partition overlaps with the existing
792  * partition between offset + 1 and offset + 2.
793  */
794  overlap = true;
795  with = boundinfo->indexes[offset + 2];
796  }
797  }
798  }
799  else
800  {
801  /*
802  * The new partition overlaps with the existing
803  * partition between offset and offset + 1.
804  */
805  overlap = true;
806  with = boundinfo->indexes[offset + 1];
807  }
808  }
809 
810  break;
811  }
812 
813  default:
814  elog(ERROR, "unexpected partition strategy: %d",
815  (int) key->strategy);
816  }
817 
818  if (overlap)
819  {
820  Assert(with >= 0);
821  ereport(ERROR,
822  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
823  errmsg("partition \"%s\" would overlap partition \"%s\"",
824  relname, get_rel_name(partdesc->oids[with])),
825  parser_errposition(pstate, spec->location)));
826  }
827 }
828 
829 /*
830  * get_partition_parent
831  *
832  * Returns inheritance parent of a partition by scanning pg_inherits
833  *
834  * Note: Because this function assumes that the relation whose OID is passed
835  * as an argument will have precisely one parent, it should only be called
836  * when it is known that the relation is a partition.
837  */
838 Oid
840 {
841  Form_pg_inherits form;
842  Relation catalogRelation;
843  SysScanDesc scan;
844  ScanKeyData key[2];
845  HeapTuple tuple;
846  Oid result;
847 
848  catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
849 
850  ScanKeyInit(&key[0],
852  BTEqualStrategyNumber, F_OIDEQ,
853  ObjectIdGetDatum(relid));
854  ScanKeyInit(&key[1],
856  BTEqualStrategyNumber, F_INT4EQ,
857  Int32GetDatum(1));
858 
859  scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
860  NULL, 2, key);
861 
862  tuple = systable_getnext(scan);
863  if (!HeapTupleIsValid(tuple))
864  elog(ERROR, "could not find tuple for parent of relation %u", relid);
865 
866  form = (Form_pg_inherits) GETSTRUCT(tuple);
867  result = form->inhparent;
868 
869  systable_endscan(scan);
870  heap_close(catalogRelation, AccessShareLock);
871 
872  return result;
873 }
874 
875 /*
876  * get_qual_from_partbound
877  * Given a parser node for partition bound, return the list of executable
878  * expressions as partition constraint
879  */
880 List *
882  PartitionBoundSpec *spec)
883 {
885  List *my_qual = NIL;
886 
887  Assert(key != NULL);
888 
889  switch (key->strategy)
890  {
893  my_qual = get_qual_for_list(key, spec);
894  break;
895 
898  my_qual = get_qual_for_range(key, spec);
899  break;
900 
901  default:
902  elog(ERROR, "unexpected partition strategy: %d",
903  (int) key->strategy);
904  }
905 
906  return my_qual;
907 }
908 
909 /*
910  * map_partition_varattnos - maps varattno of any Vars in expr from the
911  * parent attno to partition attno.
912  *
913  * We must allow for cases where physical attnos of a partition can be
914  * different from the parent's.
915  *
916  * Note: this will work on any node tree, so really the argument and result
917  * should be declared "Node *". But a substantial majority of the callers
918  * are working on Lists, so it's less messy to do the casts internally.
919  */
920 List *
921 map_partition_varattnos(List *expr, int target_varno,
922  Relation partrel, Relation parent)
923 {
924  AttrNumber *part_attnos;
925  bool found_whole_row;
926 
927  if (expr == NIL)
928  return NIL;
929 
930  part_attnos = convert_tuples_by_name_map(RelationGetDescr(partrel),
931  RelationGetDescr(parent),
932  gettext_noop("could not convert row type"));
933  expr = (List *) map_variable_attnos((Node *) expr,
934  target_varno, 0,
935  part_attnos,
936  RelationGetDescr(parent)->natts,
937  &found_whole_row);
938  /* There can never be a whole-row reference here */
939  if (found_whole_row)
940  elog(ERROR, "unexpected whole-row reference found in partition key");
941 
942  return expr;
943 }
944 
945 /*
946  * RelationGetPartitionQual
947  *
948  * Returns a list of partition quals
949  */
950 List *
952 {
953  /* Quick exit */
954  if (!rel->rd_rel->relispartition)
955  return NIL;
956 
957  return generate_partition_qual(rel);
958 }
959 
960 /*
961  * get_partition_qual_relid
962  *
963  * Returns an expression tree describing the passed-in relation's partition
964  * constraint.
965  */
966 Expr *
968 {
969  Relation rel = heap_open(relid, AccessShareLock);
970  Expr *result = NULL;
971  List *and_args;
972 
973  /* Do the work only if this relation is a partition. */
974  if (rel->rd_rel->relispartition)
975  {
976  and_args = generate_partition_qual(rel);
977  if (list_length(and_args) > 1)
978  result = makeBoolExpr(AND_EXPR, and_args, -1);
979  else
980  result = linitial(and_args);
981  }
982 
983  /* Keep the lock. */
984  heap_close(rel, NoLock);
985 
986  return result;
987 }
988 
989 /*
990  * Append OIDs of rel's partitions to the list 'partoids' and for each OID,
991  * append pointer rel to the list 'parents'.
992  */
993 #define APPEND_REL_PARTITION_OIDS(rel, partoids, parents) \
994  do\
995  {\
996  int i;\
997  for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
998  {\
999  (partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
1000  (parents) = lappend((parents), (rel));\
1001  }\
1002  } while(0)
1003 
1004 /*
1005  * RelationGetPartitionDispatchInfo
1006  * Returns information necessary to route tuples down a partition tree
1007  *
1008  * All the partitions will be locked with lockmode, unless it is NoLock.
1009  * A list of the OIDs of all the leaf partitions of rel is returned in
1010  * *leaf_part_oids.
1011  */
1014  int *num_parted, List **leaf_part_oids)
1015 {
1016  PartitionDispatchData **pd;
1017  List *all_parts = NIL,
1018  *all_parents = NIL,
1019  *parted_rels,
1020  *parted_rel_parents;
1021  ListCell *lc1,
1022  *lc2;
1023  int i,
1024  k,
1025  offset;
1026 
1027  /*
1028  * Lock partitions and make a list of the partitioned ones to prepare
1029  * their PartitionDispatch objects below.
1030  *
1031  * Cannot use find_all_inheritors() here, because then the order of OIDs
1032  * in parted_rels list would be unknown, which does not help, because we
1033  * assign indexes within individual PartitionDispatch in an order that is
1034  * predetermined (determined by the order of OIDs in individual partition
1035  * descriptors).
1036  */
1037  *num_parted = 1;
1038  parted_rels = list_make1(rel);
1039  /* Root partitioned table has no parent, so NULL for parent */
1040  parted_rel_parents = list_make1(NULL);
1041  APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents);
1042  forboth(lc1, all_parts, lc2, all_parents)
1043  {
1044  Relation partrel = heap_open(lfirst_oid(lc1), lockmode);
1045  Relation parent = lfirst(lc2);
1046  PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
1047 
1048  /*
1049  * If this partition is a partitioned table, add its children to the
1050  * end of the list, so that they are processed as well.
1051  */
1052  if (partdesc)
1053  {
1054  (*num_parted)++;
1055  parted_rels = lappend(parted_rels, partrel);
1056  parted_rel_parents = lappend(parted_rel_parents, parent);
1057  APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents);
1058  }
1059  else
1060  heap_close(partrel, NoLock);
1061 
1062  /*
1063  * We keep the partitioned ones open until we're done using the
1064  * information being collected here (for example, see
1065  * ExecEndModifyTable).
1066  */
1067  }
1068 
1069  /*
1070  * We want to create two arrays - one for leaf partitions and another for
1071  * partitioned tables (including the root table and internal partitions).
1072  * While we only create the latter here, leaf partition array of suitable
1073  * objects (such as, ResultRelInfo) is created by the caller using the
1074  * list of OIDs we return. Indexes into these arrays get assigned in a
1075  * breadth-first manner, whereby partitions of any given level are placed
1076  * consecutively in the respective arrays.
1077  */
1078  pd = (PartitionDispatchData **) palloc(*num_parted *
1079  sizeof(PartitionDispatchData *));
1080  *leaf_part_oids = NIL;
1081  i = k = offset = 0;
1082  forboth(lc1, parted_rels, lc2, parted_rel_parents)
1083  {
1084  Relation partrel = lfirst(lc1);
1085  Relation parent = lfirst(lc2);
1086  PartitionKey partkey = RelationGetPartitionKey(partrel);
1087  TupleDesc tupdesc = RelationGetDescr(partrel);
1088  PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
1089  int j,
1090  m;
1091 
1093  pd[i]->reldesc = partrel;
1094  pd[i]->key = partkey;
1095  pd[i]->keystate = NIL;
1096  pd[i]->partdesc = partdesc;
1097  if (parent != NULL)
1098  {
1099  /*
1100  * For every partitioned table other than root, we must store a
1101  * tuple table slot initialized with its tuple descriptor and a
1102  * tuple conversion map to convert a tuple from its parent's
1103  * rowtype to its own. That is to make sure that we are looking at
1104  * the correct row using the correct tuple descriptor when
1105  * computing its partition key for tuple routing.
1106  */
1107  pd[i]->tupslot = MakeSingleTupleTableSlot(tupdesc);
1109  tupdesc,
1110  gettext_noop("could not convert row type"));
1111  }
1112  else
1113  {
1114  /* Not required for the root partitioned table */
1115  pd[i]->tupslot = NULL;
1116  pd[i]->tupmap = NULL;
1117  }
1118  pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
1119 
1120  /*
1121  * Indexes corresponding to the internal partitions are multiplied by
1122  * -1 to distinguish them from those of leaf partitions. Encountering
1123  * an index >= 0 means we found a leaf partition, which is immediately
1124  * returned as the partition we are looking for. A negative index
1125  * means we found a partitioned table, whose PartitionDispatch object
1126  * is located at the above index multiplied back by -1. Using the
1127  * PartitionDispatch object, search is continued further down the
1128  * partition tree.
1129  */
1130  m = 0;
1131  for (j = 0; j < partdesc->nparts; j++)
1132  {
1133  Oid partrelid = partdesc->oids[j];
1134 
1135  if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
1136  {
1137  *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
1138  pd[i]->indexes[j] = k++;
1139  }
1140  else
1141  {
1142  /*
1143  * offset denotes the number of partitioned tables of upper
1144  * levels including those of the current level. Any partition
1145  * of this table must belong to the next level and hence will
1146  * be placed after the last partitioned table of this level.
1147  */
1148  pd[i]->indexes[j] = -(1 + offset + m);
1149  m++;
1150  }
1151  }
1152  i++;
1153 
1154  /*
1155  * This counts the number of partitioned tables at upper levels
1156  * including those of the current level.
1157  */
1158  offset += m;
1159  }
1160 
1161  return pd;
1162 }
1163 
1164 /* Module-local functions */
1165 
1166 /*
1167  * get_partition_operator
1168  *
1169  * Return oid of the operator of given strategy for a given partition key
1170  * column.
1171  */
1172 static Oid
1174  bool *need_relabel)
1175 {
1176  Oid operoid;
1177 
1178  /*
1179  * First check if there exists an operator of the given strategy, with
1180  * this column's type as both its lefttype and righttype, in the
1181  * partitioning operator family specified for the column.
1182  */
1183  operoid = get_opfamily_member(key->partopfamily[col],
1184  key->parttypid[col],
1185  key->parttypid[col],
1186  strategy);
1187 
1188  /*
1189  * If one doesn't exist, we must resort to using an operator in the same
1190  * operator family but with the operator class declared input type. It is
1191  * OK to do so, because the column's type is known to be binary-coercible
1192  * with the operator class input type (otherwise, the operator class in
1193  * question would not have been accepted as the partitioning operator
1194  * class). We must however inform the caller to wrap the non-Const
1195  * expression with a RelabelType node to denote the implicit coercion. It
1196  * ensures that the resulting expression structurally matches similarly
1197  * processed expressions within the optimizer.
1198  */
1199  if (!OidIsValid(operoid))
1200  {
1201  operoid = get_opfamily_member(key->partopfamily[col],
1202  key->partopcintype[col],
1203  key->partopcintype[col],
1204  strategy);
1205  *need_relabel = true;
1206  }
1207  else
1208  *need_relabel = false;
1209 
1210  if (!OidIsValid(operoid))
1211  elog(ERROR, "could not find operator for partitioning");
1212 
1213  return operoid;
1214 }
1215 
1216 /*
1217  * make_partition_op_expr
1218  * Returns an Expr for the given partition key column with arg1 and
1219  * arg2 as its leftop and rightop, respectively
1220  */
1221 static Expr *
1223  uint16 strategy, Expr *arg1, Expr *arg2)
1224 {
1225  Oid operoid;
1226  bool need_relabel = false;
1227  Expr *result = NULL;
1228 
1229  /* Get the correct btree operator for this partitioning column */
1230  operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
1231 
1232  /*
1233  * Chosen operator may be such that the non-Const operand needs to be
1234  * coerced, so apply the same; see the comment in
1235  * get_partition_operator().
1236  */
1237  if (!IsA(arg1, Const) &&
1238  (need_relabel ||
1239  key->partcollation[keynum] != key->parttypcoll[keynum]))
1240  arg1 = (Expr *) makeRelabelType(arg1,
1241  key->partopcintype[keynum],
1242  -1,
1243  key->partcollation[keynum],
1245 
1246  /* Generate the actual expression */
1247  switch (key->strategy)
1248  {
1250  {
1251  ScalarArrayOpExpr *saopexpr;
1252 
1253  /* Build leftop = ANY (rightop) */
1254  saopexpr = makeNode(ScalarArrayOpExpr);
1255  saopexpr->opno = operoid;
1256  saopexpr->opfuncid = get_opcode(operoid);
1257  saopexpr->useOr = true;
1258  saopexpr->inputcollid = key->partcollation[keynum];
1259  saopexpr->args = list_make2(arg1, arg2);
1260  saopexpr->location = -1;
1261 
1262  result = (Expr *) saopexpr;
1263  break;
1264  }
1265 
1267  result = make_opclause(operoid,
1268  BOOLOID,
1269  false,
1270  arg1, arg2,
1271  InvalidOid,
1272  key->partcollation[keynum]);
1273  break;
1274 
1275  default:
1276  elog(ERROR, "invalid partitioning strategy");
1277  break;
1278  }
1279 
1280  return result;
1281 }
1282 
1283 /*
1284  * get_qual_for_list
1285  *
1286  * Returns an implicit-AND list of expressions to use as a list partition's
1287  * constraint, given the partition key and bound structures.
1288  */
1289 static List *
1291 {
1292  List *result;
1293  Expr *keyCol;
1294  ArrayExpr *arr;
1295  Expr *opexpr;
1296  NullTest *nulltest;
1297  ListCell *cell;
1298  List *arrelems = NIL;
1299  bool list_has_null = false;
1300 
1301  /*
1302  * Only single-column list partitioning is supported, so we are worried
1303  * only about the partition key with index 0.
1304  */
1305  Assert(key->partnatts == 1);
1306 
1307  /* Construct Var or expression representing the partition column */
1308  if (key->partattrs[0] != 0)
1309  keyCol = (Expr *) makeVar(1,
1310  key->partattrs[0],
1311  key->parttypid[0],
1312  key->parttypmod[0],
1313  key->parttypcoll[0],
1314  0);
1315  else
1316  keyCol = (Expr *) copyObject(linitial(key->partexprs));
1317 
1318  /* Create list of Consts for the allowed values, excluding any nulls */
1319  foreach(cell, spec->listdatums)
1320  {
1321  Const *val = castNode(Const, lfirst(cell));
1322 
1323  if (val->constisnull)
1324  list_has_null = true;
1325  else
1326  arrelems = lappend(arrelems, copyObject(val));
1327  }
1328 
1329  if (arrelems)
1330  {
1331  /* Construct an ArrayExpr for the non-null partition values */
1332  arr = makeNode(ArrayExpr);
1333  arr->array_typeid = !type_is_array(key->parttypid[0])
1334  ? get_array_type(key->parttypid[0])
1335  : key->parttypid[0];
1336  arr->array_collid = key->parttypcoll[0];
1337  arr->element_typeid = key->parttypid[0];
1338  arr->elements = arrelems;
1339  arr->multidims = false;
1340  arr->location = -1;
1341 
1342  /* Generate the main expression, i.e., keyCol = ANY (arr) */
1344  keyCol, (Expr *) arr);
1345  }
1346  else
1347  {
1348  /* If there are no partition values, we don't need an = ANY expr */
1349  opexpr = NULL;
1350  }
1351 
1352  if (!list_has_null)
1353  {
1354  /*
1355  * Gin up a "col IS NOT NULL" test that will be AND'd with the main
1356  * expression. This might seem redundant, but the partition routing
1357  * machinery needs it.
1358  */
1359  nulltest = makeNode(NullTest);
1360  nulltest->arg = keyCol;
1361  nulltest->nulltesttype = IS_NOT_NULL;
1362  nulltest->argisrow = false;
1363  nulltest->location = -1;
1364 
1365  result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
1366  }
1367  else
1368  {
1369  /*
1370  * Gin up a "col IS NULL" test that will be OR'd with the main
1371  * expression.
1372  */
1373  nulltest = makeNode(NullTest);
1374  nulltest->arg = keyCol;
1375  nulltest->nulltesttype = IS_NULL;
1376  nulltest->argisrow = false;
1377  nulltest->location = -1;
1378 
1379  if (opexpr)
1380  {
1381  Expr *or;
1382 
1383  or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
1384  result = list_make1(or);
1385  }
1386  else
1387  result = list_make1(nulltest);
1388  }
1389 
1390  return result;
1391 }
1392 
1393 /*
1394  * get_range_key_properties
1395  * Returns range partition key information for a given column
1396  *
1397  * This is a subroutine for get_qual_for_range, and its API is pretty
1398  * specialized to that caller.
1399  *
1400  * Constructs an Expr for the key column (returned in *keyCol) and Consts
1401  * for the lower and upper range limits (returned in *lower_val and
1402  * *upper_val). For UNBOUNDED limits, NULL is returned instead of a Const.
1403  * All of these structures are freshly palloc'd.
1404  *
1405  * *partexprs_item points to the cell containing the next expression in
1406  * the key->partexprs list, or NULL. It may be advanced upon return.
1407  */
1408 static void
1410  PartitionRangeDatum *ldatum,
1411  PartitionRangeDatum *udatum,
1412  ListCell **partexprs_item,
1413  Expr **keyCol,
1414  Const **lower_val, Const **upper_val)
1415 {
1416  /* Get partition key expression for this column */
1417  if (key->partattrs[keynum] != 0)
1418  {
1419  *keyCol = (Expr *) makeVar(1,
1420  key->partattrs[keynum],
1421  key->parttypid[keynum],
1422  key->parttypmod[keynum],
1423  key->parttypcoll[keynum],
1424  0);
1425  }
1426  else
1427  {
1428  if (*partexprs_item == NULL)
1429  elog(ERROR, "wrong number of partition key expressions");
1430  *keyCol = copyObject(lfirst(*partexprs_item));
1431  *partexprs_item = lnext(*partexprs_item);
1432  }
1433 
1434  /* Get appropriate Const nodes for the bounds */
1435  if (!ldatum->infinite)
1436  *lower_val = castNode(Const, copyObject(ldatum->value));
1437  else
1438  *lower_val = NULL;
1439 
1440  if (!udatum->infinite)
1441  *upper_val = castNode(Const, copyObject(udatum->value));
1442  else
1443  *upper_val = NULL;
1444 }
1445 
1446 /*
1447  * get_qual_for_range
1448  *
1449  * Returns an implicit-AND list of expressions to use as a range partition's
1450  * constraint, given the partition key and bound structures.
1451  *
1452  * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
1453  * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
1454  * generate an expression tree of the following form:
1455  *
1456  * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
1457  * AND
1458  * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
1459  * AND
1460  * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
1461  *
1462  * It is often the case that a prefix of lower and upper bound tuples contains
1463  * the same values, for example, (al = au), in which case, we will emit an
1464  * expression tree of the following form:
1465  *
1466  * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
1467  * AND
1468  * (a = al)
1469  * AND
1470  * (b > bl OR (b = bl AND c >= cl))
1471  * AND
1472  * (b < bu) OR (b = bu AND c < cu))
1473  *
1474  * If cu happens to be UNBOUNDED, we need not emit any expression for it, so
1475  * the last line would be:
1476  *
1477  * (b < bu) OR (b = bu), which is simplified to (b <= bu)
1478  *
1479  * In most common cases with only one partition column, say a, the following
1480  * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
1481  *
1482  * If all values of both lower and upper bounds are UNBOUNDED, the partition
1483  * does not really have a constraint, except the IS NOT NULL constraint for
1484  * partition keys.
1485  *
1486  * If we end up with an empty result list, we return a single-member list
1487  * containing a constant TRUE, because callers expect a non-empty list.
1488  */
1489 static List *
1491 {
1492  List *result = NIL;
1493  ListCell *cell1,
1494  *cell2,
1495  *partexprs_item,
1496  *partexprs_item_saved;
1497  int i,
1498  j;
1499  PartitionRangeDatum *ldatum,
1500  *udatum;
1501  Expr *keyCol;
1502  Const *lower_val,
1503  *upper_val;
1504  NullTest *nulltest;
1505  List *lower_or_arms,
1506  *upper_or_arms;
1507  int num_or_arms,
1508  current_or_arm;
1509  ListCell *lower_or_start_datum,
1510  *upper_or_start_datum;
1511  bool need_next_lower_arm,
1512  need_next_upper_arm;
1513 
1514  lower_or_start_datum = list_head(spec->lowerdatums);
1515  upper_or_start_datum = list_head(spec->upperdatums);
1516  num_or_arms = key->partnatts;
1517 
1518  /*
1519  * A range-partitioned table does not currently allow partition keys to be
1520  * null, so emit an IS NOT NULL expression for each key column.
1521  */
1522  partexprs_item = list_head(key->partexprs);
1523  for (i = 0; i < key->partnatts; i++)
1524  {
1525  Expr *keyCol;
1526 
1527  if (key->partattrs[i] != 0)
1528  {
1529  keyCol = (Expr *) makeVar(1,
1530  key->partattrs[i],
1531  key->parttypid[i],
1532  key->parttypmod[i],
1533  key->parttypcoll[i],
1534  0);
1535  }
1536  else
1537  {
1538  if (partexprs_item == NULL)
1539  elog(ERROR, "wrong number of partition key expressions");
1540  keyCol = copyObject(lfirst(partexprs_item));
1541  partexprs_item = lnext(partexprs_item);
1542  }
1543 
1544  nulltest = makeNode(NullTest);
1545  nulltest->arg = keyCol;
1546  nulltest->nulltesttype = IS_NOT_NULL;
1547  nulltest->argisrow = false;
1548  nulltest->location = -1;
1549  result = lappend(result, nulltest);
1550  }
1551 
1552  /*
1553  * Iterate over the key columns and check if the corresponding lower and
1554  * upper datums are equal using the btree equality operator for the
1555  * column's type. If equal, we emit single keyCol = common_value
1556  * expression. Starting from the first column for which the corresponding
1557  * lower and upper bound datums are not equal, we generate OR expressions
1558  * as shown in the function's header comment.
1559  */
1560  i = 0;
1561  partexprs_item = list_head(key->partexprs);
1562  partexprs_item_saved = partexprs_item; /* placate compiler */
1563  forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
1564  {
1565  EState *estate;
1566  MemoryContext oldcxt;
1567  Expr *test_expr;
1568  ExprState *test_exprstate;
1569  Datum test_result;
1570  bool isNull;
1571 
1572  ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
1573  udatum = castNode(PartitionRangeDatum, lfirst(cell2));
1574 
1575  /*
1576  * Since get_range_key_properties() modifies partexprs_item, and we
1577  * might need to start over from the previous expression in the later
1578  * part of this function, save away the current value.
1579  */
1580  partexprs_item_saved = partexprs_item;
1581 
1582  get_range_key_properties(key, i, ldatum, udatum,
1583  &partexprs_item,
1584  &keyCol,
1585  &lower_val, &upper_val);
1586 
1587  /*
1588  * If either or both of lower_val and upper_val is NULL, they are
1589  * unequal, because being NULL means the column is unbounded in the
1590  * respective direction.
1591  */
1592  if (!lower_val || !upper_val)
1593  break;
1594 
1595  /* Create the test expression */
1596  estate = CreateExecutorState();
1597  oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
1598  test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
1599  (Expr *) lower_val,
1600  (Expr *) upper_val);
1601  fix_opfuncids((Node *) test_expr);
1602  test_exprstate = ExecInitExpr(test_expr, NULL);
1603  test_result = ExecEvalExprSwitchContext(test_exprstate,
1604  GetPerTupleExprContext(estate),
1605  &isNull);
1606  MemoryContextSwitchTo(oldcxt);
1607  FreeExecutorState(estate);
1608 
1609  /* If not equal, go generate the OR expressions */
1610  if (!DatumGetBool(test_result))
1611  break;
1612 
1613  /*
1614  * The bounds for the last key column can't be equal, because such a
1615  * range partition would never be allowed to be defined (it would have
1616  * an empty range otherwise).
1617  */
1618  if (i == key->partnatts - 1)
1619  elog(ERROR, "invalid range bound specification");
1620 
1621  /* Equal, so generate keyCol = lower_val expression */
1622  result = lappend(result,
1624  keyCol, (Expr *) lower_val));
1625 
1626  i++;
1627  }
1628 
1629  /* First pair of lower_val and upper_val that are not equal. */
1630  lower_or_start_datum = cell1;
1631  upper_or_start_datum = cell2;
1632 
1633  /* OR will have as many arms as there are key columns left. */
1634  num_or_arms = key->partnatts - i;
1635  current_or_arm = 0;
1636  lower_or_arms = upper_or_arms = NIL;
1637  need_next_lower_arm = need_next_upper_arm = true;
1638  while (current_or_arm < num_or_arms)
1639  {
1640  List *lower_or_arm_args = NIL,
1641  *upper_or_arm_args = NIL;
1642 
1643  /* Restart scan of columns from the i'th one */
1644  j = i;
1645  partexprs_item = partexprs_item_saved;
1646 
1647  for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
1648  {
1649  PartitionRangeDatum *ldatum_next = NULL,
1650  *udatum_next = NULL;
1651 
1652  ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
1653  if (lnext(cell1))
1654  ldatum_next = castNode(PartitionRangeDatum,
1655  lfirst(lnext(cell1)));
1656  udatum = castNode(PartitionRangeDatum, lfirst(cell2));
1657  if (lnext(cell2))
1658  udatum_next = castNode(PartitionRangeDatum,
1659  lfirst(lnext(cell2)));
1660  get_range_key_properties(key, j, ldatum, udatum,
1661  &partexprs_item,
1662  &keyCol,
1663  &lower_val, &upper_val);
1664 
1665  if (need_next_lower_arm && lower_val)
1666  {
1667  uint16 strategy;
1668 
1669  /*
1670  * For the non-last columns of this arm, use the EQ operator.
1671  * For the last or the last finite-valued column, use GE.
1672  */
1673  if (j - i < current_or_arm)
1674  strategy = BTEqualStrategyNumber;
1675  else if ((ldatum_next && ldatum_next->infinite) ||
1676  j == key->partnatts - 1)
1677  strategy = BTGreaterEqualStrategyNumber;
1678  else
1679  strategy = BTGreaterStrategyNumber;
1680 
1681  lower_or_arm_args = lappend(lower_or_arm_args,
1682  make_partition_op_expr(key, j,
1683  strategy,
1684  keyCol,
1685  (Expr *) lower_val));
1686  }
1687 
1688  if (need_next_upper_arm && upper_val)
1689  {
1690  uint16 strategy;
1691 
1692  /*
1693  * For the non-last columns of this arm, use the EQ operator.
1694  * For the last finite-valued column, use LE.
1695  */
1696  if (j - i < current_or_arm)
1697  strategy = BTEqualStrategyNumber;
1698  else if (udatum_next && udatum_next->infinite)
1699  strategy = BTLessEqualStrategyNumber;
1700  else
1701  strategy = BTLessStrategyNumber;
1702 
1703  upper_or_arm_args = lappend(upper_or_arm_args,
1704  make_partition_op_expr(key, j,
1705  strategy,
1706  keyCol,
1707  (Expr *) upper_val));
1708  }
1709 
1710  /*
1711  * Did we generate enough of OR's arguments? First arm considers
1712  * the first of the remaining columns, second arm considers first
1713  * two of the remaining columns, and so on.
1714  */
1715  ++j;
1716  if (j - i > current_or_arm)
1717  {
1718  /*
1719  * We need not emit the next arm if the new column that will
1720  * be considered is unbounded.
1721  */
1722  need_next_lower_arm = ldatum_next && !ldatum_next->infinite;
1723  need_next_upper_arm = udatum_next && !udatum_next->infinite;
1724  break;
1725  }
1726  }
1727 
1728  if (lower_or_arm_args != NIL)
1729  lower_or_arms = lappend(lower_or_arms,
1730  list_length(lower_or_arm_args) > 1
1731  ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
1732  : linitial(lower_or_arm_args));
1733 
1734  if (upper_or_arm_args != NIL)
1735  upper_or_arms = lappend(upper_or_arms,
1736  list_length(upper_or_arm_args) > 1
1737  ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
1738  : linitial(upper_or_arm_args));
1739 
1740  /* If no work to do in the next iteration, break away. */
1741  if (!need_next_lower_arm && !need_next_upper_arm)
1742  break;
1743 
1744  ++current_or_arm;
1745  }
1746 
1747  /*
1748  * Generate the OR expressions for each of lower and upper bounds (if
1749  * required), and append to the list of implicitly ANDed list of
1750  * expressions.
1751  */
1752  if (lower_or_arms != NIL)
1753  result = lappend(result,
1754  list_length(lower_or_arms) > 1
1755  ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
1756  : linitial(lower_or_arms));
1757  if (upper_or_arms != NIL)
1758  result = lappend(result,
1759  list_length(upper_or_arms) > 1
1760  ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
1761  : linitial(upper_or_arms));
1762 
1763  /* As noted above, caller expects the list to be non-empty. */
1764  if (result == NIL)
1765  result = list_make1(makeBoolConst(true, false));
1766 
1767  return result;
1768 }
1769 
1770 /*
1771  * generate_partition_qual
1772  *
1773  * Generate partition predicate from rel's partition bound expression
1774  *
1775  * Result expression tree is stored CacheMemoryContext to ensure it survives
1776  * as long as the relcache entry. But we should be running in a less long-lived
1777  * working context. To avoid leaking cache memory if this routine fails partway
1778  * through, we build in working memory and then copy the completed structure
1779  * into cache memory.
1780  */
1781 static List *
1783 {
1784  HeapTuple tuple;
1785  MemoryContext oldcxt;
1786  Datum boundDatum;
1787  bool isnull;
1788  PartitionBoundSpec *bound;
1789  List *my_qual = NIL,
1790  *result = NIL;
1791  Relation parent;
1792 
1793  /* Guard against stack overflow due to overly deep partition tree */
1795 
1796  /* Quick copy */
1797  if (rel->rd_partcheck != NIL)
1798  return copyObject(rel->rd_partcheck);
1799 
1800  /* Grab at least an AccessShareLock on the parent table */
1802  AccessShareLock);
1803 
1804  /* Get pg_class.relpartbound */
1805  tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
1806  if (!HeapTupleIsValid(tuple))
1807  elog(ERROR, "cache lookup failed for relation %u",
1808  RelationGetRelid(rel));
1809 
1810  boundDatum = SysCacheGetAttr(RELOID, tuple,
1812  &isnull);
1813  if (isnull) /* should not happen */
1814  elog(ERROR, "relation \"%s\" has relpartbound = null",
1816  bound = castNode(PartitionBoundSpec,
1817  stringToNode(TextDatumGetCString(boundDatum)));
1818  ReleaseSysCache(tuple);
1819 
1820  my_qual = get_qual_from_partbound(rel, parent, bound);
1821 
1822  /* Add the parent's quals to the list (if any) */
1823  if (parent->rd_rel->relispartition)
1824  result = list_concat(generate_partition_qual(parent), my_qual);
1825  else
1826  result = my_qual;
1827 
1828  /*
1829  * Change Vars to have partition's attnos instead of the parent's. We do
1830  * this after we concatenate the parent's quals, because we want every Var
1831  * in it to bear this relation's attnos. It's safe to assume varno = 1
1832  * here.
1833  */
1834  result = map_partition_varattnos(result, 1, rel, parent);
1835 
1836  /* Save a copy in the relcache */
1838  rel->rd_partcheck = copyObject(result);
1839  MemoryContextSwitchTo(oldcxt);
1840 
1841  /* Keep the parent locked until commit */
1842  heap_close(parent, NoLock);
1843 
1844  return result;
1845 }
1846 
1847 /* ----------------
1848  * FormPartitionKeyDatum
1849  * Construct values[] and isnull[] arrays for the partition key
1850  * of a tuple.
1851  *
1852  * pd Partition dispatch object of the partitioned table
1853  * slot Heap tuple from which to extract partition key
1854  * estate executor state for evaluating any partition key
1855  * expressions (must be non-NULL)
1856  * values Array of partition key Datums (output area)
1857  * isnull Array of is-null indicators (output area)
1858  *
1859  * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1860  * the heap tuple passed in.
1861  * ----------------
1862  */
1863 void
1865  TupleTableSlot *slot,
1866  EState *estate,
1867  Datum *values,
1868  bool *isnull)
1869 {
1870  ListCell *partexpr_item;
1871  int i;
1872 
1873  if (pd->key->partexprs != NIL && pd->keystate == NIL)
1874  {
1875  /* Check caller has set up context correctly */
1876  Assert(estate != NULL &&
1877  GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1878 
1879  /* First time through, set up expression evaluation state */
1880  pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1881  }
1882 
1883  partexpr_item = list_head(pd->keystate);
1884  for (i = 0; i < pd->key->partnatts; i++)
1885  {
1886  AttrNumber keycol = pd->key->partattrs[i];
1887  Datum datum;
1888  bool isNull;
1889 
1890  if (keycol != 0)
1891  {
1892  /* Plain column; get the value directly from the heap tuple */
1893  datum = slot_getattr(slot, keycol, &isNull);
1894  }
1895  else
1896  {
1897  /* Expression; need to evaluate it */
1898  if (partexpr_item == NULL)
1899  elog(ERROR, "wrong number of partition key expressions");
1900  datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1901  GetPerTupleExprContext(estate),
1902  &isNull);
1903  partexpr_item = lnext(partexpr_item);
1904  }
1905  values[i] = datum;
1906  isnull[i] = isNull;
1907  }
1908 
1909  if (partexpr_item != NULL)
1910  elog(ERROR, "wrong number of partition key expressions");
1911 }
1912 
1913 /*
1914  * get_partition_for_tuple
1915  * Finds a leaf partition for tuple contained in *slot
1916  *
1917  * Returned value is the sequence number of the leaf partition thus found,
1918  * or -1 if no leaf partition is found for the tuple. *failed_at is set
1919  * to the OID of the partitioned table whose partition was not found in
1920  * the latter case.
1921  */
1922 int
1924  TupleTableSlot *slot,
1925  EState *estate,
1926  PartitionDispatchData **failed_at,
1927  TupleTableSlot **failed_slot)
1928 {
1929  PartitionDispatch parent;
1931  bool isnull[PARTITION_MAX_KEYS];
1932  int cur_offset,
1933  cur_index;
1934  int i,
1935  result;
1936  ExprContext *ecxt = GetPerTupleExprContext(estate);
1937  TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
1938 
1939  /* start with the root partitioned table */
1940  parent = pd[0];
1941  while (true)
1942  {
1943  PartitionKey key = parent->key;
1944  PartitionDesc partdesc = parent->partdesc;
1945  TupleTableSlot *myslot = parent->tupslot;
1946  TupleConversionMap *map = parent->tupmap;
1947 
1948  if (myslot != NULL && map != NULL)
1949  {
1950  HeapTuple tuple = ExecFetchSlotTuple(slot);
1951 
1952  ExecClearTuple(myslot);
1953  tuple = do_convert_tuple(tuple, map);
1954  ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
1955  slot = myslot;
1956  }
1957 
1958  /* Quick exit */
1959  if (partdesc->nparts == 0)
1960  {
1961  *failed_at = parent;
1962  *failed_slot = slot;
1963  result = -1;
1964  goto error_exit;
1965  }
1966 
1967  /*
1968  * Extract partition key from tuple. Expression evaluation machinery
1969  * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
1970  * point to the correct tuple slot. The slot might have changed from
1971  * what was used for the parent table if the table of the current
1972  * partitioning level has different tuple descriptor from the parent.
1973  * So update ecxt_scantuple accordingly.
1974  */
1975  ecxt->ecxt_scantuple = slot;
1976  FormPartitionKeyDatum(parent, slot, estate, values, isnull);
1977 
1978  if (key->strategy == PARTITION_STRATEGY_RANGE)
1979  {
1980  /*
1981  * Since we cannot route tuples with NULL partition keys through a
1982  * range-partitioned table, simply return that no partition exists
1983  */
1984  for (i = 0; i < key->partnatts; i++)
1985  {
1986  if (isnull[i])
1987  {
1988  *failed_at = parent;
1989  *failed_slot = slot;
1990  result = -1;
1991  goto error_exit;
1992  }
1993  }
1994  }
1995 
1996  /*
1997  * A null partition key is only acceptable if null-accepting list
1998  * partition exists.
1999  */
2000  cur_index = -1;
2001  if (isnull[0] && partition_bound_accepts_nulls(partdesc->boundinfo))
2002  cur_index = partdesc->boundinfo->null_index;
2003  else if (!isnull[0])
2004  {
2005  /* Else bsearch in partdesc->boundinfo */
2006  bool equal = false;
2007 
2008  cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
2009  values, false, &equal);
2010  switch (key->strategy)
2011  {
2013  if (cur_offset >= 0 && equal)
2014  cur_index = partdesc->boundinfo->indexes[cur_offset];
2015  else
2016  cur_index = -1;
2017  break;
2018 
2020 
2021  /*
2022  * Offset returned is such that the bound at offset is
2023  * found to be less or equal with the tuple. So, the bound
2024  * at offset+1 would be the upper bound.
2025  */
2026  cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
2027  break;
2028 
2029  default:
2030  elog(ERROR, "unexpected partition strategy: %d",
2031  (int) key->strategy);
2032  }
2033  }
2034 
2035  /*
2036  * cur_index < 0 means we failed to find a partition of this parent.
2037  * cur_index >= 0 means we either found the leaf partition, or the
2038  * next parent to find a partition of.
2039  */
2040  if (cur_index < 0)
2041  {
2042  result = -1;
2043  *failed_at = parent;
2044  *failed_slot = slot;
2045  break;
2046  }
2047  else if (parent->indexes[cur_index] >= 0)
2048  {
2049  result = parent->indexes[cur_index];
2050  break;
2051  }
2052  else
2053  parent = pd[-parent->indexes[cur_index]];
2054  }
2055 
2056 error_exit:
2057  ecxt->ecxt_scantuple = ecxt_scantuple_old;
2058  return result;
2059 }
2060 
2061 /*
2062  * qsort_partition_list_value_cmp
2063  *
2064  * Compare two list partition bound datums
2065  */
2066 static int32
2067 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
2068 {
2069  Datum val1 = (*(const PartitionListValue **) a)->value,
2070  val2 = (*(const PartitionListValue **) b)->value;
2071  PartitionKey key = (PartitionKey) arg;
2072 
2074  key->partcollation[0],
2075  val1, val2));
2076 }
2077 
2078 /*
2079  * make_one_range_bound
2080  *
2081  * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
2082  * and a flag telling whether the bound is lower or not. Made into a function
2083  * because there are multiple sites that want to use this facility.
2084  */
2085 static PartitionRangeBound *
2087 {
2088  PartitionRangeBound *bound;
2089  ListCell *lc;
2090  int i;
2091 
2092  bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
2093  bound->index = index;
2094  bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
2095  bound->content = (RangeDatumContent *) palloc0(key->partnatts *
2096  sizeof(RangeDatumContent));
2097  bound->lower = lower;
2098 
2099  i = 0;
2100  foreach(lc, datums)
2101  {
2103 
2104  /* What's contained in this range datum? */
2105  bound->content[i] = !datum->infinite
2107  : (lower ? RANGE_DATUM_NEG_INF
2109 
2110  if (bound->content[i] == RANGE_DATUM_FINITE)
2111  {
2112  Const *val = castNode(Const, datum->value);
2113 
2114  if (val->constisnull)
2115  elog(ERROR, "invalid range bound datum");
2116  bound->datums[i] = val->constvalue;
2117  }
2118 
2119  i++;
2120  }
2121 
2122  return bound;
2123 }
2124 
2125 /* Used when sorting range bounds across all range partitions */
2126 static int32
2127 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
2128 {
2129  PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
2130  PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
2131  PartitionKey key = (PartitionKey) arg;
2132 
2133  return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2);
2134 }
2135 
2136 /*
2137  * partition_rbound_cmp
2138  *
2139  * Return for two range bounds whether the 1st one (specified in datum1,
2140  * content1, and lower1) is <, =, or > the bound specified in *b2.
2141  *
2142  * Note that if the values of the two range bounds compare equal, then we take
2143  * into account whether they are upper or lower bounds, and an upper bound is
2144  * considered to be smaller than a lower bound. This is important to the way
2145  * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
2146  * structure, which only stores the upper bound of a common boundary between
2147  * two contiguous partitions.
2148  */
2149 static int32
2151  Datum *datums1, RangeDatumContent *content1, bool lower1,
2152  PartitionRangeBound *b2)
2153 {
2154  int32 cmpval = 0; /* placate compiler */
2155  int i;
2156  Datum *datums2 = b2->datums;
2157  RangeDatumContent *content2 = b2->content;
2158  bool lower2 = b2->lower;
2159 
2160  for (i = 0; i < key->partnatts; i++)
2161  {
2162  /*
2163  * First, handle cases where the column is unbounded, which should not
2164  * invoke the comparison procedure, and should not consider any later
2165  * columns.
2166  */
2167  if (content1[i] != RANGE_DATUM_FINITE ||
2168  content2[i] != RANGE_DATUM_FINITE)
2169  {
2170  /*
2171  * If the bound values are equal, fall through and compare whether
2172  * they are upper or lower bounds.
2173  */
2174  if (content1[i] == content2[i])
2175  break;
2176 
2177  /* Otherwise, one bound is definitely larger than the other */
2178  if (content1[i] == RANGE_DATUM_NEG_INF)
2179  return -1;
2180  else if (content1[i] == RANGE_DATUM_POS_INF)
2181  return 1;
2182  else if (content2[i] == RANGE_DATUM_NEG_INF)
2183  return 1;
2184  else if (content2[i] == RANGE_DATUM_POS_INF)
2185  return -1;
2186  }
2187 
2188  cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
2189  key->partcollation[i],
2190  datums1[i],
2191  datums2[i]));
2192  if (cmpval != 0)
2193  break;
2194  }
2195 
2196  /*
2197  * If the comparison is anything other than equal, we're done. If they
2198  * compare equal though, we still have to consider whether the boundaries
2199  * are inclusive or exclusive. Exclusive one is considered smaller of the
2200  * two.
2201  */
2202  if (cmpval == 0 && lower1 != lower2)
2203  cmpval = lower1 ? 1 : -1;
2204 
2205  return cmpval;
2206 }
2207 
2208 /*
2209  * partition_rbound_datum_cmp
2210  *
2211  * Return whether range bound (specified in rb_datums, rb_content, and
2212  * rb_lower) <=, =, >= partition key of tuple (tuple_datums)
2213  */
2214 static int32
2216  Datum *rb_datums, RangeDatumContent *rb_content,
2217  Datum *tuple_datums)
2218 {
2219  int i;
2220  int32 cmpval = -1;
2221 
2222  for (i = 0; i < key->partnatts; i++)
2223  {
2224  if (rb_content[i] != RANGE_DATUM_FINITE)
2225  return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
2226 
2227  cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
2228  key->partcollation[i],
2229  rb_datums[i],
2230  tuple_datums[i]));
2231  if (cmpval != 0)
2232  break;
2233  }
2234 
2235  return cmpval;
2236 }
2237 
2238 /*
2239  * partition_bound_cmp
2240  *
2241  * Return whether the bound at offset in boundinfo is <=, =, >= the argument
2242  * specified in *probe.
2243  */
2244 static int32
2246  int offset, void *probe, bool probe_is_bound)
2247 {
2248  Datum *bound_datums = boundinfo->datums[offset];
2249  int32 cmpval = -1;
2250 
2251  switch (key->strategy)
2252  {
2254  cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
2255  key->partcollation[0],
2256  bound_datums[0],
2257  *(Datum *) probe));
2258  break;
2259 
2261  {
2262  RangeDatumContent *content = boundinfo->content[offset];
2263 
2264  if (probe_is_bound)
2265  {
2266  /*
2267  * We need to pass whether the existing bound is a lower
2268  * bound, so that two equal-valued lower and upper bounds
2269  * are not regarded equal.
2270  */
2271  bool lower = boundinfo->indexes[offset] < 0;
2272 
2273  cmpval = partition_rbound_cmp(key,
2274  bound_datums, content, lower,
2275  (PartitionRangeBound *) probe);
2276  }
2277  else
2278  cmpval = partition_rbound_datum_cmp(key,
2279  bound_datums, content,
2280  (Datum *) probe);
2281  break;
2282  }
2283 
2284  default:
2285  elog(ERROR, "unexpected partition strategy: %d",
2286  (int) key->strategy);
2287  }
2288 
2289  return cmpval;
2290 }
2291 
2292 /*
2293  * Binary search on a collection of partition bounds. Returns greatest
2294  * bound in array boundinfo->datums which is less than or equal to *probe.
2295  * If all bounds in the array are greater than *probe, -1 is returned.
2296  *
2297  * *probe could either be a partition bound or a Datum array representing
2298  * the partition key of a tuple being routed; probe_is_bound tells which.
2299  * We pass that down to the comparison function so that it can interpret the
2300  * contents of *probe accordingly.
2301  *
2302  * *is_equal is set to whether the bound at the returned index is equal with
2303  * *probe.
2304  */
2305 static int
2307  void *probe, bool probe_is_bound, bool *is_equal)
2308 {
2309  int lo,
2310  hi,
2311  mid;
2312 
2313  lo = -1;
2314  hi = boundinfo->ndatums - 1;
2315  while (lo < hi)
2316  {
2317  int32 cmpval;
2318 
2319  mid = (lo + hi + 1) / 2;
2320  cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
2321  probe_is_bound);
2322  if (cmpval <= 0)
2323  {
2324  lo = mid;
2325  *is_equal = (cmpval == 0);
2326 
2327  if (*is_equal)
2328  break;
2329  }
2330  else
2331  hi = mid - 1;
2332  }
2333 
2334  return lo;
2335 }
Datum constvalue
Definition: primnodes.h:196
#define list_make2(x1, x2)
Definition: pg_list.h:140
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrNumber *attno_map, int map_length, bool *found_whole_row)
bool multidims
Definition: primnodes.h:956
#define NIL
Definition: pg_list.h:69
TupleTableSlot * ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree)
Definition: execTuples.c:320
static int32 partition_rbound_datum_cmp(PartitionKey key, Datum *rb_datums, RangeDatumContent *rb_content, Datum *tuple_datums)
Definition: partition.c:2215
struct PartitionDescData * rd_partdesc
Definition: rel.h:131
#define Anum_pg_inherits_inhrelid
Definition: pg_inherits.h:50
void * stringToNode(char *str)
Definition: read.c:38
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
struct PartitionDispatchData * PartitionDispatch
Definition: partition.h:71
static Expr * make_partition_op_expr(PartitionKey key, int keynum, uint16 strategy, Expr *arg1, Expr *arg2)
Definition: partition.c:1222
PartitionDesc partdesc
Definition: partition.h:65
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:282
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:180
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:499
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
#define DatumGetInt32(X)
Definition: postgres.h:478
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:2962
#define RelationGetDescr(relation)
Definition: rel.h:428
Oid * partopfamily
Definition: rel.h:61
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:43
FmgrInfo * partsupfunc
Definition: rel.h:63
#define castNode(_type_, nodeptr)
Definition: nodes.h:578
Oid get_array_type(Oid typid)
Definition: lsyscache.c:2512
void fix_opfuncids(Node *node)
Definition: nodeFuncs.c:1582
char get_rel_relkind(Oid relid)
Definition: lsyscache.c:1801
static Oid get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, bool *need_relabel)
Definition: partition.c:1173
bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
Definition: datum.c:219
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:439
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static int32 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
Definition: partition.c:2127
TupleConversionMap * tupmap
Definition: partition.h:67
#define AccessShareLock
Definition: lockdefs.h:36
#define InvalidBuffer
Definition: buf.h:25
#define gettext_noop(x)
Definition: c.h:139
Definition: nodes.h:509
#define partition_bound_accepts_nulls(bi)
Definition: partition.c:94
struct cursor * cur
Definition: ecpg.c:28
uint16 StrategyNumber
Definition: stratnum.h:22
int errcode(int sqlerrcode)
Definition: elog.c:575
#define PARTITION_MAX_KEYS
Oid array_typeid
Definition: primnodes.h:952
int get_partition_for_tuple(PartitionDispatch *pd, TupleTableSlot *slot, EState *estate, PartitionDispatchData **failed_at, TupleTableSlot **failed_slot)
Definition: partition.c:1923
List * list_concat(List *list1, List *list2)
Definition: list.c:321
return result
Definition: formatting.c:1633
struct PartitionListValue PartitionListValue
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Definition: clauses.c:172
#define heap_close(r, l)
Definition: heapam.h:97
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:74
List * partexprs
Definition: rel.h:58
char strategy
Definition: rel.h:54
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1047
Form_pg_class rd_rel
Definition: rel.h:114
unsigned int Oid
Definition: postgres_ext.h:31
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
#define OidIsValid(objectId)
Definition: c.h:538
List * get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec)
Definition: partition.c:881
void check_new_partition_bound(char *relname, Relation parent, PartitionBoundSpec *spec)
Definition: partition.c:670
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:328
void RelationBuildPartitionDesc(Relation rel)
Definition: partition.c:160
#define SearchSysCache1(cacheId, key1)
Definition: syscache.h:156
signed int int32
Definition: c.h:256
Expr * makeBoolExpr(BoolExprType boolop, List *args, int location)
Definition: makefuncs.c:366
PartitionBoundInfo boundinfo
Definition: partition.h:37
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
ParseState * make_parsestate(ParseState *parentParseState)
Definition: parse_node.c:44
Definition: type.h:89
#define list_make1(x1)
Definition: pg_list.h:139
void FreeExecutorState(EState *estate)
Definition: execUtils.c:178
#define GetPerTupleExprContext(estate)
Definition: executor.h:457
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:416
unsigned short uint16
Definition: c.h:267
void pfree(void *pointer)
Definition: mcxt.c:950
MemoryContext es_query_cxt
Definition: execnodes.h:468
Oid * parttypcoll
Definition: rel.h:74
#define linitial(l)
Definition: pg_list.h:111
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
static List * get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
Definition: partition.c:1490
#define ERROR
Definition: elog.h:43
static int32 partition_rbound_cmp(PartitionKey key, Datum *datums1, RangeDatumContent *content1, bool lower1, PartitionRangeBound *b2)
Definition: partition.c:2150
Oid get_partition_parent(Oid relid)
Definition: partition.c:839
Node * makeBoolConst(bool value, bool isnull)
Definition: makefuncs.c:354
Expr * arg
Definition: primnodes.h:1180
static struct @121 value
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:165
static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
Definition: partition.c:2067
static int partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, void *probe, bool probe_is_bound, bool *is_equal)
Definition: partition.c:2306
char * c
void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: partition.c:1864
#define NoLock
Definition: lockdefs.h:34
RelabelType * makeRelabelType(Expr *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat)
Definition: makefuncs.c:399
void check_stack_depth(void)
Definition: postgres.c:3117
static void get_range_key_properties(PartitionKey key, int keynum, PartitionRangeDatum *ldatum, PartitionRangeDatum *udatum, ListCell **partexprs_item, Expr **keyCol, Const **lower_val, Const **upper_val)
Definition: partition.c:1409
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:163
#define DatumGetBool(X)
Definition: postgres.h:399
#define RelationGetRelationName(relation)
Definition: rel.h:436
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
List * elements
Definition: primnodes.h:955
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc)
Definition: execTuples.c:199
RangeDatumContent
Definition: partition.c:71
#define lnext(lc)
Definition: pg_list.h:105
#define ereport(elevel, rest)
Definition: elog.h:122
Var * makeVar(Index varno, AttrNumber varattno, Oid vartype, int32 vartypmod, Oid varcollid, Index varlevelsup)
Definition: makefuncs.c:67
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:128
Oid * parttypid
Definition: rel.h:69
EState * CreateExecutorState(void)
Definition: execUtils.c:80
TupleConversionMap * convert_tuples_by_name(TupleDesc indesc, TupleDesc outdesc, const char *msg)
Definition: tupconvert.c:205
List * ExecPrepareExprList(List *nodes, EState *estate)
Definition: execExpr.c:511
List * lappend(List *list, void *datum)
Definition: list.c:128
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
AttrNumber * convert_tuples_by_name_map(TupleDesc indesc, TupleDesc outdesc, const char *msg)
Definition: tupconvert.c:283
#define RELKIND_PARTITIONED_TABLE
Definition: pg_class.h:168
Oid * partcollation
Definition: rel.h:66
#define TextDatumGetCString(d)
Definition: builtins.h:92
List * RelationGetPartitionQual(Relation rel)
Definition: partition.c:951
static PartitionRangeBound * make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
Definition: partition.c:2086
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
void * palloc0(Size size)
Definition: mcxt.c:878
int location
Definition: primnodes.h:957
AttrNumber * partattrs
Definition: rel.h:56
uintptr_t Datum
Definition: postgres.h:372
int16 partnatts
Definition: rel.h:55
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1117
TupleTableSlot * tupslot
Definition: partition.h:66
#define Anum_pg_inherits_inhseqno
Definition: pg_inherits.h:52
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1279
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1284
NullTestType nulltesttype
Definition: primnodes.h:1181
int32 * parttypmod
Definition: rel.h:70
struct PartitionBoundInfoData PartitionBoundInfoData
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1094
List * find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
Definition: pg_inherits.c:57
RangeDatumContent * content
Definition: partition.c:113
#define for_both_cell(cell1, initcell1, cell2, initcell2)
Definition: pg_list.h:194
#define Anum_pg_class_relpartbound
Definition: pg_class.h:135
#define makeNode(_type_)
Definition: nodes.h:557
bool * parttypbyval
Definition: rel.h:72
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
MemoryContext rd_pdcxt
Definition: rel.h:130
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
int16 * parttyplen
Definition: rel.h:71
static List * get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
Definition: partition.c:1290
Oid array_collid
Definition: primnodes.h:953
#define InheritsRelidSeqnoIndexId
Definition: indexing.h:167
int location
Definition: primnodes.h:1183
FormData_pg_inherits * Form_pg_inherits
Definition: pg_inherits.h:43
static int list_length(const List *l)
Definition: pg_list.h:89
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:111
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:197
#define type_is_array(typid)
Definition: lsyscache.h:180
#define BOOLOID
Definition: pg_type.h:288
static List * generate_partition_qual(Relation rel)
Definition: partition.c:1782
#define PARTITION_STRATEGY_LIST
Definition: parsenodes.h:786
#define RelationGetPartitionKey(relation)
Definition: rel.h:584
HeapTuple do_convert_tuple(HeapTuple tuple, TupleConversionMap *map)
Definition: tupconvert.c:343
HeapTuple ExecFetchSlotTuple(TupleTableSlot *slot)
Definition: execTuples.c:618
Oid element_typeid
Definition: primnodes.h:954
#define InheritsRelationId
Definition: pg_inherits.h:29
Expr * get_partition_qual_relid(Oid relid)
Definition: partition.c:967
PartitionDispatch * RelationGetPartitionDispatchInfo(Relation rel, int lockmode, int *num_parted, List **leaf_part_oids)
Definition: partition.c:1013
static Datum values[MAXATTR]
Definition: bootstrap.c:163
FormData_pg_class * Form_pg_class
Definition: pg_class.h:95
#define PARTITION_STRATEGY_RANGE
Definition: parsenodes.h:787
#define Int32GetDatum(X)
Definition: postgres.h:485
void * palloc(Size size)
Definition: mcxt.c:849
struct PartitionKeyData * PartitionKey
Definition: rel.h:77
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define APPEND_REL_PARTITION_OIDS(rel, partoids, parents)
Definition: partition.c:993
Oid * partopcintype
Definition: rel.h:62
int i
Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: heaptuple.c:1141
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
void * arg
bool partition_bounds_equal(PartitionKey key, PartitionBoundInfo b1, PartitionBoundInfo b2)
Definition: partition.c:599
bool argisrow
Definition: primnodes.h:1182
struct PartitionRangeBound PartitionRangeBound
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:113
#define elog
Definition: elog.h:219
#define copyObject(obj)
Definition: nodes.h:622
#define BTLessStrategyNumber
Definition: stratnum.h:29
Definition: pg_list.h:45
char * get_rel_name(Oid relid)
Definition: lsyscache.c:1726
int16 AttrNumber
Definition: attnum.h:21
#define RelationGetRelid(relation)
Definition: rel.h:416
List * rd_partcheck
Definition: rel.h:132
long val
Definition: informix.c:689
static int32 partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, int offset, void *probe, bool probe_is_bound)
Definition: partition.c:2245
bool constisnull
Definition: primnodes.h:197
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
#define lfirst_oid(lc)
Definition: pg_list.h:108
#define RelationGetPartitionDesc(relation)
Definition: rel.h:632
List * map_partition_varattnos(List *expr, int target_varno, Relation partrel, Relation parent)
Definition: partition.c:921
MemoryContext CacheMemoryContext
Definition: mcxt.c:46
PartitionKey key
Definition: partition.h:63
RangeDatumContent ** content
Definition: partition.c:84