PostgreSQL Source Code  git master
partdesc.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * partdesc.c
4  * Support routines for manipulating partition descriptors
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/partitioning/partdesc.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/genam.h"
18 #include "access/htup_details.h"
19 #include "access/table.h"
20 #include "catalog/indexing.h"
21 #include "catalog/partition.h"
22 #include "catalog/pg_inherits.h"
24 #include "partitioning/partdesc.h"
25 #include "storage/bufmgr.h"
26 #include "storage/sinval.h"
27 #include "utils/builtins.h"
28 #include "utils/fmgroids.h"
29 #include "utils/hsearch.h"
30 #include "utils/inval.h"
31 #include "utils/lsyscache.h"
32 #include "utils/memutils.h"
33 #include "utils/partcache.h"
34 #include "utils/rel.h"
35 #include "utils/syscache.h"
36 
37 typedef struct PartitionDirectoryData
38 {
42 
44 {
49 
50 /*
51  * RelationBuildPartitionDesc
52  * Form rel's partition descriptor, and store in relcache entry
53  *
54  * Note: the descriptor won't be flushed from the cache by
55  * RelationClearRelation() unless it's changed because of
56  * addition or removal of a partition. Hence, code holding a lock
57  * that's sufficient to prevent that can assume that rd_partdesc
58  * won't change underneath it.
59  */
60 void
62 {
63  PartitionDesc partdesc;
64  PartitionBoundInfo boundinfo = NULL;
65  List *inhoids;
66  PartitionBoundSpec **boundspecs = NULL;
67  Oid *oids = NULL;
68  ListCell *cell;
69  int i,
70  nparts;
72  MemoryContext oldcxt;
73  int *mapping;
74 
75  /*
76  * Get partition oids from pg_inherits. This uses a single snapshot to
77  * fetch the list of children, so while more children may be getting added
78  * concurrently, whatever this function returns will be accurate as of
79  * some well-defined point in time.
80  */
82  nparts = list_length(inhoids);
83 
84  /* Allocate arrays for OIDs and boundspecs. */
85  if (nparts > 0)
86  {
87  oids = palloc(nparts * sizeof(Oid));
88  boundspecs = palloc(nparts * sizeof(PartitionBoundSpec *));
89  }
90 
91  /* Collect bound spec nodes for each partition. */
92  i = 0;
93  foreach(cell, inhoids)
94  {
95  Oid inhrelid = lfirst_oid(cell);
96  HeapTuple tuple;
97  PartitionBoundSpec *boundspec = NULL;
98 
99  /* Try fetching the tuple from the catcache, for speed. */
100  tuple = SearchSysCache1(RELOID, inhrelid);
101  if (HeapTupleIsValid(tuple))
102  {
103  Datum datum;
104  bool isnull;
105 
106  datum = SysCacheGetAttr(RELOID, tuple,
107  Anum_pg_class_relpartbound,
108  &isnull);
109  if (!isnull)
110  boundspec = stringToNode(TextDatumGetCString(datum));
111  ReleaseSysCache(tuple);
112  }
113 
114  /*
115  * The system cache may be out of date; if so, we may find no pg_class
116  * tuple or an old one where relpartbound is NULL. In that case, try
117  * the table directly. We can't just AcceptInvalidationMessages() and
118  * retry the system cache lookup because it's possible that a
119  * concurrent ATTACH PARTITION operation has removed itself to the
120  * ProcArray but yet added invalidation messages to the shared queue;
121  * InvalidateSystemCaches() would work, but seems excessive.
122  *
123  * Note that this algorithm assumes that PartitionBoundSpec we manage
124  * to fetch is the right one -- so this is only good enough for
125  * concurrent ATTACH PARTITION, not concurrent DETACH PARTITION or
126  * some hypothetical operation that changes the partition bounds.
127  */
128  if (boundspec == NULL)
129  {
130  Relation pg_class;
131  SysScanDesc scan;
132  ScanKeyData key[1];
133  Datum datum;
134  bool isnull;
135 
136  pg_class = table_open(RelationRelationId, AccessShareLock);
137  ScanKeyInit(&key[0],
138  Anum_pg_class_oid,
139  BTEqualStrategyNumber, F_OIDEQ,
140  ObjectIdGetDatum(inhrelid));
141  scan = systable_beginscan(pg_class, ClassOidIndexId, true,
142  NULL, 1, key);
143  tuple = systable_getnext(scan);
144  datum = heap_getattr(tuple, Anum_pg_class_relpartbound,
145  RelationGetDescr(pg_class), &isnull);
146  if (!isnull)
147  boundspec = stringToNode(TextDatumGetCString(datum));
148  systable_endscan(scan);
149  table_close(pg_class, AccessShareLock);
150  }
151 
152  /* Sanity checks. */
153  if (!boundspec)
154  elog(ERROR, "missing relpartbound for relation %u", inhrelid);
155  if (!IsA(boundspec, PartitionBoundSpec))
156  elog(ERROR, "invalid relpartbound for relation %u", inhrelid);
157 
158  /*
159  * If the PartitionBoundSpec says this is the default partition, its
160  * OID should match pg_partitioned_table.partdefid; if not, the
161  * catalog is corrupt.
162  */
163  if (boundspec->is_default)
164  {
165  Oid partdefid;
166 
168  if (partdefid != inhrelid)
169  elog(ERROR, "expected partdefid %u, but got %u",
170  inhrelid, partdefid);
171  }
172 
173  /* Save results. */
174  oids[i] = inhrelid;
175  boundspecs[i] = boundspec;
176  ++i;
177  }
178 
179  /* Assert we aren't about to leak any old data structure */
180  Assert(rel->rd_pdcxt == NULL);
181  Assert(rel->rd_partdesc == NULL);
182 
183  /*
184  * Now build the actual relcache partition descriptor. Note that the
185  * order of operations here is fairly critical. If we fail partway
186  * through this code, we won't have leaked memory because the rd_pdcxt is
187  * attached to the relcache entry immediately, so it'll be freed whenever
188  * the entry is rebuilt or destroyed. However, we don't assign to
189  * rd_partdesc until the cached data structure is fully complete and
190  * valid, so that no other code might try to use it.
191  */
193  "partition descriptor",
197 
198  partdesc = (PartitionDescData *)
200  partdesc->nparts = nparts;
201  /* If there are no partitions, the rest of the partdesc can stay zero */
202  if (nparts > 0)
203  {
204  /* Create PartitionBoundInfo, using the caller's context. */
205  boundinfo = partition_bounds_create(boundspecs, nparts, key, &mapping);
206 
207  /* Now copy all info into relcache's partdesc. */
208  oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
209  partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
210  partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
211  partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
212  MemoryContextSwitchTo(oldcxt);
213 
214  /*
215  * Assign OIDs from the original array into mapped indexes of the
216  * result array. The order of OIDs in the former is defined by the
217  * catalog scan that retrieved them, whereas that in the latter is
218  * defined by canonicalized representation of the partition bounds.
219  *
220  * Also record leaf-ness of each partition. For this we use
221  * get_rel_relkind() which may leak memory, so be sure to run it in
222  * the caller's context.
223  */
224  for (i = 0; i < nparts; i++)
225  {
226  int index = mapping[i];
227 
228  partdesc->oids[index] = oids[i];
229  partdesc->is_leaf[index] =
230  (get_rel_relkind(oids[i]) != RELKIND_PARTITIONED_TABLE);
231  }
232  }
233 
234  rel->rd_partdesc = partdesc;
235 }
236 
237 /*
238  * CreatePartitionDirectory
239  * Create a new partition directory object.
240  */
243 {
244  MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
245  PartitionDirectory pdir;
246  HASHCTL ctl;
247 
248  MemSet(&ctl, 0, sizeof(HASHCTL));
249  ctl.keysize = sizeof(Oid);
250  ctl.entrysize = sizeof(PartitionDirectoryEntry);
251  ctl.hcxt = mcxt;
252 
253  pdir = palloc(sizeof(PartitionDirectoryData));
254  pdir->pdir_mcxt = mcxt;
255  pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
257 
258  MemoryContextSwitchTo(oldcontext);
259  return pdir;
260 }
261 
262 /*
263  * PartitionDirectoryLookup
264  * Look up the partition descriptor for a relation in the directory.
265  *
266  * The purpose of this function is to ensure that we get the same
267  * PartitionDesc for each relation every time we look it up. In the
268  * face of current DDL, different PartitionDescs may be constructed with
269  * different views of the catalog state, but any single particular OID
270  * will always get the same PartitionDesc for as long as the same
271  * PartitionDirectory is used.
272  */
275 {
277  Oid relid = RelationGetRelid(rel);
278  bool found;
279 
280  pde = hash_search(pdir->pdir_hash, &relid, HASH_ENTER, &found);
281  if (!found)
282  {
283  /*
284  * We must keep a reference count on the relation so that the
285  * PartitionDesc to which we are pointing can't get destroyed.
286  */
288  pde->rel = rel;
289  pde->pd = RelationGetPartitionDesc(rel);
290  Assert(pde->pd != NULL);
291  }
292  return pde->pd;
293 }
294 
295 /*
296  * DestroyPartitionDirectory
297  * Destroy a partition directory.
298  *
299  * Release the reference counts we're holding.
300  */
301 void
303 {
306 
307  hash_seq_init(&status, pdir->pdir_hash);
308  while ((pde = hash_seq_search(&status)) != NULL)
310 }
311 
312 /*
313  * equalPartitionDescs
314  * Compare two partition descriptors for logical equality
315  */
316 bool
318  PartitionDesc partdesc2)
319 {
320  int i;
321 
322  if (partdesc1 != NULL)
323  {
324  if (partdesc2 == NULL)
325  return false;
326  if (partdesc1->nparts != partdesc2->nparts)
327  return false;
328 
329  Assert(key != NULL || partdesc1->nparts == 0);
330 
331  /*
332  * Same oids? If the partitioning structure did not change, that is,
333  * no partitions were added or removed to the relation, the oids array
334  * should still match element-by-element.
335  */
336  for (i = 0; i < partdesc1->nparts; i++)
337  {
338  if (partdesc1->oids[i] != partdesc2->oids[i])
339  return false;
340  }
341 
342  /*
343  * Now compare partition bound collections. The logic to iterate over
344  * the collections is private to partition.c.
345  */
346  if (partdesc1->boundinfo != NULL)
347  {
348  if (partdesc2->boundinfo == NULL)
349  return false;
350 
352  key->parttypbyval,
353  partdesc1->boundinfo,
354  partdesc2->boundinfo))
355  return false;
356  }
357  else if (partdesc2->boundinfo != NULL)
358  return false;
359  }
360  else if (partdesc2 != NULL)
361  return false;
362 
363  return true;
364 }
365 
366 /*
367  * get_default_oid_from_partdesc
368  *
369  * Given a partition descriptor, return the OID of the default partition, if
370  * one exists; else, return InvalidOid.
371  */
372 Oid
374 {
375  if (partdesc && partdesc->boundinfo &&
377  return partdesc->oids[partdesc->boundinfo->default_index];
378 
379  return InvalidOid;
380 }
struct PartitionDescData * rd_partdesc
Definition: rel.h:99
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define AllocSetContextCreate
Definition: memutils.h:170
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:525
#define HASH_CONTEXT
Definition: hsearch.h:93
#define HASH_ELEM
Definition: hsearch.h:87
MemoryContext hcxt
Definition: hsearch.h:78
#define RelationGetDescr(relation)
Definition: rel.h:448
char get_rel_relkind(Oid relid)
Definition: lsyscache.c:1805
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:202
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt)
Definition: partdesc.c:242
#define AccessShareLock
Definition: lockdefs.h:36
Size entrysize
Definition: hsearch.h:73
void * stringToNode(const char *str)
Definition: read.c:89
#define MemSet(start, val, len)
Definition: c.h:962
bool * is_leaf
Definition: partdesc.h:26
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:906
unsigned int Oid
Definition: postgres_ext.h:31
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:352
PartitionBoundInfo boundinfo
Definition: partdesc.h:29
Definition: type.h:89
Definition: dynahash.c:208
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:444
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2068
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
void DestroyPartitionDirectory(PartitionDirectory pdir)
Definition: partdesc.c:302
bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1, PartitionDesc partdesc2)
Definition: partdesc.c:317
void RelationBuildPartitionDesc(Relation rel)
Definition: partdesc.c:61
PartitionDesc pd
Definition: partdesc.c:47
#define MemoryContextCopyAndSetIdentifier(cxt, id)
Definition: memutils.h:97
#define NoLock
Definition: lockdefs.h:34
MemoryContext pdir_mcxt
Definition: partdesc.c:39
#define RelationGetRelationName(relation)
Definition: rel.h:456
#define partition_bound_has_default(bi)
Definition: partbounds.h:75
#define ClassOidIndexId
Definition: indexing.h:114
struct PartitionDirectoryEntry PartitionDirectoryEntry
Oid get_default_partition_oid(Oid parentId)
Definition: partition.c:298
#define heap_getattr(tup, attnum, tupleDesc, isnull)
Definition: htup_details.h:762
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
#define HASH_BLOBS
Definition: hsearch.h:88
#define TextDatumGetCString(d)
Definition: builtins.h:84
PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, PartitionKey key)
Definition: partbounds.c:780
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2055
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:316
uintptr_t Datum
Definition: postgres.h:367
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1377
Size keysize
Definition: hsearch.h:72
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:839
#define InvalidOid
Definition: postgres_ext.h:36
List * find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
Definition: pg_inherits.c:55
bool * parttypbyval
Definition: partcache.h:44
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
MemoryContext rd_pdcxt
Definition: rel.h:100
#define Assert(condition)
Definition: c.h:739
int16 * parttyplen
Definition: partcache.h:43
static int list_length(const List *l)
Definition: pg_list.h:169
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1389
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1379
#define RelationGetPartitionKey(relation)
Definition: rel.h:603
void * palloc(Size size)
Definition: mcxt.c:949
bool partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, PartitionBoundInfo b1, PartitionBoundInfo b2)
Definition: partbounds.c:667
#define elog(elevel,...)
Definition: elog.h:228
int i
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition: partdesc.c:274
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:226
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
Definition: pg_list.h:50
#define RelationGetRelid(relation)
Definition: rel.h:422
PartitionBoundInfo partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts, PartitionKey key, int **mapping)
Definition: partbounds.c:173
#define BTEqualStrategyNumber
Definition: stratnum.h:31
struct PartitionDirectoryData PartitionDirectoryData
#define lfirst_oid(lc)
Definition: pg_list.h:192
#define RelationGetPartitionDesc(relation)
Definition: rel.h:609
MemoryContext CacheMemoryContext
Definition: mcxt.c:47
Oid get_default_oid_from_partdesc(PartitionDesc partdesc)
Definition: partdesc.c:373