PostgreSQL Source Code  git master
partdesc.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * partdesc.c
4  * Support routines for manipulating partition descriptors
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/partitioning/partdesc.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/genam.h"
18 #include "access/htup_details.h"
19 #include "access/table.h"
20 #include "catalog/indexing.h"
21 #include "catalog/partition.h"
22 #include "catalog/pg_inherits.h"
24 #include "partitioning/partdesc.h"
25 #include "storage/bufmgr.h"
26 #include "storage/sinval.h"
27 #include "utils/builtins.h"
28 #include "utils/fmgroids.h"
29 #include "utils/hsearch.h"
30 #include "utils/inval.h"
31 #include "utils/lsyscache.h"
32 #include "utils/memutils.h"
33 #include "utils/partcache.h"
34 #include "utils/rel.h"
35 #include "utils/syscache.h"
36 
37 typedef struct PartitionDirectoryData
38 {
42 
44 {
49 
50 static void RelationBuildPartitionDesc(Relation rel);
51 
52 
53 /*
54  * RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned
55  *
56  * Note: we arrange for partition descriptors to not get freed until the
57  * relcache entry's refcount goes to zero (see hacks in RelationClose,
58  * RelationClearRelation, and RelationBuildPartitionDesc). Therefore, even
59  * though we hand back a direct pointer into the relcache entry, it's safe
60  * for callers to continue to use that pointer as long as (a) they hold the
61  * relation open, and (b) they hold a relation lock strong enough to ensure
62  * that the data doesn't become stale.
63  */
66 {
67  if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
68  return NULL;
69 
70  if (unlikely(rel->rd_partdesc == NULL))
72 
73  return rel->rd_partdesc;
74 }
75 
76 /*
77  * RelationBuildPartitionDesc
78  * Form rel's partition descriptor, and store in relcache entry
79  *
80  * Partition descriptor is a complex structure; to avoid complicated logic to
81  * free individual elements whenever the relcache entry is flushed, we give it
82  * its own memory context, a child of CacheMemoryContext, which can easily be
83  * deleted on its own. To avoid leaking memory in that context in case of an
84  * error partway through this function, the context is initially created as a
85  * child of CurTransactionContext and only re-parented to CacheMemoryContext
86  * at the end, when no further errors are possible. Also, we don't make this
87  * context the current context except in very brief code sections, out of fear
88  * that some of our callees allocate memory on their own which would be leaked
89  * permanently.
90  */
91 static void
93 {
94  PartitionDesc partdesc;
95  PartitionBoundInfo boundinfo = NULL;
96  List *inhoids;
97  PartitionBoundSpec **boundspecs = NULL;
98  Oid *oids = NULL;
99  bool *is_leaf = NULL;
100  ListCell *cell;
101  int i,
102  nparts;
104  MemoryContext new_pdcxt;
105  MemoryContext oldcxt;
106  int *mapping;
107 
108  /*
109  * Get partition oids from pg_inherits. This uses a single snapshot to
110  * fetch the list of children, so while more children may be getting added
111  * concurrently, whatever this function returns will be accurate as of
112  * some well-defined point in time.
113  */
115  nparts = list_length(inhoids);
116 
117  /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
118  if (nparts > 0)
119  {
120  oids = (Oid *) palloc(nparts * sizeof(Oid));
121  is_leaf = (bool *) palloc(nparts * sizeof(bool));
122  boundspecs = palloc(nparts * sizeof(PartitionBoundSpec *));
123  }
124 
125  /* Collect bound spec nodes for each partition. */
126  i = 0;
127  foreach(cell, inhoids)
128  {
129  Oid inhrelid = lfirst_oid(cell);
130  HeapTuple tuple;
131  PartitionBoundSpec *boundspec = NULL;
132 
133  /* Try fetching the tuple from the catcache, for speed. */
134  tuple = SearchSysCache1(RELOID, inhrelid);
135  if (HeapTupleIsValid(tuple))
136  {
137  Datum datum;
138  bool isnull;
139 
140  datum = SysCacheGetAttr(RELOID, tuple,
141  Anum_pg_class_relpartbound,
142  &isnull);
143  if (!isnull)
144  boundspec = stringToNode(TextDatumGetCString(datum));
145  ReleaseSysCache(tuple);
146  }
147 
148  /*
149  * The system cache may be out of date; if so, we may find no pg_class
150  * tuple or an old one where relpartbound is NULL. In that case, try
151  * the table directly. We can't just AcceptInvalidationMessages() and
152  * retry the system cache lookup because it's possible that a
153  * concurrent ATTACH PARTITION operation has removed itself to the
154  * ProcArray but yet added invalidation messages to the shared queue;
155  * InvalidateSystemCaches() would work, but seems excessive.
156  *
157  * Note that this algorithm assumes that PartitionBoundSpec we manage
158  * to fetch is the right one -- so this is only good enough for
159  * concurrent ATTACH PARTITION, not concurrent DETACH PARTITION or
160  * some hypothetical operation that changes the partition bounds.
161  */
162  if (boundspec == NULL)
163  {
164  Relation pg_class;
165  SysScanDesc scan;
166  ScanKeyData key[1];
167  Datum datum;
168  bool isnull;
169 
170  pg_class = table_open(RelationRelationId, AccessShareLock);
171  ScanKeyInit(&key[0],
172  Anum_pg_class_oid,
173  BTEqualStrategyNumber, F_OIDEQ,
174  ObjectIdGetDatum(inhrelid));
175  scan = systable_beginscan(pg_class, ClassOidIndexId, true,
176  NULL, 1, key);
177  tuple = systable_getnext(scan);
178  datum = heap_getattr(tuple, Anum_pg_class_relpartbound,
179  RelationGetDescr(pg_class), &isnull);
180  if (!isnull)
181  boundspec = stringToNode(TextDatumGetCString(datum));
182  systable_endscan(scan);
183  table_close(pg_class, AccessShareLock);
184  }
185 
186  /* Sanity checks. */
187  if (!boundspec)
188  elog(ERROR, "missing relpartbound for relation %u", inhrelid);
189  if (!IsA(boundspec, PartitionBoundSpec))
190  elog(ERROR, "invalid relpartbound for relation %u", inhrelid);
191 
192  /*
193  * If the PartitionBoundSpec says this is the default partition, its
194  * OID should match pg_partitioned_table.partdefid; if not, the
195  * catalog is corrupt.
196  */
197  if (boundspec->is_default)
198  {
199  Oid partdefid;
200 
202  if (partdefid != inhrelid)
203  elog(ERROR, "expected partdefid %u, but got %u",
204  inhrelid, partdefid);
205  }
206 
207  /* Save results. */
208  oids[i] = inhrelid;
209  is_leaf[i] = (get_rel_relkind(inhrelid) != RELKIND_PARTITIONED_TABLE);
210  boundspecs[i] = boundspec;
211  ++i;
212  }
213 
214  /*
215  * Create PartitionBoundInfo and mapping, working in the caller's context.
216  * This could fail, but we haven't done any damage if so.
217  */
218  if (nparts > 0)
219  boundinfo = partition_bounds_create(boundspecs, nparts, key, &mapping);
220 
221  /*
222  * Now build the actual relcache partition descriptor, copying all the
223  * data into a new, small context. As per above comment, we don't make
224  * this a long-lived context until it's finished.
225  */
227  "partition descriptor",
231 
232  partdesc = (PartitionDescData *)
233  MemoryContextAllocZero(new_pdcxt, sizeof(PartitionDescData));
234  partdesc->nparts = nparts;
235  /* If there are no partitions, the rest of the partdesc can stay zero */
236  if (nparts > 0)
237  {
238  oldcxt = MemoryContextSwitchTo(new_pdcxt);
239  partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
240  partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
241  partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
242 
243  /*
244  * Assign OIDs from the original array into mapped indexes of the
245  * result array. The order of OIDs in the former is defined by the
246  * catalog scan that retrieved them, whereas that in the latter is
247  * defined by canonicalized representation of the partition bounds.
248  * Also save leaf-ness of each partition.
249  */
250  for (i = 0; i < nparts; i++)
251  {
252  int index = mapping[i];
253 
254  partdesc->oids[index] = oids[i];
255  partdesc->is_leaf[index] = is_leaf[i];
256  }
257  MemoryContextSwitchTo(oldcxt);
258  }
259 
260  /*
261  * We have a fully valid partdesc ready to store into the relcache.
262  * Reparent it so it has the right lifespan.
263  */
265 
266  /*
267  * But first, a kluge: if there's an old rd_pdcxt, it contains an old
268  * partition descriptor that may still be referenced somewhere. Preserve
269  * it, while not leaking it, by reattaching it as a child context of the
270  * new rd_pdcxt. Eventually it will get dropped by either RelationClose
271  * or RelationClearRelation.
272  */
273  if (rel->rd_pdcxt != NULL)
274  MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
275  rel->rd_pdcxt = new_pdcxt;
276  rel->rd_partdesc = partdesc;
277 }
278 
279 /*
280  * CreatePartitionDirectory
281  * Create a new partition directory object.
282  */
285 {
286  MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
287  PartitionDirectory pdir;
288  HASHCTL ctl;
289 
290  MemSet(&ctl, 0, sizeof(HASHCTL));
291  ctl.keysize = sizeof(Oid);
292  ctl.entrysize = sizeof(PartitionDirectoryEntry);
293  ctl.hcxt = mcxt;
294 
295  pdir = palloc(sizeof(PartitionDirectoryData));
296  pdir->pdir_mcxt = mcxt;
297  pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
299 
300  MemoryContextSwitchTo(oldcontext);
301  return pdir;
302 }
303 
304 /*
305  * PartitionDirectoryLookup
306  * Look up the partition descriptor for a relation in the directory.
307  *
308  * The purpose of this function is to ensure that we get the same
309  * PartitionDesc for each relation every time we look it up. In the
310  * face of concurrent DDL, different PartitionDescs may be constructed with
311  * different views of the catalog state, but any single particular OID
312  * will always get the same PartitionDesc for as long as the same
313  * PartitionDirectory is used.
314  */
317 {
319  Oid relid = RelationGetRelid(rel);
320  bool found;
321 
322  pde = hash_search(pdir->pdir_hash, &relid, HASH_ENTER, &found);
323  if (!found)
324  {
325  /*
326  * We must keep a reference count on the relation so that the
327  * PartitionDesc to which we are pointing can't get destroyed.
328  */
330  pde->rel = rel;
331  pde->pd = RelationGetPartitionDesc(rel);
332  Assert(pde->pd != NULL);
333  }
334  return pde->pd;
335 }
336 
337 /*
338  * DestroyPartitionDirectory
339  * Destroy a partition directory.
340  *
341  * Release the reference counts we're holding.
342  */
343 void
345 {
348 
349  hash_seq_init(&status, pdir->pdir_hash);
350  while ((pde = hash_seq_search(&status)) != NULL)
352 }
353 
354 /*
355  * get_default_oid_from_partdesc
356  *
357  * Given a partition descriptor, return the OID of the default partition, if
358  * one exists; else, return InvalidOid.
359  */
360 Oid
362 {
363  if (partdesc && partdesc->boundinfo &&
365  return partdesc->oids[partdesc->boundinfo->default_index];
366 
367  return InvalidOid;
368 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:577
#define AllocSetContextCreate
Definition: memutils.h:170
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:529
#define HASH_CONTEXT
Definition: hsearch.h:93
#define HASH_ELEM
Definition: hsearch.h:87
MemoryContext hcxt
Definition: hsearch.h:78
#define RelationGetDescr(relation)
Definition: rel.h:461
void MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
Definition: mcxt.c:354
char get_rel_relkind(Oid relid)
Definition: lsyscache.c:1831
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:202
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt)
Definition: partdesc.c:284
#define AccessShareLock
Definition: lockdefs.h:36
Size entrysize
Definition: hsearch.h:73
MemoryContext CurTransactionContext
Definition: mcxt.c:50
void * stringToNode(const char *str)
Definition: read.c:89
#define MemSet(start, val, len)
Definition: c.h:971
bool * is_leaf
Definition: partdesc.h:26
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:907
PartitionKey RelationGetPartitionKey(Relation rel)
Definition: partcache.c:54
Form_pg_class rd_rel
Definition: rel.h:89
unsigned int Oid
Definition: postgres_ext.h:31
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:356
PartitionBoundInfo boundinfo
Definition: partdesc.h:29
Definition: type.h:89
Definition: dynahash.c:209
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:448
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2057
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
void DestroyPartitionDirectory(PartitionDirectory pdir)
Definition: partdesc.c:344
PartitionDesc pd
Definition: partdesc.c:47
#define MemoryContextCopyAndSetIdentifier(cxt, id)
Definition: memutils.h:97
#define NoLock
Definition: lockdefs.h:34
MemoryContext pdir_mcxt
Definition: partdesc.c:39
PartitionDesc RelationGetPartitionDesc(Relation rel)
Definition: partdesc.c:65
#define RelationGetRelationName(relation)
Definition: rel.h:469
#define partition_bound_has_default(bi)
Definition: partbounds.h:75
PartitionDesc rd_partdesc
Definition: rel.h:108
#define ClassOidIndexId
Definition: indexing.h:114
struct PartitionDirectoryEntry PartitionDirectoryEntry
Oid get_default_partition_oid(Oid parentId)
Definition: partition.c:290
#define heap_getattr(tup, attnum, tupleDesc, isnull)
Definition: htup_details.h:762
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
#define HASH_BLOBS
Definition: hsearch.h:88
#define TextDatumGetCString(d)
Definition: builtins.h:88
PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, PartitionKey key)
Definition: partbounds.c:784
static void RelationBuildPartitionDesc(Relation rel)
Definition: partdesc.c:92
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2044
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:317
uintptr_t Datum
Definition: postgres.h:367
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1377
Size keysize
Definition: hsearch.h:72
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:839
#define InvalidOid
Definition: postgres_ext.h:36
List * find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
Definition: pg_inherits.c:55
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
MemoryContext rd_pdcxt
Definition: rel.h:109
#define Assert(condition)
Definition: c.h:738
static int list_length(const List *l)
Definition: pg_list.h:169
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1390
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1380
void * palloc(Size size)
Definition: mcxt.c:949
#define elog(elevel,...)
Definition: elog.h:214
int i
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define unlikely(x)
Definition: c.h:206
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition: partdesc.c:316
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
Definition: pg_list.h:50
#define RelationGetRelid(relation)
Definition: rel.h:435
PartitionBoundInfo partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts, PartitionKey key, int **mapping)
Definition: partbounds.c:172
#define BTEqualStrategyNumber
Definition: stratnum.h:31
struct PartitionDirectoryData PartitionDirectoryData
#define lfirst_oid(lc)
Definition: pg_list.h:192
MemoryContext CacheMemoryContext
Definition: mcxt.c:47
Oid get_default_oid_from_partdesc(PartitionDesc partdesc)
Definition: partdesc.c:361