PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/parallel.h"
37 #include "access/reloptions.h"
38 #include "access/sysattr.h"
39 #include "access/table.h"
40 #include "access/tableam.h"
41 #include "access/tupdesc_details.h"
42 #include "access/xact.h"
43 #include "access/xlog.h"
44 #include "catalog/binary_upgrade.h"
45 #include "catalog/catalog.h"
46 #include "catalog/indexing.h"
47 #include "catalog/namespace.h"
48 #include "catalog/partition.h"
49 #include "catalog/pg_am.h"
50 #include "catalog/pg_amproc.h"
51 #include "catalog/pg_attrdef.h"
53 #include "catalog/pg_authid.h"
54 #include "catalog/pg_constraint.h"
55 #include "catalog/pg_database.h"
56 #include "catalog/pg_namespace.h"
57 #include "catalog/pg_opclass.h"
58 #include "catalog/pg_proc.h"
59 #include "catalog/pg_publication.h"
60 #include "catalog/pg_rewrite.h"
61 #include "catalog/pg_shseclabel.h"
64 #include "catalog/pg_tablespace.h"
65 #include "catalog/pg_trigger.h"
66 #include "catalog/pg_type.h"
67 #include "catalog/schemapg.h"
68 #include "catalog/storage.h"
69 #include "commands/policy.h"
71 #include "commands/trigger.h"
72 #include "miscadmin.h"
73 #include "nodes/makefuncs.h"
74 #include "nodes/nodeFuncs.h"
75 #include "optimizer/optimizer.h"
76 #include "pgstat.h"
77 #include "rewrite/rewriteDefine.h"
78 #include "rewrite/rowsecurity.h"
79 #include "storage/lmgr.h"
80 #include "storage/smgr.h"
81 #include "utils/array.h"
82 #include "utils/builtins.h"
83 #include "utils/datum.h"
84 #include "utils/fmgroids.h"
85 #include "utils/inval.h"
86 #include "utils/lsyscache.h"
87 #include "utils/memutils.h"
88 #include "utils/relmapper.h"
89 #include "utils/resowner_private.h"
90 #include "utils/snapmgr.h"
91 #include "utils/syscache.h"
92 
93 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
94 
95 /*
96  * Whether to bother checking if relation cache memory needs to be freed
97  * eagerly. See also RelationBuildDesc() and pg_config_manual.h.
98  */
99 #if defined(RECOVER_RELATION_BUILD_MEMORY) && (RECOVER_RELATION_BUILD_MEMORY != 0)
100 #define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1
101 #else
102 #define RECOVER_RELATION_BUILD_MEMORY 0
103 #ifdef DISCARD_CACHES_ENABLED
104 #define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1
105 #endif
106 #endif
107 
108 /*
109  * hardcoded tuple descriptors, contents generated by genbki.pl
110  */
111 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
112 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
113 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
114 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
115 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
116 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
117 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
118 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
119 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
120 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
121 
122 /*
123  * Hash tables that index the relation cache
124  *
125  * We used to index the cache by both name and OID, but now there
126  * is only an index by OID.
127  */
128 typedef struct relidcacheent
129 {
133 
135 
136 /*
137  * This flag is false until we have prepared the critical relcache entries
138  * that are needed to do indexscans on the tables read by relcache building.
139  */
141 
142 /*
143  * This flag is false until we have prepared the critical relcache entries
144  * for shared catalogs (which are the tables needed for login).
145  */
147 
148 /*
149  * This counter counts relcache inval events received since backend startup
150  * (but only for rels that are actually in cache). Presently, we use it only
151  * to detect whether data about to be written by write_relcache_init_file()
152  * might already be obsolete.
153  */
154 static long relcacheInvalsReceived = 0L;
155 
156 /*
157  * in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE
158  * INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
159  * It critically relies on each backend absorbing those changes no later than
160  * next transaction start. Hence, RelationBuildDesc() loops until it finishes
161  * without accepting a relevant invalidation. (Most invalidation consumers
162  * don't do this.)
163  */
164 typedef struct inprogressent
165 {
166  Oid reloid; /* OID of relation being built */
167  bool invalidated; /* whether an invalidation arrived for it */
169 
173 
174 /*
175  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
176  * cleanup work. This list intentionally has limited size; if it overflows,
177  * we fall back to scanning the whole hashtable. There is no value in a very
178  * large list because (1) at some point, a hash_seq_search scan is faster than
179  * retail lookups, and (2) the value of this is to reduce EOXact work for
180  * short transactions, which can't have dirtied all that many tables anyway.
181  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
182  * cleanup processing must be idempotent.
183  */
184 #define MAX_EOXACT_LIST 32
186 static int eoxact_list_len = 0;
187 static bool eoxact_list_overflowed = false;
188 
189 #define EOXactListAdd(rel) \
190  do { \
191  if (eoxact_list_len < MAX_EOXACT_LIST) \
192  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
193  else \
194  eoxact_list_overflowed = true; \
195  } while (0)
196 
197 /*
198  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
199  * cleanup work. The array expands as needed; there is no hashtable because
200  * we don't need to access individual items except at EOXact.
201  */
203 static int NextEOXactTupleDescNum = 0;
204 static int EOXactTupleDescArrayLen = 0;
205 
206 /*
207  * macros to manipulate the lookup hashtable
208  */
209 #define RelationCacheInsert(RELATION, replace_allowed) \
210 do { \
211  RelIdCacheEnt *hentry; bool found; \
212  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
213  &((RELATION)->rd_id), \
214  HASH_ENTER, &found); \
215  if (found) \
216  { \
217  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
218  Relation _old_rel = hentry->reldesc; \
219  Assert(replace_allowed); \
220  hentry->reldesc = (RELATION); \
221  if (RelationHasReferenceCountZero(_old_rel)) \
222  RelationDestroyRelation(_old_rel, false); \
223  else if (!IsBootstrapProcessingMode()) \
224  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
225  RelationGetRelationName(_old_rel)); \
226  } \
227  else \
228  hentry->reldesc = (RELATION); \
229 } while(0)
230 
231 #define RelationIdCacheLookup(ID, RELATION) \
232 do { \
233  RelIdCacheEnt *hentry; \
234  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
235  &(ID), \
236  HASH_FIND, NULL); \
237  if (hentry) \
238  RELATION = hentry->reldesc; \
239  else \
240  RELATION = NULL; \
241 } while(0)
242 
243 #define RelationCacheDelete(RELATION) \
244 do { \
245  RelIdCacheEnt *hentry; \
246  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
247  &((RELATION)->rd_id), \
248  HASH_REMOVE, NULL); \
249  if (hentry == NULL) \
250  elog(WARNING, "failed to delete relcache entry for OID %u", \
251  (RELATION)->rd_id); \
252 } while(0)
253 
254 
255 /*
256  * Special cache for opclass-related information
257  *
258  * Note: only default support procs get cached, ie, those with
259  * lefttype = righttype = opcintype.
260  */
261 typedef struct opclasscacheent
262 {
263  Oid opclassoid; /* lookup key: OID of opclass */
264  bool valid; /* set true after successful fill-in */
265  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
266  Oid opcfamily; /* OID of opclass's family */
267  Oid opcintype; /* OID of opclass's declared input type */
268  RegProcedure *supportProcs; /* OIDs of support procedures */
270 
271 static HTAB *OpClassCache = NULL;
272 
273 
274 /* non-export function prototypes */
275 
276 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
277 static void RelationClearRelation(Relation relation, bool rebuild);
278 
279 static void RelationReloadIndexInfo(Relation relation);
280 static void RelationReloadNailed(Relation relation);
281 static void RelationFlushRelation(Relation relation);
283 #ifdef USE_ASSERT_CHECKING
284 static void AssertPendingSyncConsistency(Relation relation);
285 #endif
286 static void AtEOXact_cleanup(Relation relation, bool isCommit);
287 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
288  SubTransactionId mySubid, SubTransactionId parentSubid);
289 static bool load_relcache_init_file(bool shared);
290 static void write_relcache_init_file(bool shared);
291 static void write_item(const void *data, Size len, FILE *fp);
292 
293 static void formrdesc(const char *relationName, Oid relationReltype,
294  bool isshared, int natts, const FormData_pg_attribute *attrs);
295 
296 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
298 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
299 static void RelationBuildTupleDesc(Relation relation);
300 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
301 static void RelationInitPhysicalAddr(Relation relation);
302 static void load_critical_index(Oid indexoid, Oid heapoid);
303 static TupleDesc GetPgClassDescriptor(void);
304 static TupleDesc GetPgIndexDescriptor(void);
305 static void AttrDefaultFetch(Relation relation, int ndef);
306 static int AttrDefaultCmp(const void *a, const void *b);
307 static void CheckConstraintFetch(Relation relation);
308 static int CheckConstraintCmp(const void *a, const void *b);
309 static void InitIndexAmRoutine(Relation relation);
310 static void IndexSupportInitialize(oidvector *indclass,
311  RegProcedure *indexSupport,
312  Oid *opFamily,
313  Oid *opcInType,
314  StrategyNumber maxSupportNumber,
315  AttrNumber maxAttributeNumber);
316 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
317  StrategyNumber numSupport);
318 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
319 static void unlink_initfile(const char *initfilename, int elevel);
320 
321 
322 /*
323  * ScanPgRelation
324  *
325  * This is used by RelationBuildDesc to find a pg_class
326  * tuple matching targetRelId. The caller must hold at least
327  * AccessShareLock on the target relid to prevent concurrent-update
328  * scenarios; it isn't guaranteed that all scans used to build the
329  * relcache entry will use the same snapshot. If, for example,
330  * an attribute were to be added after scanning pg_class and before
331  * scanning pg_attribute, relnatts wouldn't match.
332  *
333  * NB: the returned tuple has been copied into palloc'd storage
334  * and must eventually be freed with heap_freetuple.
335  */
336 static HeapTuple
337 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
338 {
339  HeapTuple pg_class_tuple;
340  Relation pg_class_desc;
341  SysScanDesc pg_class_scan;
342  ScanKeyData key[1];
343  Snapshot snapshot = NULL;
344 
345  /*
346  * If something goes wrong during backend startup, we might find ourselves
347  * trying to read pg_class before we've selected a database. That ain't
348  * gonna work, so bail out with a useful error message. If this happens,
349  * it probably means a relcache entry that needs to be nailed isn't.
350  */
351  if (!OidIsValid(MyDatabaseId))
352  elog(FATAL, "cannot read pg_class without having selected a database");
353 
354  /*
355  * form a scan key
356  */
357  ScanKeyInit(&key[0],
358  Anum_pg_class_oid,
359  BTEqualStrategyNumber, F_OIDEQ,
360  ObjectIdGetDatum(targetRelId));
361 
362  /*
363  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
364  * built the critical relcache entries (this includes initdb and startup
365  * without a pg_internal.init file). The caller can also force a heap
366  * scan by setting indexOK == false.
367  */
368  pg_class_desc = table_open(RelationRelationId, AccessShareLock);
369 
370  /*
371  * The caller might need a tuple that's newer than the one the historic
372  * snapshot; currently the only case requiring to do so is looking up the
373  * relfilenumber of non mapped system relations during decoding. That
374  * snapshot can't change in the midst of a relcache build, so there's no
375  * need to register the snapshot.
376  */
377  if (force_non_historic)
378  snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
379 
380  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
381  indexOK && criticalRelcachesBuilt,
382  snapshot,
383  1, key);
384 
385  pg_class_tuple = systable_getnext(pg_class_scan);
386 
387  /*
388  * Must copy tuple before releasing buffer.
389  */
390  if (HeapTupleIsValid(pg_class_tuple))
391  pg_class_tuple = heap_copytuple(pg_class_tuple);
392 
393  /* all done */
394  systable_endscan(pg_class_scan);
395  table_close(pg_class_desc, AccessShareLock);
396 
397  return pg_class_tuple;
398 }
399 
400 /*
401  * AllocateRelationDesc
402  *
403  * This is used to allocate memory for a new relation descriptor
404  * and initialize the rd_rel field from the given pg_class tuple.
405  */
406 static Relation
408 {
409  Relation relation;
410  MemoryContext oldcxt;
411  Form_pg_class relationForm;
412 
413  /* Relcache entries must live in CacheMemoryContext */
415 
416  /*
417  * allocate and zero space for new relation descriptor
418  */
419  relation = (Relation) palloc0(sizeof(RelationData));
420 
421  /* make sure relation is marked as having no open file yet */
422  relation->rd_smgr = NULL;
423 
424  /*
425  * Copy the relation tuple form
426  *
427  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
428  * variable-length fields (relacl, reloptions) are NOT stored in the
429  * relcache --- there'd be little point in it, since we don't copy the
430  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
431  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
432  * it from the syscache if you need it. The same goes for the original
433  * form of reloptions (however, we do store the parsed form of reloptions
434  * in rd_options).
435  */
436  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
437 
438  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
439 
440  /* initialize relation tuple form */
441  relation->rd_rel = relationForm;
442 
443  /* and allocate attribute tuple form storage */
444  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
445  /* which we mark as a reference-counted tupdesc */
446  relation->rd_att->tdrefcount = 1;
447 
448  MemoryContextSwitchTo(oldcxt);
449 
450  return relation;
451 }
452 
453 /*
454  * RelationParseRelOptions
455  * Convert pg_class.reloptions into pre-parsed rd_options
456  *
457  * tuple is the real pg_class tuple (not rd_rel!) for relation
458  *
459  * Note: rd_rel and (if an index) rd_indam must be valid already
460  */
461 static void
463 {
464  bytea *options;
465  amoptions_function amoptsfn;
466 
467  relation->rd_options = NULL;
468 
469  /*
470  * Look up any AM-specific parse function; fall out if relkind should not
471  * have options.
472  */
473  switch (relation->rd_rel->relkind)
474  {
475  case RELKIND_RELATION:
476  case RELKIND_TOASTVALUE:
477  case RELKIND_VIEW:
478  case RELKIND_MATVIEW:
479  case RELKIND_PARTITIONED_TABLE:
480  amoptsfn = NULL;
481  break;
482  case RELKIND_INDEX:
483  case RELKIND_PARTITIONED_INDEX:
484  amoptsfn = relation->rd_indam->amoptions;
485  break;
486  default:
487  return;
488  }
489 
490  /*
491  * Fetch reloptions from tuple; have to use a hardwired descriptor because
492  * we might not have any other for pg_class yet (consider executing this
493  * code for pg_class itself)
494  */
495  options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
496 
497  /*
498  * Copy parsed data into CacheMemoryContext. To guard against the
499  * possibility of leaks in the reloptions code, we want to do the actual
500  * parsing in the caller's memory context and copy the results into
501  * CacheMemoryContext after the fact.
502  */
503  if (options)
504  {
506  VARSIZE(options));
507  memcpy(relation->rd_options, options, VARSIZE(options));
508  pfree(options);
509  }
510 }
511 
512 /*
513  * RelationBuildTupleDesc
514  *
515  * Form the relation's tuple descriptor from information in
516  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
517  */
518 static void
520 {
521  HeapTuple pg_attribute_tuple;
522  Relation pg_attribute_desc;
523  SysScanDesc pg_attribute_scan;
524  ScanKeyData skey[2];
525  int need;
526  TupleConstr *constr;
527  AttrMissing *attrmiss = NULL;
528  int ndef = 0;
529 
530  /* fill rd_att's type ID fields (compare heap.c's AddNewRelationTuple) */
531  relation->rd_att->tdtypeid =
532  relation->rd_rel->reltype ? relation->rd_rel->reltype : RECORDOID;
533  relation->rd_att->tdtypmod = -1; /* just to be sure */
534 
536  sizeof(TupleConstr));
537  constr->has_not_null = false;
538  constr->has_generated_stored = false;
539 
540  /*
541  * Form a scan key that selects only user attributes (attnum > 0).
542  * (Eliminating system attribute rows at the index level is lots faster
543  * than fetching them.)
544  */
545  ScanKeyInit(&skey[0],
546  Anum_pg_attribute_attrelid,
547  BTEqualStrategyNumber, F_OIDEQ,
549  ScanKeyInit(&skey[1],
550  Anum_pg_attribute_attnum,
551  BTGreaterStrategyNumber, F_INT2GT,
552  Int16GetDatum(0));
553 
554  /*
555  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
556  * built the critical relcache entries (this includes initdb and startup
557  * without a pg_internal.init file).
558  */
559  pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
560  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
561  AttributeRelidNumIndexId,
563  NULL,
564  2, skey);
565 
566  /*
567  * add attribute data to relation->rd_att
568  */
569  need = RelationGetNumberOfAttributes(relation);
570 
571  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
572  {
573  Form_pg_attribute attp;
574  int attnum;
575 
576  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
577 
578  attnum = attp->attnum;
579  if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation))
580  elog(ERROR, "invalid attribute number %d for relation \"%s\"",
581  attp->attnum, RelationGetRelationName(relation));
582 
583  memcpy(TupleDescAttr(relation->rd_att, attnum - 1),
584  attp,
586 
587  /* Update constraint/default info */
588  if (attp->attnotnull)
589  constr->has_not_null = true;
590  if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
591  constr->has_generated_stored = true;
592  if (attp->atthasdef)
593  ndef++;
594 
595  /* If the column has a "missing" value, put it in the attrmiss array */
596  if (attp->atthasmissing)
597  {
598  Datum missingval;
599  bool missingNull;
600 
601  /* Do we have a missing value? */
602  missingval = heap_getattr(pg_attribute_tuple,
603  Anum_pg_attribute_attmissingval,
604  pg_attribute_desc->rd_att,
605  &missingNull);
606  if (!missingNull)
607  {
608  /* Yes, fetch from the array */
609  MemoryContext oldcxt;
610  bool is_null;
611  int one = 1;
612  Datum missval;
613 
614  if (attrmiss == NULL)
615  attrmiss = (AttrMissing *)
617  relation->rd_rel->relnatts *
618  sizeof(AttrMissing));
619 
620  missval = array_get_element(missingval,
621  1,
622  &one,
623  -1,
624  attp->attlen,
625  attp->attbyval,
626  attp->attalign,
627  &is_null);
628  Assert(!is_null);
629  if (attp->attbyval)
630  {
631  /* for copy by val just copy the datum direct */
632  attrmiss[attnum - 1].am_value = missval;
633  }
634  else
635  {
636  /* otherwise copy in the correct context */
638  attrmiss[attnum - 1].am_value = datumCopy(missval,
639  attp->attbyval,
640  attp->attlen);
641  MemoryContextSwitchTo(oldcxt);
642  }
643  attrmiss[attnum - 1].am_present = true;
644  }
645  }
646  need--;
647  if (need == 0)
648  break;
649  }
650 
651  /*
652  * end the scan and close the attribute relation
653  */
654  systable_endscan(pg_attribute_scan);
655  table_close(pg_attribute_desc, AccessShareLock);
656 
657  if (need != 0)
658  elog(ERROR, "pg_attribute catalog is missing %d attribute(s) for relation OID %u",
659  need, RelationGetRelid(relation));
660 
661  /*
662  * The attcacheoff values we read from pg_attribute should all be -1
663  * ("unknown"). Verify this if assert checking is on. They will be
664  * computed when and if needed during tuple access.
665  */
666 #ifdef USE_ASSERT_CHECKING
667  {
668  int i;
669 
670  for (i = 0; i < RelationGetNumberOfAttributes(relation); i++)
671  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
672  }
673 #endif
674 
675  /*
676  * However, we can easily set the attcacheoff value for the first
677  * attribute: it must be zero. This eliminates the need for special cases
678  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
679  */
680  if (RelationGetNumberOfAttributes(relation) > 0)
681  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
682 
683  /*
684  * Set up constraint/default info
685  */
686  if (constr->has_not_null ||
687  constr->has_generated_stored ||
688  ndef > 0 ||
689  attrmiss ||
690  relation->rd_rel->relchecks > 0)
691  {
692  relation->rd_att->constr = constr;
693 
694  if (ndef > 0) /* DEFAULTs */
695  AttrDefaultFetch(relation, ndef);
696  else
697  constr->num_defval = 0;
698 
699  constr->missing = attrmiss;
700 
701  if (relation->rd_rel->relchecks > 0) /* CHECKs */
702  CheckConstraintFetch(relation);
703  else
704  constr->num_check = 0;
705  }
706  else
707  {
708  pfree(constr);
709  relation->rd_att->constr = NULL;
710  }
711 }
712 
713 /*
714  * RelationBuildRuleLock
715  *
716  * Form the relation's rewrite rules from information in
717  * the pg_rewrite system catalog.
718  *
719  * Note: The rule parsetrees are potentially very complex node structures.
720  * To allow these trees to be freed when the relcache entry is flushed,
721  * we make a private memory context to hold the RuleLock information for
722  * each relcache entry that has associated rules. The context is used
723  * just for rule info, not for any other subsidiary data of the relcache
724  * entry, because that keeps the update logic in RelationClearRelation()
725  * manageable. The other subsidiary data structures are simple enough
726  * to be easy to free explicitly, anyway.
727  *
728  * Note: The relation's reloptions must have been extracted first.
729  */
730 static void
732 {
733  MemoryContext rulescxt;
734  MemoryContext oldcxt;
735  HeapTuple rewrite_tuple;
736  Relation rewrite_desc;
737  TupleDesc rewrite_tupdesc;
738  SysScanDesc rewrite_scan;
740  RuleLock *rulelock;
741  int numlocks;
742  RewriteRule **rules;
743  int maxlocks;
744 
745  /*
746  * Make the private context. Assume it'll not contain much data.
747  */
749  "relation rules",
751  relation->rd_rulescxt = rulescxt;
753  RelationGetRelationName(relation));
754 
755  /*
756  * allocate an array to hold the rewrite rules (the array is extended if
757  * necessary)
758  */
759  maxlocks = 4;
760  rules = (RewriteRule **)
761  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
762  numlocks = 0;
763 
764  /*
765  * form a scan key
766  */
767  ScanKeyInit(&key,
768  Anum_pg_rewrite_ev_class,
769  BTEqualStrategyNumber, F_OIDEQ,
771 
772  /*
773  * open pg_rewrite and begin a scan
774  *
775  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
776  * be reading the rules in name order, except possibly during
777  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
778  * ensures that rules will be fired in name order.
779  */
780  rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
781  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
782  rewrite_scan = systable_beginscan(rewrite_desc,
783  RewriteRelRulenameIndexId,
784  true, NULL,
785  1, &key);
786 
787  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
788  {
789  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
790  bool isnull;
791  Datum rule_datum;
792  char *rule_str;
793  RewriteRule *rule;
794  Oid check_as_user;
795 
796  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
797  sizeof(RewriteRule));
798 
799  rule->ruleId = rewrite_form->oid;
800 
801  rule->event = rewrite_form->ev_type - '0';
802  rule->enabled = rewrite_form->ev_enabled;
803  rule->isInstead = rewrite_form->is_instead;
804 
805  /*
806  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
807  * rule strings are often large enough to be toasted. To avoid
808  * leaking memory in the caller's context, do the detoasting here so
809  * we can free the detoasted version.
810  */
811  rule_datum = heap_getattr(rewrite_tuple,
812  Anum_pg_rewrite_ev_action,
813  rewrite_tupdesc,
814  &isnull);
815  Assert(!isnull);
816  rule_str = TextDatumGetCString(rule_datum);
817  oldcxt = MemoryContextSwitchTo(rulescxt);
818  rule->actions = (List *) stringToNode(rule_str);
819  MemoryContextSwitchTo(oldcxt);
820  pfree(rule_str);
821 
822  rule_datum = heap_getattr(rewrite_tuple,
823  Anum_pg_rewrite_ev_qual,
824  rewrite_tupdesc,
825  &isnull);
826  Assert(!isnull);
827  rule_str = TextDatumGetCString(rule_datum);
828  oldcxt = MemoryContextSwitchTo(rulescxt);
829  rule->qual = (Node *) stringToNode(rule_str);
830  MemoryContextSwitchTo(oldcxt);
831  pfree(rule_str);
832 
833  /*
834  * If this is a SELECT rule defining a view, and the view has
835  * "security_invoker" set, we must perform all permissions checks on
836  * relations referred to by the rule as the invoking user.
837  *
838  * In all other cases (including non-SELECT rules on security invoker
839  * views), perform the permissions checks as the relation owner.
840  */
841  if (rule->event == CMD_SELECT &&
842  relation->rd_rel->relkind == RELKIND_VIEW &&
843  RelationHasSecurityInvoker(relation))
844  check_as_user = InvalidOid;
845  else
846  check_as_user = relation->rd_rel->relowner;
847 
848  /*
849  * Scan through the rule's actions and set the checkAsUser field on
850  * all RTEPermissionInfos. We have to look at the qual as well, in
851  * case it contains sublinks.
852  *
853  * The reason for doing this when the rule is loaded, rather than when
854  * it is stored, is that otherwise ALTER TABLE OWNER would have to
855  * grovel through stored rules to update checkAsUser fields. Scanning
856  * the rule tree during load is relatively cheap (compared to
857  * constructing it in the first place), so we do it here.
858  */
859  setRuleCheckAsUser((Node *) rule->actions, check_as_user);
860  setRuleCheckAsUser(rule->qual, check_as_user);
861 
862  if (numlocks >= maxlocks)
863  {
864  maxlocks *= 2;
865  rules = (RewriteRule **)
866  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
867  }
868  rules[numlocks++] = rule;
869  }
870 
871  /*
872  * end the scan and close the attribute relation
873  */
874  systable_endscan(rewrite_scan);
875  table_close(rewrite_desc, AccessShareLock);
876 
877  /*
878  * there might not be any rules (if relhasrules is out-of-date)
879  */
880  if (numlocks == 0)
881  {
882  relation->rd_rules = NULL;
883  relation->rd_rulescxt = NULL;
884  MemoryContextDelete(rulescxt);
885  return;
886  }
887 
888  /*
889  * form a RuleLock and insert into relation
890  */
891  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
892  rulelock->numLocks = numlocks;
893  rulelock->rules = rules;
894 
895  relation->rd_rules = rulelock;
896 }
897 
898 /*
899  * equalRuleLocks
900  *
901  * Determine whether two RuleLocks are equivalent
902  *
903  * Probably this should be in the rules code someplace...
904  */
905 static bool
907 {
908  int i;
909 
910  /*
911  * As of 7.3 we assume the rule ordering is repeatable, because
912  * RelationBuildRuleLock should read 'em in a consistent order. So just
913  * compare corresponding slots.
914  */
915  if (rlock1 != NULL)
916  {
917  if (rlock2 == NULL)
918  return false;
919  if (rlock1->numLocks != rlock2->numLocks)
920  return false;
921  for (i = 0; i < rlock1->numLocks; i++)
922  {
923  RewriteRule *rule1 = rlock1->rules[i];
924  RewriteRule *rule2 = rlock2->rules[i];
925 
926  if (rule1->ruleId != rule2->ruleId)
927  return false;
928  if (rule1->event != rule2->event)
929  return false;
930  if (rule1->enabled != rule2->enabled)
931  return false;
932  if (rule1->isInstead != rule2->isInstead)
933  return false;
934  if (!equal(rule1->qual, rule2->qual))
935  return false;
936  if (!equal(rule1->actions, rule2->actions))
937  return false;
938  }
939  }
940  else if (rlock2 != NULL)
941  return false;
942  return true;
943 }
944 
945 /*
946  * equalPolicy
947  *
948  * Determine whether two policies are equivalent
949  */
950 static bool
952 {
953  int i;
954  Oid *r1,
955  *r2;
956 
957  if (policy1 != NULL)
958  {
959  if (policy2 == NULL)
960  return false;
961 
962  if (policy1->polcmd != policy2->polcmd)
963  return false;
964  if (policy1->hassublinks != policy2->hassublinks)
965  return false;
966  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
967  return false;
968  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
969  return false;
970 
971  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
972  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
973 
974  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
975  {
976  if (r1[i] != r2[i])
977  return false;
978  }
979 
980  if (!equal(policy1->qual, policy2->qual))
981  return false;
982  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
983  return false;
984  }
985  else if (policy2 != NULL)
986  return false;
987 
988  return true;
989 }
990 
991 /*
992  * equalRSDesc
993  *
994  * Determine whether two RowSecurityDesc's are equivalent
995  */
996 static bool
998 {
999  ListCell *lc,
1000  *rc;
1001 
1002  if (rsdesc1 == NULL && rsdesc2 == NULL)
1003  return true;
1004 
1005  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
1006  (rsdesc1 == NULL && rsdesc2 != NULL))
1007  return false;
1008 
1009  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1010  return false;
1011 
1012  /* RelationBuildRowSecurity should build policies in order */
1013  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1014  {
1017 
1018  if (!equalPolicy(l, r))
1019  return false;
1020  }
1021 
1022  return true;
1023 }
1024 
1025 /*
1026  * RelationBuildDesc
1027  *
1028  * Build a relation descriptor. The caller must hold at least
1029  * AccessShareLock on the target relid.
1030  *
1031  * The new descriptor is inserted into the hash table if insertIt is true.
1032  *
1033  * Returns NULL if no pg_class row could be found for the given relid
1034  * (suggesting we are trying to access a just-deleted relation).
1035  * Any other error is reported via elog.
1036  */
1037 static Relation
1038 RelationBuildDesc(Oid targetRelId, bool insertIt)
1039 {
1040  int in_progress_offset;
1041  Relation relation;
1042  Oid relid;
1043  HeapTuple pg_class_tuple;
1044  Form_pg_class relp;
1045 
1046  /*
1047  * This function and its subroutines can allocate a good deal of transient
1048  * data in CurrentMemoryContext. Traditionally we've just leaked that
1049  * data, reasoning that the caller's context is at worst of transaction
1050  * scope, and relcache loads shouldn't happen so often that it's essential
1051  * to recover transient data before end of statement/transaction. However
1052  * that's definitely not true when debug_discard_caches is active, and
1053  * perhaps it's not true in other cases.
1054  *
1055  * When debug_discard_caches is active or when forced to by
1056  * RECOVER_RELATION_BUILD_MEMORY=1, arrange to allocate the junk in a
1057  * temporary context that we'll free before returning. Make it a child of
1058  * caller's context so that it will get cleaned up appropriately if we
1059  * error out partway through.
1060  */
1061 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1062  MemoryContext tmpcxt = NULL;
1063  MemoryContext oldcxt = NULL;
1064 
1066  {
1068  "RelationBuildDesc workspace",
1070  oldcxt = MemoryContextSwitchTo(tmpcxt);
1071  }
1072 #endif
1073 
1074  /* Register to catch invalidation messages */
1076  {
1077  int allocsize;
1078 
1079  allocsize = in_progress_list_maxlen * 2;
1081  allocsize * sizeof(*in_progress_list));
1082  in_progress_list_maxlen = allocsize;
1083  }
1084  in_progress_offset = in_progress_list_len++;
1085  in_progress_list[in_progress_offset].reloid = targetRelId;
1086 retry:
1087  in_progress_list[in_progress_offset].invalidated = false;
1088 
1089  /*
1090  * find the tuple in pg_class corresponding to the given relation id
1091  */
1092  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1093 
1094  /*
1095  * if no such tuple exists, return NULL
1096  */
1097  if (!HeapTupleIsValid(pg_class_tuple))
1098  {
1099 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1100  if (tmpcxt)
1101  {
1102  /* Return to caller's context, and blow away the temporary context */
1103  MemoryContextSwitchTo(oldcxt);
1104  MemoryContextDelete(tmpcxt);
1105  }
1106 #endif
1107  Assert(in_progress_offset + 1 == in_progress_list_len);
1109  return NULL;
1110  }
1111 
1112  /*
1113  * get information from the pg_class_tuple
1114  */
1115  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1116  relid = relp->oid;
1117  Assert(relid == targetRelId);
1118 
1119  /*
1120  * allocate storage for the relation descriptor, and copy pg_class_tuple
1121  * to relation->rd_rel.
1122  */
1123  relation = AllocateRelationDesc(relp);
1124 
1125  /*
1126  * initialize the relation's relation id (relation->rd_id)
1127  */
1128  RelationGetRelid(relation) = relid;
1129 
1130  /*
1131  * Normal relations are not nailed into the cache. Since we don't flush
1132  * new relations, it won't be new. It could be temp though.
1133  */
1134  relation->rd_refcnt = 0;
1135  relation->rd_isnailed = false;
1140  switch (relation->rd_rel->relpersistence)
1141  {
1142  case RELPERSISTENCE_UNLOGGED:
1143  case RELPERSISTENCE_PERMANENT:
1144  relation->rd_backend = InvalidBackendId;
1145  relation->rd_islocaltemp = false;
1146  break;
1147  case RELPERSISTENCE_TEMP:
1148  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1149  {
1150  relation->rd_backend = BackendIdForTempRelations();
1151  relation->rd_islocaltemp = true;
1152  }
1153  else
1154  {
1155  /*
1156  * If it's a temp table, but not one of ours, we have to use
1157  * the slow, grotty method to figure out the owning backend.
1158  *
1159  * Note: it's possible that rd_backend gets set to MyBackendId
1160  * here, in case we are looking at a pg_class entry left over
1161  * from a crashed backend that coincidentally had the same
1162  * BackendId we're using. We should *not* consider such a
1163  * table to be "ours"; this is why we need the separate
1164  * rd_islocaltemp flag. The pg_class entry will get flushed
1165  * if/when we clean out the corresponding temp table namespace
1166  * in preparation for using it.
1167  */
1168  relation->rd_backend =
1169  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1170  Assert(relation->rd_backend != InvalidBackendId);
1171  relation->rd_islocaltemp = false;
1172  }
1173  break;
1174  default:
1175  elog(ERROR, "invalid relpersistence: %c",
1176  relation->rd_rel->relpersistence);
1177  break;
1178  }
1179 
1180  /*
1181  * initialize the tuple descriptor (relation->rd_att).
1182  */
1183  RelationBuildTupleDesc(relation);
1184 
1185  /* foreign key data is not loaded till asked for */
1186  relation->rd_fkeylist = NIL;
1187  relation->rd_fkeyvalid = false;
1188 
1189  /* partitioning data is not loaded till asked for */
1190  relation->rd_partkey = NULL;
1191  relation->rd_partkeycxt = NULL;
1192  relation->rd_partdesc = NULL;
1193  relation->rd_partdesc_nodetached = NULL;
1195  relation->rd_pdcxt = NULL;
1196  relation->rd_pddcxt = NULL;
1197  relation->rd_partcheck = NIL;
1198  relation->rd_partcheckvalid = false;
1199  relation->rd_partcheckcxt = NULL;
1200 
1201  /*
1202  * initialize access method information
1203  */
1204  if (relation->rd_rel->relkind == RELKIND_INDEX ||
1205  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
1206  RelationInitIndexAccessInfo(relation);
1207  else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) ||
1208  relation->rd_rel->relkind == RELKIND_SEQUENCE)
1210  else
1211  Assert(relation->rd_rel->relam == InvalidOid);
1212 
1213  /* extract reloptions if any */
1214  RelationParseRelOptions(relation, pg_class_tuple);
1215 
1216  /*
1217  * Fetch rules and triggers that affect this relation.
1218  *
1219  * Note that RelationBuildRuleLock() relies on this being done after
1220  * extracting the relation's reloptions.
1221  */
1222  if (relation->rd_rel->relhasrules)
1223  RelationBuildRuleLock(relation);
1224  else
1225  {
1226  relation->rd_rules = NULL;
1227  relation->rd_rulescxt = NULL;
1228  }
1229 
1230  if (relation->rd_rel->relhastriggers)
1231  RelationBuildTriggers(relation);
1232  else
1233  relation->trigdesc = NULL;
1234 
1235  if (relation->rd_rel->relrowsecurity)
1236  RelationBuildRowSecurity(relation);
1237  else
1238  relation->rd_rsdesc = NULL;
1239 
1240  /*
1241  * initialize the relation lock manager information
1242  */
1243  RelationInitLockInfo(relation); /* see lmgr.c */
1244 
1245  /*
1246  * initialize physical addressing information for the relation
1247  */
1248  RelationInitPhysicalAddr(relation);
1249 
1250  /* make sure relation is marked as having no open file yet */
1251  relation->rd_smgr = NULL;
1252 
1253  /*
1254  * now we can free the memory allocated for pg_class_tuple
1255  */
1256  heap_freetuple(pg_class_tuple);
1257 
1258  /*
1259  * If an invalidation arrived mid-build, start over. Between here and the
1260  * end of this function, don't add code that does or reasonably could read
1261  * system catalogs. That range must be free from invalidation processing
1262  * for the !insertIt case. For the insertIt case, RelationCacheInsert()
1263  * will enroll this relation in ordinary relcache invalidation processing,
1264  */
1265  if (in_progress_list[in_progress_offset].invalidated)
1266  {
1267  RelationDestroyRelation(relation, false);
1268  goto retry;
1269  }
1270  Assert(in_progress_offset + 1 == in_progress_list_len);
1272 
1273  /*
1274  * Insert newly created relation into relcache hash table, if requested.
1275  *
1276  * There is one scenario in which we might find a hashtable entry already
1277  * present, even though our caller failed to find it: if the relation is a
1278  * system catalog or index that's used during relcache load, we might have
1279  * recursively created the same relcache entry during the preceding steps.
1280  * So allow RelationCacheInsert to delete any already-present relcache
1281  * entry for the same OID. The already-present entry should have refcount
1282  * zero (else somebody forgot to close it); in the event that it doesn't,
1283  * we'll elog a WARNING and leak the already-present entry.
1284  */
1285  if (insertIt)
1286  RelationCacheInsert(relation, true);
1287 
1288  /* It's fully valid */
1289  relation->rd_isvalid = true;
1290 
1291 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1292  if (tmpcxt)
1293  {
1294  /* Return to caller's context, and blow away the temporary context */
1295  MemoryContextSwitchTo(oldcxt);
1296  MemoryContextDelete(tmpcxt);
1297  }
1298 #endif
1299 
1300  return relation;
1301 }
1302 
1303 /*
1304  * Initialize the physical addressing info (RelFileLocator) for a relcache entry
1305  *
1306  * Note: at the physical level, relations in the pg_global tablespace must
1307  * be treated as shared, even if relisshared isn't set. Hence we do not
1308  * look at relisshared here.
1309  */
1310 static void
1312 {
1313  RelFileNumber oldnumber = relation->rd_locator.relNumber;
1314 
1315  /* these relations kinds never have storage */
1316  if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1317  return;
1318 
1319  if (relation->rd_rel->reltablespace)
1320  relation->rd_locator.spcOid = relation->rd_rel->reltablespace;
1321  else
1323  if (relation->rd_locator.spcOid == GLOBALTABLESPACE_OID)
1324  relation->rd_locator.dbOid = InvalidOid;
1325  else
1326  relation->rd_locator.dbOid = MyDatabaseId;
1327 
1328  if (relation->rd_rel->relfilenode)
1329  {
1330  /*
1331  * Even if we are using a decoding snapshot that doesn't represent the
1332  * current state of the catalog we need to make sure the filenode
1333  * points to the current file since the older file will be gone (or
1334  * truncated). The new file will still contain older rows so lookups
1335  * in them will work correctly. This wouldn't work correctly if
1336  * rewrites were allowed to change the schema in an incompatible way,
1337  * but those are prevented both on catalog tables and on user tables
1338  * declared as additional catalog tables.
1339  */
1342  && IsTransactionState())
1343  {
1344  HeapTuple phys_tuple;
1345  Form_pg_class physrel;
1346 
1347  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1348  RelationGetRelid(relation) != ClassOidIndexId,
1349  true);
1350  if (!HeapTupleIsValid(phys_tuple))
1351  elog(ERROR, "could not find pg_class entry for %u",
1352  RelationGetRelid(relation));
1353  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1354 
1355  relation->rd_rel->reltablespace = physrel->reltablespace;
1356  relation->rd_rel->relfilenode = physrel->relfilenode;
1357  heap_freetuple(phys_tuple);
1358  }
1359 
1360  relation->rd_locator.relNumber = relation->rd_rel->relfilenode;
1361  }
1362  else
1363  {
1364  /* Consult the relation mapper */
1365  relation->rd_locator.relNumber =
1367  relation->rd_rel->relisshared);
1368  if (!RelFileNumberIsValid(relation->rd_locator.relNumber))
1369  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1370  RelationGetRelationName(relation), relation->rd_id);
1371  }
1372 
1373  /*
1374  * For RelationNeedsWAL() to answer correctly on parallel workers, restore
1375  * rd_firstRelfilelocatorSubid. No subtransactions start or end while in
1376  * parallel mode, so the specific SubTransactionId does not matter.
1377  */
1378  if (IsParallelWorker() && oldnumber != relation->rd_locator.relNumber)
1379  {
1380  if (RelFileLocatorSkippingWAL(relation->rd_locator))
1382  else
1384  }
1385 }
1386 
1387 /*
1388  * Fill in the IndexAmRoutine for an index relation.
1389  *
1390  * relation's rd_amhandler and rd_indexcxt must be valid already.
1391  */
1392 static void
1394 {
1395  IndexAmRoutine *cached,
1396  *tmp;
1397 
1398  /*
1399  * Call the amhandler in current, short-lived memory context, just in case
1400  * it leaks anything (it probably won't, but let's be paranoid).
1401  */
1402  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1403 
1404  /* OK, now transfer the data into relation's rd_indexcxt. */
1405  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1406  sizeof(IndexAmRoutine));
1407  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1408  relation->rd_indam = cached;
1409 
1410  pfree(tmp);
1411 }
1412 
1413 /*
1414  * Initialize index-access-method support data for an index relation
1415  */
1416 void
1418 {
1419  HeapTuple tuple;
1420  Form_pg_am aform;
1421  Datum indcollDatum;
1422  Datum indclassDatum;
1423  Datum indoptionDatum;
1424  bool isnull;
1425  oidvector *indcoll;
1426  oidvector *indclass;
1427  int2vector *indoption;
1428  MemoryContext indexcxt;
1429  MemoryContext oldcontext;
1430  int indnatts;
1431  int indnkeyatts;
1432  uint16 amsupport;
1433 
1434  /*
1435  * Make a copy of the pg_index entry for the index. Since pg_index
1436  * contains variable-length and possibly-null fields, we have to do this
1437  * honestly rather than just treating it as a Form_pg_index struct.
1438  */
1439  tuple = SearchSysCache1(INDEXRELID,
1440  ObjectIdGetDatum(RelationGetRelid(relation)));
1441  if (!HeapTupleIsValid(tuple))
1442  elog(ERROR, "cache lookup failed for index %u",
1443  RelationGetRelid(relation));
1445  relation->rd_indextuple = heap_copytuple(tuple);
1446  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1447  MemoryContextSwitchTo(oldcontext);
1448  ReleaseSysCache(tuple);
1449 
1450  /*
1451  * Look up the index's access method, save the OID of its handler function
1452  */
1453  Assert(relation->rd_rel->relam != InvalidOid);
1454  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1455  if (!HeapTupleIsValid(tuple))
1456  elog(ERROR, "cache lookup failed for access method %u",
1457  relation->rd_rel->relam);
1458  aform = (Form_pg_am) GETSTRUCT(tuple);
1459  relation->rd_amhandler = aform->amhandler;
1460  ReleaseSysCache(tuple);
1461 
1462  indnatts = RelationGetNumberOfAttributes(relation);
1463  if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1464  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1465  RelationGetRelid(relation));
1466  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1467 
1468  /*
1469  * Make the private context to hold index access info. The reason we need
1470  * a context, and not just a couple of pallocs, is so that we won't leak
1471  * any subsidiary info attached to fmgr lookup records.
1472  */
1474  "index info",
1476  relation->rd_indexcxt = indexcxt;
1478  RelationGetRelationName(relation));
1479 
1480  /*
1481  * Now we can fetch the index AM's API struct
1482  */
1483  InitIndexAmRoutine(relation);
1484 
1485  /*
1486  * Allocate arrays to hold data. Opclasses are not used for included
1487  * columns, so allocate them for indnkeyatts only.
1488  */
1489  relation->rd_opfamily = (Oid *)
1490  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1491  relation->rd_opcintype = (Oid *)
1492  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1493 
1494  amsupport = relation->rd_indam->amsupport;
1495  if (amsupport > 0)
1496  {
1497  int nsupport = indnatts * amsupport;
1498 
1499  relation->rd_support = (RegProcedure *)
1500  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1501  relation->rd_supportinfo = (FmgrInfo *)
1502  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1503  }
1504  else
1505  {
1506  relation->rd_support = NULL;
1507  relation->rd_supportinfo = NULL;
1508  }
1509 
1510  relation->rd_indcollation = (Oid *)
1511  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1512 
1513  relation->rd_indoption = (int16 *)
1514  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1515 
1516  /*
1517  * indcollation cannot be referenced directly through the C struct,
1518  * because it comes after the variable-width indkey field. Must extract
1519  * the datum the hard way...
1520  */
1521  indcollDatum = fastgetattr(relation->rd_indextuple,
1522  Anum_pg_index_indcollation,
1524  &isnull);
1525  Assert(!isnull);
1526  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1527  memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1528 
1529  /*
1530  * indclass cannot be referenced directly through the C struct, because it
1531  * comes after the variable-width indkey field. Must extract the datum
1532  * the hard way...
1533  */
1534  indclassDatum = fastgetattr(relation->rd_indextuple,
1535  Anum_pg_index_indclass,
1537  &isnull);
1538  Assert(!isnull);
1539  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1540 
1541  /*
1542  * Fill the support procedure OID array, as well as the info about
1543  * opfamilies and opclass input types. (aminfo and supportinfo are left
1544  * as zeroes, and are filled on-the-fly when used)
1545  */
1546  IndexSupportInitialize(indclass, relation->rd_support,
1547  relation->rd_opfamily, relation->rd_opcintype,
1548  amsupport, indnkeyatts);
1549 
1550  /*
1551  * Similarly extract indoption and copy it to the cache entry
1552  */
1553  indoptionDatum = fastgetattr(relation->rd_indextuple,
1554  Anum_pg_index_indoption,
1556  &isnull);
1557  Assert(!isnull);
1558  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1559  memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1560 
1561  (void) RelationGetIndexAttOptions(relation, false);
1562 
1563  /*
1564  * expressions, predicate, exclusion caches will be filled later
1565  */
1566  relation->rd_indexprs = NIL;
1567  relation->rd_indpred = NIL;
1568  relation->rd_exclops = NULL;
1569  relation->rd_exclprocs = NULL;
1570  relation->rd_exclstrats = NULL;
1571  relation->rd_amcache = NULL;
1572 }
1573 
1574 /*
1575  * IndexSupportInitialize
1576  * Initializes an index's cached opclass information,
1577  * given the index's pg_index.indclass entry.
1578  *
1579  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1580  * which are arrays allocated by the caller.
1581  *
1582  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1583  * indicate the size of the arrays it has allocated --- but in practice these
1584  * numbers must always match those obtainable from the system catalog entries
1585  * for the index and access method.
1586  */
1587 static void
1589  RegProcedure *indexSupport,
1590  Oid *opFamily,
1591  Oid *opcInType,
1592  StrategyNumber maxSupportNumber,
1593  AttrNumber maxAttributeNumber)
1594 {
1595  int attIndex;
1596 
1597  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1598  {
1599  OpClassCacheEnt *opcentry;
1600 
1601  if (!OidIsValid(indclass->values[attIndex]))
1602  elog(ERROR, "bogus pg_index tuple");
1603 
1604  /* look up the info for this opclass, using a cache */
1605  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1606  maxSupportNumber);
1607 
1608  /* copy cached data into relcache entry */
1609  opFamily[attIndex] = opcentry->opcfamily;
1610  opcInType[attIndex] = opcentry->opcintype;
1611  if (maxSupportNumber > 0)
1612  memcpy(&indexSupport[attIndex * maxSupportNumber],
1613  opcentry->supportProcs,
1614  maxSupportNumber * sizeof(RegProcedure));
1615  }
1616 }
1617 
1618 /*
1619  * LookupOpclassInfo
1620  *
1621  * This routine maintains a per-opclass cache of the information needed
1622  * by IndexSupportInitialize(). This is more efficient than relying on
1623  * the catalog cache, because we can load all the info about a particular
1624  * opclass in a single indexscan of pg_amproc.
1625  *
1626  * The information from pg_am about expected range of support function
1627  * numbers is passed in, rather than being looked up, mainly because the
1628  * caller will have it already.
1629  *
1630  * Note there is no provision for flushing the cache. This is OK at the
1631  * moment because there is no way to ALTER any interesting properties of an
1632  * existing opclass --- all you can do is drop it, which will result in
1633  * a useless but harmless dead entry in the cache. To support altering
1634  * opclass membership (not the same as opfamily membership!), we'd need to
1635  * be able to flush this cache as well as the contents of relcache entries
1636  * for indexes.
1637  */
1638 static OpClassCacheEnt *
1639 LookupOpclassInfo(Oid operatorClassOid,
1640  StrategyNumber numSupport)
1641 {
1642  OpClassCacheEnt *opcentry;
1643  bool found;
1644  Relation rel;
1645  SysScanDesc scan;
1646  ScanKeyData skey[3];
1647  HeapTuple htup;
1648  bool indexOK;
1649 
1650  if (OpClassCache == NULL)
1651  {
1652  /* First time through: initialize the opclass cache */
1653  HASHCTL ctl;
1654 
1655  /* Also make sure CacheMemoryContext exists */
1656  if (!CacheMemoryContext)
1658 
1659  ctl.keysize = sizeof(Oid);
1660  ctl.entrysize = sizeof(OpClassCacheEnt);
1661  OpClassCache = hash_create("Operator class cache", 64,
1662  &ctl, HASH_ELEM | HASH_BLOBS);
1663  }
1664 
1665  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1666  &operatorClassOid,
1667  HASH_ENTER, &found);
1668 
1669  if (!found)
1670  {
1671  /* Initialize new entry */
1672  opcentry->valid = false; /* until known OK */
1673  opcentry->numSupport = numSupport;
1674  opcentry->supportProcs = NULL; /* filled below */
1675  }
1676  else
1677  {
1678  Assert(numSupport == opcentry->numSupport);
1679  }
1680 
1681  /*
1682  * When aggressively testing cache-flush hazards, we disable the operator
1683  * class cache and force reloading of the info on each call. This models
1684  * no real-world behavior, since the cache entries are never invalidated
1685  * otherwise. However it can be helpful for detecting bugs in the cache
1686  * loading logic itself, such as reliance on a non-nailed index. Given
1687  * the limited use-case and the fact that this adds a great deal of
1688  * expense, we enable it only for high values of debug_discard_caches.
1689  */
1690 #ifdef DISCARD_CACHES_ENABLED
1691  if (debug_discard_caches > 2)
1692  opcentry->valid = false;
1693 #endif
1694 
1695  if (opcentry->valid)
1696  return opcentry;
1697 
1698  /*
1699  * Need to fill in new entry. First allocate space, unless we already did
1700  * so in some previous attempt.
1701  */
1702  if (opcentry->supportProcs == NULL && numSupport > 0)
1703  opcentry->supportProcs = (RegProcedure *)
1705  numSupport * sizeof(RegProcedure));
1706 
1707  /*
1708  * To avoid infinite recursion during startup, force heap scans if we're
1709  * looking up info for the opclasses used by the indexes we would like to
1710  * reference here.
1711  */
1712  indexOK = criticalRelcachesBuilt ||
1713  (operatorClassOid != OID_BTREE_OPS_OID &&
1714  operatorClassOid != INT2_BTREE_OPS_OID);
1715 
1716  /*
1717  * We have to fetch the pg_opclass row to determine its opfamily and
1718  * opcintype, which are needed to look up related operators and functions.
1719  * It'd be convenient to use the syscache here, but that probably doesn't
1720  * work while bootstrapping.
1721  */
1722  ScanKeyInit(&skey[0],
1723  Anum_pg_opclass_oid,
1724  BTEqualStrategyNumber, F_OIDEQ,
1725  ObjectIdGetDatum(operatorClassOid));
1726  rel = table_open(OperatorClassRelationId, AccessShareLock);
1727  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1728  NULL, 1, skey);
1729 
1730  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1731  {
1732  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1733 
1734  opcentry->opcfamily = opclassform->opcfamily;
1735  opcentry->opcintype = opclassform->opcintype;
1736  }
1737  else
1738  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1739 
1740  systable_endscan(scan);
1742 
1743  /*
1744  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1745  * the default ones (those with lefttype = righttype = opcintype).
1746  */
1747  if (numSupport > 0)
1748  {
1749  ScanKeyInit(&skey[0],
1750  Anum_pg_amproc_amprocfamily,
1751  BTEqualStrategyNumber, F_OIDEQ,
1752  ObjectIdGetDatum(opcentry->opcfamily));
1753  ScanKeyInit(&skey[1],
1754  Anum_pg_amproc_amproclefttype,
1755  BTEqualStrategyNumber, F_OIDEQ,
1756  ObjectIdGetDatum(opcentry->opcintype));
1757  ScanKeyInit(&skey[2],
1758  Anum_pg_amproc_amprocrighttype,
1759  BTEqualStrategyNumber, F_OIDEQ,
1760  ObjectIdGetDatum(opcentry->opcintype));
1761  rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1762  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1763  NULL, 3, skey);
1764 
1765  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1766  {
1767  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1768 
1769  if (amprocform->amprocnum <= 0 ||
1770  (StrategyNumber) amprocform->amprocnum > numSupport)
1771  elog(ERROR, "invalid amproc number %d for opclass %u",
1772  amprocform->amprocnum, operatorClassOid);
1773 
1774  opcentry->supportProcs[amprocform->amprocnum - 1] =
1775  amprocform->amproc;
1776  }
1777 
1778  systable_endscan(scan);
1780  }
1781 
1782  opcentry->valid = true;
1783  return opcentry;
1784 }
1785 
1786 /*
1787  * Fill in the TableAmRoutine for a relation
1788  *
1789  * relation's rd_amhandler must be valid already.
1790  */
1791 static void
1793 {
1794  relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1795 }
1796 
1797 /*
1798  * Initialize table access method support for a table like relation
1799  */
1800 void
1802 {
1803  HeapTuple tuple;
1804  Form_pg_am aform;
1805 
1806  if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1807  {
1808  /*
1809  * Sequences are currently accessed like heap tables, but it doesn't
1810  * seem prudent to show that in the catalog. So just overwrite it
1811  * here.
1812  */
1813  Assert(relation->rd_rel->relam == InvalidOid);
1814  relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
1815  }
1816  else if (IsCatalogRelation(relation))
1817  {
1818  /*
1819  * Avoid doing a syscache lookup for catalog tables.
1820  */
1821  Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1822  relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
1823  }
1824  else
1825  {
1826  /*
1827  * Look up the table access method, save the OID of its handler
1828  * function.
1829  */
1830  Assert(relation->rd_rel->relam != InvalidOid);
1831  tuple = SearchSysCache1(AMOID,
1832  ObjectIdGetDatum(relation->rd_rel->relam));
1833  if (!HeapTupleIsValid(tuple))
1834  elog(ERROR, "cache lookup failed for access method %u",
1835  relation->rd_rel->relam);
1836  aform = (Form_pg_am) GETSTRUCT(tuple);
1837  relation->rd_amhandler = aform->amhandler;
1838  ReleaseSysCache(tuple);
1839  }
1840 
1841  /*
1842  * Now we can fetch the table AM's API struct
1843  */
1844  InitTableAmRoutine(relation);
1845 }
1846 
1847 /*
1848  * formrdesc
1849  *
1850  * This is a special cut-down version of RelationBuildDesc(),
1851  * used while initializing the relcache.
1852  * The relation descriptor is built just from the supplied parameters,
1853  * without actually looking at any system table entries. We cheat
1854  * quite a lot since we only need to work for a few basic system
1855  * catalogs.
1856  *
1857  * The catalogs this is used for can't have constraints (except attnotnull),
1858  * default values, rules, or triggers, since we don't cope with any of that.
1859  * (Well, actually, this only matters for properties that need to be valid
1860  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1861  * these properties matter then...)
1862  *
1863  * NOTE: we assume we are already switched into CacheMemoryContext.
1864  */
1865 static void
1866 formrdesc(const char *relationName, Oid relationReltype,
1867  bool isshared,
1868  int natts, const FormData_pg_attribute *attrs)
1869 {
1870  Relation relation;
1871  int i;
1872  bool has_not_null;
1873 
1874  /*
1875  * allocate new relation desc, clear all fields of reldesc
1876  */
1877  relation = (Relation) palloc0(sizeof(RelationData));
1878 
1879  /* make sure relation is marked as having no open file yet */
1880  relation->rd_smgr = NULL;
1881 
1882  /*
1883  * initialize reference count: 1 because it is nailed in cache
1884  */
1885  relation->rd_refcnt = 1;
1886 
1887  /*
1888  * all entries built with this routine are nailed-in-cache; none are for
1889  * new or temp relations.
1890  */
1891  relation->rd_isnailed = true;
1896  relation->rd_backend = InvalidBackendId;
1897  relation->rd_islocaltemp = false;
1898 
1899  /*
1900  * initialize relation tuple form
1901  *
1902  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1903  * get us launched. RelationCacheInitializePhase3() will read the real
1904  * data from pg_class and replace what we've done here. Note in
1905  * particular that relowner is left as zero; this cues
1906  * RelationCacheInitializePhase3 that the real data isn't there yet.
1907  */
1909 
1910  namestrcpy(&relation->rd_rel->relname, relationName);
1911  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1912  relation->rd_rel->reltype = relationReltype;
1913 
1914  /*
1915  * It's important to distinguish between shared and non-shared relations,
1916  * even at bootstrap time, to make sure we know where they are stored.
1917  */
1918  relation->rd_rel->relisshared = isshared;
1919  if (isshared)
1920  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1921 
1922  /* formrdesc is used only for permanent relations */
1923  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1924 
1925  /* ... and they're always populated, too */
1926  relation->rd_rel->relispopulated = true;
1927 
1928  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1929  relation->rd_rel->relpages = 0;
1930  relation->rd_rel->reltuples = -1;
1931  relation->rd_rel->relallvisible = 0;
1932  relation->rd_rel->relkind = RELKIND_RELATION;
1933  relation->rd_rel->relnatts = (int16) natts;
1934  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1935 
1936  /*
1937  * initialize attribute tuple form
1938  *
1939  * Unlike the case with the relation tuple, this data had better be right
1940  * because it will never be replaced. The data comes from
1941  * src/include/catalog/ headers via genbki.pl.
1942  */
1943  relation->rd_att = CreateTemplateTupleDesc(natts);
1944  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1945 
1946  relation->rd_att->tdtypeid = relationReltype;
1947  relation->rd_att->tdtypmod = -1; /* just to be sure */
1948 
1949  /*
1950  * initialize tuple desc info
1951  */
1952  has_not_null = false;
1953  for (i = 0; i < natts; i++)
1954  {
1955  memcpy(TupleDescAttr(relation->rd_att, i),
1956  &attrs[i],
1958  has_not_null |= attrs[i].attnotnull;
1959  /* make sure attcacheoff is valid */
1960  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1961  }
1962 
1963  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1964  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1965 
1966  /* mark not-null status */
1967  if (has_not_null)
1968  {
1969  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1970 
1971  constr->has_not_null = true;
1972  relation->rd_att->constr = constr;
1973  }
1974 
1975  /*
1976  * initialize relation id from info in att array (my, this is ugly)
1977  */
1978  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1979 
1980  /*
1981  * All relations made with formrdesc are mapped. This is necessarily so
1982  * because there is no other way to know what filenumber they currently
1983  * have. In bootstrap mode, add them to the initial relation mapper data,
1984  * specifying that the initial filenumber is the same as the OID.
1985  */
1986  relation->rd_rel->relfilenode = InvalidRelFileNumber;
1989  RelationGetRelid(relation),
1990  isshared, true);
1991 
1992  /*
1993  * initialize the relation lock manager information
1994  */
1995  RelationInitLockInfo(relation); /* see lmgr.c */
1996 
1997  /*
1998  * initialize physical addressing information for the relation
1999  */
2000  RelationInitPhysicalAddr(relation);
2001 
2002  /*
2003  * initialize the table am handler
2004  */
2005  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
2006  relation->rd_tableam = GetHeapamTableAmRoutine();
2007 
2008  /*
2009  * initialize the rel-has-index flag, using hardwired knowledge
2010  */
2012  {
2013  /* In bootstrap mode, we have no indexes */
2014  relation->rd_rel->relhasindex = false;
2015  }
2016  else
2017  {
2018  /* Otherwise, all the rels formrdesc is used for have indexes */
2019  relation->rd_rel->relhasindex = true;
2020  }
2021 
2022  /*
2023  * add new reldesc to relcache
2024  */
2025  RelationCacheInsert(relation, false);
2026 
2027  /* It's fully valid */
2028  relation->rd_isvalid = true;
2029 }
2030 
2031 
2032 /* ----------------------------------------------------------------
2033  * Relation Descriptor Lookup Interface
2034  * ----------------------------------------------------------------
2035  */
2036 
2037 /*
2038  * RelationIdGetRelation
2039  *
2040  * Lookup a reldesc by OID; make one if not already in cache.
2041  *
2042  * Returns NULL if no pg_class row could be found for the given relid
2043  * (suggesting we are trying to access a just-deleted relation).
2044  * Any other error is reported via elog.
2045  *
2046  * NB: caller should already have at least AccessShareLock on the
2047  * relation ID, else there are nasty race conditions.
2048  *
2049  * NB: relation ref count is incremented, or set to 1 if new entry.
2050  * Caller should eventually decrement count. (Usually,
2051  * that happens by calling RelationClose().)
2052  */
2053 Relation
2055 {
2056  Relation rd;
2057 
2058  /* Make sure we're in an xact, even if this ends up being a cache hit */
2060 
2061  /*
2062  * first try to find reldesc in the cache
2063  */
2064  RelationIdCacheLookup(relationId, rd);
2065 
2066  if (RelationIsValid(rd))
2067  {
2068  /* return NULL for dropped relations */
2070  {
2071  Assert(!rd->rd_isvalid);
2072  return NULL;
2073  }
2074 
2076  /* revalidate cache entry if necessary */
2077  if (!rd->rd_isvalid)
2078  {
2079  /*
2080  * Indexes only have a limited number of possible schema changes,
2081  * and we don't want to use the full-blown procedure because it's
2082  * a headache for indexes that reload itself depends on.
2083  */
2084  if (rd->rd_rel->relkind == RELKIND_INDEX ||
2085  rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2087  else
2088  RelationClearRelation(rd, true);
2089 
2090  /*
2091  * Normally entries need to be valid here, but before the relcache
2092  * has been initialized, not enough infrastructure exists to
2093  * perform pg_class lookups. The structure of such entries doesn't
2094  * change, but we still want to update the rd_rel entry. So
2095  * rd_isvalid = false is left in place for a later lookup.
2096  */
2097  Assert(rd->rd_isvalid ||
2099  }
2100  return rd;
2101  }
2102 
2103  /*
2104  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2105  * it.
2106  */
2107  rd = RelationBuildDesc(relationId, true);
2108  if (RelationIsValid(rd))
2110  return rd;
2111 }
2112 
2113 /* ----------------------------------------------------------------
2114  * cache invalidation support routines
2115  * ----------------------------------------------------------------
2116  */
2117 
2118 /*
2119  * RelationIncrementReferenceCount
2120  * Increments relation reference count.
2121  *
2122  * Note: bootstrap mode has its own weird ideas about relation refcount
2123  * behavior; we ought to fix it someday, but for now, just disable
2124  * reference count ownership tracking in bootstrap mode.
2125  */
2126 void
2128 {
2130  rel->rd_refcnt += 1;
2133 }
2134 
2135 /*
2136  * RelationDecrementReferenceCount
2137  * Decrements relation reference count.
2138  */
2139 void
2141 {
2142  Assert(rel->rd_refcnt > 0);
2143  rel->rd_refcnt -= 1;
2146 }
2147 
2148 /*
2149  * RelationClose - close an open relation
2150  *
2151  * Actually, we just decrement the refcount.
2152  *
2153  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2154  * will be freed as soon as their refcount goes to zero. In combination
2155  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2156  * to catch references to already-released relcache entries. It slows
2157  * things down quite a bit, however.
2158  */
2159 void
2161 {
2162  /* Note: no locking manipulations needed */
2164 
2165  /*
2166  * If the relation is no longer open in this session, we can clean up any
2167  * stale partition descriptors it has. This is unlikely, so check to see
2168  * if there are child contexts before expending a call to mcxt.c.
2169  */
2170  if (RelationHasReferenceCountZero(relation))
2171  {
2172  if (relation->rd_pdcxt != NULL &&
2173  relation->rd_pdcxt->firstchild != NULL)
2175 
2176  if (relation->rd_pddcxt != NULL &&
2177  relation->rd_pddcxt->firstchild != NULL)
2179  }
2180 
2181 #ifdef RELCACHE_FORCE_RELEASE
2182  if (RelationHasReferenceCountZero(relation) &&
2183  relation->rd_createSubid == InvalidSubTransactionId &&
2185  RelationClearRelation(relation, false);
2186 #endif
2187 }
2188 
2189 /*
2190  * RelationReloadIndexInfo - reload minimal information for an open index
2191  *
2192  * This function is used only for indexes. A relcache inval on an index
2193  * can mean that its pg_class or pg_index row changed. There are only
2194  * very limited changes that are allowed to an existing index's schema,
2195  * so we can update the relcache entry without a complete rebuild; which
2196  * is fortunate because we can't rebuild an index entry that is "nailed"
2197  * and/or in active use. We support full replacement of the pg_class row,
2198  * as well as updates of a few simple fields of the pg_index row.
2199  *
2200  * We can't necessarily reread the catalog rows right away; we might be
2201  * in a failed transaction when we receive the SI notification. If so,
2202  * RelationClearRelation just marks the entry as invalid by setting
2203  * rd_isvalid to false. This routine is called to fix the entry when it
2204  * is next needed.
2205  *
2206  * We assume that at the time we are called, we have at least AccessShareLock
2207  * on the target index. (Note: in the calls from RelationClearRelation,
2208  * this is legitimate because we know the rel has positive refcount.)
2209  *
2210  * If the target index is an index on pg_class or pg_index, we'd better have
2211  * previously gotten at least AccessShareLock on its underlying catalog,
2212  * else we are at risk of deadlock against someone trying to exclusive-lock
2213  * the heap and index in that order. This is ensured in current usage by
2214  * only applying this to indexes being opened or having positive refcount.
2215  */
2216 static void
2218 {
2219  bool indexOK;
2220  HeapTuple pg_class_tuple;
2221  Form_pg_class relp;
2222 
2223  /* Should be called only for invalidated, live indexes */
2224  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2225  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2226  !relation->rd_isvalid &&
2228 
2229  /* Ensure it's closed at smgr level */
2230  RelationCloseSmgr(relation);
2231 
2232  /* Must free any AM cached data upon relcache flush */
2233  if (relation->rd_amcache)
2234  pfree(relation->rd_amcache);
2235  relation->rd_amcache = NULL;
2236 
2237  /*
2238  * If it's a shared index, we might be called before backend startup has
2239  * finished selecting a database, in which case we have no way to read
2240  * pg_class yet. However, a shared index can never have any significant
2241  * schema updates, so it's okay to ignore the invalidation signal. Just
2242  * mark it valid and return without doing anything more.
2243  */
2244  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2245  {
2246  relation->rd_isvalid = true;
2247  return;
2248  }
2249 
2250  /*
2251  * Read the pg_class row
2252  *
2253  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2254  * for pg_class_oid_index ...
2255  */
2256  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2257  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2258  if (!HeapTupleIsValid(pg_class_tuple))
2259  elog(ERROR, "could not find pg_class tuple for index %u",
2260  RelationGetRelid(relation));
2261  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2262  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2263  /* Reload reloptions in case they changed */
2264  if (relation->rd_options)
2265  pfree(relation->rd_options);
2266  RelationParseRelOptions(relation, pg_class_tuple);
2267  /* done with pg_class tuple */
2268  heap_freetuple(pg_class_tuple);
2269  /* We must recalculate physical address in case it changed */
2270  RelationInitPhysicalAddr(relation);
2271 
2272  /*
2273  * For a non-system index, there are fields of the pg_index row that are
2274  * allowed to change, so re-read that row and update the relcache entry.
2275  * Most of the info derived from pg_index (such as support function lookup
2276  * info) cannot change, and indeed the whole point of this routine is to
2277  * update the relcache entry without clobbering that data; so wholesale
2278  * replacement is not appropriate.
2279  */
2280  if (!IsSystemRelation(relation))
2281  {
2282  HeapTuple tuple;
2284 
2285  tuple = SearchSysCache1(INDEXRELID,
2286  ObjectIdGetDatum(RelationGetRelid(relation)));
2287  if (!HeapTupleIsValid(tuple))
2288  elog(ERROR, "cache lookup failed for index %u",
2289  RelationGetRelid(relation));
2290  index = (Form_pg_index) GETSTRUCT(tuple);
2291 
2292  /*
2293  * Basically, let's just copy all the bool fields. There are one or
2294  * two of these that can't actually change in the current code, but
2295  * it's not worth it to track exactly which ones they are. None of
2296  * the array fields are allowed to change, though.
2297  */
2298  relation->rd_index->indisunique = index->indisunique;
2299  relation->rd_index->indnullsnotdistinct = index->indnullsnotdistinct;
2300  relation->rd_index->indisprimary = index->indisprimary;
2301  relation->rd_index->indisexclusion = index->indisexclusion;
2302  relation->rd_index->indimmediate = index->indimmediate;
2303  relation->rd_index->indisclustered = index->indisclustered;
2304  relation->rd_index->indisvalid = index->indisvalid;
2305  relation->rd_index->indcheckxmin = index->indcheckxmin;
2306  relation->rd_index->indisready = index->indisready;
2307  relation->rd_index->indislive = index->indislive;
2308  relation->rd_index->indisreplident = index->indisreplident;
2309 
2310  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2312  HeapTupleHeaderGetXmin(tuple->t_data));
2313 
2314  ReleaseSysCache(tuple);
2315  }
2316 
2317  /* Okay, now it's valid again */
2318  relation->rd_isvalid = true;
2319 }
2320 
2321 /*
2322  * RelationReloadNailed - reload minimal information for nailed relations.
2323  *
2324  * The structure of a nailed relation can never change (which is good, because
2325  * we rely on knowing their structure to be able to read catalog content). But
2326  * some parts, e.g. pg_class.relfrozenxid, are still important to have
2327  * accurate content for. Therefore those need to be reloaded after the arrival
2328  * of invalidations.
2329  */
2330 static void
2332 {
2333  Assert(relation->rd_isnailed);
2334 
2335  /*
2336  * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2337  * mapping changed.
2338  */
2339  RelationInitPhysicalAddr(relation);
2340 
2341  /* flag as needing to be revalidated */
2342  relation->rd_isvalid = false;
2343 
2344  /*
2345  * Can only reread catalog contents if in a transaction. If the relation
2346  * is currently open (not counting the nailed refcount), do so
2347  * immediately. Otherwise we've already marked the entry as possibly
2348  * invalid, and it'll be fixed when next opened.
2349  */
2350  if (!IsTransactionState() || relation->rd_refcnt <= 1)
2351  return;
2352 
2353  if (relation->rd_rel->relkind == RELKIND_INDEX)
2354  {
2355  /*
2356  * If it's a nailed-but-not-mapped index, then we need to re-read the
2357  * pg_class row to see if its relfilenumber changed.
2358  */
2359  RelationReloadIndexInfo(relation);
2360  }
2361  else
2362  {
2363  /*
2364  * Reload a non-index entry. We can't easily do so if relcaches
2365  * aren't yet built, but that's fine because at that stage the
2366  * attributes that need to be current (like relfrozenxid) aren't yet
2367  * accessed. To ensure the entry will later be revalidated, we leave
2368  * it in invalid state, but allow use (cf. RelationIdGetRelation()).
2369  */
2371  {
2372  HeapTuple pg_class_tuple;
2373  Form_pg_class relp;
2374 
2375  /*
2376  * NB: Mark the entry as valid before starting to scan, to avoid
2377  * self-recursion when re-building pg_class.
2378  */
2379  relation->rd_isvalid = true;
2380 
2381  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2382  true, false);
2383  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2384  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2385  heap_freetuple(pg_class_tuple);
2386 
2387  /*
2388  * Again mark as valid, to protect against concurrently arriving
2389  * invalidations.
2390  */
2391  relation->rd_isvalid = true;
2392  }
2393  }
2394 }
2395 
2396 /*
2397  * RelationDestroyRelation
2398  *
2399  * Physically delete a relation cache entry and all subsidiary data.
2400  * Caller must already have unhooked the entry from the hash table.
2401  */
2402 static void
2403 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2404 {
2406 
2407  /*
2408  * Make sure smgr and lower levels close the relation's files, if they
2409  * weren't closed already. (This was probably done by caller, but let's
2410  * just be real sure.)
2411  */
2412  RelationCloseSmgr(relation);
2413 
2414  /* break mutual link with stats entry */
2415  pgstat_unlink_relation(relation);
2416 
2417  /*
2418  * Free all the subsidiary data structures of the relcache entry, then the
2419  * entry itself.
2420  */
2421  if (relation->rd_rel)
2422  pfree(relation->rd_rel);
2423  /* can't use DecrTupleDescRefCount here */
2424  Assert(relation->rd_att->tdrefcount > 0);
2425  if (--relation->rd_att->tdrefcount == 0)
2426  {
2427  /*
2428  * If we Rebuilt a relcache entry during a transaction then its
2429  * possible we did that because the TupDesc changed as the result of
2430  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2431  * possible someone copied that TupDesc, in which case the copy would
2432  * point to free'd memory. So if we rebuild an entry we keep the
2433  * TupDesc around until end of transaction, to be safe.
2434  */
2435  if (remember_tupdesc)
2437  else
2438  FreeTupleDesc(relation->rd_att);
2439  }
2440  FreeTriggerDesc(relation->trigdesc);
2441  list_free_deep(relation->rd_fkeylist);
2442  list_free(relation->rd_indexlist);
2443  list_free(relation->rd_statlist);
2444  bms_free(relation->rd_keyattr);
2445  bms_free(relation->rd_pkattr);
2446  bms_free(relation->rd_idattr);
2447  bms_free(relation->rd_hotblockingattr);
2448  bms_free(relation->rd_summarizedattr);
2449  if (relation->rd_pubdesc)
2450  pfree(relation->rd_pubdesc);
2451  if (relation->rd_options)
2452  pfree(relation->rd_options);
2453  if (relation->rd_indextuple)
2454  pfree(relation->rd_indextuple);
2455  if (relation->rd_amcache)
2456  pfree(relation->rd_amcache);
2457  if (relation->rd_fdwroutine)
2458  pfree(relation->rd_fdwroutine);
2459  if (relation->rd_indexcxt)
2460  MemoryContextDelete(relation->rd_indexcxt);
2461  if (relation->rd_rulescxt)
2462  MemoryContextDelete(relation->rd_rulescxt);
2463  if (relation->rd_rsdesc)
2464  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2465  if (relation->rd_partkeycxt)
2467  if (relation->rd_pdcxt)
2468  MemoryContextDelete(relation->rd_pdcxt);
2469  if (relation->rd_pddcxt)
2470  MemoryContextDelete(relation->rd_pddcxt);
2471  if (relation->rd_partcheckcxt)
2473  pfree(relation);
2474 }
2475 
2476 /*
2477  * RelationClearRelation
2478  *
2479  * Physically blow away a relation cache entry, or reset it and rebuild
2480  * it from scratch (that is, from catalog entries). The latter path is
2481  * used when we are notified of a change to an open relation (one with
2482  * refcount > 0).
2483  *
2484  * NB: when rebuilding, we'd better hold some lock on the relation,
2485  * else the catalog data we need to read could be changing under us.
2486  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2487  * a sinval reset could happen while we're accessing the catalogs, and
2488  * the rel would get blown away underneath us by RelationCacheInvalidate
2489  * if it has zero refcnt.
2490  *
2491  * The "rebuild" parameter is redundant in current usage because it has
2492  * to match the relation's refcnt status, but we keep it as a crosscheck
2493  * that we're doing what the caller expects.
2494  */
2495 static void
2496 RelationClearRelation(Relation relation, bool rebuild)
2497 {
2498  /*
2499  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2500  * course it would be an equally bad idea to blow away one with nonzero
2501  * refcnt, since that would leave someone somewhere with a dangling
2502  * pointer. All callers are expected to have verified that this holds.
2503  */
2504  Assert(rebuild ?
2505  !RelationHasReferenceCountZero(relation) :
2506  RelationHasReferenceCountZero(relation));
2507 
2508  /*
2509  * Make sure smgr and lower levels close the relation's files, if they
2510  * weren't closed already. If the relation is not getting deleted, the
2511  * next smgr access should reopen the files automatically. This ensures
2512  * that the low-level file access state is updated after, say, a vacuum
2513  * truncation.
2514  */
2515  RelationCloseSmgr(relation);
2516 
2517  /* Free AM cached data, if any */
2518  if (relation->rd_amcache)
2519  pfree(relation->rd_amcache);
2520  relation->rd_amcache = NULL;
2521 
2522  /*
2523  * Treat nailed-in system relations separately, they always need to be
2524  * accessible, so we can't blow them away.
2525  */
2526  if (relation->rd_isnailed)
2527  {
2528  RelationReloadNailed(relation);
2529  return;
2530  }
2531 
2532  /* Mark it invalid until we've finished rebuild */
2533  relation->rd_isvalid = false;
2534 
2535  /* See RelationForgetRelation(). */
2536  if (relation->rd_droppedSubid != InvalidSubTransactionId)
2537  return;
2538 
2539  /*
2540  * Even non-system indexes should not be blown away if they are open and
2541  * have valid index support information. This avoids problems with active
2542  * use of the index support information. As with nailed indexes, we
2543  * re-read the pg_class row to handle possible physical relocation of the
2544  * index, and we check for pg_index updates too.
2545  */
2546  if ((relation->rd_rel->relkind == RELKIND_INDEX ||
2547  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2548  relation->rd_refcnt > 0 &&
2549  relation->rd_indexcxt != NULL)
2550  {
2551  if (IsTransactionState())
2552  RelationReloadIndexInfo(relation);
2553  return;
2554  }
2555 
2556  /*
2557  * If we're really done with the relcache entry, blow it away. But if
2558  * someone is still using it, reconstruct the whole deal without moving
2559  * the physical RelationData record (so that the someone's pointer is
2560  * still valid).
2561  */
2562  if (!rebuild)
2563  {
2564  /* Remove it from the hash table */
2565  RelationCacheDelete(relation);
2566 
2567  /* And release storage */
2568  RelationDestroyRelation(relation, false);
2569  }
2570  else if (!IsTransactionState())
2571  {
2572  /*
2573  * If we're not inside a valid transaction, we can't do any catalog
2574  * access so it's not possible to rebuild yet. Just exit, leaving
2575  * rd_isvalid = false so that the rebuild will occur when the entry is
2576  * next opened.
2577  *
2578  * Note: it's possible that we come here during subtransaction abort,
2579  * and the reason for wanting to rebuild is that the rel is open in
2580  * the outer transaction. In that case it might seem unsafe to not
2581  * rebuild immediately, since whatever code has the rel already open
2582  * will keep on using the relcache entry as-is. However, in such a
2583  * case the outer transaction should be holding a lock that's
2584  * sufficient to prevent any significant change in the rel's schema,
2585  * so the existing entry contents should be good enough for its
2586  * purposes; at worst we might be behind on statistics updates or the
2587  * like. (See also CheckTableNotInUse() and its callers.) These same
2588  * remarks also apply to the cases above where we exit without having
2589  * done RelationReloadIndexInfo() yet.
2590  */
2591  return;
2592  }
2593  else
2594  {
2595  /*
2596  * Our strategy for rebuilding an open relcache entry is to build a
2597  * new entry from scratch, swap its contents with the old entry, and
2598  * finally delete the new entry (along with any infrastructure swapped
2599  * over from the old entry). This is to avoid trouble in case an
2600  * error causes us to lose control partway through. The old entry
2601  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2602  * on next access. Meanwhile it's not any less valid than it was
2603  * before, so any code that might expect to continue accessing it
2604  * isn't hurt by the rebuild failure. (Consider for example a
2605  * subtransaction that ALTERs a table and then gets canceled partway
2606  * through the cache entry rebuild. The outer transaction should
2607  * still see the not-modified cache entry as valid.) The worst
2608  * consequence of an error is leaking the necessarily-unreferenced new
2609  * entry, and this shouldn't happen often enough for that to be a big
2610  * problem.
2611  *
2612  * When rebuilding an open relcache entry, we must preserve ref count,
2613  * rd_*Subid, and rd_toastoid state. Also attempt to preserve the
2614  * pg_class entry (rd_rel), tupledesc, rewrite-rule, partition key,
2615  * and partition descriptor substructures in place, because various
2616  * places assume that these structures won't move while they are
2617  * working with an open relcache entry. (Note: the refcount
2618  * mechanism for tupledescs might someday allow us to remove this hack
2619  * for the tupledesc.)
2620  *
2621  * Note that this process does not touch CurrentResourceOwner; which
2622  * is good because whatever ref counts the entry may have do not
2623  * necessarily belong to that resource owner.
2624  */
2625  Relation newrel;
2626  Oid save_relid = RelationGetRelid(relation);
2627  bool keep_tupdesc;
2628  bool keep_rules;
2629  bool keep_policies;
2630  bool keep_partkey;
2631 
2632  /* Build temporary entry, but don't link it into hashtable */
2633  newrel = RelationBuildDesc(save_relid, false);
2634 
2635  /*
2636  * Between here and the end of the swap, don't add code that does or
2637  * reasonably could read system catalogs. That range must be free
2638  * from invalidation processing. See RelationBuildDesc() manipulation
2639  * of in_progress_list.
2640  */
2641 
2642  if (newrel == NULL)
2643  {
2644  /*
2645  * We can validly get here, if we're using a historic snapshot in
2646  * which a relation, accessed from outside logical decoding, is
2647  * still invisible. In that case it's fine to just mark the
2648  * relation as invalid and return - it'll fully get reloaded by
2649  * the cache reset at the end of logical decoding (or at the next
2650  * access). During normal processing we don't want to ignore this
2651  * case as it shouldn't happen there, as explained below.
2652  */
2653  if (HistoricSnapshotActive())
2654  return;
2655 
2656  /*
2657  * This shouldn't happen as dropping a relation is intended to be
2658  * impossible if still referenced (cf. CheckTableNotInUse()). But
2659  * if we get here anyway, we can't just delete the relcache entry,
2660  * as it possibly could get accessed later (as e.g. the error
2661  * might get trapped and handled via a subtransaction rollback).
2662  */
2663  elog(ERROR, "relation %u deleted while still in use", save_relid);
2664  }
2665 
2666  /*
2667  * If we were to, again, have cases of the relkind of a relcache entry
2668  * changing, we would need to ensure that pgstats does not get
2669  * confused.
2670  */
2671  Assert(relation->rd_rel->relkind == newrel->rd_rel->relkind);
2672 
2673  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2674  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2675  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2676  /* partkey is immutable once set up, so we can always keep it */
2677  keep_partkey = (relation->rd_partkey != NULL);
2678 
2679  /*
2680  * Perform swapping of the relcache entry contents. Within this
2681  * process the old entry is momentarily invalid, so there *must* be no
2682  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2683  * all-in-line code for safety.
2684  *
2685  * Since the vast majority of fields should be swapped, our method is
2686  * to swap the whole structures and then re-swap those few fields we
2687  * didn't want swapped.
2688  */
2689 #define SWAPFIELD(fldtype, fldname) \
2690  do { \
2691  fldtype _tmp = newrel->fldname; \
2692  newrel->fldname = relation->fldname; \
2693  relation->fldname = _tmp; \
2694  } while (0)
2695 
2696  /* swap all Relation struct fields */
2697  {
2698  RelationData tmpstruct;
2699 
2700  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2701  memcpy(newrel, relation, sizeof(RelationData));
2702  memcpy(relation, &tmpstruct, sizeof(RelationData));
2703  }
2704 
2705  /* rd_smgr must not be swapped, due to back-links from smgr level */
2706  SWAPFIELD(SMgrRelation, rd_smgr);
2707  /* rd_refcnt must be preserved */
2708  SWAPFIELD(int, rd_refcnt);
2709  /* isnailed shouldn't change */
2710  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2711  /* creation sub-XIDs must be preserved */
2712  SWAPFIELD(SubTransactionId, rd_createSubid);
2713  SWAPFIELD(SubTransactionId, rd_newRelfilelocatorSubid);
2714  SWAPFIELD(SubTransactionId, rd_firstRelfilelocatorSubid);
2715  SWAPFIELD(SubTransactionId, rd_droppedSubid);
2716  /* un-swap rd_rel pointers, swap contents instead */
2717  SWAPFIELD(Form_pg_class, rd_rel);
2718  /* ... but actually, we don't have to update newrel->rd_rel */
2719  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2720  /* preserve old tupledesc, rules, policies if no logical change */
2721  if (keep_tupdesc)
2722  SWAPFIELD(TupleDesc, rd_att);
2723  if (keep_rules)
2724  {
2725  SWAPFIELD(RuleLock *, rd_rules);
2726  SWAPFIELD(MemoryContext, rd_rulescxt);
2727  }
2728  if (keep_policies)
2729  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2730  /* toast OID override must be preserved */
2731  SWAPFIELD(Oid, rd_toastoid);
2732  /* pgstat_info / enabled must be preserved */
2733  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2734  SWAPFIELD(bool, pgstat_enabled);
2735  /* preserve old partition key if we have one */
2736  if (keep_partkey)
2737  {
2738  SWAPFIELD(PartitionKey, rd_partkey);
2739  SWAPFIELD(MemoryContext, rd_partkeycxt);
2740  }
2741  if (newrel->rd_pdcxt != NULL || newrel->rd_pddcxt != NULL)
2742  {
2743  /*
2744  * We are rebuilding a partitioned relation with a non-zero
2745  * reference count, so we must keep the old partition descriptor
2746  * around, in case there's a PartitionDirectory with a pointer to
2747  * it. This means we can't free the old rd_pdcxt yet. (This is
2748  * necessary because RelationGetPartitionDesc hands out direct
2749  * pointers to the relcache's data structure, unlike our usual
2750  * practice which is to hand out copies. We'd have the same
2751  * problem with rd_partkey, except that we always preserve that
2752  * once created.)
2753  *
2754  * To ensure that it's not leaked completely, re-attach it to the
2755  * new reldesc, or make it a child of the new reldesc's rd_pdcxt
2756  * in the unlikely event that there is one already. (Compare hack
2757  * in RelationBuildPartitionDesc.) RelationClose will clean up
2758  * any such contexts once the reference count reaches zero.
2759  *
2760  * In the case where the reference count is zero, this code is not
2761  * reached, which should be OK because in that case there should
2762  * be no PartitionDirectory with a pointer to the old entry.
2763  *
2764  * Note that newrel and relation have already been swapped, so the
2765  * "old" partition descriptor is actually the one hanging off of
2766  * newrel.
2767  */
2768  relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */
2769  relation->rd_partdesc_nodetached = NULL;
2771  if (relation->rd_pdcxt != NULL) /* probably never happens */
2772  MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2773  else
2774  relation->rd_pdcxt = newrel->rd_pdcxt;
2775  if (relation->rd_pddcxt != NULL)
2776  MemoryContextSetParent(newrel->rd_pddcxt, relation->rd_pddcxt);
2777  else
2778  relation->rd_pddcxt = newrel->rd_pddcxt;
2779  /* drop newrel's pointers so we don't destroy it below */
2780  newrel->rd_partdesc = NULL;
2781  newrel->rd_partdesc_nodetached = NULL;
2783  newrel->rd_pdcxt = NULL;
2784  newrel->rd_pddcxt = NULL;
2785  }
2786 
2787 #undef SWAPFIELD
2788 
2789  /* And now we can throw away the temporary entry */
2790  RelationDestroyRelation(newrel, !keep_tupdesc);
2791  }
2792 }
2793 
2794 /*
2795  * RelationFlushRelation
2796  *
2797  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2798  * This is used when we receive a cache invalidation event for the rel.
2799  */
2800 static void
2802 {
2803  if (relation->rd_createSubid != InvalidSubTransactionId ||
2805  {
2806  /*
2807  * New relcache entries are always rebuilt, not flushed; else we'd
2808  * forget the "new" status of the relation. Ditto for the
2809  * new-relfilenumber status.
2810  *
2811  * The rel could have zero refcnt here, so temporarily increment the
2812  * refcnt to ensure it's safe to rebuild it. We can assume that the
2813  * current transaction has some lock on the rel already.
2814  */
2816  RelationClearRelation(relation, true);
2818  }
2819  else
2820  {
2821  /*
2822  * Pre-existing rels can be dropped from the relcache if not open.
2823  */
2824  bool rebuild = !RelationHasReferenceCountZero(relation);
2825 
2826  RelationClearRelation(relation, rebuild);
2827  }
2828 }
2829 
2830 /*
2831  * RelationForgetRelation - caller reports that it dropped the relation
2832  */
2833 void
2835 {
2836  Relation relation;
2837 
2838  RelationIdCacheLookup(rid, relation);
2839 
2840  if (!PointerIsValid(relation))
2841  return; /* not in cache, nothing to do */
2842 
2843  if (!RelationHasReferenceCountZero(relation))
2844  elog(ERROR, "relation %u is still open", rid);
2845 
2847  if (relation->rd_createSubid != InvalidSubTransactionId ||
2849  {
2850  /*
2851  * In the event of subtransaction rollback, we must not forget
2852  * rd_*Subid. Mark the entry "dropped" so RelationClearRelation()
2853  * invalidates it in lieu of destroying it. (If we're in a top
2854  * transaction, we could opt to destroy the entry.)
2855  */
2857  }
2858 
2859  RelationClearRelation(relation, false);
2860 }
2861 
2862 /*
2863  * RelationCacheInvalidateEntry
2864  *
2865  * This routine is invoked for SI cache flush messages.
2866  *
2867  * Any relcache entry matching the relid must be flushed. (Note: caller has
2868  * already determined that the relid belongs to our database or is a shared
2869  * relation.)
2870  *
2871  * We used to skip local relations, on the grounds that they could
2872  * not be targets of cross-backend SI update messages; but it seems
2873  * safer to process them, so that our *own* SI update messages will
2874  * have the same effects during CommandCounterIncrement for both
2875  * local and nonlocal relations.
2876  */
2877 void
2879 {
2880  Relation relation;
2881 
2882  RelationIdCacheLookup(relationId, relation);
2883 
2884  if (PointerIsValid(relation))
2885  {
2887  RelationFlushRelation(relation);
2888  }
2889  else
2890  {
2891  int i;
2892 
2893  for (i = 0; i < in_progress_list_len; i++)
2894  if (in_progress_list[i].reloid == relationId)
2895  in_progress_list[i].invalidated = true;
2896  }
2897 }
2898 
2899 /*
2900  * RelationCacheInvalidate
2901  * Blow away cached relation descriptors that have zero reference counts,
2902  * and rebuild those with positive reference counts. Also reset the smgr
2903  * relation cache and re-read relation mapping data.
2904  *
2905  * Apart from debug_discard_caches, this is currently used only to recover
2906  * from SI message buffer overflow, so we do not touch relations having
2907  * new-in-transaction relfilenumbers; they cannot be targets of cross-backend
2908  * SI updates (and our own updates now go through a separate linked list
2909  * that isn't limited by the SI message buffer size).
2910  *
2911  * We do this in two phases: the first pass deletes deletable items, and
2912  * the second one rebuilds the rebuildable items. This is essential for
2913  * safety, because hash_seq_search only copes with concurrent deletion of
2914  * the element it is currently visiting. If a second SI overflow were to
2915  * occur while we are walking the table, resulting in recursive entry to
2916  * this routine, we could crash because the inner invocation blows away
2917  * the entry next to be visited by the outer scan. But this way is OK,
2918  * because (a) during the first pass we won't process any more SI messages,
2919  * so hash_seq_search will complete safely; (b) during the second pass we
2920  * only hold onto pointers to nondeletable entries.
2921  *
2922  * The two-phase approach also makes it easy to update relfilenumbers for
2923  * mapped relations before we do anything else, and to ensure that the
2924  * second pass processes nailed-in-cache items before other nondeletable
2925  * items. This should ensure that system catalogs are up to date before
2926  * we attempt to use them to reload information about other open relations.
2927  *
2928  * After those two phases of work having immediate effects, we normally
2929  * signal any RelationBuildDesc() on the stack to start over. However, we
2930  * don't do this if called as part of debug_discard_caches. Otherwise,
2931  * RelationBuildDesc() would become an infinite loop.
2932  */
2933 void
2934 RelationCacheInvalidate(bool debug_discard)
2935 {
2936  HASH_SEQ_STATUS status;
2937  RelIdCacheEnt *idhentry;
2938  Relation relation;
2939  List *rebuildFirstList = NIL;
2940  List *rebuildList = NIL;
2941  ListCell *l;
2942  int i;
2943 
2944  /*
2945  * Reload relation mapping data before starting to reconstruct cache.
2946  */
2948 
2949  /* Phase 1 */
2950  hash_seq_init(&status, RelationIdCache);
2951 
2952  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2953  {
2954  relation = idhentry->reldesc;
2955 
2956  /* Must close all smgr references to avoid leaving dangling ptrs */
2957  RelationCloseSmgr(relation);
2958 
2959  /*
2960  * Ignore new relations; no other backend will manipulate them before
2961  * we commit. Likewise, before replacing a relation's relfilelocator,
2962  * we shall have acquired AccessExclusiveLock and drained any
2963  * applicable pending invalidations.
2964  */
2965  if (relation->rd_createSubid != InvalidSubTransactionId ||
2967  continue;
2968 
2970 
2971  if (RelationHasReferenceCountZero(relation))
2972  {
2973  /* Delete this entry immediately */
2974  Assert(!relation->rd_isnailed);
2975  RelationClearRelation(relation, false);
2976  }
2977  else
2978  {
2979  /*
2980  * If it's a mapped relation, immediately update its rd_locator in
2981  * case its relfilenumber changed. We must do this during phase 1
2982  * in case the relation is consulted during rebuild of other
2983  * relcache entries in phase 2. It's safe since consulting the
2984  * map doesn't involve any access to relcache entries.
2985  */
2986  if (RelationIsMapped(relation))
2987  RelationInitPhysicalAddr(relation);
2988 
2989  /*
2990  * Add this entry to list of stuff to rebuild in second pass.
2991  * pg_class goes to the front of rebuildFirstList while
2992  * pg_class_oid_index goes to the back of rebuildFirstList, so
2993  * they are done first and second respectively. Other nailed
2994  * relations go to the front of rebuildList, so they'll be done
2995  * next in no particular order; and everything else goes to the
2996  * back of rebuildList.
2997  */
2998  if (RelationGetRelid(relation) == RelationRelationId)
2999  rebuildFirstList = lcons(relation, rebuildFirstList);
3000  else if (RelationGetRelid(relation) == ClassOidIndexId)
3001  rebuildFirstList = lappend(rebuildFirstList, relation);
3002  else if (relation->rd_isnailed)
3003  rebuildList = lcons(relation, rebuildList);
3004  else
3005  rebuildList = lappend(rebuildList, relation);
3006  }
3007  }
3008 
3009  /*
3010  * Now zap any remaining smgr cache entries. This must happen before we
3011  * start to rebuild entries, since that may involve catalog fetches which
3012  * will re-open catalog files.
3013  */
3014  smgrcloseall();
3015 
3016  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
3017  foreach(l, rebuildFirstList)
3018  {
3019  relation = (Relation) lfirst(l);
3020  RelationClearRelation(relation, true);
3021  }
3022  list_free(rebuildFirstList);
3023  foreach(l, rebuildList)
3024  {
3025  relation = (Relation) lfirst(l);
3026  RelationClearRelation(relation, true);
3027  }
3028  list_free(rebuildList);
3029 
3030  if (!debug_discard)
3031  /* Any RelationBuildDesc() on the stack must start over. */
3032  for (i = 0; i < in_progress_list_len; i++)
3033  in_progress_list[i].invalidated = true;
3034 }
3035 
3036 /*
3037  * RelationCloseSmgrByOid - close a relcache entry's smgr link
3038  *
3039  * Needed in some cases where we are changing a relation's physical mapping.
3040  * The link will be automatically reopened on next use.
3041  */
3042 void
3044 {
3045  Relation relation;
3046 
3047  RelationIdCacheLookup(relationId, relation);
3048 
3049  if (!PointerIsValid(relation))
3050  return; /* not in cache, nothing to do */
3051 
3052  RelationCloseSmgr(relation);
3053 }
3054 
3055 static void
3057 {
3058  if (EOXactTupleDescArray == NULL)
3059  {
3060  MemoryContext oldcxt;
3061 
3063 
3064  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
3067  MemoryContextSwitchTo(oldcxt);
3068  }
3070  {
3071  int32 newlen = EOXactTupleDescArrayLen * 2;
3072 
3074 
3076  newlen * sizeof(TupleDesc));
3077  EOXactTupleDescArrayLen = newlen;
3078  }
3079 
3081 }
3082 
3083 #ifdef USE_ASSERT_CHECKING
3084 static void
3085 AssertPendingSyncConsistency(Relation relation)
3086 {
3087  bool relcache_verdict =
3088  RelationIsPermanent(relation) &&
3089  ((relation->rd_createSubid != InvalidSubTransactionId &&
3090  RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) ||
3092 
3093  Assert(relcache_verdict == RelFileLocatorSkippingWAL(relation->rd_locator));
3094 
3095  if (relation->rd_droppedSubid != InvalidSubTransactionId)
3096  Assert(!relation->rd_isvalid &&
3097  (relation->rd_createSubid != InvalidSubTransactionId ||
3099 }
3100 
3101 /*
3102  * AssertPendingSyncs_RelationCache
3103  *
3104  * Assert that relcache.c and storage.c agree on whether to skip WAL.
3105  */
3106 void
3108 {
3109  HASH_SEQ_STATUS status;
3110  LOCALLOCK *locallock;
3111  Relation *rels;
3112  int maxrels;
3113  int nrels;
3114  RelIdCacheEnt *idhentry;
3115  int i;
3116 
3117  /*
3118  * Open every relation that this transaction has locked. If, for some
3119  * relation, storage.c is skipping WAL and relcache.c is not skipping WAL,
3120  * a CommandCounterIncrement() typically yields a local invalidation
3121  * message that destroys the relcache entry. By recreating such entries
3122  * here, we detect the problem.
3123  */
3125  maxrels = 1;
3126  rels = palloc(maxrels * sizeof(*rels));
3127  nrels = 0;
3128  hash_seq_init(&status, GetLockMethodLocalHash());
3129  while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
3130  {
3131  Oid relid;
3132  Relation r;
3133 
3134  if (locallock->nLocks <= 0)
3135  continue;
3136  if ((LockTagType) locallock->tag.lock.locktag_type !=
3138  continue;
3139  relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2);
3140  r = RelationIdGetRelation(relid);
3141  if (!RelationIsValid(r))
3142  continue;
3143  if (nrels >= maxrels)
3144  {
3145  maxrels *= 2;
3146  rels = repalloc(rels, maxrels * sizeof(*rels));
3147  }
3148  rels[nrels++] = r;
3149  }
3150 
3151  hash_seq_init(&status, RelationIdCache);
3152  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3153  AssertPendingSyncConsistency(idhentry->reldesc);
3154 
3155  for (i = 0; i < nrels; i++)
3156  RelationClose(rels[i]);
3158 }
3159 #endif
3160 
3161 /*
3162  * AtEOXact_RelationCache
3163  *
3164  * Clean up the relcache at main-transaction commit or abort.
3165  *
3166  * Note: this must be called *before* processing invalidation messages.
3167  * In the case of abort, we don't want to try to rebuild any invalidated
3168  * cache entries (since we can't safely do database accesses). Therefore
3169  * we must reset refcnts before handling pending invalidations.
3170  *
3171  * As of PostgreSQL 8.1, relcache refcnts should get released by the
3172  * ResourceOwner mechanism. This routine just does a debugging
3173  * cross-check that no pins remain. However, we also need to do special
3174  * cleanup when the current transaction created any relations or made use
3175  * of forced index lists.
3176  */
3177 void
3179 {
3180  HASH_SEQ_STATUS status;
3181  RelIdCacheEnt *idhentry;
3182  int i;
3183 
3184  /*
3185  * Forget in_progress_list. This is relevant when we're aborting due to
3186  * an error during RelationBuildDesc().
3187  */
3188  Assert(in_progress_list_len == 0 || !isCommit);
3190 
3191  /*
3192  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3193  * listed in it. Otherwise fall back on a hash_seq_search scan.
3194  *
3195  * For simplicity, eoxact_list[] entries are not deleted till end of
3196  * top-level transaction, even though we could remove them at
3197  * subtransaction end in some cases, or remove relations from the list if
3198  * they are cleared for other reasons. Therefore we should expect the
3199  * case that list entries are not found in the hashtable; if not, there's
3200  * nothing to do for them.
3201  */
3203  {
3204  hash_seq_init(&status, RelationIdCache);
3205  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3206  {
3207  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3208  }
3209  }
3210  else
3211  {
3212  for (i = 0; i < eoxact_list_len; i++)
3213  {
3214  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3215  &eoxact_list[i],
3216  HASH_FIND,
3217  NULL);
3218  if (idhentry != NULL)
3219  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3220  }
3221  }
3222 
3223  if (EOXactTupleDescArrayLen > 0)
3224  {
3225  Assert(EOXactTupleDescArray != NULL);
3226  for (i = 0; i < NextEOXactTupleDescNum; i++)
3229  EOXactTupleDescArray = NULL;
3230  }
3231 
3232  /* Now we're out of the transaction and can clear the lists */
3233  eoxact_list_len = 0;
3234  eoxact_list_overflowed = false;
3237 }
3238 
3239 /*
3240  * AtEOXact_cleanup
3241  *
3242  * Clean up a single rel at main-transaction commit or abort
3243  *
3244  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3245  * bother to prevent duplicate entries in eoxact_list[].
3246  */
3247 static void
3248 AtEOXact_cleanup(Relation relation, bool isCommit)
3249 {
3250  bool clear_relcache = false;
3251 
3252  /*
3253  * The relcache entry's ref count should be back to its normal
3254  * not-in-a-transaction state: 0 unless it's nailed in cache.
3255  *
3256  * In bootstrap mode, this is NOT true, so don't check it --- the
3257  * bootstrap code expects relations to stay open across start/commit
3258  * transaction calls. (That seems bogus, but it's not worth fixing.)
3259  *
3260  * Note: ideally this check would be applied to every relcache entry, not
3261  * just those that have eoxact work to do. But it's not worth forcing a
3262  * scan of the whole relcache just for this. (Moreover, doing so would
3263  * mean that assert-enabled testing never tests the hash_search code path
3264  * above, which seems a bad idea.)
3265  */
3266 #ifdef USE_ASSERT_CHECKING
3268  {
3269  int expected_refcnt;
3270 
3271  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3272  Assert(relation->rd_refcnt == expected_refcnt);
3273  }
3274 #endif
3275 
3276  /*
3277  * Is the relation live after this transaction ends?
3278  *
3279  * During commit, clear the relcache entry if it is preserved after
3280  * relation drop, in order not to orphan the entry. During rollback,
3281  * clear the relcache entry if the relation is created in the current
3282  * transaction since it isn't interesting any longer once we are out of
3283  * the transaction.
3284  */
3285  clear_relcache =
3286  (isCommit ?
3289 
3290  /*
3291  * Since we are now out of the transaction, reset the subids to zero. That
3292  * also lets RelationClearRelation() drop the relcache entry.
3293  */
3298 
3299  if (clear_relcache)
3300  {
3301  if (RelationHasReferenceCountZero(relation))
3302  {
3303  RelationClearRelation(relation, false);
3304  return;
3305  }
3306  else
3307  {
3308  /*
3309  * Hmm, somewhere there's a (leaked?) reference to the relation.
3310  * We daren't remove the entry for fear of dereferencing a
3311  * dangling pointer later. Bleat, and mark it as not belonging to
3312  * the current transaction. Hopefully it'll get cleaned up
3313  * eventually. This must be just a WARNING to avoid
3314  * error-during-error-recovery loops.
3315  */
3316  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3317  RelationGetRelationName(relation));
3318  }
3319  }
3320 }
3321 
3322 /*
3323  * AtEOSubXact_RelationCache
3324  *
3325  * Clean up the relcache at sub-transaction commit or abort.
3326  *
3327  * Note: this must be called *before* processing invalidation messages.
3328  */
3329 void
3331  SubTransactionId parentSubid)
3332 {
3333  HASH_SEQ_STATUS status;
3334  RelIdCacheEnt *idhentry;
3335  int i;
3336 
3337  /*
3338  * Forget in_progress_list. This is relevant when we're aborting due to
3339  * an error during RelationBuildDesc(). We don't commit subtransactions
3340  * during RelationBuildDesc().
3341  */
3342  Assert(in_progress_list_len == 0 || !isCommit);
3344 
3345  /*
3346  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3347  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3348  * logic as in AtEOXact_RelationCache.
3349  */
3351  {
3352  hash_seq_init(&status, RelationIdCache);
3353  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3354  {
3355  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3356  mySubid, parentSubid);
3357  }
3358  }
3359  else
3360  {
3361  for (i = 0; i < eoxact_list_len; i++)
3362  {
3363  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3364  &eoxact_list[i],
3365  HASH_FIND,
3366  NULL);
3367  if (idhentry != NULL)
3368  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3369  mySubid, parentSubid);
3370  }
3371  }
3372 
3373  /* Don't reset the list; we still need more cleanup later */
3374 }
3375 
3376 /*
3377  * AtEOSubXact_cleanup
3378  *
3379  * Clean up a single rel at subtransaction commit or abort
3380  *
3381  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3382  * bother to prevent duplicate entries in eoxact_list[].
3383  */
3384 static void
3385 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3386  SubTransactionId mySubid, SubTransactionId parentSubid)
3387 {
3388  /*
3389  * Is it a relation created in the current subtransaction?
3390  *
3391  * During subcommit, mark it as belonging to the parent, instead, as long
3392  * as it has not been dropped. Otherwise simply delete the relcache entry.
3393  * --- it isn't interesting any longer.
3394  */
3395  if (relation->rd_createSubid == mySubid)
3396  {
3397  /*
3398  * Valid rd_droppedSubid means the corresponding relation is dropped
3399  * but the relcache entry is preserved for at-commit pending sync. We
3400  * need to drop it explicitly here not to make the entry orphan.
3401  */
3402  Assert(relation->rd_droppedSubid == mySubid ||
3404  if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId)
3405  relation->rd_createSubid = parentSubid;
3406  else if (RelationHasReferenceCountZero(relation))
3407  {
3408  /* allow the entry to be removed */
3413  RelationClearRelation(relation, false);
3414  return;
3415  }
3416  else
3417  {
3418  /*
3419  * Hmm, somewhere there's a (leaked?) reference to the relation.
3420  * We daren't remove the entry for fear of dereferencing a
3421  * dangling pointer later. Bleat, and transfer it to the parent
3422  * subtransaction so we can try again later. This must be just a
3423  * WARNING to avoid error-during-error-recovery loops.
3424  */
3425  relation->rd_createSubid = parentSubid;
3426  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3427  RelationGetRelationName(relation));
3428  }
3429  }
3430 
3431  /*
3432  * Likewise, update or drop any new-relfilenumber-in-subtransaction record
3433  * or drop record.
3434  */
3435  if (relation->rd_newRelfilelocatorSubid == mySubid)
3436  {
3437  if (isCommit)
3438  relation->rd_newRelfilelocatorSubid = parentSubid;
3439  else
3441  }
3442 
3443  if (relation->rd_firstRelfilelocatorSubid == mySubid)
3444  {
3445  if (isCommit)
3446  relation->rd_firstRelfilelocatorSubid = parentSubid;
3447  else
3449  }
3450 
3451  if (relation->rd_droppedSubid == mySubid)
3452  {
3453  if (isCommit)
3454  relation->rd_droppedSubid = parentSubid;
3455  else
3457  }
3458 }
3459 
3460 
3461 /*
3462  * RelationBuildLocalRelation
3463  * Build a relcache entry for an about-to-be-created relation,
3464  * and enter it into the relcache.
3465  */
3466 Relation
3468  Oid relnamespace,
3469  TupleDesc tupDesc,
3470  Oid relid,
3471  Oid accessmtd,
3472  RelFileNumber relfilenumber,
3473  Oid reltablespace,
3474  bool shared_relation,
3475  bool mapped_relation,
3476  char relpersistence,
3477  char relkind)
3478 {
3479  Relation rel;
3480  MemoryContext oldcxt;
3481  int natts = tupDesc->natts;
3482  int i;
3483  bool has_not_null;
3484  bool nailit;
3485 
3486  Assert(natts >= 0);
3487 
3488  /*
3489  * check for creation of a rel that must be nailed in cache.
3490  *
3491  * XXX this list had better match the relations specially handled in
3492  * RelationCacheInitializePhase2/3.
3493  */
3494  switch (relid)
3495  {
3496  case DatabaseRelationId:
3497  case AuthIdRelationId:
3498  case AuthMemRelationId:
3499  case RelationRelationId:
3500  case AttributeRelationId:
3501  case ProcedureRelationId:
3502  case TypeRelationId:
3503  nailit = true;
3504  break;
3505  default:
3506  nailit = false;
3507  break;
3508  }
3509 
3510  /*
3511  * check that hardwired list of shared rels matches what's in the
3512  * bootstrap .bki file. If you get a failure here during initdb, you
3513  * probably need to fix IsSharedRelation() to match whatever you've done
3514  * to the set of shared relations.
3515  */
3516  if (shared_relation != IsSharedRelation(relid))
3517  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3518  relname, relid);
3519 
3520  /* Shared relations had better be mapped, too */
3521  Assert(mapped_relation || !shared_relation);
3522 
3523  /*
3524  * switch to the cache context to create the relcache entry.
3525  */
3526  if (!CacheMemoryContext)
3528 
3530 
3531  /*
3532  * allocate a new relation descriptor and fill in basic state fields.
3533  */
3534  rel = (Relation) palloc0(sizeof(RelationData));
3535 
3536  /* make sure relation is marked as having no open file yet */
3537  rel->rd_smgr = NULL;
3538 
3539  /* mark it nailed if appropriate */
3540  rel->rd_isnailed = nailit;
3541 
3542  rel->rd_refcnt = nailit ? 1 : 0;
3543 
3544  /* it's being created in this transaction */
3549 
3550  /*
3551  * create a new tuple descriptor from the one passed in. We do this
3552  * partly to copy it into the cache context, and partly because the new
3553  * relation can't have any defaults or constraints yet; they have to be
3554  * added in later steps, because they require additions to multiple system
3555  * catalogs. We can copy attnotnull constraints here, however.
3556  */
3557  rel->rd_att = CreateTupleDescCopy(tupDesc);
3558  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3559  has_not_null = false;
3560  for (i = 0; i < natts; i++)
3561  {
3562  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3563  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3564 
3565  datt->attidentity = satt->attidentity;
3566  datt->attgenerated = satt->attgenerated;
3567  datt->attnotnull = satt->attnotnull;
3568  has_not_null |= satt->attnotnull;
3569  }
3570 
3571  if (has_not_null)
3572  {
3573  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3574 
3575  constr->has_not_null = true;
3576  rel->rd_att->constr = constr;
3577  }
3578 
3579  /*
3580  * initialize relation tuple form (caller may add/override data later)
3581  */
3583 
3584  namestrcpy(&rel->rd_rel->relname, relname);
3585  rel->rd_rel->relnamespace = relnamespace;
3586 
3587  rel->rd_rel->relkind = relkind;
3588  rel->rd_rel->relnatts = natts;
3589  rel->rd_rel->reltype = InvalidOid;
3590  /* needed when bootstrapping: */
3591  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3592 
3593  /* set up persistence and relcache fields dependent on it */
3594  rel->rd_rel->relpersistence = relpersistence;
3595  switch (relpersistence)
3596  {
3597  case RELPERSISTENCE_UNLOGGED:
3598  case RELPERSISTENCE_PERMANENT:
3600  rel->rd_islocaltemp = false;
3601  break;
3602  case RELPERSISTENCE_TEMP:
3603  Assert(isTempOrTempToastNamespace(relnamespace));
3605  rel->rd_islocaltemp = true;
3606  break;
3607  default:
3608  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3609  break;
3610  }
3611 
3612  /* if it's a materialized view, it's not populated initially */
3613  if (relkind == RELKIND_MATVIEW)
3614  rel->rd_rel->relispopulated = false;
3615  else
3616  rel->rd_rel->relispopulated = true;
3617 
3618  /* set replica identity -- system catalogs and non-tables don't have one */
3619  if (!IsCatalogNamespace(relnamespace) &&
3620  (relkind == RELKIND_RELATION ||
3621  relkind == RELKIND_MATVIEW ||
3622  relkind == RELKIND_PARTITIONED_TABLE))
3623  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3624  else
3625  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3626 
3627  /*
3628  * Insert relation physical and logical identifiers (OIDs) into the right
3629  * places. For a mapped relation, we set relfilenumber to zero and rely
3630  * on RelationInitPhysicalAddr to consult the map.
3631  */
3632  rel->rd_rel->relisshared = shared_relation;
3633 
3634  RelationGetRelid(rel) = relid;
3635 
3636  for (i = 0; i < natts; i++)
3637  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3638 
3639  rel->rd_rel->reltablespace = reltablespace;
3640 
3641  if (mapped_relation)
3642  {
3643  rel->rd_rel->relfilenode = InvalidRelFileNumber;
3644  /* Add it to the active mapping information */
3645  RelationMapUpdateMap(relid, relfilenumber, shared_relation, true);
3646  }
3647  else
3648  rel->rd_rel->relfilenode = relfilenumber;
3649 
3650  RelationInitLockInfo(rel); /* see lmgr.c */
3651 
3653 
3654  rel->rd_rel->relam = accessmtd;
3655 
3656  /*
3657  * RelationInitTableAccessMethod will do syscache lookups, so we mustn't
3658  * run it in CacheMemoryContext. Fortunately, the remaining steps don't
3659  * require a long-lived current context.
3660  */
3661  MemoryContextSwitchTo(oldcxt);
3662 
3663  if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_SEQUENCE)
3665 
3666  /*
3667  * Okay to insert into the relcache hash table.
3668  *
3669  * Ordinarily, there should certainly not be an existing hash entry for
3670  * the same OID; but during bootstrap, when we create a "real" relcache
3671  * entry for one of the bootstrap relations, we'll be overwriting the
3672  * phony one created with formrdesc. So allow that to happen for nailed
3673  * rels.
3674  */
3675  RelationCacheInsert(rel, nailit);
3676 
3677  /*
3678  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3679  * can't do this before storing relid in it.
3680  */
3681  EOXactListAdd(rel);
3682 
3683  /* It's fully valid */
3684  rel->rd_isvalid = true;
3685 
3686  /*
3687  * Caller expects us to pin the returned entry.
3688  */
3690 
3691  return rel;
3692 }
3693 
3694 
3695 /*
3696  * RelationSetNewRelfilenumber
3697  *
3698  * Assign a new relfilenumber (physical file name), and possibly a new
3699  * persistence setting, to the relation.
3700  *
3701  * This allows a full rewrite of the relation to be done with transactional
3702  * safety (since the filenumber assignment can be rolled back). Note however
3703  * that there is no simple way to access the relation's old data for the
3704  * remainder of the current transaction. This limits the usefulness to cases
3705  * such as TRUNCATE or rebuilding an index from scratch.
3706  *
3707  * Caller must already hold exclusive lock on the relation.
3708  */
3709 void
3710 RelationSetNewRelfilenumber(Relation relation, char persistence)
3711 {
3712  RelFileNumber newrelfilenumber;
3713  Relation pg_class;
3714  HeapTuple tuple;
3715  Form_pg_class classform;
3716  MultiXactId minmulti = InvalidMultiXactId;
3717  TransactionId freezeXid = InvalidTransactionId;
3718  RelFileLocator newrlocator;
3719 
3720  if (!IsBinaryUpgrade)
3721  {
3722  /* Allocate a new relfilenumber */
3723  newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
3724  NULL, persistence);
3725  }
3726  else if (relation->rd_rel->relkind == RELKIND_INDEX)
3727  {
3729  ereport(ERROR,
3730  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3731  errmsg("index relfilenumber value not set when in binary upgrade mode")));
3732 
3735  }
3736  else if (relation->rd_rel->relkind == RELKIND_RELATION)
3737  {
3739  ereport(ERROR,
3740  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3741  errmsg("heap relfilenumber value not set when in binary upgrade mode")));
3742 
3745  }
3746  else
3747  ereport(ERROR,
3748  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3749  errmsg("unexpected request for new relfilenumber in binary upgrade mode")));
3750 
3751  /*
3752  * Get a writable copy of the pg_class tuple for the given relation.
3753  */
3754  pg_class = table_open(RelationRelationId, RowExclusiveLock);
3755 
3756  tuple = SearchSysCacheCopy1(RELOID,
3757  ObjectIdGetDatum(RelationGetRelid(relation)));
3758  if (!HeapTupleIsValid(tuple))
3759  elog(ERROR, "could not find tuple for relation %u",
3760  RelationGetRelid(relation));
3761  classform = (Form_pg_class) GETSTRUCT(tuple);
3762 
3763  /*
3764  * Schedule unlinking of the old storage at transaction commit, except
3765  * when performing a binary upgrade, when we must do it immediately.
3766  */
3767  if (IsBinaryUpgrade)
3768  {
3769  SMgrRelation srel;
3770 
3771  /*
3772  * During a binary upgrade, we use this code path to ensure that
3773  * pg_largeobject and its index have the same relfilenumbers as in the
3774  * old cluster. This is necessary because pg_upgrade treats
3775  * pg_largeobject like a user table, not a system table. It is however
3776  * possible that a table or index may need to end up with the same
3777  * relfilenumber in the new cluster as what it had in the old cluster.
3778  * Hence, we can't wait until commit time to remove the old storage.
3779  *
3780  * In general, this function needs to have transactional semantics,
3781  * and removing the old storage before commit time surely isn't.
3782  * However, it doesn't really matter, because if a binary upgrade
3783  * fails at this stage, the new cluster will need to be recreated
3784  * anyway.
3785  */
3786  srel = smgropen(relation->rd_locator, relation->rd_backend);
3787  smgrdounlinkall(&srel, 1, false);
3788  smgrclose(srel);
3789  }
3790  else
3791  {
3792  /* Not a binary upgrade, so just schedule it to happen later. */
3793  RelationDropStorage(relation);
3794  }
3795 
3796  /*
3797  * Create storage for the main fork of the new relfilenumber. If it's a
3798  * table-like object, call into the table AM to do so, which'll also
3799  * create the table's init fork if needed.
3800  *
3801  * NOTE: If relevant for the AM, any conflict in relfilenumber value will
3802  * be caught here, if GetNewRelFileNumber messes up for any reason.
3803  */
3804  newrlocator = relation->rd_locator;
3805  newrlocator.relNumber = newrelfilenumber;
3806 
3807  if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
3808  {
3809  table_relation_set_new_filelocator(relation, &newrlocator,
3810  persistence,
3811  &freezeXid, &minmulti);
3812  }
3813  else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
3814  {
3815  /* handle these directly, at least for now */
3816  SMgrRelation srel;
3817 
3818  srel = RelationCreateStorage(newrlocator, persistence, true);
3819  smgrclose(srel);
3820  }
3821  else
3822  {
3823  /* we shouldn't be called for anything else */
3824  elog(ERROR, "relation \"%s\" does not have storage",
3825  RelationGetRelationName(relation));
3826  }
3827 
3828  /*
3829  * If we're dealing with a mapped index, pg_class.relfilenode doesn't
3830  * change; instead we have to send the update to the relation mapper.
3831  *
3832  * For mapped indexes, we don't actually change the pg_class entry at all;
3833  * this is essential when reindexing pg_class itself. That leaves us with
3834  * possibly-inaccurate values of relpages etc, but those will be fixed up
3835  * later.
3836  */
3837  if (RelationIsMapped(relation))
3838  {
3839  /* This case is only supported for indexes */
3840  Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3841 
3842  /* Since we're not updating pg_class, these had better not change */
3843  Assert(classform->relfrozenxid == freezeXid);
3844  Assert(classform->relminmxid == minmulti);
3845  Assert(classform->relpersistence == persistence);
3846 
3847  /*
3848  * In some code paths it's possible that the tuple update we'd
3849  * otherwise do here is the only thing that would assign an XID for
3850  * the current transaction. However, we must have an XID to delete
3851  * files, so make sure one is assigned.
3852  */
3853  (void) GetCurrentTransactionId();
3854 
3855  /* Do the deed */
3857  newrelfilenumber,
3858  relation->rd_rel->relisshared,
3859  false);
3860 
3861  /* Since we're not updating pg_class, must trigger inval manually */
3862  CacheInvalidateRelcache(relation);
3863  }
3864  else
3865  {
3866  /* Normal case, update the pg_class entry */
3867  classform->relfilenode = newrelfilenumber;
3868 
3869  /* relpages etc. never change for sequences */
3870  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3871  {
3872  classform->relpages = 0; /* it's empty until further notice */
3873  classform->reltuples = -1;
3874  classform->relallvisible = 0;
3875  }
3876  classform->relfrozenxid = freezeXid;
3877  classform->relminmxid = minmulti;
3878  classform->relpersistence = persistence;
3879 
3880  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3881  }
3882 
3883  heap_freetuple(tuple);
3884 
3885  table_close(pg_class, RowExclusiveLock);
3886 
3887  /*
3888  * Make the pg_class row change or relation map change visible. This will
3889  * cause the relcache entry to get updated, too.
3890  */
3892 
3894 }
3895 
3896 /*
3897  * RelationAssumeNewRelfilelocator
3898  *
3899  * Code that modifies pg_class.reltablespace or pg_class.relfilenode must call
3900  * this. The call shall precede any code that might insert WAL records whose
3901  * replay would modify bytes in the new RelFileLocator, and the call shall follow
3902  * any WAL modifying bytes in the prior RelFileLocator. See struct RelationData.
3903  * Ideally, call this as near as possible to the CommandCounterIncrement()
3904  * that makes the pg_class change visible (before it or after it); that
3905  * minimizes the chance of future development adding a forbidden WAL insertion
3906  * between RelationAssumeNewRelfilelocator() and CommandCounterIncrement().
3907  */
3908 void
3910 {
3914 
3915  /* Flag relation as needing eoxact cleanup (to clear these fields) */
3916  EOXactListAdd(relation);
3917 }
3918 
3919 
3920 /*
3921  * RelationCacheInitialize
3922  *
3923  * This initializes the relation descriptor cache. At the time
3924  * that this is invoked, we can't do database access yet (mainly
3925  * because the transaction subsystem is not up); all we are doing
3926  * is making an empty cache hashtable. This must be done before
3927  * starting the initialization transaction, because otherwise
3928  * AtEOXact_RelationCache would crash if that transaction aborts
3929  * before we can get the relcache set up.
3930  */
3931 
3932 #define INITRELCACHESIZE 400
3933 
3934 void
3936 {
3937  HASHCTL ctl;
3938  int allocsize;
3939 
3940  /*
3941  * make sure cache memory context exists
3942  */
3943  if (!CacheMemoryContext)
3945 
3946  /*
3947  * create hashtable that indexes the relcache
3948  */
3949  ctl.keysize = sizeof(Oid);
3950  ctl.entrysize = sizeof(RelIdCacheEnt);
3951  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3952  &ctl, HASH_ELEM | HASH_BLOBS);
3953 
3954  /*
3955  * reserve enough in_progress_list slots for many cases
3956  */
3957  allocsize = 4;
3960  allocsize * sizeof(*in_progress_list));
3961  in_progress_list_maxlen = allocsize;
3962 
3963  /*
3964  * relation mapper needs to be initialized too
3965  */
3967 }
3968 
3969 /*
3970  * RelationCacheInitializePhase2
3971  *
3972  * This is called to prepare for access to shared catalogs during startup.
3973  * We must at least set up nailed reldescs for pg_database, pg_authid,
3974  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3975  * for their indexes, too. We attempt to load this information from the
3976  * shared relcache init file. If that's missing or broken, just make
3977  * phony entries for the catalogs themselves.
3978  * RelationCacheInitializePhase3 will clean up as needed.
3979  */
3980 void
3982 {
3983  MemoryContext oldcxt;
3984 
3985  /*
3986  * relation mapper needs initialized too
3987  */
3989 
3990  /*
3991  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3992  * nothing.
3993  */
3995  return;
3996 
3997  /*
3998  * switch to cache memory context
3999  */
4001 
4002  /*
4003  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
4004  * the cache with pre-made descriptors for the critical shared catalogs.
4005  */
4006  if (!load_relcache_init_file(true))
4007  {
4008  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
4009  Natts_pg_database, Desc_pg_database);
4010  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
4011  Natts_pg_authid, Desc_pg_authid);
4012  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
4013  Natts_pg_auth_members, Desc_pg_auth_members);
4014  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
4015  Natts_pg_shseclabel, Desc_pg_shseclabel);
4016  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
4017  Natts_pg_subscription, Desc_pg_subscription);
4018 
4019 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
4020  }
4021 
4022  MemoryContextSwitchTo(oldcxt);
4023 }
4024 
4025 /*
4026  * RelationCacheInitializePhase3
4027  *
4028  * This is called as soon as the catcache and transaction system
4029  * are functional and we have determined MyDatabaseId. At this point
4030  * we can actually read data from the database's system catalogs.
4031  * We first try to read pre-computed relcache entries from the local
4032  * relcache init file. If that's missing or broken, make phony entries
4033  * for the minimum set of nailed-in-cache relations. Then (unless
4034  * bootstrapping) make sure we have entries for the critical system
4035  * indexes. Once we've done all this, we have enough infrastructure to
4036  * open any system catalog or use any catcache. The last step is to
4037  * rewrite the cache files if needed.
4038  */
4039 void
4041 {
4042  HASH_SEQ_STATUS status;
4043  RelIdCacheEnt *idhentry;
4044  MemoryContext oldcxt;
4045  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
4046 
4047  /*
4048  * relation mapper needs initialized too
4049  */
4051 
4052  /*
4053  * switch to cache memory context
4054  */
4056 
4057  /*
4058  * Try to load the local relcache cache file. If unsuccessful, bootstrap
4059  * the cache with pre-made descriptors for the critical "nailed-in" system
4060  * catalogs.
4061  */
4062  if (IsBootstrapProcessingMode() ||
4063  !load_relcache_init_file(false))
4064  {
4065  needNewCacheFile = true;
4066 
4067  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
4068  Natts_pg_class, Desc_pg_class);
4069  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
4070  Natts_pg_attribute, Desc_pg_attribute);
4071  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
4072  Natts_pg_proc, Desc_pg_proc);
4073  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
4074  Natts_pg_type, Desc_pg_type);
4075 
4076 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
4077  }
4078 
4079  MemoryContextSwitchTo(oldcxt);
4080 
4081  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
4083  return;
4084 
4085  /*
4086  * If we didn't get the critical system indexes loaded into relcache, do
4087  * so now. These are critical because the catcache and/or opclass cache
4088  * depend on them for fetches done during relcache load. Thus, we have an
4089  * infinite-recursion problem. We can break the recursion by doing
4090  * heapscans instead of indexscans at certain key spots. To avoid hobbling
4091  * performance, we only want to do that until we have the critical indexes
4092  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
4093  * decide whether to do heapscan or indexscan at the key spots, and we set
4094  * it true after we've loaded the critical indexes.
4095  *
4096  * The critical indexes are marked as "nailed in cache", partly to make it
4097  * easy for load_relcache_init_file to count them, but mainly because we
4098  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
4099  * true. (NOTE: perhaps it would be possible to reload them by
4100  * temporarily setting criticalRelcachesBuilt to false again. For now,
4101  * though, we just nail 'em in.)
4102  *
4103  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
4104  * in the same way as the others, because the critical catalogs don't
4105  * (currently) have any rules or triggers, and so these indexes can be
4106  * rebuilt without inducing recursion. However they are used during
4107  * relcache load when a rel does have rules or triggers, so we choose to
4108  * nail them for performance reasons.
4109  */
4111  {
4112  load_critical_index(ClassOidIndexId,
4113  RelationRelationId);
4114  load_critical_index(AttributeRelidNumIndexId,
4115  AttributeRelationId);
4116  load_critical_index(IndexRelidIndexId,
4117  IndexRelationId);
4118  load_critical_index(OpclassOidIndexId,
4119  OperatorClassRelationId);
4120  load_critical_index(AccessMethodProcedureIndexId,
4121  AccessMethodProcedureRelationId);
4122  load_critical_index(RewriteRelRulenameIndexId,
4123  RewriteRelationId);
4124  load_critical_index(TriggerRelidNameIndexId,
4125  TriggerRelationId);
4126 
4127 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
4128 
4129  criticalRelcachesBuilt = true;
4130  }
4131 
4132  /*
4133  * Process critical shared indexes too.
4134  *
4135  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
4136  * initial lookup of MyDatabaseId, without which we'll never find any
4137  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
4138  * database OID, so it instead depends on DatabaseOidIndexId. We also
4139  * need to nail up some indexes on pg_authid and pg_auth_members for use
4140  * during client authentication. SharedSecLabelObjectIndexId isn't
4141  * critical for the core system, but authentication hooks might be
4142  * interested in it.
4143  */
4145  {
4146  load_critical_index(DatabaseNameIndexId,
4147  DatabaseRelationId);
4148  load_critical_index(DatabaseOidIndexId,
4149  DatabaseRelationId);
4150  load_critical_index(AuthIdRolnameIndexId,
4151  AuthIdRelationId);
4152  load_critical_index(AuthIdOidIndexId,
4153  AuthIdRelationId);
4154  load_critical_index(AuthMemMemRoleIndexId,
4155  AuthMemRelationId);
4156  load_critical_index(SharedSecLabelObjectIndexId,
4157  SharedSecLabelRelationId);
4158 
4159 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
4160 
4162  }
4163 
4164  /*
4165  * Now, scan all the relcache entries and update anything that might be
4166  * wrong in the results from formrdesc or the relcache cache file. If we
4167  * faked up relcache entries using formrdesc, then read the real pg_class
4168  * rows and replace the fake entries with them. Also, if any of the
4169  * relcache entries have rules, triggers, or security policies, load that
4170  * info the hard way since it isn't recorded in the cache file.
4171  *
4172  * Whenever we access the catalogs to read data, there is a possibility of
4173  * a shared-inval cache flush causing relcache entries to be removed.
4174  * Since hash_seq_search only guarantees to still work after the *current*
4175  * entry is removed, it's unsafe to continue the hashtable scan afterward.
4176  * We handle this by restarting the scan from scratch after each access.
4177  * This is theoretically O(N^2), but the number of entries that actually
4178  * need to be fixed is small enough that it doesn't matter.
4179  */
4180  hash_seq_init(&status, RelationIdCache);
4181 
4182  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
4183  {
4184  Relation relation = idhentry->reldesc;
4185  bool restart = false;
4186 
4187  /*
4188  * Make sure *this* entry doesn't get flushed while we work with it.
4189  */
4191 
4192  /*
4193  * If it's a faked-up entry, read the real pg_class tuple.
4194  */
4195  if (relation->rd_rel->relowner == InvalidOid)
4196  {
4197  HeapTuple htup;
4198  Form_pg_class relp;
4199 
4200  htup = SearchSysCache1(RELOID,
4201  ObjectIdGetDatum(RelationGetRelid(relation)));
4202  if (!HeapTupleIsValid(htup))
4203  elog(FATAL, "cache lookup failed for relation %u",
4204  RelationGetRelid(relation));
4205  relp = (Form_pg_class) GETSTRUCT(htup);
4206 
4207  /*
4208  * Copy tuple to relation->rd_rel. (See notes in
4209  * AllocateRelationDesc())
4210  */
4211  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
4212 
4213  /* Update rd_options while we have the tuple */
4214  if (relation->rd_options)
4215  pfree(relation->rd_options);
4216  RelationParseRelOptions(relation, htup);
4217 
4218  /*
4219  * Check the values in rd_att were set up correctly. (We cannot
4220  * just copy them over now: formrdesc must have set up the rd_att
4221  * data correctly to start with, because it may already have been
4222  * copied into one or more catcache entries.)
4223  */
4224  Assert(relation->rd_att->tdtypeid == relp->reltype);
4225  Assert(relation->rd_att->tdtypmod == -1);
4226 
4227  ReleaseSysCache(htup);
4228 
4229  /* relowner had better be OK now, else we'll loop forever */
4230  if (relation->rd_rel->relowner == InvalidOid)
4231  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
4232  RelationGetRelationName(relation));
4233 
4234  restart = true;
4235  }
4236 
4237  /*
4238  * Fix data that isn't saved in relcache cache file.
4239  *
4240  * relhasrules or relhastriggers could possibly be wrong or out of
4241  * date. If we don't actually find any rules or triggers, clear the
4242  * local copy of the flag so that we don't get into an infinite loop
4243  * here. We don't make any attempt to fix the pg_class entry, though.
4244  */
4245  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
4246  {
4247  RelationBuildRuleLock(relation);
4248  if (relation->rd_rules == NULL)
4249  relation->rd_rel->relhasrules = false;
4250  restart = true;
4251  }
4252  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
4253  {
4254  RelationBuildTriggers(relation);
4255  if (relation->trigdesc == NULL)
4256  relation->rd_rel->relhastriggers = false;
4257  restart = true;
4258  }
4259 
4260  /*
4261  * Re-load the row security policies if the relation has them, since
4262  * they are not preserved in the cache. Note that we can never NOT
4263  * have a policy while relrowsecurity is true,
4264  * RelationBuildRowSecurity will create a single default-deny policy
4265  * if there is no policy defined in pg_policy.
4266  */
4267  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
4268  {
4269  RelationBuildRowSecurity(relation);
4270 
4271  Assert(relation->rd_rsdesc != NULL);
4272  restart = true;
4273  }
4274 
4275  /* Reload tableam data if needed */
4276  if (relation->rd_tableam == NULL &&
4277  (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) || relation->rd_rel->relkind == RELKIND_SEQUENCE))
4278  {
4280  Assert(relation->rd_tableam != NULL);
4281 
4282  restart = true;
4283  }
4284 
4285  /* Release hold on the relation */
4287 
4288  /* Now, restart the hashtable scan if needed */
4289  if (restart)
4290  {
4291  hash_seq_term(&status);
4292  hash_seq_init(&status, RelationIdCache);
4293  }
4294  }
4295 
4296  /*
4297  * Lastly, write out new relcache cache files if needed. We don't bother
4298  * to distinguish cases where only one of the two needs an update.
4299  */
4300  if (needNewCacheFile)
4301  {
4302  /*
4303  * Force all the catcaches to finish initializing and thereby open the
4304  * catalogs and indexes they use. This will preload the relcache with
4305  * entries for all the most important system catalogs and indexes, so
4306  * that the init files will be most useful for future backends.
4307  */
4309 
4310  /* now write the files */
4312  write_relcache_init_file(false);
4313  }
4314 }
4315 
4316 /*
4317  * Load one critical system index into the relcache
4318  *
4319  * indexoid is the OID of the target index, heapoid is the OID of the catalog
4320  * it belongs to.
4321  */
4322 static void
4323 load_critical_index(Oid indexoid, Oid heapoid)
4324 {
4325  Relation ird;
4326 
4327  /*
4328  * We must lock the underlying catalog before locking the index to avoid
4329  * deadlock, since RelationBuildDesc might well need to read the catalog,
4330  * and if anyone else is exclusive-locking this catalog and index they'll
4331  * be doing it in that order.
4332  */
4333  LockRelationOid(heapoid, AccessShareLock);
4334  LockRelationOid(indexoid, AccessShareLock);
4335  ird = RelationBuildDesc(indexoid, true);
4336  if (ird == NULL)
4337  elog(PANIC, "could not open critical system index %u", indexoid);
4338  ird->rd_isnailed = true;
4339  ird->rd_refcnt = 1;
4342 
4343  (void) RelationGetIndexAttOptions(ird, false);
4344 }
4345 
4346 /*
4347  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
4348  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
4349  *
4350  * We need this kluge because we have to be able to access non-fixed-width
4351  * fields of pg_class and pg_index before we have the standard catalog caches
4352  * available. We use predefined data that's set up in just the same way as
4353  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
4354  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
4355  * does it have a TupleConstr field. But it's good enough for the purpose of
4356  * extracting fields.
4357  */
4358 static TupleDesc
4360 {
4361  TupleDesc result;
4362  MemoryContext oldcxt;
4363  int i;
4364 
4366 
4367  result = CreateTemplateTupleDesc(natts);
4368  result->tdtypeid = RECORDOID; /* not right, but we don't care */
4369  result->tdtypmod = -1;
4370 
4371  for (i = 0; i < natts; i++)
4372  {
4373  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4374  /* make sure attcacheoff is valid */
4375  TupleDescAttr(result, i)->attcacheoff = -1;
4376  }
4377 
4378  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
4379  TupleDescAttr(result, 0)->attcacheoff = 0;
4380 
4381  /* Note: we don't bother to set up a TupleConstr entry */
4382 
4383  MemoryContextSwitchTo(oldcxt);
4384 
4385  return result;
4386 }
4387 
4388 static TupleDesc
4390 {
4391  static TupleDesc pgclassdesc = NULL;
4392 
4393  /* Already done? */
4394  if (pgclassdesc == NULL)
4395  pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4396  Desc_pg_class);
4397 
4398  return pgclassdesc;
4399 }
4400 
4401 static TupleDesc
4403 {
4404  static TupleDesc pgindexdesc = NULL;
4405 
4406  /* Already done? */
4407  if (pgindexdesc == NULL)
4408  pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4409  Desc_pg_index);
4410 
4411  return pgindexdesc;
4412 }
4413 
4414 /*
4415  * Load any default attribute value definitions for the relation.
4416  *
4417  * ndef is the number of attributes that were marked atthasdef.
4418  *
4419  * Note: we don't make it a hard error to be missing some pg_attrdef records.
4420  * We can limp along as long as nothing needs to use the default value. Code
4421  * that fails to find an expected AttrDefault record should throw an error.
4422  */
4423 static void
4424 AttrDefaultFetch(Relation relation, int ndef)
4425 {
4426  AttrDefault *attrdef;
4427  Relation adrel;
4428  SysScanDesc adscan;
4429  ScanKeyData skey;
4430  HeapTuple htup;
4431  int found = 0;
4432 
4433  /* Allocate array with room for as many entries as expected */
4434  attrdef = (AttrDefault *)
4436  ndef * sizeof(AttrDefault));
4437 
4438  /* Search pg_attrdef for relevant entries */
4439  ScanKeyInit(&skey,
4440  Anum_pg_attrdef_adrelid,
4441  BTEqualStrategyNumber, F_OIDEQ,
4442  ObjectIdGetDatum(RelationGetRelid(relation)));
4443 
4444  adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4445  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4446  NULL, 1, &skey);
4447 
4448  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4449  {
4450  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4451  Datum val;
4452  bool isnull;
4453 
4454  /* protect limited size of array */
4455  if (found >= ndef)
4456  {
4457  elog(WARNING, "unexpected pg_attrdef record found for attribute %d of relation \"%s\"",
4458  adform->adnum, RelationGetRelationName(relation));
4459  break;
4460  }
4461 
4462  val = fastgetattr(htup,
4463  Anum_pg_attrdef_adbin,
4464  adrel->rd_att, &isnull);
4465  if (isnull)
4466  elog(WARNING, "null adbin for attribute %d of relation \"%s\"",
4467  adform->adnum, RelationGetRelationName(relation));
4468  else
4469  {
4470  /* detoast and convert to cstring in caller's context */
4471  char *s = TextDatumGetCString(val);
4472 
4473  attrdef[found].adnum = adform->adnum;
4474  attrdef[found].adbin = MemoryContextStrdup(CacheMemoryContext, s);
4475  pfree(s);
4476  found++;
4477  }
4478  }
4479 
4480  systable_endscan(adscan);
4481  table_close(adrel, AccessShareLock);
4482 
4483  if (found != ndef)
4484  elog(WARNING, "%d pg_attrdef record(s) missing for relation \"%s\"",
4485  ndef - found, RelationGetRelationName(relation));
4486 
4487  /*
4488  * Sort the AttrDefault entries by adnum, for the convenience of
4489  * equalTupleDescs(). (Usually, they already will be in order, but this
4490  * might not be so if systable_getnext isn't using an index.)
4491  */
4492  if (found > 1)
4493  qsort(attrdef, found, sizeof(AttrDefault), AttrDefaultCmp);
4494 
4495  /* Install array only after it's fully valid */
4496  relation->rd_att->constr->defval = attrdef;
4497  relation->rd_att->constr->num_defval = found;
4498 }
4499 
4500 /*
4501  * qsort comparator to sort AttrDefault entries by adnum
4502  */
4503 static int
4504 AttrDefaultCmp(const void *a, const void *b)
4505 {
4506  const AttrDefault *ada = (const AttrDefault *) a;
4507  const AttrDefault *adb = (const AttrDefault *) b;
4508 
4509  return ada->adnum - adb->adnum;
4510 }
4511 
4512 /*
4513  * Load any check constraints for the relation.
4514  *
4515  * As with defaults, if we don't find the expected number of them, just warn
4516  * here. The executor should throw an error if an INSERT/UPDATE is attempted.
4517  */
4518 static void
4520 {
4521  ConstrCheck *check;
4522  int ncheck = relation->rd_rel->relchecks;
4523  Relation conrel;
4524  SysScanDesc conscan;
4525  ScanKeyData skey[1];
4526  HeapTuple htup;
4527  int found = 0;
4528 
4529  /* Allocate array with room for as many entries as expected */
4530  check = (ConstrCheck *)
4532  ncheck * sizeof(ConstrCheck));
4533 
4534  /* Search pg_constraint for relevant entries */
4535  ScanKeyInit(&skey[0],
4536  Anum_pg_constraint_conrelid,
4537  BTEqualStrategyNumber, F_OIDEQ,
4538  ObjectIdGetDatum(RelationGetRelid(relation)));
4539 
4540  conrel = table_open(ConstraintRelationId, AccessShareLock);
4541  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4542  NULL, 1, skey);
4543 
4544  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4545  {
4547  Datum val;
4548  bool isnull;
4549 
4550  /* We want check constraints only */
4551  if (conform->contype != CONSTRAINT_CHECK)
4552  continue;
4553 
4554  /* protect limited size of array */
4555  if (found >= ncheck)
4556  {
4557  elog(WARNING, "unexpected pg_constraint record found for relation \"%s\"",
4558  RelationGetRelationName(relation));
4559  break;
4560  }
4561 
4562  check[found].ccvalid = conform->convalidated;
4563  check[found].ccnoinherit = conform->connoinherit;
4565  NameStr(conform->conname));
4566 
4567  /* Grab and test conbin is actually set */
4568  val = fastgetattr(htup,
4569  Anum_pg_constraint_conbin,
4570  conrel->rd_att, &isnull);
4571  if (isnull)
4572  elog(WARNING, "null conbin for relation \"%s\"",
4573  RelationGetRelationName(relation));
4574  else
4575  {
4576  /* detoast and convert to cstring in caller's context */
4577  char *s = TextDatumGetCString(val);
4578 
4579  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4580  pfree(s);
4581  found++;
4582  }
4583  }
4584 
4585  systable_endscan(conscan);
4586  table_close(conrel, AccessShareLock);
4587 
4588  if (found != ncheck)
4589  elog(WARNING, "%d pg_constraint record(s) missing for relation \"%s\"",
4590  ncheck - found, RelationGetRelationName(relation));
4591 
4592  /*
4593  * Sort the records by name. This ensures that CHECKs are applied in a
4594  * deterministic order, and it also makes equalTupleDescs() faster.
4595  */
4596  if (found > 1)
4597  qsort(check, found, sizeof(ConstrCheck), CheckConstraintCmp);
4598 
4599  /* Install array only after it's fully valid */
4600  relation->rd_att->constr->check = check;
4601  relation->rd_att->constr->num_check = found;
4602 }
4603 
4604 /*
4605  * qsort comparator to sort ConstrCheck entries by name
4606  */
4607 static int
4608 CheckConstraintCmp(const void *a, const void *b)
4609 {
4610  const ConstrCheck *ca = (const ConstrCheck *) a;
4611  const ConstrCheck *cb = (const ConstrCheck *) b;
4612 
4613  return strcmp(ca->ccname, cb->ccname);
4614 }
4615 
4616 /*
4617  * RelationGetFKeyList -- get a list of foreign key info for the relation
4618  *
4619  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4620  * the given relation. This data is a direct copy of relevant fields from
4621  * pg_constraint. The list items are in no particular order.
4622  *
4623  * CAUTION: the returned list is part of the relcache's data, and could
4624  * vanish in a relcache entry reset. Callers must inspect or copy it
4625  * before doing anything that might trigger a cache flush, such as
4626  * system catalog accesses. copyObject() can be used if desired.
4627  * (We define it this way because current callers want to filter and
4628  * modify the list entries anyway, so copying would be a waste of time.)
4629  */
4630 List *
4632 {
4633  List *result;
4634  Relation conrel;
4635  SysScanDesc conscan;
4636  ScanKeyData skey;
4637  HeapTuple htup;
4638  List *oldlist;
4639  MemoryContext oldcxt;
4640 
4641  /* Quick exit if we already computed the list. */
4642  if (relation->rd_fkeyvalid)
4643  return relation->rd_fkeylist;
4644 
4645  /* Fast path: non-partitioned tables without triggers can't have FKs */
4646  if (!relation->rd_rel->relhastriggers &&
4647  relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4648  return NIL;
4649 
4650  /*
4651  * We build the list we intend to return (in the caller's context) while
4652  * doing the scan. After successfully completing the scan, we copy that
4653  * list into the relcache entry. This avoids cache-context memory leakage
4654  * if we get some sort of error partway through.
4655  */
4656  result = NIL;
4657 
4658  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4659  ScanKeyInit(&skey,
4660  Anum_pg_constraint_conrelid,
4661  BTEqualStrategyNumber, F_OIDEQ,
4662  ObjectIdGetDatum(RelationGetRelid(relation)));
4663 
4664  conrel = table_open(ConstraintRelationId, AccessShareLock);
4665  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4666  NULL, 1, &skey);
4667 
4668  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4669  {
4670  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4671  ForeignKeyCacheInfo *info;
4672 
4673  /* consider only foreign keys */
4674  if (constraint->contype != CONSTRAINT_FOREIGN)
4675  continue;
4676 
4677  info = makeNode(ForeignKeyCacheInfo);
4678  info->conoid = constraint->oid;
4679  info->conrelid = constraint->conrelid;
4680  info->confrelid = constraint->confrelid;
4681 
4682  DeconstructFkConstraintRow(htup, &info->nkeys,
4683  info->conkey,
4684  info->confkey,
4685  info->conpfeqop,
4686  NULL, NULL, NULL, NULL);
4687 
4688  /* Add FK's node to the result list */
4689  result = lappend(result, info);
4690  }
4691 
4692  systable_endscan(conscan);
4693  table_close(conrel, AccessShareLock);
4694 
4695  /* Now save a copy of the completed list in the relcache entry. */
4697  oldlist = relation->rd_fkeylist;
4698  relation->rd_fkeylist = copyObject(result);
4699  relation->rd_fkeyvalid = true;
4700  MemoryContextSwitchTo(oldcxt);
4701 
4702  /* Don't leak the old list, if there is one */
4703  list_free_deep(oldlist);
4704 
4705  return result;
4706 }
4707 
4708 /*
4709  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4710  *
4711  * The index list is created only if someone requests it. We scan pg_index
4712  * to find relevant indexes, and add the list to the relcache entry so that
4713  * we won't have to compute it again. Note that shared cache inval of a
4714  * relcache entry will delete the old list and set rd_indexvalid to false,
4715  * so that we must recompute the index list on next request. This handles
4716  * creation or deletion of an index.
4717  *
4718  * Indexes that are marked not indislive are omitted from the returned list.
4719  * Such indexes are expected to be dropped momentarily, and should not be
4720  * touched at all by any caller of this function.
4721  *
4722  * The returned list is guaranteed to be sorted in order by OID. This is
4723  * needed by the executor, since for index types that we obtain exclusive
4724  * locks on when updating the index, all backends must lock the indexes in
4725  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4726  * consistent ordering would do, but ordering by OID is easy.
4727  *
4728  * Since shared cache inval causes the relcache's copy of the list to go away,
4729  * we return a copy of the list palloc'd in the caller's context. The caller
4730  * may list_free() the returned list after scanning it. This is necessary
4731  * since the caller will typically be doing syscache lookups on the relevant
4732  * indexes, and syscache lookup could cause SI messages to be processed!
4733  *
4734  * In exactly the same way, we update rd_pkindex, which is the OID of the
4735  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4736  * which is the pg_class OID of an index to be used as the relation's
4737  * replication identity index, or InvalidOid if there is no such index.
4738  */
4739 List *
4741 {
4742  Relation indrel;
4743  SysScanDesc indscan;
4744  ScanKeyData skey;
4745  HeapTuple htup;
4746  List *result;
4747  List *oldlist;
4748  char replident = relation->rd_rel->relreplident;
4749  Oid pkeyIndex = InvalidOid;
4750  Oid candidateIndex = InvalidOid;
4751  MemoryContext oldcxt;
4752 
4753  /* Quick exit if we already computed the list. */
4754  if (relation->rd_indexvalid)
4755  return list_copy(relation->rd_indexlist);
4756 
4757  /*
4758  * We build the list we intend to return (in the caller's context) while
4759  * doing the scan. After successfully completing the scan, we copy that
4760  * list into the relcache entry. This avoids cache-context memory leakage
4761  * if we get some sort of error partway through.
4762  */
4763  result = NIL;
4764 
4765  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4766  ScanKeyInit(&skey,
4767  Anum_pg_index_indrelid,
4768  BTEqualStrategyNumber, F_OIDEQ,
4769  ObjectIdGetDatum(RelationGetRelid(relation)));
4770 
4771  indrel = table_open(IndexRelationId, AccessShareLock);
4772  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4773  NULL, 1, &skey);
4774 
4775  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4776  {
4778 
4779  /*
4780  * Ignore any indexes that are currently being dropped. This will
4781  * prevent them from being searched, inserted into, or considered in
4782  * HOT-safety decisions. It's unsafe to touch such an index at all
4783  * since its catalog entries could disappear at any instant.
4784  */
4785  if (!index->indislive)
4786  continue;
4787 
4788  /* add index's OID to result list */
4789  result = lappend_oid(result, index->indexrelid);
4790 
4791  /*
4792  * Non-unique, non-immediate or predicate indexes aren't interesting
4793  * for either oid indexes or replication identity indexes, so don't
4794  * check them.
4795  */
4796  if (!index->indisunique ||
4797  !index->indimmediate ||
4798  !heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4799  continue;
4800 
4801  /*
4802  * Remember primary key index, if any. We do this only if the index
4803  * is valid; but if the table is partitioned, then we do it even if
4804  * it's invalid.
4805  *
4806  * The reason for returning invalid primary keys for foreign tables is
4807  * because of pg_dump of NOT NULL constraints, and the fact that PKs
4808  * remain marked invalid until the partitions' PKs are attached to it.
4809  * If we make rd_pkindex invalid, then the attnotnull flag is reset
4810  * after the PK is created, which causes the ALTER INDEX ATTACH
4811  * PARTITION to fail with 'column ... is not marked NOT NULL'. With
4812  * this, dropconstraint_internal() will believe that the columns must
4813  * not have attnotnull reset, so the PKs-on-partitions can be attached
4814  * correctly, until finally the PK-on-parent is marked valid.
4815  *
4816  * Also, this doesn't harm anything, because rd_pkindex is not a
4817  * "real" index anyway, but a RELKIND_PARTITIONED_INDEX.
4818  */
4819  if (index->indisprimary &&
4820  (index->indisvalid ||
4821  relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
4822  pkeyIndex = index->indexrelid;
4823 
4824  if (!index->indisvalid)
4825  continue;
4826 
4827  /* remember explicitly chosen replica index */
4828  if (index->indisreplident)
4829  candidateIndex = index->indexrelid;
4830  }
4831 
4832  systable_endscan(indscan);
4833 
4834  table_close(indrel, AccessShareLock);
4835 
4836  /* Sort the result list into OID order, per API spec. */
4837  list_sort(result, list_oid_cmp);
4838 
4839  /* Now save a copy of the completed list in the relcache entry. */
4841  oldlist = relation->rd_indexlist;
4842  relation->rd_indexlist = list_copy(result);
4843  relation->rd_pkindex = pkeyIndex;
4844  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4845  relation->rd_replidindex = pkeyIndex;
4846  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4847  relation->rd_replidindex = candidateIndex;
4848  else
4849  relation->rd_replidindex = InvalidOid;
4850  relation->rd_indexvalid = true;
4851  MemoryContextSwitchTo(oldcxt);
4852 
4853  /* Don't leak the old list, if there is one */
4854  list_free(oldlist);
4855 
4856  return result;
4857 }
4858 
4859 /*
4860  * RelationGetStatExtList
4861  * get a list of OIDs of statistics objects on this relation
4862  *
4863  * The statistics list is created only if someone requests it, in a way
4864  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4865  * relevant statistics, and add the list to the relcache entry so that we
4866  * won't have to compute it again. Note that shared cache inval of a
4867  * relcache entry will delete the old list and set rd_statvalid to 0,
4868  * so that we must recompute the statistics list on next request. This
4869  * handles creation or deletion of a statistics object.
4870  *
4871  * The returned list is guaranteed to be sorted in order by OID, although
4872  * this is not currently needed.
4873  *
4874  * Since shared cache inval causes the relcache's copy of the list to go away,
4875  * we return a copy of the list palloc'd in the caller's context. The caller
4876  * may list_free() the returned list after scanning it. This is necessary
4877  * since the caller will typically be doing syscache lookups on the relevant
4878  * statistics, and syscache lookup could cause SI messages to be processed!
4879  */
4880 List *
4882 {
4883  Relation indrel;
4884  SysScanDesc indscan;
4885  ScanKeyData skey;
4886  HeapTuple htup;
4887  List *result;
4888  List *oldlist;
4889  MemoryContext oldcxt;
4890 
4891  /* Quick exit if we already computed the list. */
4892  if (relation->rd_statvalid != 0)
4893  return list_copy(relation->rd_statlist);
4894 
4895  /*
4896  * We build the list we intend to return (in the caller's context) while
4897  * doing the scan. After successfully completing the scan, we copy that
4898  * list into the relcache entry. This avoids cache-context memory leakage
4899  * if we get some sort of error partway through.
4900  */
4901  result = NIL;
4902 
4903  /*
4904  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4905  * rel.
4906  */
4907  ScanKeyInit(&skey,
4908  Anum_pg_statistic_ext_stxrelid,
4909  BTEqualStrategyNumber, F_OIDEQ,
4910  ObjectIdGetDatum(RelationGetRelid(relation)));
4911 
4912  indrel = table_open(StatisticExtRelationId, AccessShareLock);
4913  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4914  NULL, 1, &skey);
4915 
4916  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4917  {
4918  Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4919 
4920  result = lappend_oid(result, oid);
4921  }
4922 
4923  systable_endscan(indscan);
4924 
4925  table_close(indrel, AccessShareLock);
4926 
4927  /* Sort the result list into OID order, per API spec. */
4928  list_sort(result, list_oid_cmp);
4929 
4930  /* Now save a copy of the completed list in the relcache entry. */
4932  oldlist = relation->rd_statlist;
4933  relation->rd_statlist = list_copy(result);
4934 
4935  relation->rd_statvalid = true;
4936  MemoryContextSwitchTo(oldcxt);
4937 
4938  /* Don't leak the old list, if there is one */
4939  list_free(oldlist);
4940 
4941  return result;
4942 }
4943 
4944 /*
4945  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4946  *
4947  * Returns InvalidOid if there is no such index.
4948  */
4949 Oid
4951 {
4952  List *ilist;
4953 
4954  if (!relation->rd_indexvalid)
4955  {
4956  /* RelationGetIndexList does the heavy lifting. */
4957  ilist = RelationGetIndexList(relation);
4958  list_free(ilist);
4959  Assert(relation->rd_indexvalid);
4960  }
4961 
4962  return relation->rd_pkindex;
4963 }
4964 
4965 /*
4966  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4967  *
4968  * Returns InvalidOid if there is no such index.
4969  */
4970 Oid
4972 {
4973  List *ilist;
4974 
4975  if (!relation->rd_indexvalid)
4976  {
4977  /* RelationGetIndexList does the heavy lifting. */
4978  ilist = RelationGetIndexList(relation);
4979  list_free(ilist);
4980  Assert(relation->rd_indexvalid);
4981  }
4982 
4983  return relation->rd_replidindex;
4984 }
4985 
4986 /*
4987  * RelationGetIndexExpressions -- get the index expressions for an index
4988  *
4989  * We cache the result of transforming pg_index.indexprs into a node tree.
4990  * If the rel is not an index or has no expressional columns, we return NIL.
4991  * Otherwise, the returned tree is copied into the caller's memory context.
4992  * (We don't want to return a pointer to the relcache copy, since it could
4993  * disappear due to relcache invalidation.)
4994  */
4995 List *
4997 {
4998  List *result;
4999  Datum exprsDatum;
5000  bool isnull;
5001  char *exprsString;
5002  MemoryContext oldcxt;
5003 
5004  /* Quick exit if we already computed the result. */
5005  if (relation->rd_indexprs)
5006  return copyObject(relation->rd_indexprs);
5007 
5008  /* Quick exit if there is nothing to do. */
5009  if (relation->rd_indextuple == NULL ||
5010  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
5011  return NIL;
5012 
5013  /*
5014  * We build the tree we intend to return in the caller's context. After
5015  * successfully completing the work, we copy it into the relcache entry.
5016  * This avoids problems if we get some sort of error partway through.
5017  */
5018  exprsDatum = heap_getattr(relation->rd_indextuple,
5019  Anum_pg_index_indexprs,
5021  &isnull);
5022  Assert(!isnull);
5023  exprsString = TextDatumGetCString(exprsDatum);
5024  result = (List *) stringToNode(exprsString);
5025  pfree(exprsString);
5026 
5027  /*
5028  * Run the expressions through eval_const_expressions. This is not just an
5029  * optimization, but is necessary, because the planner will be comparing
5030  * them to similarly-processed qual clauses, and may fail to detect valid
5031  * matches without this. We must not use canonicalize_qual, however,
5032  * since these aren't qual expressions.
5033  */
5034  result = (List *) eval_const_expressions(NULL, (Node *) result);
5035 
5036  /* May as well fix opfuncids too */
5037  fix_opfuncids((Node *) result);
5038 
5039  /* Now save a copy of the completed tree in the relcache entry. */
5040  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
5041  relation->rd_indexprs = copyObject(result);
5042  MemoryContextSwitchTo(oldcxt);
5043 
5044  return result;
5045 }
5046 
5047 /*
5048  * RelationGetDummyIndexExpressions -- get dummy expressions for an index
5049  *
5050  * Return a list of dummy expressions (just Const nodes) with the same
5051  * types/typmods/collations as the index's real expressions. This is
5052  * useful in situations where we don't want to run any user-defined code.
5053  */
5054 List *
5056 {
5057  List *result;
5058  Datum exprsDatum;
5059  bool isnull;
5060  char *exprsString;
5061  List *rawExprs;
5062  ListCell *lc;
5063 
5064  /* Quick exit if there is nothing to do. */
5065  if (relation->rd_indextuple == NULL ||
5066  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
5067  return NIL;
5068 
5069  /* Extract raw node tree(s) from index tuple. */
5070  exprsDatum = heap_getattr(relation->rd_indextuple,
5071  Anum_pg_index_indexprs,
5073  &isnull);
5074  Assert(!isnull);
5075  exprsString = TextDatumGetCString(exprsDatum);
5076  rawExprs = (List *) stringToNode(exprsString);
5077  pfree(exprsString);
5078 
5079  /* Construct null Consts; the typlen and typbyval are arbitrary. */
5080  result = NIL;
5081  foreach(lc, rawExprs)
5082  {
5083  Node *rawExpr = (Node *) lfirst(lc);
5084 
5085  result = lappend(result,
5086  makeConst(exprType(rawExpr),
5087  exprTypmod(rawExpr),
5088  exprCollation(rawExpr),
5089  1,
5090  (Datum) 0,
5091  true,
5092  true));
5093  }
5094 
5095  return result;
5096 }
5097 
5098 /*
5099  * RelationGetIndexPredicate -- get the index predicate for an index
5100  *
5101  * We cache the result of transforming pg_index.indpred into an implicit-AND
5102  * node tree (suitable for use in planning).
5103  * If the rel is not an index or has no predicate, we return NIL.
5104  * Otherwise, the returned tree is copied into the caller's memory context.
5105  * (We don't want to return a pointer to the relcache copy, since it could
5106  * disappear due to relcache invalidation.)
5107  */
5108 List *
5110 {
5111  List *result;
5112  Datum predDatum;
5113  bool isnull;
5114  char *predString;
5115  MemoryContext oldcxt;
5116 
5117  /* Quick exit if we already computed the result. */
5118  if (relation->rd_indpred)
5119  return copyObject(relation->rd_indpred);
5120 
5121  /* Quick exit if there is nothing to do. */
5122  if (relation->rd_indextuple == NULL ||
5123  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
5124  return NIL;
5125 
5126  /*
5127  * We build the tree we intend to return in the caller's context. After
5128  * successfully completing the work, we copy it into the relcache entry.
5129  * This avoids problems if we get some sort of error partway through.
5130  */
5131  predDatum = heap_getattr(relation->rd_indextuple,
5132  Anum_pg_index_indpred,
5134  &isnull);
5135  Assert(!isnull);
5136  predString = TextDatumGetCString(predDatum);
5137  result = (List *) stringToNode(predString);
5138  pfree(predString);
5139 
5140  /*
5141  * Run the expression through const-simplification and canonicalization.
5142  * This is not just an optimization, but is necessary, because the planner
5143  * will be comparing it to similarly-processed qual clauses, and may fail
5144  * to detect valid matches without this. This must match the processing
5145  * done to qual clauses in preprocess_expression()! (We can skip the
5146  * stuff involving subqueries, however, since we don't allow any in index
5147  * predicates.)
5148  */
5149  result = (List *) eval_const_expressions(NULL, (Node *) result);
5150 
5151  result = (List *) canonicalize_qual((Expr *) result, false);
5152 
5153  /* Also convert to implicit-AND format */
5154  result = make_ands_implicit((Expr *) result);
5155 
5156  /* May as well fix opfuncids too */
5157  fix_opfuncids((Node *) result);
5158 
5159  /* Now save a copy of the completed tree in the relcache entry. */
5160  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
5161  relation->rd_indpred = copyObject(result);
5162  MemoryContextSwitchTo(oldcxt);
5163 
5164  return result;
5165 }
5166 
5167 /*
5168  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
5169  *
5170  * The result has a bit set for each attribute used anywhere in the index
5171  * definitions of all the indexes on this relation. (This includes not only
5172  * simple index keys, but attributes used in expressions and partial-index
5173  * predicates.)
5174  *
5175  * Depending on attrKind, a bitmap covering attnums for certain columns is
5176  * returned:
5177  * INDEX_ATTR_BITMAP_KEY Columns in non-partial unique indexes not
5178  * in expressions (i.e., usable for FKs)
5179  * INDEX_ATTR_BITMAP_PRIMARY_KEY Columns in the table's primary key
5180  * INDEX_ATTR_BITMAP_IDENTITY_KEY Columns in the table's replica identity
5181  * index (empty if FULL)
5182  * INDEX_ATTR_BITMAP_HOT_BLOCKING Columns that block updates from being HOT
5183  * INDEX_ATTR_BITMAP_SUMMARIZED Columns included in summarizing indexes
5184  *
5185  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
5186  * we can include system attributes (e.g., OID) in the bitmap representation.
5187  *
5188  * Caller had better hold at least RowExclusiveLock on the target relation
5189  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
5190  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
5191  * that lock level doesn't guarantee a stable set of indexes, so we have to
5192  * be prepared to retry here in case of a change in the set of indexes.
5193  *
5194  * The returned result is palloc'd in the caller's memory context and should
5195  * be bms_free'd when not needed anymore.
5196  */
5197 Bitmapset *
5199 {
5200  Bitmapset *uindexattrs; /* columns in unique indexes */
5201  Bitmapset *pkindexattrs; /* columns in the primary index */
5202  Bitmapset *idindexattrs; /* columns in the replica identity */
5203  Bitmapset *hotblockingattrs; /* columns with HOT blocking indexes */
5204  Bitmapset *summarizedattrs; /* columns with summarizing indexes */
5205  List *indexoidlist;
5206  List *newindexoidlist;
5207  Oid relpkindex;
5208  Oid relreplindex;
5209  ListCell *l;
5210  MemoryContext oldcxt;
5211 
5212  /* Quick exit if we already computed the result. */
5213  if (relation->rd_attrsvalid)
5214  {
5215  switch (attrKind)
5216  {
5217  case INDEX_ATTR_BITMAP_KEY:
5218  return bms_copy(relation->rd_keyattr);
5220  return bms_copy(relation->rd_pkattr);
5222  return bms_copy(relation->rd_idattr);
5224  return bms_copy(relation->rd_hotblockingattr);
5226  return bms_copy(relation->rd_summarizedattr);
5227  default:
5228  elog(ERROR, "unknown attrKind %u", attrKind);
5229  }
5230  }
5231 
5232  /* Fast path if definitely no indexes */
5233  if (!RelationGetForm(relation)->relhasindex)
5234  return NULL;
5235 
5236  /*
5237  * Get cached list of index OIDs. If we have to start over, we do so here.
5238  */
5239 restart:
5240  indexoidlist = RelationGetIndexList(relation);
5241 
5242  /* Fall out if no indexes (but relhasindex was set) */
5243  if (indexoidlist == NIL)
5244  return NULL;
5245 
5246  /*
5247  * Copy the rd_pkindex and rd_replidindex values computed by
5248  * RelationGetIndexList before proceeding. This is needed because a
5249  * relcache flush could occur inside index_open below, resetting the
5250  * fields managed by RelationGetIndexList. We need to do the work with
5251  * stable values of these fields.
5252  */
5253  relpkindex = relation->rd_pkindex;
5254  relreplindex = relation->rd_replidindex;
5255 
5256  /*
5257  * For each index, add referenced attributes to indexattrs.
5258  *
5259  * Note: we consider all indexes returned by RelationGetIndexList, even if
5260  * they are not indisready or indisvalid. This is important because an
5261  * index for which CREATE INDEX CONCURRENTLY has just started must be
5262  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
5263  * CONCURRENTLY is far enough along that we should ignore the index, it
5264  * won't be returned at all by RelationGetIndexList.
5265  */
5266  uindexattrs = NULL;
5267  pkindexattrs = NULL;
5268  idindexattrs = NULL;
5269  hotblockingattrs = NULL;
5270  summarizedattrs = NULL;
5271  foreach(l, indexoidlist)
5272  {
5273  Oid indexOid = lfirst_oid(l);
5274  Relation indexDesc;
5275  Datum datum;
5276  bool isnull;
5277  Node *indexExpressions;
5278  Node *indexPredicate;
5279  int i;
5280  bool isKey; /* candidate key */
5281  bool isPK; /* primary key */
5282  bool isIDKey; /* replica identity index */
5283  Bitmapset **attrs;
5284 
5285  indexDesc = index_open(indexOid, AccessShareLock);
5286 
5287  /*
5288  * Extract index expressions and index predicate. Note: Don't use
5289  * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
5290  * those might run constant expressions evaluation, which needs a
5291  * snapshot, which we might not have here. (Also, it's probably more
5292  * sound to collect the bitmaps before any transformations that might
5293  * eliminate columns, but the practical impact of this is limited.)
5294  */
5295 
5296  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
5297  GetPgIndexDescriptor(), &isnull);
5298  if (!isnull)
5299  indexExpressions = stringToNode(TextDatumGetCString(datum));
5300  else
5301  indexExpressions = NULL;
5302 
5303  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
5304  GetPgIndexDescriptor(), &isnull);
5305  if (!isnull)
5306  indexPredicate = stringToNode(TextDatumGetCString(datum));
5307  else
5308  indexPredicate = NULL;
5309 
5310  /* Can this index be referenced by a foreign key? */
5311  isKey = indexDesc->rd_index->indisunique &&
5312  indexExpressions == NULL &&
5313  indexPredicate == NULL;
5314 
5315  /* Is this a primary key? */
5316  isPK = (indexOid == relpkindex);
5317 
5318  /* Is this index the configured (or default) replica identity? */
5319  isIDKey = (indexOid == relreplindex);
5320 
5321  /*
5322  * If the index is summarizing, it doesn't block HOT updates, but we
5323  * may still need to update it (if the attributes were modified). So
5324  * decide which bitmap we'll update in the following loop.
5325  */
5326  if (indexDesc->rd_indam->amsummarizing)
5327  attrs = &summarizedattrs;
5328  else
5329  attrs = &hotblockingattrs;
5330 
5331  /* Collect simple attribute references */
5332  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
5333  {
5334  int attrnum = indexDesc->rd_index->indkey.values[i];
5335 
5336  /*
5337  * Since we have covering indexes with non-key columns, we must
5338  * handle them accurately here. non-key columns must be added into
5339  * hotblockingattrs or summarizedattrs, since they are in index,
5340  * and update shouldn't miss them.
5341  *
5342  * Summarizing indexes do not block HOT, but do need to be updated
5343  * when the column value changes, thus require a separate
5344  * attribute bitmapset.
5345  *
5346  * Obviously, non-key columns couldn't be referenced by foreign
5347  * key or identity key. Hence we do not include them into
5348  * uindexattrs, pkindexattrs and idindexattrs bitmaps.
5349  */
5350  if (attrnum != 0)
5351  {
5352  *attrs = bms_add_member(*attrs,
5354 
5355  if (isKey && i < indexDesc->rd_index->indnkeyatts)
5356  uindexattrs = bms_add_member(uindexattrs,
5358 
5359  if (isPK && i < indexDesc->rd_index->indnkeyatts)
5360  pkindexattrs = bms_add_member(pkindexattrs,
5362 
5363  if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
5364  idindexattrs = bms_add_member(idindexattrs,
5366  }
5367  }
5368 
5369  /* Collect all attributes used in expressions, too */
5370  pull_varattnos(indexExpressions, 1, attrs);
5371 
5372  /* Collect all attributes in the index predicate, too */
5373  pull_varattnos(indexPredicate, 1, attrs);
5374 
5375  index_close(indexDesc, AccessShareLock);
5376  }
5377 
5378  /*
5379  * During one of the index_opens in the above loop, we might have received
5380  * a relcache flush event on this relcache entry, which might have been
5381  * signaling a change in the rel's index list. If so, we'd better start
5382  * over to ensure we deliver up-to-date attribute bitmaps.
5383  */
5384  newindexoidlist = RelationGetIndexList(relation);
5385  if (equal(indexoidlist, newindexoidlist) &&
5386  relpkindex == relation->rd_pkindex &&
5387  relreplindex == relation->rd_replidindex)
5388  {
5389  /* Still the same index set, so proceed */
5390  list_free(newindexoidlist);
5391  list_free(indexoidlist);
5392  }
5393  else
5394  {
5395  /* Gotta do it over ... might as well not leak memory */
5396  list_free(newindexoidlist);
5397  list_free(indexoidlist);
5398  bms_free(uindexattrs);
5399  bms_free(pkindexattrs);
5400  bms_free(idindexattrs);
5401  bms_free(hotblockingattrs);
5402  bms_free(summarizedattrs);
5403 
5404  goto restart;
5405  }
5406 
5407  /* Don't leak the old values of these bitmaps, if any */
5408  relation->rd_attrsvalid = false;
5409  bms_free(relation->rd_keyattr);
5410  relation->rd_keyattr = NULL;
5411  bms_free(relation->rd_pkattr);
5412  relation->rd_pkattr = NULL;
5413  bms_free(relation->rd_idattr);
5414  relation->rd_idattr = NULL;
5415  bms_free(relation->rd_hotblockingattr);
5416  relation->rd_hotblockingattr = NULL;
5417  bms_free(relation->rd_summarizedattr);
5418  relation->rd_summarizedattr = NULL;
5419 
5420  /*
5421  * Now save copies of the bitmaps in the relcache entry. We intentionally
5422  * set rd_attrsvalid last, because that's the one that signals validity of
5423  * the values; if we run out of memory before making that copy, we won't
5424  * leave the relcache entry looking like the other ones are valid but
5425  * empty.
5426  */
5428  relation->rd_keyattr = bms_copy(uindexattrs);
5429  relation->rd_pkattr = bms_copy(pkindexattrs);
5430  relation->rd_idattr = bms_copy(idindexattrs);
5431  relation->rd_hotblockingattr = bms_copy(hotblockingattrs);
5432  relation->rd_summarizedattr = bms_copy(summarizedattrs);
5433  relation->rd_attrsvalid = true;
5434  MemoryContextSwitchTo(oldcxt);
5435 
5436  /* We return our original working copy for caller to play with */
5437  switch (attrKind)
5438  {
5439  case INDEX_ATTR_BITMAP_KEY:
5440  return uindexattrs;
5442  return pkindexattrs;
5444  return idindexattrs;
5446  return hotblockingattrs;
5448  return summarizedattrs;
5449  default:
5450  elog(ERROR, "unknown attrKind %u", attrKind);
5451  return NULL;
5452  }
5453 }
5454 
5455 /*
5456  * RelationGetIdentityKeyBitmap -- get a bitmap of replica identity attribute
5457  * numbers
5458  *
5459  * A bitmap of index attribute numbers for the configured replica identity
5460  * index is returned.
5461  *
5462  * See also comments of RelationGetIndexAttrBitmap().
5463  *
5464  * This is a special purpose function used during logical replication. Here,
5465  * unlike RelationGetIndexAttrBitmap(), we don't acquire a lock on the required
5466  * index as we build the cache entry using a historic snapshot and all the
5467  * later changes are absorbed while decoding WAL. Due to this reason, we don't
5468  * need to retry here in case of a change in the set of indexes.
5469  */
5470 Bitmapset *
5472 {
5473  Bitmapset *idindexattrs = NULL; /* columns in the replica identity */
5474  Relation indexDesc;
5475  int i;
5476  Oid replidindex;
5477  MemoryContext oldcxt;
5478 
5479  /* Quick exit if we already computed the result */
5480  if (relation->rd_idattr != NULL)
5481  return bms_copy(relation->rd_idattr);
5482 
5483  /* Fast path if definitely no indexes */
5484  if (!RelationGetForm(relation)->relhasindex)
5485  return NULL;
5486 
5487  /* Historic snapshot must be set. */
5489 
5490  replidindex = RelationGetReplicaIndex(relation);
5491 
5492  /* Fall out if there is no replica identity index */
5493  if (!OidIsValid(replidindex))
5494  return NULL;
5495 
5496  /* Look up the description for the replica identity index */
5497  indexDesc = RelationIdGetRelation(replidindex);
5498 
5499  if (!RelationIsValid(indexDesc))
5500  elog(ERROR, "could not open relation with OID %u",
5501  relation->rd_replidindex);
5502 
5503  /* Add referenced attributes to idindexattrs */
5504  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
5505  {
5506  int attrnum = indexDesc->rd_index->indkey.values[i];
5507 
5508  /*
5509  * We don't include non-key columns into idindexattrs bitmaps. See
5510  * RelationGetIndexAttrBitmap.
5511  */
5512  if (attrnum != 0)
5513  {
5514  if (i < indexDesc->rd_index->indnkeyatts)
5515  idindexattrs = bms_add_member(idindexattrs,
5517  }
5518  }
5519 
5520  RelationClose(indexDesc);
5521 
5522  /* Don't leak the old values of these bitmaps, if any */
5523  bms_free(relation->rd_idattr);
5524  relation->rd_idattr = NULL;
5525 
5526  /* Now save copy of the bitmap in the relcache entry */
5528  relation->rd_idattr = bms_copy(idindexattrs);
5529  MemoryContextSwitchTo(oldcxt);
5530 
5531  /* We return our original working copy for caller to play with */
5532  return idindexattrs;
5533 }
5534 
5535 /*
5536  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5537  *
5538  * This should be called only for an index that is known to have an
5539  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5540  * context) of the exclusion operator OIDs, their underlying functions'
5541  * OIDs, and their strategy numbers in the index's opclasses. We cache
5542  * all this information since it requires a fair amount of work to get.
5543  */
5544 void
5546  Oid **operators,
5547  Oid **procs,
5548  uint16 **strategies)
5549 {
5550  int indnkeyatts;
5551  Oid *ops;
5552  Oid *funcs;
5553  uint16 *strats;
5554  Relation conrel;
5555  SysScanDesc conscan;
5556  ScanKeyData skey[1];
5557  HeapTuple htup;
5558  bool found;
5559  MemoryContext oldcxt;
5560  int i;
5561 
5562  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5563 
5564  /* Allocate result space in caller context */
5565  *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5566  *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5567  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5568 
5569  /* Quick exit if we have the data cached already */
5570  if (indexRelation->rd_exclstrats != NULL)
5571  {
5572  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5573  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5574  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5575  return;
5576  }
5577 
5578  /*
5579  * Search pg_constraint for the constraint associated with the index. To
5580  * make this not too painfully slow, we use the index on conrelid; that
5581  * will hold the parent relation's OID not the index's own OID.
5582  *
5583  * Note: if we wanted to rely on the constraint name matching the index's
5584  * name, we could just do a direct lookup using pg_constraint's unique
5585  * index. For the moment it doesn't seem worth requiring that.
5586  */
5587  ScanKeyInit(&skey[0],
5588  Anum_pg_constraint_conrelid,
5589  BTEqualStrategyNumber, F_OIDEQ,
5590  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5591 
5592  conrel = table_open(ConstraintRelationId, AccessShareLock);
5593  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5594  NULL, 1, skey);
5595  found = false;
5596 
5597  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5598  {
5600  Datum val;
5601  bool isnull;
5602  ArrayType *arr;
5603  int nelem;
5604 
5605  /* We want the exclusion constraint owning the index */
5606  if (conform->contype != CONSTRAINT_EXCLUSION ||
5607  conform->conindid != RelationGetRelid(indexRelation))
5608  continue;
5609 
5610  /* There should be only one */
5611  if (found)
5612  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5613  RelationGetRelationName(indexRelation));
5614  found = true;
5615 
5616  /* Extract the operator OIDS from conexclop */
5617  val = fastgetattr(htup,
5618  Anum_pg_constraint_conexclop,
5619  conrel->rd_att, &isnull);
5620  if (isnull)
5621  elog(ERROR, "null conexclop for rel %s",
5622  RelationGetRelationName(indexRelation));
5623 
5624  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5625  nelem = ARR_DIMS(arr)[0];
5626  if (ARR_NDIM(arr) != 1 ||
5627  nelem != indnkeyatts ||
5628  ARR_HASNULL(arr) ||
5629  ARR_ELEMTYPE(arr) != OIDOID)
5630  elog(ERROR, "conexclop is not a 1-D Oid array");
5631 
5632  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5633  }
5634 
5635  systable_endscan(conscan);
5636  table_close(conrel, AccessShareLock);
5637 
5638  if (!found)
5639  elog(ERROR, "exclusion constraint record missing for rel %s",
5640  RelationGetRelationName(indexRelation));
5641 
5642  /* We need the func OIDs and strategy numbers too */
5643  for (i = 0; i < indnkeyatts; i++)
5644  {
5645  funcs[i] = get_opcode(ops[i]);
5646  strats[i] = get_op_opfamily_strategy(ops[i],
5647  indexRelation->rd_opfamily[i]);
5648  /* shouldn't fail, since it was checked at index creation */
5649  if (strats[i] == InvalidStrategy)
5650  elog(ERROR, "could not find strategy for operator %u in family %u",
5651  ops[i], indexRelation->rd_opfamily[i]);
5652  }
5653 
5654  /* Save a copy of the results in the relcache entry. */
5655  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5656  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5657  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5658  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5659  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5660  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5661  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5662  MemoryContextSwitchTo(oldcxt);
5663 }
5664 
5665 /*
5666  * Get the publication information for the given relation.
5667  *
5668  * Traverse all the publications which the relation is in to get the
5669  * publication actions and validate the row filter expressions for such
5670  * publications if any. We consider the row filter expression as invalid if it
5671  * references any column which is not part of REPLICA IDENTITY.
5672  *
5673  * To avoid fetching the publication information repeatedly, we cache the
5674  * publication actions and row filter validation information.
5675  */
5676 void
5678 {
5679  List *puboids;
5680  ListCell *lc;
5681  MemoryContext oldcxt;
5682  Oid schemaid;
5683  List *ancestors = NIL;
5684  Oid relid = RelationGetRelid(relation);
5685 
5686  /*
5687  * If not publishable, it publishes no actions. (pgoutput_change() will
5688  * ignore it.)
5689  */
5690  if (!is_publishable_relation(relation))
5691  {
5692  memset(pubdesc, 0, sizeof(PublicationDesc));
5693  pubdesc->rf_valid_for_update = true;
5694  pubdesc->rf_valid_for_delete = true;
5695  pubdesc->cols_valid_for_update = true;
5696  pubdesc->cols_valid_for_delete = true;
5697  return;
5698  }
5699 
5700  if (relation->rd_pubdesc)
5701  {
5702  memcpy(pubdesc, relation->rd_pubdesc, sizeof(PublicationDesc));
5703  return;
5704  }
5705 
5706  memset(pubdesc, 0, sizeof(PublicationDesc));
5707  pubdesc->rf_valid_for_update = true;
5708  pubdesc->rf_valid_for_delete = true;
5709  pubdesc->cols_valid_for_update = true;
5710  pubdesc->cols_valid_for_delete = true;
5711 
5712  /* Fetch the publication membership info. */
5713  puboids = GetRelationPublications(relid);
5714  schemaid = RelationGetNamespace(relation);
5715  puboids = list_concat_unique_oid(puboids, GetSchemaPublications(schemaid));
5716 
5717  if (relation->rd_rel->relispartition)
5718  {
5719  /* Add publications that the ancestors are in too. */
5720  ancestors = get_partition_ancestors(relid);
5721 
5722  foreach(lc, ancestors)
5723  {
5724  Oid ancestor = lfirst_oid(lc);
5725 
5726  puboids = list_concat_unique_oid(puboids,
5727  GetRelationPublications(ancestor));
5728  schemaid = get_rel_namespace(ancestor);
5729  puboids = list_concat_unique_oid(puboids,
5730  GetSchemaPublications(schemaid));
5731  }
5732  }
5733  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5734 
5735  foreach(lc, puboids)
5736  {
5737  Oid pubid = lfirst_oid(lc);
5738  HeapTuple tup;
5739  Form_pg_publication pubform;
5740 
5742 
5743  if (!HeapTupleIsValid(tup))
5744  elog(ERROR, "cache lookup failed for publication %u", pubid);
5745 
5746  pubform = (Form_pg_publication) GETSTRUCT(tup);
5747 
5748  pubdesc->pubactions.pubinsert |= pubform->pubinsert;
5749  pubdesc->pubactions.pubupdate |= pubform->pubupdate;
5750  pubdesc->pubactions.pubdelete |= pubform->pubdelete;
5751  pubdesc->pubactions.pubtruncate |= pubform->pubtruncate;
5752 
5753  /*
5754  * Check if all columns referenced in the filter expression are part
5755  * of the REPLICA IDENTITY index or not.
5756  *
5757  * If the publication is FOR ALL TABLES then it means the table has no
5758  * row filters and we can skip the validation.
5759  */
5760  if (!pubform->puballtables &&
5761  (pubform->pubupdate || pubform->pubdelete) &&
5762  pub_rf_contains_invalid_column(pubid, relation, ancestors,
5763  pubform->pubviaroot))
5764  {
5765  if (pubform->pubupdate)
5766  pubdesc->rf_valid_for_update = false;
5767  if (pubform->pubdelete)
5768  pubdesc->rf_valid_for_delete = false;
5769  }
5770 
5771  /*
5772  * Check if all columns are part of the REPLICA IDENTITY index or not.
5773  *
5774  * If the publication is FOR ALL TABLES then it means the table has no
5775  * column list and we can skip the validation.
5776  */
5777  if (!pubform->puballtables &&
5778  (pubform->pubupdate || pubform->pubdelete) &&
5779  pub_collist_contains_invalid_column(pubid, relation, ancestors,
5780  pubform->pubviaroot))
5781  {
5782  if (pubform->pubupdate)
5783  pubdesc->cols_valid_for_update = false;
5784  if (pubform->pubdelete)
5785  pubdesc->cols_valid_for_delete = false;
5786  }
5787 
5788  ReleaseSysCache(tup);
5789 
5790  /*
5791  * If we know everything is replicated and the row filter is invalid
5792  * for update and delete, there is no point to check for other
5793  * publications.
5794  */
5795  if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
5796  pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
5797  !pubdesc->rf_valid_for_update && !pubdesc->rf_valid_for_delete)
5798  break;
5799 
5800  /*
5801  * If we know everything is replicated and the column list is invalid
5802  * for update and delete, there is no point to check for other
5803  * publications.
5804  */
5805  if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
5806  pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
5807  !pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
5808  break;
5809  }
5810 
5811  if (relation->rd_pubdesc)
5812  {
5813  pfree(relation->rd_pubdesc);
5814  relation->rd_pubdesc = NULL;
5815  }
5816 
5817  /* Now save copy of the descriptor in the relcache entry. */
5819  relation->rd_pubdesc = palloc(sizeof(PublicationDesc));
5820  memcpy(relation->rd_pubdesc, pubdesc, sizeof(PublicationDesc));
5821  MemoryContextSwitchTo(oldcxt);
5822 }
5823 
5824 /*
5825  * RelationGetIndexRawAttOptions -- get AM/opclass-specific options for the index
5826  */
5827