PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/reloptions.h"
37 #include "access/sysattr.h"
38 #include "access/table.h"
39 #include "access/tableam.h"
40 #include "access/tupdesc_details.h"
41 #include "access/xact.h"
42 #include "access/xlog.h"
43 #include "catalog/catalog.h"
44 #include "catalog/indexing.h"
45 #include "catalog/namespace.h"
46 #include "catalog/partition.h"
47 #include "catalog/pg_am.h"
48 #include "catalog/pg_amproc.h"
49 #include "catalog/pg_attrdef.h"
51 #include "catalog/pg_authid.h"
52 #include "catalog/pg_constraint.h"
53 #include "catalog/pg_database.h"
54 #include "catalog/pg_namespace.h"
55 #include "catalog/pg_opclass.h"
57 #include "catalog/pg_proc.h"
58 #include "catalog/pg_publication.h"
59 #include "catalog/pg_rewrite.h"
60 #include "catalog/pg_shseclabel.h"
63 #include "catalog/pg_tablespace.h"
64 #include "catalog/pg_trigger.h"
65 #include "catalog/pg_type.h"
66 #include "catalog/schemapg.h"
67 #include "catalog/storage.h"
68 #include "commands/policy.h"
69 #include "commands/trigger.h"
70 #include "miscadmin.h"
71 #include "nodes/makefuncs.h"
72 #include "nodes/nodeFuncs.h"
73 #include "optimizer/optimizer.h"
75 #include "partitioning/partdesc.h"
76 #include "rewrite/rewriteDefine.h"
77 #include "rewrite/rowsecurity.h"
78 #include "storage/lmgr.h"
79 #include "storage/smgr.h"
80 #include "utils/array.h"
81 #include "utils/builtins.h"
82 #include "utils/datum.h"
83 #include "utils/fmgroids.h"
84 #include "utils/inval.h"
85 #include "utils/lsyscache.h"
86 #include "utils/memutils.h"
87 #include "utils/partcache.h"
88 #include "utils/relmapper.h"
89 #include "utils/resowner_private.h"
90 #include "utils/snapmgr.h"
91 #include "utils/syscache.h"
92 
93 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
94 
95 /*
96  * Default policy for whether to apply RECOVER_RELATION_BUILD_MEMORY:
97  * do so in clobber-cache builds but not otherwise. This choice can be
98  * overridden at compile time with -DRECOVER_RELATION_BUILD_MEMORY=1 or =0.
99  */
100 #ifndef RECOVER_RELATION_BUILD_MEMORY
101 #if defined(CLOBBER_CACHE_ALWAYS) || defined(CLOBBER_CACHE_RECURSIVELY)
102 #define RECOVER_RELATION_BUILD_MEMORY 1
103 #else
104 #define RECOVER_RELATION_BUILD_MEMORY 0
105 #endif
106 #endif
107 
108 /*
109  * hardcoded tuple descriptors, contents generated by genbki.pl
110  */
111 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
112 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
113 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
114 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
115 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
116 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
117 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
118 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
119 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
120 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
121 
122 /*
123  * Hash tables that index the relation cache
124  *
125  * We used to index the cache by both name and OID, but now there
126  * is only an index by OID.
127  */
128 typedef struct relidcacheent
129 {
132 } RelIdCacheEnt;
133 
135 
136 /*
137  * This flag is false until we have prepared the critical relcache entries
138  * that are needed to do indexscans on the tables read by relcache building.
139  */
141 
142 /*
143  * This flag is false until we have prepared the critical relcache entries
144  * for shared catalogs (which are the tables needed for login).
145  */
147 
148 /*
149  * This counter counts relcache inval events received since backend startup
150  * (but only for rels that are actually in cache). Presently, we use it only
151  * to detect whether data about to be written by write_relcache_init_file()
152  * might already be obsolete.
153  */
154 static long relcacheInvalsReceived = 0L;
155 
156 /*
157  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
158  * cleanup work. This list intentionally has limited size; if it overflows,
159  * we fall back to scanning the whole hashtable. There is no value in a very
160  * large list because (1) at some point, a hash_seq_search scan is faster than
161  * retail lookups, and (2) the value of this is to reduce EOXact work for
162  * short transactions, which can't have dirtied all that many tables anyway.
163  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
164  * cleanup processing must be idempotent.
165  */
166 #define MAX_EOXACT_LIST 32
168 static int eoxact_list_len = 0;
169 static bool eoxact_list_overflowed = false;
170 
171 #define EOXactListAdd(rel) \
172  do { \
173  if (eoxact_list_len < MAX_EOXACT_LIST) \
174  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
175  else \
176  eoxact_list_overflowed = true; \
177  } while (0)
178 
179 /*
180  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
181  * cleanup work. The array expands as needed; there is no hashtable because
182  * we don't need to access individual items except at EOXact.
183  */
185 static int NextEOXactTupleDescNum = 0;
186 static int EOXactTupleDescArrayLen = 0;
187 
188 /*
189  * macros to manipulate the lookup hashtable
190  */
191 #define RelationCacheInsert(RELATION, replace_allowed) \
192 do { \
193  RelIdCacheEnt *hentry; bool found; \
194  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
195  (void *) &((RELATION)->rd_id), \
196  HASH_ENTER, &found); \
197  if (found) \
198  { \
199  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
200  Relation _old_rel = hentry->reldesc; \
201  Assert(replace_allowed); \
202  hentry->reldesc = (RELATION); \
203  if (RelationHasReferenceCountZero(_old_rel)) \
204  RelationDestroyRelation(_old_rel, false); \
205  else if (!IsBootstrapProcessingMode()) \
206  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
207  RelationGetRelationName(_old_rel)); \
208  } \
209  else \
210  hentry->reldesc = (RELATION); \
211 } while(0)
212 
213 #define RelationIdCacheLookup(ID, RELATION) \
214 do { \
215  RelIdCacheEnt *hentry; \
216  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
217  (void *) &(ID), \
218  HASH_FIND, NULL); \
219  if (hentry) \
220  RELATION = hentry->reldesc; \
221  else \
222  RELATION = NULL; \
223 } while(0)
224 
225 #define RelationCacheDelete(RELATION) \
226 do { \
227  RelIdCacheEnt *hentry; \
228  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
229  (void *) &((RELATION)->rd_id), \
230  HASH_REMOVE, NULL); \
231  if (hentry == NULL) \
232  elog(WARNING, "failed to delete relcache entry for OID %u", \
233  (RELATION)->rd_id); \
234 } while(0)
235 
236 
237 /*
238  * Special cache for opclass-related information
239  *
240  * Note: only default support procs get cached, ie, those with
241  * lefttype = righttype = opcintype.
242  */
243 typedef struct opclasscacheent
244 {
245  Oid opclassoid; /* lookup key: OID of opclass */
246  bool valid; /* set true after successful fill-in */
247  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
248  Oid opcfamily; /* OID of opclass's family */
249  Oid opcintype; /* OID of opclass's declared input type */
250  RegProcedure *supportProcs; /* OIDs of support procedures */
252 
253 static HTAB *OpClassCache = NULL;
254 
255 
256 /* non-export function prototypes */
257 
258 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
259 static void RelationClearRelation(Relation relation, bool rebuild);
260 
261 static void RelationReloadIndexInfo(Relation relation);
262 static void RelationReloadNailed(Relation relation);
263 static void RelationFlushRelation(Relation relation);
265 static void AtEOXact_cleanup(Relation relation, bool isCommit);
266 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
267  SubTransactionId mySubid, SubTransactionId parentSubid);
268 static bool load_relcache_init_file(bool shared);
269 static void write_relcache_init_file(bool shared);
270 static void write_item(const void *data, Size len, FILE *fp);
271 
272 static void formrdesc(const char *relationName, Oid relationReltype,
273  bool isshared, int natts, const FormData_pg_attribute *attrs);
274 
275 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
277 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
278 static void RelationBuildTupleDesc(Relation relation);
279 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
280 static void RelationInitPhysicalAddr(Relation relation);
281 static void load_critical_index(Oid indexoid, Oid heapoid);
282 static TupleDesc GetPgClassDescriptor(void);
283 static TupleDesc GetPgIndexDescriptor(void);
284 static void AttrDefaultFetch(Relation relation);
285 static void CheckConstraintFetch(Relation relation);
286 static int CheckConstraintCmp(const void *a, const void *b);
287 static void InitIndexAmRoutine(Relation relation);
288 static void IndexSupportInitialize(oidvector *indclass,
289  RegProcedure *indexSupport,
290  Oid *opFamily,
291  Oid *opcInType,
292  StrategyNumber maxSupportNumber,
293  AttrNumber maxAttributeNumber);
294 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
295  StrategyNumber numSupport);
296 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
297 static void unlink_initfile(const char *initfilename, int elevel);
298 
299 
300 /*
301  * ScanPgRelation
302  *
303  * This is used by RelationBuildDesc to find a pg_class
304  * tuple matching targetRelId. The caller must hold at least
305  * AccessShareLock on the target relid to prevent concurrent-update
306  * scenarios; it isn't guaranteed that all scans used to build the
307  * relcache entry will use the same snapshot. If, for example,
308  * an attribute were to be added after scanning pg_class and before
309  * scanning pg_attribute, relnatts wouldn't match.
310  *
311  * NB: the returned tuple has been copied into palloc'd storage
312  * and must eventually be freed with heap_freetuple.
313  */
314 static HeapTuple
315 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
316 {
317  HeapTuple pg_class_tuple;
318  Relation pg_class_desc;
319  SysScanDesc pg_class_scan;
320  ScanKeyData key[1];
321  Snapshot snapshot;
322 
323  /*
324  * If something goes wrong during backend startup, we might find ourselves
325  * trying to read pg_class before we've selected a database. That ain't
326  * gonna work, so bail out with a useful error message. If this happens,
327  * it probably means a relcache entry that needs to be nailed isn't.
328  */
329  if (!OidIsValid(MyDatabaseId))
330  elog(FATAL, "cannot read pg_class without having selected a database");
331 
332  /*
333  * form a scan key
334  */
335  ScanKeyInit(&key[0],
336  Anum_pg_class_oid,
337  BTEqualStrategyNumber, F_OIDEQ,
338  ObjectIdGetDatum(targetRelId));
339 
340  /*
341  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
342  * built the critical relcache entries (this includes initdb and startup
343  * without a pg_internal.init file). The caller can also force a heap
344  * scan by setting indexOK == false.
345  */
346  pg_class_desc = table_open(RelationRelationId, AccessShareLock);
347 
348  /*
349  * The caller might need a tuple that's newer than the one the historic
350  * snapshot; currently the only case requiring to do so is looking up the
351  * relfilenode of non mapped system relations during decoding.
352  */
353  if (force_non_historic)
354  snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
355  else
356  snapshot = GetCatalogSnapshot(RelationRelationId);
357 
358  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
359  indexOK && criticalRelcachesBuilt,
360  snapshot,
361  1, key);
362 
363  pg_class_tuple = systable_getnext(pg_class_scan);
364 
365  /*
366  * Must copy tuple before releasing buffer.
367  */
368  if (HeapTupleIsValid(pg_class_tuple))
369  pg_class_tuple = heap_copytuple(pg_class_tuple);
370 
371  /* all done */
372  systable_endscan(pg_class_scan);
373  table_close(pg_class_desc, AccessShareLock);
374 
375  return pg_class_tuple;
376 }
377 
378 /*
379  * AllocateRelationDesc
380  *
381  * This is used to allocate memory for a new relation descriptor
382  * and initialize the rd_rel field from the given pg_class tuple.
383  */
384 static Relation
386 {
387  Relation relation;
388  MemoryContext oldcxt;
389  Form_pg_class relationForm;
390 
391  /* Relcache entries must live in CacheMemoryContext */
393 
394  /*
395  * allocate and zero space for new relation descriptor
396  */
397  relation = (Relation) palloc0(sizeof(RelationData));
398 
399  /* make sure relation is marked as having no open file yet */
400  relation->rd_smgr = NULL;
401 
402  /*
403  * Copy the relation tuple form
404  *
405  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
406  * variable-length fields (relacl, reloptions) are NOT stored in the
407  * relcache --- there'd be little point in it, since we don't copy the
408  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
409  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
410  * it from the syscache if you need it. The same goes for the original
411  * form of reloptions (however, we do store the parsed form of reloptions
412  * in rd_options).
413  */
414  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
415 
416  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
417 
418  /* initialize relation tuple form */
419  relation->rd_rel = relationForm;
420 
421  /* and allocate attribute tuple form storage */
422  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
423  /* which we mark as a reference-counted tupdesc */
424  relation->rd_att->tdrefcount = 1;
425 
426  MemoryContextSwitchTo(oldcxt);
427 
428  return relation;
429 }
430 
431 /*
432  * RelationParseRelOptions
433  * Convert pg_class.reloptions into pre-parsed rd_options
434  *
435  * tuple is the real pg_class tuple (not rd_rel!) for relation
436  *
437  * Note: rd_rel and (if an index) rd_indam must be valid already
438  */
439 static void
441 {
442  bytea *options;
443  amoptions_function amoptsfn;
444 
445  relation->rd_options = NULL;
446 
447  /*
448  * Look up any AM-specific parse function; fall out if relkind should not
449  * have options.
450  */
451  switch (relation->rd_rel->relkind)
452  {
453  case RELKIND_RELATION:
454  case RELKIND_TOASTVALUE:
455  case RELKIND_VIEW:
456  case RELKIND_MATVIEW:
457  case RELKIND_PARTITIONED_TABLE:
458  amoptsfn = NULL;
459  break;
460  case RELKIND_INDEX:
461  case RELKIND_PARTITIONED_INDEX:
462  amoptsfn = relation->rd_indam->amoptions;
463  break;
464  default:
465  return;
466  }
467 
468  /*
469  * Fetch reloptions from tuple; have to use a hardwired descriptor because
470  * we might not have any other for pg_class yet (consider executing this
471  * code for pg_class itself)
472  */
473  options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
474 
475  /*
476  * Copy parsed data into CacheMemoryContext. To guard against the
477  * possibility of leaks in the reloptions code, we want to do the actual
478  * parsing in the caller's memory context and copy the results into
479  * CacheMemoryContext after the fact.
480  */
481  if (options)
482  {
484  VARSIZE(options));
485  memcpy(relation->rd_options, options, VARSIZE(options));
486  pfree(options);
487  }
488 }
489 
490 /*
491  * RelationBuildTupleDesc
492  *
493  * Form the relation's tuple descriptor from information in
494  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
495  */
496 static void
498 {
499  HeapTuple pg_attribute_tuple;
500  Relation pg_attribute_desc;
501  SysScanDesc pg_attribute_scan;
502  ScanKeyData skey[2];
503  int need;
504  TupleConstr *constr;
505  AttrDefault *attrdef = NULL;
506  AttrMissing *attrmiss = NULL;
507  int ndef = 0;
508 
509  /* copy some fields from pg_class row to rd_att */
510  relation->rd_att->tdtypeid = relation->rd_rel->reltype;
511  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
512 
514  sizeof(TupleConstr));
515  constr->has_not_null = false;
516  constr->has_generated_stored = false;
517 
518  /*
519  * Form a scan key that selects only user attributes (attnum > 0).
520  * (Eliminating system attribute rows at the index level is lots faster
521  * than fetching them.)
522  */
523  ScanKeyInit(&skey[0],
524  Anum_pg_attribute_attrelid,
525  BTEqualStrategyNumber, F_OIDEQ,
527  ScanKeyInit(&skey[1],
528  Anum_pg_attribute_attnum,
529  BTGreaterStrategyNumber, F_INT2GT,
530  Int16GetDatum(0));
531 
532  /*
533  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
534  * built the critical relcache entries (this includes initdb and startup
535  * without a pg_internal.init file).
536  */
537  pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
538  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
541  NULL,
542  2, skey);
543 
544  /*
545  * add attribute data to relation->rd_att
546  */
547  need = RelationGetNumberOfAttributes(relation);
548 
549  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
550  {
551  Form_pg_attribute attp;
552  int attnum;
553 
554  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
555 
556  attnum = attp->attnum;
557  if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation))
558  elog(ERROR, "invalid attribute number %d for %s",
559  attp->attnum, RelationGetRelationName(relation));
560 
561 
562  memcpy(TupleDescAttr(relation->rd_att, attnum - 1),
563  attp,
565 
566  /* Update constraint/default info */
567  if (attp->attnotnull)
568  constr->has_not_null = true;
569  if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
570  constr->has_generated_stored = true;
571 
572  /* If the column has a default, fill it into the attrdef array */
573  if (attp->atthasdef)
574  {
575  if (attrdef == NULL)
576  attrdef = (AttrDefault *)
579  sizeof(AttrDefault));
580  attrdef[ndef].adnum = attnum;
581  attrdef[ndef].adbin = NULL;
582 
583  ndef++;
584  }
585 
586  /* Likewise for a missing value */
587  if (attp->atthasmissing)
588  {
589  Datum missingval;
590  bool missingNull;
591 
592  /* Do we have a missing value? */
593  missingval = heap_getattr(pg_attribute_tuple,
594  Anum_pg_attribute_attmissingval,
595  pg_attribute_desc->rd_att,
596  &missingNull);
597  if (!missingNull)
598  {
599  /* Yes, fetch from the array */
600  MemoryContext oldcxt;
601  bool is_null;
602  int one = 1;
603  Datum missval;
604 
605  if (attrmiss == NULL)
606  attrmiss = (AttrMissing *)
608  relation->rd_rel->relnatts *
609  sizeof(AttrMissing));
610 
611  missval = array_get_element(missingval,
612  1,
613  &one,
614  -1,
615  attp->attlen,
616  attp->attbyval,
617  attp->attalign,
618  &is_null);
619  Assert(!is_null);
620  if (attp->attbyval)
621  {
622  /* for copy by val just copy the datum direct */
623  attrmiss[attnum - 1].am_value = missval;
624  }
625  else
626  {
627  /* otherwise copy in the correct context */
629  attrmiss[attnum - 1].am_value = datumCopy(missval,
630  attp->attbyval,
631  attp->attlen);
632  MemoryContextSwitchTo(oldcxt);
633  }
634  attrmiss[attnum - 1].am_present = true;
635  }
636  }
637  need--;
638  if (need == 0)
639  break;
640  }
641 
642  /*
643  * end the scan and close the attribute relation
644  */
645  systable_endscan(pg_attribute_scan);
646  table_close(pg_attribute_desc, AccessShareLock);
647 
648  if (need != 0)
649  elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
650  need, RelationGetRelid(relation));
651 
652  /*
653  * The attcacheoff values we read from pg_attribute should all be -1
654  * ("unknown"). Verify this if assert checking is on. They will be
655  * computed when and if needed during tuple access.
656  */
657 #ifdef USE_ASSERT_CHECKING
658  {
659  int i;
660 
661  for (i = 0; i < RelationGetNumberOfAttributes(relation); i++)
662  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
663  }
664 #endif
665 
666  /*
667  * However, we can easily set the attcacheoff value for the first
668  * attribute: it must be zero. This eliminates the need for special cases
669  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
670  */
671  if (RelationGetNumberOfAttributes(relation) > 0)
672  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
673 
674  /*
675  * Set up constraint/default info
676  */
677  if (constr->has_not_null || ndef > 0 ||
678  attrmiss || relation->rd_rel->relchecks)
679  {
680  relation->rd_att->constr = constr;
681 
682  if (ndef > 0) /* DEFAULTs */
683  {
684  if (ndef < RelationGetNumberOfAttributes(relation))
685  constr->defval = (AttrDefault *)
686  repalloc(attrdef, ndef * sizeof(AttrDefault));
687  else
688  constr->defval = attrdef;
689  constr->num_defval = ndef;
690  AttrDefaultFetch(relation);
691  }
692  else
693  constr->num_defval = 0;
694 
695  constr->missing = attrmiss;
696 
697  if (relation->rd_rel->relchecks > 0) /* CHECKs */
698  {
699  constr->num_check = relation->rd_rel->relchecks;
700  constr->check = (ConstrCheck *)
702  constr->num_check * sizeof(ConstrCheck));
703  CheckConstraintFetch(relation);
704  }
705  else
706  constr->num_check = 0;
707  }
708  else
709  {
710  pfree(constr);
711  relation->rd_att->constr = NULL;
712  }
713 }
714 
715 /*
716  * RelationBuildRuleLock
717  *
718  * Form the relation's rewrite rules from information in
719  * the pg_rewrite system catalog.
720  *
721  * Note: The rule parsetrees are potentially very complex node structures.
722  * To allow these trees to be freed when the relcache entry is flushed,
723  * we make a private memory context to hold the RuleLock information for
724  * each relcache entry that has associated rules. The context is used
725  * just for rule info, not for any other subsidiary data of the relcache
726  * entry, because that keeps the update logic in RelationClearRelation()
727  * manageable. The other subsidiary data structures are simple enough
728  * to be easy to free explicitly, anyway.
729  */
730 static void
732 {
733  MemoryContext rulescxt;
734  MemoryContext oldcxt;
735  HeapTuple rewrite_tuple;
736  Relation rewrite_desc;
737  TupleDesc rewrite_tupdesc;
738  SysScanDesc rewrite_scan;
740  RuleLock *rulelock;
741  int numlocks;
742  RewriteRule **rules;
743  int maxlocks;
744 
745  /*
746  * Make the private context. Assume it'll not contain much data.
747  */
749  "relation rules",
751  relation->rd_rulescxt = rulescxt;
753  RelationGetRelationName(relation));
754 
755  /*
756  * allocate an array to hold the rewrite rules (the array is extended if
757  * necessary)
758  */
759  maxlocks = 4;
760  rules = (RewriteRule **)
761  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
762  numlocks = 0;
763 
764  /*
765  * form a scan key
766  */
767  ScanKeyInit(&key,
768  Anum_pg_rewrite_ev_class,
769  BTEqualStrategyNumber, F_OIDEQ,
771 
772  /*
773  * open pg_rewrite and begin a scan
774  *
775  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
776  * be reading the rules in name order, except possibly during
777  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
778  * ensures that rules will be fired in name order.
779  */
780  rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
781  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
782  rewrite_scan = systable_beginscan(rewrite_desc,
784  true, NULL,
785  1, &key);
786 
787  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
788  {
789  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
790  bool isnull;
791  Datum rule_datum;
792  char *rule_str;
793  RewriteRule *rule;
794 
795  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
796  sizeof(RewriteRule));
797 
798  rule->ruleId = rewrite_form->oid;
799 
800  rule->event = rewrite_form->ev_type - '0';
801  rule->enabled = rewrite_form->ev_enabled;
802  rule->isInstead = rewrite_form->is_instead;
803 
804  /*
805  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
806  * rule strings are often large enough to be toasted. To avoid
807  * leaking memory in the caller's context, do the detoasting here so
808  * we can free the detoasted version.
809  */
810  rule_datum = heap_getattr(rewrite_tuple,
811  Anum_pg_rewrite_ev_action,
812  rewrite_tupdesc,
813  &isnull);
814  Assert(!isnull);
815  rule_str = TextDatumGetCString(rule_datum);
816  oldcxt = MemoryContextSwitchTo(rulescxt);
817  rule->actions = (List *) stringToNode(rule_str);
818  MemoryContextSwitchTo(oldcxt);
819  pfree(rule_str);
820 
821  rule_datum = heap_getattr(rewrite_tuple,
822  Anum_pg_rewrite_ev_qual,
823  rewrite_tupdesc,
824  &isnull);
825  Assert(!isnull);
826  rule_str = TextDatumGetCString(rule_datum);
827  oldcxt = MemoryContextSwitchTo(rulescxt);
828  rule->qual = (Node *) stringToNode(rule_str);
829  MemoryContextSwitchTo(oldcxt);
830  pfree(rule_str);
831 
832  /*
833  * We want the rule's table references to be checked as though by the
834  * table owner, not the user referencing the rule. Therefore, scan
835  * through the rule's actions and set the checkAsUser field on all
836  * rtable entries. We have to look at the qual as well, in case it
837  * contains sublinks.
838  *
839  * The reason for doing this when the rule is loaded, rather than when
840  * it is stored, is that otherwise ALTER TABLE OWNER would have to
841  * grovel through stored rules to update checkAsUser fields. Scanning
842  * the rule tree during load is relatively cheap (compared to
843  * constructing it in the first place), so we do it here.
844  */
845  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
846  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
847 
848  if (numlocks >= maxlocks)
849  {
850  maxlocks *= 2;
851  rules = (RewriteRule **)
852  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
853  }
854  rules[numlocks++] = rule;
855  }
856 
857  /*
858  * end the scan and close the attribute relation
859  */
860  systable_endscan(rewrite_scan);
861  table_close(rewrite_desc, AccessShareLock);
862 
863  /*
864  * there might not be any rules (if relhasrules is out-of-date)
865  */
866  if (numlocks == 0)
867  {
868  relation->rd_rules = NULL;
869  relation->rd_rulescxt = NULL;
870  MemoryContextDelete(rulescxt);
871  return;
872  }
873 
874  /*
875  * form a RuleLock and insert into relation
876  */
877  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
878  rulelock->numLocks = numlocks;
879  rulelock->rules = rules;
880 
881  relation->rd_rules = rulelock;
882 }
883 
884 /*
885  * equalRuleLocks
886  *
887  * Determine whether two RuleLocks are equivalent
888  *
889  * Probably this should be in the rules code someplace...
890  */
891 static bool
893 {
894  int i;
895 
896  /*
897  * As of 7.3 we assume the rule ordering is repeatable, because
898  * RelationBuildRuleLock should read 'em in a consistent order. So just
899  * compare corresponding slots.
900  */
901  if (rlock1 != NULL)
902  {
903  if (rlock2 == NULL)
904  return false;
905  if (rlock1->numLocks != rlock2->numLocks)
906  return false;
907  for (i = 0; i < rlock1->numLocks; i++)
908  {
909  RewriteRule *rule1 = rlock1->rules[i];
910  RewriteRule *rule2 = rlock2->rules[i];
911 
912  if (rule1->ruleId != rule2->ruleId)
913  return false;
914  if (rule1->event != rule2->event)
915  return false;
916  if (rule1->enabled != rule2->enabled)
917  return false;
918  if (rule1->isInstead != rule2->isInstead)
919  return false;
920  if (!equal(rule1->qual, rule2->qual))
921  return false;
922  if (!equal(rule1->actions, rule2->actions))
923  return false;
924  }
925  }
926  else if (rlock2 != NULL)
927  return false;
928  return true;
929 }
930 
931 /*
932  * equalPolicy
933  *
934  * Determine whether two policies are equivalent
935  */
936 static bool
938 {
939  int i;
940  Oid *r1,
941  *r2;
942 
943  if (policy1 != NULL)
944  {
945  if (policy2 == NULL)
946  return false;
947 
948  if (policy1->polcmd != policy2->polcmd)
949  return false;
950  if (policy1->hassublinks != policy2->hassublinks)
951  return false;
952  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
953  return false;
954  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
955  return false;
956 
957  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
958  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
959 
960  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
961  {
962  if (r1[i] != r2[i])
963  return false;
964  }
965 
966  if (!equal(policy1->qual, policy2->qual))
967  return false;
968  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
969  return false;
970  }
971  else if (policy2 != NULL)
972  return false;
973 
974  return true;
975 }
976 
977 /*
978  * equalRSDesc
979  *
980  * Determine whether two RowSecurityDesc's are equivalent
981  */
982 static bool
984 {
985  ListCell *lc,
986  *rc;
987 
988  if (rsdesc1 == NULL && rsdesc2 == NULL)
989  return true;
990 
991  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
992  (rsdesc1 == NULL && rsdesc2 != NULL))
993  return false;
994 
995  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
996  return false;
997 
998  /* RelationBuildRowSecurity should build policies in order */
999  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1000  {
1003 
1004  if (!equalPolicy(l, r))
1005  return false;
1006  }
1007 
1008  return true;
1009 }
1010 
1011 /*
1012  * RelationBuildDesc
1013  *
1014  * Build a relation descriptor. The caller must hold at least
1015  * AccessShareLock on the target relid.
1016  *
1017  * The new descriptor is inserted into the hash table if insertIt is true.
1018  *
1019  * Returns NULL if no pg_class row could be found for the given relid
1020  * (suggesting we are trying to access a just-deleted relation).
1021  * Any other error is reported via elog.
1022  */
1023 static Relation
1024 RelationBuildDesc(Oid targetRelId, bool insertIt)
1025 {
1026  Relation relation;
1027  Oid relid;
1028  HeapTuple pg_class_tuple;
1029  Form_pg_class relp;
1030 
1031  /*
1032  * This function and its subroutines can allocate a good deal of transient
1033  * data in CurrentMemoryContext. Traditionally we've just leaked that
1034  * data, reasoning that the caller's context is at worst of transaction
1035  * scope, and relcache loads shouldn't happen so often that it's essential
1036  * to recover transient data before end of statement/transaction. However
1037  * that's definitely not true in clobber-cache test builds, and perhaps
1038  * it's not true in other cases. If RECOVER_RELATION_BUILD_MEMORY is not
1039  * zero, arrange to allocate the junk in a temporary context that we'll
1040  * free before returning. Make it a child of caller's context so that it
1041  * will get cleaned up appropriately if we error out partway through.
1042  */
1043 #if RECOVER_RELATION_BUILD_MEMORY
1044  MemoryContext tmpcxt;
1045  MemoryContext oldcxt;
1046 
1048  "RelationBuildDesc workspace",
1050  oldcxt = MemoryContextSwitchTo(tmpcxt);
1051 #endif
1052 
1053  /*
1054  * find the tuple in pg_class corresponding to the given relation id
1055  */
1056  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1057 
1058  /*
1059  * if no such tuple exists, return NULL
1060  */
1061  if (!HeapTupleIsValid(pg_class_tuple))
1062  {
1063 #if RECOVER_RELATION_BUILD_MEMORY
1064  /* Return to caller's context, and blow away the temporary context */
1065  MemoryContextSwitchTo(oldcxt);
1066  MemoryContextDelete(tmpcxt);
1067 #endif
1068  return NULL;
1069  }
1070 
1071  /*
1072  * get information from the pg_class_tuple
1073  */
1074  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1075  relid = relp->oid;
1076  Assert(relid == targetRelId);
1077 
1078  /*
1079  * allocate storage for the relation descriptor, and copy pg_class_tuple
1080  * to relation->rd_rel.
1081  */
1082  relation = AllocateRelationDesc(relp);
1083 
1084  /*
1085  * initialize the relation's relation id (relation->rd_id)
1086  */
1087  RelationGetRelid(relation) = relid;
1088 
1089  /*
1090  * normal relations are not nailed into the cache; nor can a pre-existing
1091  * relation be new. It could be temp though. (Actually, it could be new
1092  * too, but it's okay to forget that fact if forced to flush the entry.)
1093  */
1094  relation->rd_refcnt = 0;
1095  relation->rd_isnailed = false;
1098  switch (relation->rd_rel->relpersistence)
1099  {
1100  case RELPERSISTENCE_UNLOGGED:
1101  case RELPERSISTENCE_PERMANENT:
1102  relation->rd_backend = InvalidBackendId;
1103  relation->rd_islocaltemp = false;
1104  break;
1105  case RELPERSISTENCE_TEMP:
1106  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1107  {
1108  relation->rd_backend = BackendIdForTempRelations();
1109  relation->rd_islocaltemp = true;
1110  }
1111  else
1112  {
1113  /*
1114  * If it's a temp table, but not one of ours, we have to use
1115  * the slow, grotty method to figure out the owning backend.
1116  *
1117  * Note: it's possible that rd_backend gets set to MyBackendId
1118  * here, in case we are looking at a pg_class entry left over
1119  * from a crashed backend that coincidentally had the same
1120  * BackendId we're using. We should *not* consider such a
1121  * table to be "ours"; this is why we need the separate
1122  * rd_islocaltemp flag. The pg_class entry will get flushed
1123  * if/when we clean out the corresponding temp table namespace
1124  * in preparation for using it.
1125  */
1126  relation->rd_backend =
1127  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1128  Assert(relation->rd_backend != InvalidBackendId);
1129  relation->rd_islocaltemp = false;
1130  }
1131  break;
1132  default:
1133  elog(ERROR, "invalid relpersistence: %c",
1134  relation->rd_rel->relpersistence);
1135  break;
1136  }
1137 
1138  /*
1139  * initialize the tuple descriptor (relation->rd_att).
1140  */
1141  RelationBuildTupleDesc(relation);
1142 
1143  /*
1144  * Fetch rules and triggers that affect this relation
1145  */
1146  if (relation->rd_rel->relhasrules)
1147  RelationBuildRuleLock(relation);
1148  else
1149  {
1150  relation->rd_rules = NULL;
1151  relation->rd_rulescxt = NULL;
1152  }
1153 
1154  if (relation->rd_rel->relhastriggers)
1155  RelationBuildTriggers(relation);
1156  else
1157  relation->trigdesc = NULL;
1158 
1159  if (relation->rd_rel->relrowsecurity)
1160  RelationBuildRowSecurity(relation);
1161  else
1162  relation->rd_rsdesc = NULL;
1163 
1164  /* foreign key data is not loaded till asked for */
1165  relation->rd_fkeylist = NIL;
1166  relation->rd_fkeyvalid = false;
1167 
1168  /* if a partitioned table, initialize key and partition descriptor info */
1169  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1170  {
1171  RelationBuildPartitionKey(relation);
1172  RelationBuildPartitionDesc(relation);
1173  }
1174  else
1175  {
1176  relation->rd_partkey = NULL;
1177  relation->rd_partkeycxt = NULL;
1178  relation->rd_partdesc = NULL;
1179  relation->rd_pdcxt = NULL;
1180  }
1181  /* ... but partcheck is not loaded till asked for */
1182  relation->rd_partcheck = NIL;
1183  relation->rd_partcheckvalid = false;
1184  relation->rd_partcheckcxt = NULL;
1185 
1186  /*
1187  * initialize access method information
1188  */
1189  switch (relation->rd_rel->relkind)
1190  {
1191  case RELKIND_INDEX:
1192  case RELKIND_PARTITIONED_INDEX:
1193  Assert(relation->rd_rel->relam != InvalidOid);
1194  RelationInitIndexAccessInfo(relation);
1195  break;
1196  case RELKIND_RELATION:
1197  case RELKIND_TOASTVALUE:
1198  case RELKIND_MATVIEW:
1199  Assert(relation->rd_rel->relam != InvalidOid);
1201  break;
1202  case RELKIND_SEQUENCE:
1203  Assert(relation->rd_rel->relam == InvalidOid);
1205  break;
1206  case RELKIND_VIEW:
1207  case RELKIND_COMPOSITE_TYPE:
1208  case RELKIND_FOREIGN_TABLE:
1209  case RELKIND_PARTITIONED_TABLE:
1210  Assert(relation->rd_rel->relam == InvalidOid);
1211  break;
1212  }
1213 
1214  /* extract reloptions if any */
1215  RelationParseRelOptions(relation, pg_class_tuple);
1216 
1217  /*
1218  * initialize the relation lock manager information
1219  */
1220  RelationInitLockInfo(relation); /* see lmgr.c */
1221 
1222  /*
1223  * initialize physical addressing information for the relation
1224  */
1225  RelationInitPhysicalAddr(relation);
1226 
1227  /* make sure relation is marked as having no open file yet */
1228  relation->rd_smgr = NULL;
1229 
1230  /*
1231  * now we can free the memory allocated for pg_class_tuple
1232  */
1233  heap_freetuple(pg_class_tuple);
1234 
1235  /*
1236  * Insert newly created relation into relcache hash table, if requested.
1237  *
1238  * There is one scenario in which we might find a hashtable entry already
1239  * present, even though our caller failed to find it: if the relation is a
1240  * system catalog or index that's used during relcache load, we might have
1241  * recursively created the same relcache entry during the preceding steps.
1242  * So allow RelationCacheInsert to delete any already-present relcache
1243  * entry for the same OID. The already-present entry should have refcount
1244  * zero (else somebody forgot to close it); in the event that it doesn't,
1245  * we'll elog a WARNING and leak the already-present entry.
1246  */
1247  if (insertIt)
1248  RelationCacheInsert(relation, true);
1249 
1250  /* It's fully valid */
1251  relation->rd_isvalid = true;
1252 
1253 #if RECOVER_RELATION_BUILD_MEMORY
1254  /* Return to caller's context, and blow away the temporary context */
1255  MemoryContextSwitchTo(oldcxt);
1256  MemoryContextDelete(tmpcxt);
1257 #endif
1258 
1259  return relation;
1260 }
1261 
1262 /*
1263  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1264  *
1265  * Note: at the physical level, relations in the pg_global tablespace must
1266  * be treated as shared, even if relisshared isn't set. Hence we do not
1267  * look at relisshared here.
1268  */
1269 static void
1271 {
1272  /* these relations kinds never have storage */
1273  if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1274  return;
1275 
1276  if (relation->rd_rel->reltablespace)
1277  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1278  else
1279  relation->rd_node.spcNode = MyDatabaseTableSpace;
1280  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1281  relation->rd_node.dbNode = InvalidOid;
1282  else
1283  relation->rd_node.dbNode = MyDatabaseId;
1284 
1285  if (relation->rd_rel->relfilenode)
1286  {
1287  /*
1288  * Even if we are using a decoding snapshot that doesn't represent the
1289  * current state of the catalog we need to make sure the filenode
1290  * points to the current file since the older file will be gone (or
1291  * truncated). The new file will still contain older rows so lookups
1292  * in them will work correctly. This wouldn't work correctly if
1293  * rewrites were allowed to change the schema in an incompatible way,
1294  * but those are prevented both on catalog tables and on user tables
1295  * declared as additional catalog tables.
1296  */
1299  && IsTransactionState())
1300  {
1301  HeapTuple phys_tuple;
1302  Form_pg_class physrel;
1303 
1304  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1305  RelationGetRelid(relation) != ClassOidIndexId,
1306  true);
1307  if (!HeapTupleIsValid(phys_tuple))
1308  elog(ERROR, "could not find pg_class entry for %u",
1309  RelationGetRelid(relation));
1310  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1311 
1312  relation->rd_rel->reltablespace = physrel->reltablespace;
1313  relation->rd_rel->relfilenode = physrel->relfilenode;
1314  heap_freetuple(phys_tuple);
1315  }
1316 
1317  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1318  }
1319  else
1320  {
1321  /* Consult the relation mapper */
1322  relation->rd_node.relNode =
1323  RelationMapOidToFilenode(relation->rd_id,
1324  relation->rd_rel->relisshared);
1325  if (!OidIsValid(relation->rd_node.relNode))
1326  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1327  RelationGetRelationName(relation), relation->rd_id);
1328  }
1329 }
1330 
1331 /*
1332  * Fill in the IndexAmRoutine for an index relation.
1333  *
1334  * relation's rd_amhandler and rd_indexcxt must be valid already.
1335  */
1336 static void
1338 {
1339  IndexAmRoutine *cached,
1340  *tmp;
1341 
1342  /*
1343  * Call the amhandler in current, short-lived memory context, just in case
1344  * it leaks anything (it probably won't, but let's be paranoid).
1345  */
1346  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1347 
1348  /* OK, now transfer the data into relation's rd_indexcxt. */
1349  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1350  sizeof(IndexAmRoutine));
1351  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1352  relation->rd_indam = cached;
1353 
1354  pfree(tmp);
1355 }
1356 
1357 /*
1358  * Initialize index-access-method support data for an index relation
1359  */
1360 void
1362 {
1363  HeapTuple tuple;
1364  Form_pg_am aform;
1365  Datum indcollDatum;
1366  Datum indclassDatum;
1367  Datum indoptionDatum;
1368  bool isnull;
1369  oidvector *indcoll;
1370  oidvector *indclass;
1371  int2vector *indoption;
1372  MemoryContext indexcxt;
1373  MemoryContext oldcontext;
1374  int indnatts;
1375  int indnkeyatts;
1376  uint16 amsupport;
1377 
1378  /*
1379  * Make a copy of the pg_index entry for the index. Since pg_index
1380  * contains variable-length and possibly-null fields, we have to do this
1381  * honestly rather than just treating it as a Form_pg_index struct.
1382  */
1383  tuple = SearchSysCache1(INDEXRELID,
1384  ObjectIdGetDatum(RelationGetRelid(relation)));
1385  if (!HeapTupleIsValid(tuple))
1386  elog(ERROR, "cache lookup failed for index %u",
1387  RelationGetRelid(relation));
1389  relation->rd_indextuple = heap_copytuple(tuple);
1390  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1391  MemoryContextSwitchTo(oldcontext);
1392  ReleaseSysCache(tuple);
1393 
1394  /*
1395  * Look up the index's access method, save the OID of its handler function
1396  */
1397  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1398  if (!HeapTupleIsValid(tuple))
1399  elog(ERROR, "cache lookup failed for access method %u",
1400  relation->rd_rel->relam);
1401  aform = (Form_pg_am) GETSTRUCT(tuple);
1402  relation->rd_amhandler = aform->amhandler;
1403  ReleaseSysCache(tuple);
1404 
1405  indnatts = RelationGetNumberOfAttributes(relation);
1406  if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1407  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1408  RelationGetRelid(relation));
1409  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1410 
1411  /*
1412  * Make the private context to hold index access info. The reason we need
1413  * a context, and not just a couple of pallocs, is so that we won't leak
1414  * any subsidiary info attached to fmgr lookup records.
1415  */
1417  "index info",
1419  relation->rd_indexcxt = indexcxt;
1421  RelationGetRelationName(relation));
1422 
1423  /*
1424  * Now we can fetch the index AM's API struct
1425  */
1426  InitIndexAmRoutine(relation);
1427 
1428  /*
1429  * Allocate arrays to hold data. Opclasses are not used for included
1430  * columns, so allocate them for indnkeyatts only.
1431  */
1432  relation->rd_opfamily = (Oid *)
1433  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1434  relation->rd_opcintype = (Oid *)
1435  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1436 
1437  amsupport = relation->rd_indam->amsupport;
1438  if (amsupport > 0)
1439  {
1440  int nsupport = indnatts * amsupport;
1441 
1442  relation->rd_support = (RegProcedure *)
1443  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1444  relation->rd_supportinfo = (FmgrInfo *)
1445  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1446  }
1447  else
1448  {
1449  relation->rd_support = NULL;
1450  relation->rd_supportinfo = NULL;
1451  }
1452 
1453  relation->rd_indcollation = (Oid *)
1454  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1455 
1456  relation->rd_indoption = (int16 *)
1457  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1458 
1459  /*
1460  * indcollation cannot be referenced directly through the C struct,
1461  * because it comes after the variable-width indkey field. Must extract
1462  * the datum the hard way...
1463  */
1464  indcollDatum = fastgetattr(relation->rd_indextuple,
1465  Anum_pg_index_indcollation,
1467  &isnull);
1468  Assert(!isnull);
1469  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1470  memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1471 
1472  /*
1473  * indclass cannot be referenced directly through the C struct, because it
1474  * comes after the variable-width indkey field. Must extract the datum
1475  * the hard way...
1476  */
1477  indclassDatum = fastgetattr(relation->rd_indextuple,
1478  Anum_pg_index_indclass,
1480  &isnull);
1481  Assert(!isnull);
1482  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1483 
1484  /*
1485  * Fill the support procedure OID array, as well as the info about
1486  * opfamilies and opclass input types. (aminfo and supportinfo are left
1487  * as zeroes, and are filled on-the-fly when used)
1488  */
1489  IndexSupportInitialize(indclass, relation->rd_support,
1490  relation->rd_opfamily, relation->rd_opcintype,
1491  amsupport, indnkeyatts);
1492 
1493  /*
1494  * Similarly extract indoption and copy it to the cache entry
1495  */
1496  indoptionDatum = fastgetattr(relation->rd_indextuple,
1497  Anum_pg_index_indoption,
1499  &isnull);
1500  Assert(!isnull);
1501  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1502  memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1503 
1504  /*
1505  * expressions, predicate, exclusion caches will be filled later
1506  */
1507  relation->rd_indexprs = NIL;
1508  relation->rd_indpred = NIL;
1509  relation->rd_exclops = NULL;
1510  relation->rd_exclprocs = NULL;
1511  relation->rd_exclstrats = NULL;
1512  relation->rd_amcache = NULL;
1513 }
1514 
1515 /*
1516  * IndexSupportInitialize
1517  * Initializes an index's cached opclass information,
1518  * given the index's pg_index.indclass entry.
1519  *
1520  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1521  * which are arrays allocated by the caller.
1522  *
1523  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1524  * indicate the size of the arrays it has allocated --- but in practice these
1525  * numbers must always match those obtainable from the system catalog entries
1526  * for the index and access method.
1527  */
1528 static void
1530  RegProcedure *indexSupport,
1531  Oid *opFamily,
1532  Oid *opcInType,
1533  StrategyNumber maxSupportNumber,
1534  AttrNumber maxAttributeNumber)
1535 {
1536  int attIndex;
1537 
1538  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1539  {
1540  OpClassCacheEnt *opcentry;
1541 
1542  if (!OidIsValid(indclass->values[attIndex]))
1543  elog(ERROR, "bogus pg_index tuple");
1544 
1545  /* look up the info for this opclass, using a cache */
1546  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1547  maxSupportNumber);
1548 
1549  /* copy cached data into relcache entry */
1550  opFamily[attIndex] = opcentry->opcfamily;
1551  opcInType[attIndex] = opcentry->opcintype;
1552  if (maxSupportNumber > 0)
1553  memcpy(&indexSupport[attIndex * maxSupportNumber],
1554  opcentry->supportProcs,
1555  maxSupportNumber * sizeof(RegProcedure));
1556  }
1557 }
1558 
1559 /*
1560  * LookupOpclassInfo
1561  *
1562  * This routine maintains a per-opclass cache of the information needed
1563  * by IndexSupportInitialize(). This is more efficient than relying on
1564  * the catalog cache, because we can load all the info about a particular
1565  * opclass in a single indexscan of pg_amproc.
1566  *
1567  * The information from pg_am about expected range of support function
1568  * numbers is passed in, rather than being looked up, mainly because the
1569  * caller will have it already.
1570  *
1571  * Note there is no provision for flushing the cache. This is OK at the
1572  * moment because there is no way to ALTER any interesting properties of an
1573  * existing opclass --- all you can do is drop it, which will result in
1574  * a useless but harmless dead entry in the cache. To support altering
1575  * opclass membership (not the same as opfamily membership!), we'd need to
1576  * be able to flush this cache as well as the contents of relcache entries
1577  * for indexes.
1578  */
1579 static OpClassCacheEnt *
1580 LookupOpclassInfo(Oid operatorClassOid,
1581  StrategyNumber numSupport)
1582 {
1583  OpClassCacheEnt *opcentry;
1584  bool found;
1585  Relation rel;
1586  SysScanDesc scan;
1587  ScanKeyData skey[3];
1588  HeapTuple htup;
1589  bool indexOK;
1590 
1591  if (OpClassCache == NULL)
1592  {
1593  /* First time through: initialize the opclass cache */
1594  HASHCTL ctl;
1595 
1596  MemSet(&ctl, 0, sizeof(ctl));
1597  ctl.keysize = sizeof(Oid);
1598  ctl.entrysize = sizeof(OpClassCacheEnt);
1599  OpClassCache = hash_create("Operator class cache", 64,
1600  &ctl, HASH_ELEM | HASH_BLOBS);
1601 
1602  /* Also make sure CacheMemoryContext exists */
1603  if (!CacheMemoryContext)
1605  }
1606 
1607  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1608  (void *) &operatorClassOid,
1609  HASH_ENTER, &found);
1610 
1611  if (!found)
1612  {
1613  /* Need to allocate memory for new entry */
1614  opcentry->valid = false; /* until known OK */
1615  opcentry->numSupport = numSupport;
1616 
1617  if (numSupport > 0)
1618  opcentry->supportProcs = (RegProcedure *)
1620  numSupport * sizeof(RegProcedure));
1621  else
1622  opcentry->supportProcs = NULL;
1623  }
1624  else
1625  {
1626  Assert(numSupport == opcentry->numSupport);
1627  }
1628 
1629  /*
1630  * When testing for cache-flush hazards, we intentionally disable the
1631  * operator class cache and force reloading of the info on each call. This
1632  * is helpful because we want to test the case where a cache flush occurs
1633  * while we are loading the info, and it's very hard to provoke that if
1634  * this happens only once per opclass per backend.
1635  */
1636 #if defined(CLOBBER_CACHE_ALWAYS)
1637  opcentry->valid = false;
1638 #endif
1639 
1640  if (opcentry->valid)
1641  return opcentry;
1642 
1643  /*
1644  * Need to fill in new entry.
1645  *
1646  * To avoid infinite recursion during startup, force heap scans if we're
1647  * looking up info for the opclasses used by the indexes we would like to
1648  * reference here.
1649  */
1650  indexOK = criticalRelcachesBuilt ||
1651  (operatorClassOid != OID_BTREE_OPS_OID &&
1652  operatorClassOid != INT2_BTREE_OPS_OID);
1653 
1654  /*
1655  * We have to fetch the pg_opclass row to determine its opfamily and
1656  * opcintype, which are needed to look up related operators and functions.
1657  * It'd be convenient to use the syscache here, but that probably doesn't
1658  * work while bootstrapping.
1659  */
1660  ScanKeyInit(&skey[0],
1661  Anum_pg_opclass_oid,
1662  BTEqualStrategyNumber, F_OIDEQ,
1663  ObjectIdGetDatum(operatorClassOid));
1664  rel = table_open(OperatorClassRelationId, AccessShareLock);
1665  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1666  NULL, 1, skey);
1667 
1668  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1669  {
1670  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1671 
1672  opcentry->opcfamily = opclassform->opcfamily;
1673  opcentry->opcintype = opclassform->opcintype;
1674  }
1675  else
1676  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1677 
1678  systable_endscan(scan);
1680 
1681  /*
1682  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1683  * the default ones (those with lefttype = righttype = opcintype).
1684  */
1685  if (numSupport > 0)
1686  {
1687  ScanKeyInit(&skey[0],
1688  Anum_pg_amproc_amprocfamily,
1689  BTEqualStrategyNumber, F_OIDEQ,
1690  ObjectIdGetDatum(opcentry->opcfamily));
1691  ScanKeyInit(&skey[1],
1692  Anum_pg_amproc_amproclefttype,
1693  BTEqualStrategyNumber, F_OIDEQ,
1694  ObjectIdGetDatum(opcentry->opcintype));
1695  ScanKeyInit(&skey[2],
1696  Anum_pg_amproc_amprocrighttype,
1697  BTEqualStrategyNumber, F_OIDEQ,
1698  ObjectIdGetDatum(opcentry->opcintype));
1699  rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1700  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1701  NULL, 3, skey);
1702 
1703  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1704  {
1705  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1706 
1707  if (amprocform->amprocnum <= 0 ||
1708  (StrategyNumber) amprocform->amprocnum > numSupport)
1709  elog(ERROR, "invalid amproc number %d for opclass %u",
1710  amprocform->amprocnum, operatorClassOid);
1711 
1712  opcentry->supportProcs[amprocform->amprocnum - 1] =
1713  amprocform->amproc;
1714  }
1715 
1716  systable_endscan(scan);
1718  }
1719 
1720  opcentry->valid = true;
1721  return opcentry;
1722 }
1723 
1724 /*
1725  * Fill in the TableAmRoutine for a relation
1726  *
1727  * relation's rd_amhandler must be valid already.
1728  */
1729 static void
1731 {
1732  relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1733 }
1734 
1735 /*
1736  * Initialize table access method support for a table like relation
1737  */
1738 void
1740 {
1741  HeapTuple tuple;
1742  Form_pg_am aform;
1743 
1744  if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1745  {
1746  /*
1747  * Sequences are currently accessed like heap tables, but it doesn't
1748  * seem prudent to show that in the catalog. So just overwrite it
1749  * here.
1750  */
1751  relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1752  }
1753  else if (IsCatalogRelation(relation))
1754  {
1755  /*
1756  * Avoid doing a syscache lookup for catalog tables.
1757  */
1758  Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1759  relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1760  }
1761  else
1762  {
1763  /*
1764  * Look up the table access method, save the OID of its handler
1765  * function.
1766  */
1767  Assert(relation->rd_rel->relam != InvalidOid);
1768  tuple = SearchSysCache1(AMOID,
1769  ObjectIdGetDatum(relation->rd_rel->relam));
1770  if (!HeapTupleIsValid(tuple))
1771  elog(ERROR, "cache lookup failed for access method %u",
1772  relation->rd_rel->relam);
1773  aform = (Form_pg_am) GETSTRUCT(tuple);
1774  relation->rd_amhandler = aform->amhandler;
1775  ReleaseSysCache(tuple);
1776  }
1777 
1778  /*
1779  * Now we can fetch the table AM's API struct
1780  */
1781  InitTableAmRoutine(relation);
1782 }
1783 
1784 /*
1785  * formrdesc
1786  *
1787  * This is a special cut-down version of RelationBuildDesc(),
1788  * used while initializing the relcache.
1789  * The relation descriptor is built just from the supplied parameters,
1790  * without actually looking at any system table entries. We cheat
1791  * quite a lot since we only need to work for a few basic system
1792  * catalogs.
1793  *
1794  * The catalogs this is used for can't have constraints (except attnotnull),
1795  * default values, rules, or triggers, since we don't cope with any of that.
1796  * (Well, actually, this only matters for properties that need to be valid
1797  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1798  * these properties matter then...)
1799  *
1800  * NOTE: we assume we are already switched into CacheMemoryContext.
1801  */
1802 static void
1803 formrdesc(const char *relationName, Oid relationReltype,
1804  bool isshared,
1805  int natts, const FormData_pg_attribute *attrs)
1806 {
1807  Relation relation;
1808  int i;
1809  bool has_not_null;
1810 
1811  /*
1812  * allocate new relation desc, clear all fields of reldesc
1813  */
1814  relation = (Relation) palloc0(sizeof(RelationData));
1815 
1816  /* make sure relation is marked as having no open file yet */
1817  relation->rd_smgr = NULL;
1818 
1819  /*
1820  * initialize reference count: 1 because it is nailed in cache
1821  */
1822  relation->rd_refcnt = 1;
1823 
1824  /*
1825  * all entries built with this routine are nailed-in-cache; none are for
1826  * new or temp relations.
1827  */
1828  relation->rd_isnailed = true;
1831  relation->rd_backend = InvalidBackendId;
1832  relation->rd_islocaltemp = false;
1833 
1834  /*
1835  * initialize relation tuple form
1836  *
1837  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1838  * get us launched. RelationCacheInitializePhase3() will read the real
1839  * data from pg_class and replace what we've done here. Note in
1840  * particular that relowner is left as zero; this cues
1841  * RelationCacheInitializePhase3 that the real data isn't there yet.
1842  */
1844 
1845  namestrcpy(&relation->rd_rel->relname, relationName);
1846  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1847  relation->rd_rel->reltype = relationReltype;
1848 
1849  /*
1850  * It's important to distinguish between shared and non-shared relations,
1851  * even at bootstrap time, to make sure we know where they are stored.
1852  */
1853  relation->rd_rel->relisshared = isshared;
1854  if (isshared)
1855  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1856 
1857  /* formrdesc is used only for permanent relations */
1858  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1859 
1860  /* ... and they're always populated, too */
1861  relation->rd_rel->relispopulated = true;
1862 
1863  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1864  relation->rd_rel->relpages = 0;
1865  relation->rd_rel->reltuples = 0;
1866  relation->rd_rel->relallvisible = 0;
1867  relation->rd_rel->relkind = RELKIND_RELATION;
1868  relation->rd_rel->relnatts = (int16) natts;
1869  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1870 
1871  /*
1872  * initialize attribute tuple form
1873  *
1874  * Unlike the case with the relation tuple, this data had better be right
1875  * because it will never be replaced. The data comes from
1876  * src/include/catalog/ headers via genbki.pl.
1877  */
1878  relation->rd_att = CreateTemplateTupleDesc(natts);
1879  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1880 
1881  relation->rd_att->tdtypeid = relationReltype;
1882  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1883 
1884  /*
1885  * initialize tuple desc info
1886  */
1887  has_not_null = false;
1888  for (i = 0; i < natts; i++)
1889  {
1890  memcpy(TupleDescAttr(relation->rd_att, i),
1891  &attrs[i],
1893  has_not_null |= attrs[i].attnotnull;
1894  /* make sure attcacheoff is valid */
1895  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1896  }
1897 
1898  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1899  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1900 
1901  /* mark not-null status */
1902  if (has_not_null)
1903  {
1904  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1905 
1906  constr->has_not_null = true;
1907  relation->rd_att->constr = constr;
1908  }
1909 
1910  /*
1911  * initialize relation id from info in att array (my, this is ugly)
1912  */
1913  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1914 
1915  /*
1916  * All relations made with formrdesc are mapped. This is necessarily so
1917  * because there is no other way to know what filenode they currently
1918  * have. In bootstrap mode, add them to the initial relation mapper data,
1919  * specifying that the initial filenode is the same as the OID.
1920  */
1921  relation->rd_rel->relfilenode = InvalidOid;
1924  RelationGetRelid(relation),
1925  isshared, true);
1926 
1927  /*
1928  * initialize the relation lock manager information
1929  */
1930  RelationInitLockInfo(relation); /* see lmgr.c */
1931 
1932  /*
1933  * initialize physical addressing information for the relation
1934  */
1935  RelationInitPhysicalAddr(relation);
1936 
1937  /*
1938  * initialize the table am handler
1939  */
1940  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1941  relation->rd_tableam = GetHeapamTableAmRoutine();
1942 
1943  /*
1944  * initialize the rel-has-index flag, using hardwired knowledge
1945  */
1947  {
1948  /* In bootstrap mode, we have no indexes */
1949  relation->rd_rel->relhasindex = false;
1950  }
1951  else
1952  {
1953  /* Otherwise, all the rels formrdesc is used for have indexes */
1954  relation->rd_rel->relhasindex = true;
1955  }
1956 
1957  /*
1958  * add new reldesc to relcache
1959  */
1960  RelationCacheInsert(relation, false);
1961 
1962  /* It's fully valid */
1963  relation->rd_isvalid = true;
1964 }
1965 
1966 
1967 /* ----------------------------------------------------------------
1968  * Relation Descriptor Lookup Interface
1969  * ----------------------------------------------------------------
1970  */
1971 
1972 /*
1973  * RelationIdGetRelation
1974  *
1975  * Lookup a reldesc by OID; make one if not already in cache.
1976  *
1977  * Returns NULL if no pg_class row could be found for the given relid
1978  * (suggesting we are trying to access a just-deleted relation).
1979  * Any other error is reported via elog.
1980  *
1981  * NB: caller should already have at least AccessShareLock on the
1982  * relation ID, else there are nasty race conditions.
1983  *
1984  * NB: relation ref count is incremented, or set to 1 if new entry.
1985  * Caller should eventually decrement count. (Usually,
1986  * that happens by calling RelationClose().)
1987  */
1988 Relation
1990 {
1991  Relation rd;
1992 
1993  /* Make sure we're in an xact, even if this ends up being a cache hit */
1995 
1996  /*
1997  * first try to find reldesc in the cache
1998  */
1999  RelationIdCacheLookup(relationId, rd);
2000 
2001  if (RelationIsValid(rd))
2002  {
2004  /* revalidate cache entry if necessary */
2005  if (!rd->rd_isvalid)
2006  {
2007  /*
2008  * Indexes only have a limited number of possible schema changes,
2009  * and we don't want to use the full-blown procedure because it's
2010  * a headache for indexes that reload itself depends on.
2011  */
2012  if (rd->rd_rel->relkind == RELKIND_INDEX ||
2013  rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2015  else
2016  RelationClearRelation(rd, true);
2017 
2018  /*
2019  * Normally entries need to be valid here, but before the relcache
2020  * has been initialized, not enough infrastructure exists to
2021  * perform pg_class lookups. The structure of such entries doesn't
2022  * change, but we still want to update the rd_rel entry. So
2023  * rd_isvalid = false is left in place for a later lookup.
2024  */
2025  Assert(rd->rd_isvalid ||
2027  }
2028  return rd;
2029  }
2030 
2031  /*
2032  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2033  * it.
2034  */
2035  rd = RelationBuildDesc(relationId, true);
2036  if (RelationIsValid(rd))
2038  return rd;
2039 }
2040 
2041 /* ----------------------------------------------------------------
2042  * cache invalidation support routines
2043  * ----------------------------------------------------------------
2044  */
2045 
2046 /*
2047  * RelationIncrementReferenceCount
2048  * Increments relation reference count.
2049  *
2050  * Note: bootstrap mode has its own weird ideas about relation refcount
2051  * behavior; we ought to fix it someday, but for now, just disable
2052  * reference count ownership tracking in bootstrap mode.
2053  */
2054 void
2056 {
2058  rel->rd_refcnt += 1;
2061 }
2062 
2063 /*
2064  * RelationDecrementReferenceCount
2065  * Decrements relation reference count.
2066  */
2067 void
2069 {
2070  Assert(rel->rd_refcnt > 0);
2071  rel->rd_refcnt -= 1;
2074 }
2075 
2076 /*
2077  * RelationClose - close an open relation
2078  *
2079  * Actually, we just decrement the refcount.
2080  *
2081  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2082  * will be freed as soon as their refcount goes to zero. In combination
2083  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2084  * to catch references to already-released relcache entries. It slows
2085  * things down quite a bit, however.
2086  */
2087 void
2089 {
2090  /* Note: no locking manipulations needed */
2092 
2093 #ifdef RELCACHE_FORCE_RELEASE
2094  if (RelationHasReferenceCountZero(relation) &&
2095  relation->rd_createSubid == InvalidSubTransactionId &&
2097  RelationClearRelation(relation, false);
2098 #endif
2099 }
2100 
2101 /*
2102  * RelationReloadIndexInfo - reload minimal information for an open index
2103  *
2104  * This function is used only for indexes. A relcache inval on an index
2105  * can mean that its pg_class or pg_index row changed. There are only
2106  * very limited changes that are allowed to an existing index's schema,
2107  * so we can update the relcache entry without a complete rebuild; which
2108  * is fortunate because we can't rebuild an index entry that is "nailed"
2109  * and/or in active use. We support full replacement of the pg_class row,
2110  * as well as updates of a few simple fields of the pg_index row.
2111  *
2112  * We can't necessarily reread the catalog rows right away; we might be
2113  * in a failed transaction when we receive the SI notification. If so,
2114  * RelationClearRelation just marks the entry as invalid by setting
2115  * rd_isvalid to false. This routine is called to fix the entry when it
2116  * is next needed.
2117  *
2118  * We assume that at the time we are called, we have at least AccessShareLock
2119  * on the target index. (Note: in the calls from RelationClearRelation,
2120  * this is legitimate because we know the rel has positive refcount.)
2121  *
2122  * If the target index is an index on pg_class or pg_index, we'd better have
2123  * previously gotten at least AccessShareLock on its underlying catalog,
2124  * else we are at risk of deadlock against someone trying to exclusive-lock
2125  * the heap and index in that order. This is ensured in current usage by
2126  * only applying this to indexes being opened or having positive refcount.
2127  */
2128 static void
2130 {
2131  bool indexOK;
2132  HeapTuple pg_class_tuple;
2133  Form_pg_class relp;
2134 
2135  /* Should be called only for invalidated indexes */
2136  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2137  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2138  !relation->rd_isvalid);
2139 
2140  /* Ensure it's closed at smgr level */
2141  RelationCloseSmgr(relation);
2142 
2143  /* Must free any AM cached data upon relcache flush */
2144  if (relation->rd_amcache)
2145  pfree(relation->rd_amcache);
2146  relation->rd_amcache = NULL;
2147 
2148  /*
2149  * If it's a shared index, we might be called before backend startup has
2150  * finished selecting a database, in which case we have no way to read
2151  * pg_class yet. However, a shared index can never have any significant
2152  * schema updates, so it's okay to ignore the invalidation signal. Just
2153  * mark it valid and return without doing anything more.
2154  */
2155  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2156  {
2157  relation->rd_isvalid = true;
2158  return;
2159  }
2160 
2161  /*
2162  * Read the pg_class row
2163  *
2164  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2165  * for pg_class_oid_index ...
2166  */
2167  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2168  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2169  if (!HeapTupleIsValid(pg_class_tuple))
2170  elog(ERROR, "could not find pg_class tuple for index %u",
2171  RelationGetRelid(relation));
2172  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2173  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2174  /* Reload reloptions in case they changed */
2175  if (relation->rd_options)
2176  pfree(relation->rd_options);
2177  RelationParseRelOptions(relation, pg_class_tuple);
2178  /* done with pg_class tuple */
2179  heap_freetuple(pg_class_tuple);
2180  /* We must recalculate physical address in case it changed */
2181  RelationInitPhysicalAddr(relation);
2182 
2183  /*
2184  * For a non-system index, there are fields of the pg_index row that are
2185  * allowed to change, so re-read that row and update the relcache entry.
2186  * Most of the info derived from pg_index (such as support function lookup
2187  * info) cannot change, and indeed the whole point of this routine is to
2188  * update the relcache entry without clobbering that data; so wholesale
2189  * replacement is not appropriate.
2190  */
2191  if (!IsSystemRelation(relation))
2192  {
2193  HeapTuple tuple;
2195 
2196  tuple = SearchSysCache1(INDEXRELID,
2197  ObjectIdGetDatum(RelationGetRelid(relation)));
2198  if (!HeapTupleIsValid(tuple))
2199  elog(ERROR, "cache lookup failed for index %u",
2200  RelationGetRelid(relation));
2201  index = (Form_pg_index) GETSTRUCT(tuple);
2202 
2203  /*
2204  * Basically, let's just copy all the bool fields. There are one or
2205  * two of these that can't actually change in the current code, but
2206  * it's not worth it to track exactly which ones they are. None of
2207  * the array fields are allowed to change, though.
2208  */
2209  relation->rd_index->indisunique = index->indisunique;
2210  relation->rd_index->indisprimary = index->indisprimary;
2211  relation->rd_index->indisexclusion = index->indisexclusion;
2212  relation->rd_index->indimmediate = index->indimmediate;
2213  relation->rd_index->indisclustered = index->indisclustered;
2214  relation->rd_index->indisvalid = index->indisvalid;
2215  relation->rd_index->indcheckxmin = index->indcheckxmin;
2216  relation->rd_index->indisready = index->indisready;
2217  relation->rd_index->indislive = index->indislive;
2218 
2219  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2221  HeapTupleHeaderGetXmin(tuple->t_data));
2222 
2223  ReleaseSysCache(tuple);
2224  }
2225 
2226  /* Okay, now it's valid again */
2227  relation->rd_isvalid = true;
2228 }
2229 
2230 /*
2231  * RelationReloadNailed - reload minimal information for nailed relations.
2232  *
2233  * The structure of a nailed relation can never change (which is good, because
2234  * we rely on knowing their structure to be able to read catalog content). But
2235  * some parts, e.g. pg_class.relfrozenxid, are still important to have
2236  * accurate content for. Therefore those need to be reloaded after the arrival
2237  * of invalidations.
2238  */
2239 static void
2241 {
2242  Assert(relation->rd_isnailed);
2243 
2244  /*
2245  * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2246  * mapping changed.
2247  */
2248  RelationInitPhysicalAddr(relation);
2249 
2250  /* flag as needing to be revalidated */
2251  relation->rd_isvalid = false;
2252 
2253  /*
2254  * Can only reread catalog contents if in a transaction. If the relation
2255  * is currently open (not counting the nailed refcount), do so
2256  * immediately. Otherwise we've already marked the entry as possibly
2257  * invalid, and it'll be fixed when next opened.
2258  */
2259  if (!IsTransactionState() || relation->rd_refcnt <= 1)
2260  return;
2261 
2262  if (relation->rd_rel->relkind == RELKIND_INDEX)
2263  {
2264  /*
2265  * If it's a nailed-but-not-mapped index, then we need to re-read the
2266  * pg_class row to see if its relfilenode changed.
2267  */
2268  RelationReloadIndexInfo(relation);
2269  }
2270  else
2271  {
2272  /*
2273  * Reload a non-index entry. We can't easily do so if relcaches
2274  * aren't yet built, but that's fine because at that stage the
2275  * attributes that need to be current (like relfrozenxid) aren't yet
2276  * accessed. To ensure the entry will later be revalidated, we leave
2277  * it in invalid state, but allow use (cf. RelationIdGetRelation()).
2278  */
2280  {
2281  HeapTuple pg_class_tuple;
2282  Form_pg_class relp;
2283 
2284  /*
2285  * NB: Mark the entry as valid before starting to scan, to avoid
2286  * self-recursion when re-building pg_class.
2287  */
2288  relation->rd_isvalid = true;
2289 
2290  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2291  true, false);
2292  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2293  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2294  heap_freetuple(pg_class_tuple);
2295 
2296  /*
2297  * Again mark as valid, to protect against concurrently arriving
2298  * invalidations.
2299  */
2300  relation->rd_isvalid = true;
2301  }
2302  }
2303 }
2304 
2305 /*
2306  * RelationDestroyRelation
2307  *
2308  * Physically delete a relation cache entry and all subsidiary data.
2309  * Caller must already have unhooked the entry from the hash table.
2310  */
2311 static void
2312 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2313 {
2315 
2316  /*
2317  * Make sure smgr and lower levels close the relation's files, if they
2318  * weren't closed already. (This was probably done by caller, but let's
2319  * just be real sure.)
2320  */
2321  RelationCloseSmgr(relation);
2322 
2323  /*
2324  * Free all the subsidiary data structures of the relcache entry, then the
2325  * entry itself.
2326  */
2327  if (relation->rd_rel)
2328  pfree(relation->rd_rel);
2329  /* can't use DecrTupleDescRefCount here */
2330  Assert(relation->rd_att->tdrefcount > 0);
2331  if (--relation->rd_att->tdrefcount == 0)
2332  {
2333  /*
2334  * If we Rebuilt a relcache entry during a transaction then its
2335  * possible we did that because the TupDesc changed as the result of
2336  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2337  * possible someone copied that TupDesc, in which case the copy would
2338  * point to free'd memory. So if we rebuild an entry we keep the
2339  * TupDesc around until end of transaction, to be safe.
2340  */
2341  if (remember_tupdesc)
2343  else
2344  FreeTupleDesc(relation->rd_att);
2345  }
2346  FreeTriggerDesc(relation->trigdesc);
2347  list_free_deep(relation->rd_fkeylist);
2348  list_free(relation->rd_indexlist);
2349  bms_free(relation->rd_indexattr);
2350  bms_free(relation->rd_keyattr);
2351  bms_free(relation->rd_pkattr);
2352  bms_free(relation->rd_idattr);
2353  if (relation->rd_pubactions)
2354  pfree(relation->rd_pubactions);
2355  if (relation->rd_options)
2356  pfree(relation->rd_options);
2357  if (relation->rd_indextuple)
2358  pfree(relation->rd_indextuple);
2359  if (relation->rd_amcache)
2360  pfree(relation->rd_amcache);
2361  if (relation->rd_fdwroutine)
2362  pfree(relation->rd_fdwroutine);
2363  if (relation->rd_indexcxt)
2364  MemoryContextDelete(relation->rd_indexcxt);
2365  if (relation->rd_rulescxt)
2366  MemoryContextDelete(relation->rd_rulescxt);
2367  if (relation->rd_rsdesc)
2368  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2369  if (relation->rd_partkeycxt)
2371  if (relation->rd_pdcxt)
2372  MemoryContextDelete(relation->rd_pdcxt);
2373  if (relation->rd_partcheckcxt)
2375  pfree(relation);
2376 }
2377 
2378 /*
2379  * RelationClearRelation
2380  *
2381  * Physically blow away a relation cache entry, or reset it and rebuild
2382  * it from scratch (that is, from catalog entries). The latter path is
2383  * used when we are notified of a change to an open relation (one with
2384  * refcount > 0).
2385  *
2386  * NB: when rebuilding, we'd better hold some lock on the relation,
2387  * else the catalog data we need to read could be changing under us.
2388  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2389  * a sinval reset could happen while we're accessing the catalogs, and
2390  * the rel would get blown away underneath us by RelationCacheInvalidate
2391  * if it has zero refcnt.
2392  *
2393  * The "rebuild" parameter is redundant in current usage because it has
2394  * to match the relation's refcnt status, but we keep it as a crosscheck
2395  * that we're doing what the caller expects.
2396  */
2397 static void
2398 RelationClearRelation(Relation relation, bool rebuild)
2399 {
2400  /*
2401  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2402  * course it would be an equally bad idea to blow away one with nonzero
2403  * refcnt, since that would leave someone somewhere with a dangling
2404  * pointer. All callers are expected to have verified that this holds.
2405  */
2406  Assert(rebuild ?
2407  !RelationHasReferenceCountZero(relation) :
2408  RelationHasReferenceCountZero(relation));
2409 
2410  /*
2411  * Make sure smgr and lower levels close the relation's files, if they
2412  * weren't closed already. If the relation is not getting deleted, the
2413  * next smgr access should reopen the files automatically. This ensures
2414  * that the low-level file access state is updated after, say, a vacuum
2415  * truncation.
2416  */
2417  RelationCloseSmgr(relation);
2418 
2419  /* Free AM cached data, if any */
2420  if (relation->rd_amcache)
2421  pfree(relation->rd_amcache);
2422  relation->rd_amcache = NULL;
2423 
2424  /*
2425  * Treat nailed-in system relations separately, they always need to be
2426  * accessible, so we can't blow them away.
2427  */
2428  if (relation->rd_isnailed)
2429  {
2430  RelationReloadNailed(relation);
2431  return;
2432  }
2433 
2434  /*
2435  * Even non-system indexes should not be blown away if they are open and
2436  * have valid index support information. This avoids problems with active
2437  * use of the index support information. As with nailed indexes, we
2438  * re-read the pg_class row to handle possible physical relocation of the
2439  * index, and we check for pg_index updates too.
2440  */
2441  if ((relation->rd_rel->relkind == RELKIND_INDEX ||
2442  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2443  relation->rd_refcnt > 0 &&
2444  relation->rd_indexcxt != NULL)
2445  {
2446  relation->rd_isvalid = false; /* needs to be revalidated */
2447  if (IsTransactionState())
2448  RelationReloadIndexInfo(relation);
2449  return;
2450  }
2451 
2452  /* Mark it invalid until we've finished rebuild */
2453  relation->rd_isvalid = false;
2454 
2455  /*
2456  * If we're really done with the relcache entry, blow it away. But if
2457  * someone is still using it, reconstruct the whole deal without moving
2458  * the physical RelationData record (so that the someone's pointer is
2459  * still valid).
2460  */
2461  if (!rebuild)
2462  {
2463  /* Remove it from the hash table */
2464  RelationCacheDelete(relation);
2465 
2466  /* And release storage */
2467  RelationDestroyRelation(relation, false);
2468  }
2469  else if (!IsTransactionState())
2470  {
2471  /*
2472  * If we're not inside a valid transaction, we can't do any catalog
2473  * access so it's not possible to rebuild yet. Just exit, leaving
2474  * rd_isvalid = false so that the rebuild will occur when the entry is
2475  * next opened.
2476  *
2477  * Note: it's possible that we come here during subtransaction abort,
2478  * and the reason for wanting to rebuild is that the rel is open in
2479  * the outer transaction. In that case it might seem unsafe to not
2480  * rebuild immediately, since whatever code has the rel already open
2481  * will keep on using the relcache entry as-is. However, in such a
2482  * case the outer transaction should be holding a lock that's
2483  * sufficient to prevent any significant change in the rel's schema,
2484  * so the existing entry contents should be good enough for its
2485  * purposes; at worst we might be behind on statistics updates or the
2486  * like. (See also CheckTableNotInUse() and its callers.) These same
2487  * remarks also apply to the cases above where we exit without having
2488  * done RelationReloadIndexInfo() yet.
2489  */
2490  return;
2491  }
2492  else
2493  {
2494  /*
2495  * Our strategy for rebuilding an open relcache entry is to build a
2496  * new entry from scratch, swap its contents with the old entry, and
2497  * finally delete the new entry (along with any infrastructure swapped
2498  * over from the old entry). This is to avoid trouble in case an
2499  * error causes us to lose control partway through. The old entry
2500  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2501  * on next access. Meanwhile it's not any less valid than it was
2502  * before, so any code that might expect to continue accessing it
2503  * isn't hurt by the rebuild failure. (Consider for example a
2504  * subtransaction that ALTERs a table and then gets canceled partway
2505  * through the cache entry rebuild. The outer transaction should
2506  * still see the not-modified cache entry as valid.) The worst
2507  * consequence of an error is leaking the necessarily-unreferenced new
2508  * entry, and this shouldn't happen often enough for that to be a big
2509  * problem.
2510  *
2511  * When rebuilding an open relcache entry, we must preserve ref count,
2512  * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2513  * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2514  * rewrite-rule, partition key, and partition descriptor substructures
2515  * in place, because various places assume that these structures won't
2516  * move while they are working with an open relcache entry. (Note:
2517  * the refcount mechanism for tupledescs might someday allow us to
2518  * remove this hack for the tupledesc.)
2519  *
2520  * Note that this process does not touch CurrentResourceOwner; which
2521  * is good because whatever ref counts the entry may have do not
2522  * necessarily belong to that resource owner.
2523  */
2524  Relation newrel;
2525  Oid save_relid = RelationGetRelid(relation);
2526  bool keep_tupdesc;
2527  bool keep_rules;
2528  bool keep_policies;
2529  bool keep_partkey;
2530  bool keep_partdesc;
2531 
2532  /* Build temporary entry, but don't link it into hashtable */
2533  newrel = RelationBuildDesc(save_relid, false);
2534  if (newrel == NULL)
2535  {
2536  /*
2537  * We can validly get here, if we're using a historic snapshot in
2538  * which a relation, accessed from outside logical decoding, is
2539  * still invisible. In that case it's fine to just mark the
2540  * relation as invalid and return - it'll fully get reloaded by
2541  * the cache reset at the end of logical decoding (or at the next
2542  * access). During normal processing we don't want to ignore this
2543  * case as it shouldn't happen there, as explained below.
2544  */
2545  if (HistoricSnapshotActive())
2546  return;
2547 
2548  /*
2549  * This shouldn't happen as dropping a relation is intended to be
2550  * impossible if still referenced (cf. CheckTableNotInUse()). But
2551  * if we get here anyway, we can't just delete the relcache entry,
2552  * as it possibly could get accessed later (as e.g. the error
2553  * might get trapped and handled via a subtransaction rollback).
2554  */
2555  elog(ERROR, "relation %u deleted while still in use", save_relid);
2556  }
2557 
2558  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2559  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2560  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2561  /* partkey is immutable once set up, so we can always keep it */
2562  keep_partkey = (relation->rd_partkey != NULL);
2563  keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2564  relation->rd_partdesc,
2565  newrel->rd_partdesc);
2566 
2567  /*
2568  * Perform swapping of the relcache entry contents. Within this
2569  * process the old entry is momentarily invalid, so there *must* be no
2570  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2571  * all-in-line code for safety.
2572  *
2573  * Since the vast majority of fields should be swapped, our method is
2574  * to swap the whole structures and then re-swap those few fields we
2575  * didn't want swapped.
2576  */
2577 #define SWAPFIELD(fldtype, fldname) \
2578  do { \
2579  fldtype _tmp = newrel->fldname; \
2580  newrel->fldname = relation->fldname; \
2581  relation->fldname = _tmp; \
2582  } while (0)
2583 
2584  /* swap all Relation struct fields */
2585  {
2586  RelationData tmpstruct;
2587 
2588  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2589  memcpy(newrel, relation, sizeof(RelationData));
2590  memcpy(relation, &tmpstruct, sizeof(RelationData));
2591  }
2592 
2593  /* rd_smgr must not be swapped, due to back-links from smgr level */
2594  SWAPFIELD(SMgrRelation, rd_smgr);
2595  /* rd_refcnt must be preserved */
2596  SWAPFIELD(int, rd_refcnt);
2597  /* isnailed shouldn't change */
2598  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2599  /* creation sub-XIDs must be preserved */
2600  SWAPFIELD(SubTransactionId, rd_createSubid);
2601  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2602  /* un-swap rd_rel pointers, swap contents instead */
2603  SWAPFIELD(Form_pg_class, rd_rel);
2604  /* ... but actually, we don't have to update newrel->rd_rel */
2605  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2606  /* preserve old tupledesc, rules, policies if no logical change */
2607  if (keep_tupdesc)
2608  SWAPFIELD(TupleDesc, rd_att);
2609  if (keep_rules)
2610  {
2611  SWAPFIELD(RuleLock *, rd_rules);
2612  SWAPFIELD(MemoryContext, rd_rulescxt);
2613  }
2614  if (keep_policies)
2615  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2616  /* toast OID override must be preserved */
2617  SWAPFIELD(Oid, rd_toastoid);
2618  /* pgstat_info must be preserved */
2619  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2620  /* preserve old partitioning info if no logical change */
2621  if (keep_partkey)
2622  {
2623  SWAPFIELD(PartitionKey, rd_partkey);
2624  SWAPFIELD(MemoryContext, rd_partkeycxt);
2625  }
2626  if (keep_partdesc)
2627  {
2628  SWAPFIELD(PartitionDesc, rd_partdesc);
2629  SWAPFIELD(MemoryContext, rd_pdcxt);
2630  }
2631  else if (rebuild && newrel->rd_pdcxt != NULL)
2632  {
2633  /*
2634  * We are rebuilding a partitioned relation with a non-zero
2635  * reference count, so keep the old partition descriptor around,
2636  * in case there's a PartitionDirectory with a pointer to it.
2637  * Attach it to the new rd_pdcxt so that it gets cleaned up
2638  * eventually. In the case where the reference count is 0, this
2639  * code is not reached, which should be OK because in that case
2640  * there should be no PartitionDirectory with a pointer to the old
2641  * entry.
2642  *
2643  * Note that newrel and relation have already been swapped, so the
2644  * "old" partition descriptor is actually the one hanging off of
2645  * newrel.
2646  */
2647  MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2648  newrel->rd_partdesc = NULL;
2649  newrel->rd_pdcxt = NULL;
2650  }
2651 
2652 #undef SWAPFIELD
2653 
2654  /* And now we can throw away the temporary entry */
2655  RelationDestroyRelation(newrel, !keep_tupdesc);
2656  }
2657 }
2658 
2659 /*
2660  * RelationFlushRelation
2661  *
2662  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2663  * This is used when we receive a cache invalidation event for the rel.
2664  */
2665 static void
2667 {
2668  if (relation->rd_createSubid != InvalidSubTransactionId ||
2670  {
2671  /*
2672  * New relcache entries are always rebuilt, not flushed; else we'd
2673  * forget the "new" status of the relation, which is a useful
2674  * optimization to have. Ditto for the new-relfilenode status.
2675  *
2676  * The rel could have zero refcnt here, so temporarily increment the
2677  * refcnt to ensure it's safe to rebuild it. We can assume that the
2678  * current transaction has some lock on the rel already.
2679  */
2681  RelationClearRelation(relation, true);
2683  }
2684  else
2685  {
2686  /*
2687  * Pre-existing rels can be dropped from the relcache if not open.
2688  */
2689  bool rebuild = !RelationHasReferenceCountZero(relation);
2690 
2691  RelationClearRelation(relation, rebuild);
2692  }
2693 }
2694 
2695 /*
2696  * RelationForgetRelation - unconditionally remove a relcache entry
2697  *
2698  * External interface for destroying a relcache entry when we
2699  * drop the relation.
2700  */
2701 void
2703 {
2704  Relation relation;
2705 
2706  RelationIdCacheLookup(rid, relation);
2707 
2708  if (!PointerIsValid(relation))
2709  return; /* not in cache, nothing to do */
2710 
2711  if (!RelationHasReferenceCountZero(relation))
2712  elog(ERROR, "relation %u is still open", rid);
2713 
2714  /* Unconditionally destroy the relcache entry */
2715  RelationClearRelation(relation, false);
2716 }
2717 
2718 /*
2719  * RelationCacheInvalidateEntry
2720  *
2721  * This routine is invoked for SI cache flush messages.
2722  *
2723  * Any relcache entry matching the relid must be flushed. (Note: caller has
2724  * already determined that the relid belongs to our database or is a shared
2725  * relation.)
2726  *
2727  * We used to skip local relations, on the grounds that they could
2728  * not be targets of cross-backend SI update messages; but it seems
2729  * safer to process them, so that our *own* SI update messages will
2730  * have the same effects during CommandCounterIncrement for both
2731  * local and nonlocal relations.
2732  */
2733 void
2735 {
2736  Relation relation;
2737 
2738  RelationIdCacheLookup(relationId, relation);
2739 
2740  if (PointerIsValid(relation))
2741  {
2743  RelationFlushRelation(relation);
2744  }
2745 }
2746 
2747 /*
2748  * RelationCacheInvalidate
2749  * Blow away cached relation descriptors that have zero reference counts,
2750  * and rebuild those with positive reference counts. Also reset the smgr
2751  * relation cache and re-read relation mapping data.
2752  *
2753  * This is currently used only to recover from SI message buffer overflow,
2754  * so we do not touch new-in-transaction relations; they cannot be targets
2755  * of cross-backend SI updates (and our own updates now go through a
2756  * separate linked list that isn't limited by the SI message buffer size).
2757  * Likewise, we need not discard new-relfilenode-in-transaction hints,
2758  * since any invalidation of those would be a local event.
2759  *
2760  * We do this in two phases: the first pass deletes deletable items, and
2761  * the second one rebuilds the rebuildable items. This is essential for
2762  * safety, because hash_seq_search only copes with concurrent deletion of
2763  * the element it is currently visiting. If a second SI overflow were to
2764  * occur while we are walking the table, resulting in recursive entry to
2765  * this routine, we could crash because the inner invocation blows away
2766  * the entry next to be visited by the outer scan. But this way is OK,
2767  * because (a) during the first pass we won't process any more SI messages,
2768  * so hash_seq_search will complete safely; (b) during the second pass we
2769  * only hold onto pointers to nondeletable entries.
2770  *
2771  * The two-phase approach also makes it easy to update relfilenodes for
2772  * mapped relations before we do anything else, and to ensure that the
2773  * second pass processes nailed-in-cache items before other nondeletable
2774  * items. This should ensure that system catalogs are up to date before
2775  * we attempt to use them to reload information about other open relations.
2776  */
2777 void
2779 {
2781  RelIdCacheEnt *idhentry;
2782  Relation relation;
2783  List *rebuildFirstList = NIL;
2784  List *rebuildList = NIL;
2785  ListCell *l;
2786 
2787  /*
2788  * Reload relation mapping data before starting to reconstruct cache.
2789  */
2791 
2792  /* Phase 1 */
2793  hash_seq_init(&status, RelationIdCache);
2794 
2795  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2796  {
2797  relation = idhentry->reldesc;
2798 
2799  /* Must close all smgr references to avoid leaving dangling ptrs */
2800  RelationCloseSmgr(relation);
2801 
2802  /*
2803  * Ignore new relations; no other backend will manipulate them before
2804  * we commit. Likewise, before replacing a relation's relfilenode, we
2805  * shall have acquired AccessExclusiveLock and drained any applicable
2806  * pending invalidations.
2807  */
2808  if (relation->rd_createSubid != InvalidSubTransactionId ||
2810  continue;
2811 
2813 
2814  if (RelationHasReferenceCountZero(relation))
2815  {
2816  /* Delete this entry immediately */
2817  Assert(!relation->rd_isnailed);
2818  RelationClearRelation(relation, false);
2819  }
2820  else
2821  {
2822  /*
2823  * If it's a mapped relation, immediately update its rd_node in
2824  * case its relfilenode changed. We must do this during phase 1
2825  * in case the relation is consulted during rebuild of other
2826  * relcache entries in phase 2. It's safe since consulting the
2827  * map doesn't involve any access to relcache entries.
2828  */
2829  if (RelationIsMapped(relation))
2830  RelationInitPhysicalAddr(relation);
2831 
2832  /*
2833  * Add this entry to list of stuff to rebuild in second pass.
2834  * pg_class goes to the front of rebuildFirstList while
2835  * pg_class_oid_index goes to the back of rebuildFirstList, so
2836  * they are done first and second respectively. Other nailed
2837  * relations go to the front of rebuildList, so they'll be done
2838  * next in no particular order; and everything else goes to the
2839  * back of rebuildList.
2840  */
2841  if (RelationGetRelid(relation) == RelationRelationId)
2842  rebuildFirstList = lcons(relation, rebuildFirstList);
2843  else if (RelationGetRelid(relation) == ClassOidIndexId)
2844  rebuildFirstList = lappend(rebuildFirstList, relation);
2845  else if (relation->rd_isnailed)
2846  rebuildList = lcons(relation, rebuildList);
2847  else
2848  rebuildList = lappend(rebuildList, relation);
2849  }
2850  }
2851 
2852  /*
2853  * Now zap any remaining smgr cache entries. This must happen before we
2854  * start to rebuild entries, since that may involve catalog fetches which
2855  * will re-open catalog files.
2856  */
2857  smgrcloseall();
2858 
2859  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2860  foreach(l, rebuildFirstList)
2861  {
2862  relation = (Relation) lfirst(l);
2863  RelationClearRelation(relation, true);
2864  }
2865  list_free(rebuildFirstList);
2866  foreach(l, rebuildList)
2867  {
2868  relation = (Relation) lfirst(l);
2869  RelationClearRelation(relation, true);
2870  }
2871  list_free(rebuildList);
2872 }
2873 
2874 /*
2875  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2876  *
2877  * Needed in some cases where we are changing a relation's physical mapping.
2878  * The link will be automatically reopened on next use.
2879  */
2880 void
2882 {
2883  Relation relation;
2884 
2885  RelationIdCacheLookup(relationId, relation);
2886 
2887  if (!PointerIsValid(relation))
2888  return; /* not in cache, nothing to do */
2889 
2890  RelationCloseSmgr(relation);
2891 }
2892 
2893 static void
2895 {
2896  if (EOXactTupleDescArray == NULL)
2897  {
2898  MemoryContext oldcxt;
2899 
2901 
2902  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2905  MemoryContextSwitchTo(oldcxt);
2906  }
2908  {
2909  int32 newlen = EOXactTupleDescArrayLen * 2;
2910 
2912 
2913  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2914  newlen * sizeof(TupleDesc));
2915  EOXactTupleDescArrayLen = newlen;
2916  }
2917 
2918  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2919 }
2920 
2921 /*
2922  * AtEOXact_RelationCache
2923  *
2924  * Clean up the relcache at main-transaction commit or abort.
2925  *
2926  * Note: this must be called *before* processing invalidation messages.
2927  * In the case of abort, we don't want to try to rebuild any invalidated
2928  * cache entries (since we can't safely do database accesses). Therefore
2929  * we must reset refcnts before handling pending invalidations.
2930  *
2931  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2932  * ResourceOwner mechanism. This routine just does a debugging
2933  * cross-check that no pins remain. However, we also need to do special
2934  * cleanup when the current transaction created any relations or made use
2935  * of forced index lists.
2936  */
2937 void
2939 {
2941  RelIdCacheEnt *idhentry;
2942  int i;
2943 
2944  /*
2945  * Unless the eoxact_list[] overflowed, we only need to examine the rels
2946  * listed in it. Otherwise fall back on a hash_seq_search scan.
2947  *
2948  * For simplicity, eoxact_list[] entries are not deleted till end of
2949  * top-level transaction, even though we could remove them at
2950  * subtransaction end in some cases, or remove relations from the list if
2951  * they are cleared for other reasons. Therefore we should expect the
2952  * case that list entries are not found in the hashtable; if not, there's
2953  * nothing to do for them.
2954  */
2956  {
2957  hash_seq_init(&status, RelationIdCache);
2958  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2959  {
2960  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2961  }
2962  }
2963  else
2964  {
2965  for (i = 0; i < eoxact_list_len; i++)
2966  {
2967  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2968  (void *) &eoxact_list[i],
2969  HASH_FIND,
2970  NULL);
2971  if (idhentry != NULL)
2972  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2973  }
2974  }
2975 
2976  if (EOXactTupleDescArrayLen > 0)
2977  {
2978  Assert(EOXactTupleDescArray != NULL);
2979  for (i = 0; i < NextEOXactTupleDescNum; i++)
2980  FreeTupleDesc(EOXactTupleDescArray[i]);
2981  pfree(EOXactTupleDescArray);
2982  EOXactTupleDescArray = NULL;
2983  }
2984 
2985  /* Now we're out of the transaction and can clear the lists */
2986  eoxact_list_len = 0;
2987  eoxact_list_overflowed = false;
2990 }
2991 
2992 /*
2993  * AtEOXact_cleanup
2994  *
2995  * Clean up a single rel at main-transaction commit or abort
2996  *
2997  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
2998  * bother to prevent duplicate entries in eoxact_list[].
2999  */
3000 static void
3001 AtEOXact_cleanup(Relation relation, bool isCommit)
3002 {
3003  /*
3004  * The relcache entry's ref count should be back to its normal
3005  * not-in-a-transaction state: 0 unless it's nailed in cache.
3006  *
3007  * In bootstrap mode, this is NOT true, so don't check it --- the
3008  * bootstrap code expects relations to stay open across start/commit
3009  * transaction calls. (That seems bogus, but it's not worth fixing.)
3010  *
3011  * Note: ideally this check would be applied to every relcache entry, not
3012  * just those that have eoxact work to do. But it's not worth forcing a
3013  * scan of the whole relcache just for this. (Moreover, doing so would
3014  * mean that assert-enabled testing never tests the hash_search code path
3015  * above, which seems a bad idea.)
3016  */
3017 #ifdef USE_ASSERT_CHECKING
3019  {
3020  int expected_refcnt;
3021 
3022  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3023  Assert(relation->rd_refcnt == expected_refcnt);
3024  }
3025 #endif
3026 
3027  /*
3028  * Is it a relation created in the current transaction?
3029  *
3030  * During commit, reset the flag to zero, since we are now out of the
3031  * creating transaction. During abort, simply delete the relcache entry
3032  * --- it isn't interesting any longer. (NOTE: if we have forgotten the
3033  * new-ness of a new relation due to a forced cache flush, the entry will
3034  * get deleted anyway by shared-cache-inval processing of the aborted
3035  * pg_class insertion.)
3036  */
3037  if (relation->rd_createSubid != InvalidSubTransactionId)
3038  {
3039  if (isCommit)
3041  else if (RelationHasReferenceCountZero(relation))
3042  {
3043  RelationClearRelation(relation, false);
3044  return;
3045  }
3046  else
3047  {
3048  /*
3049  * Hmm, somewhere there's a (leaked?) reference to the relation.
3050  * We daren't remove the entry for fear of dereferencing a
3051  * dangling pointer later. Bleat, and mark it as not belonging to
3052  * the current transaction. Hopefully it'll get cleaned up
3053  * eventually. This must be just a WARNING to avoid
3054  * error-during-error-recovery loops.
3055  */
3057  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3058  RelationGetRelationName(relation));
3059  }
3060  }
3061 
3062  /*
3063  * Likewise, reset the hint about the relfilenode being new.
3064  */
3066 }
3067 
3068 /*
3069  * AtEOSubXact_RelationCache
3070  *
3071  * Clean up the relcache at sub-transaction commit or abort.
3072  *
3073  * Note: this must be called *before* processing invalidation messages.
3074  */
3075 void
3077  SubTransactionId parentSubid)
3078 {
3080  RelIdCacheEnt *idhentry;
3081  int i;
3082 
3083  /*
3084  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3085  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3086  * logic as in AtEOXact_RelationCache.
3087  */
3089  {
3090  hash_seq_init(&status, RelationIdCache);
3091  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3092  {
3093  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3094  mySubid, parentSubid);
3095  }
3096  }
3097  else
3098  {
3099  for (i = 0; i < eoxact_list_len; i++)
3100  {
3101  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3102  (void *) &eoxact_list[i],
3103  HASH_FIND,
3104  NULL);
3105  if (idhentry != NULL)
3106  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3107  mySubid, parentSubid);
3108  }
3109  }
3110 
3111  /* Don't reset the list; we still need more cleanup later */
3112 }
3113 
3114 /*
3115  * AtEOSubXact_cleanup
3116  *
3117  * Clean up a single rel at subtransaction commit or abort
3118  *
3119  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3120  * bother to prevent duplicate entries in eoxact_list[].
3121  */
3122 static void
3123 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3124  SubTransactionId mySubid, SubTransactionId parentSubid)
3125 {
3126  /*
3127  * Is it a relation created in the current subtransaction?
3128  *
3129  * During subcommit, mark it as belonging to the parent, instead. During
3130  * subabort, simply delete the relcache entry.
3131  */
3132  if (relation->rd_createSubid == mySubid)
3133  {
3134  if (isCommit)
3135  relation->rd_createSubid = parentSubid;
3136  else if (RelationHasReferenceCountZero(relation))
3137  {
3138  RelationClearRelation(relation, false);
3139  return;
3140  }
3141  else
3142  {
3143  /*
3144  * Hmm, somewhere there's a (leaked?) reference to the relation.
3145  * We daren't remove the entry for fear of dereferencing a
3146  * dangling pointer later. Bleat, and transfer it to the parent
3147  * subtransaction so we can try again later. This must be just a
3148  * WARNING to avoid error-during-error-recovery loops.
3149  */
3150  relation->rd_createSubid = parentSubid;
3151  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3152  RelationGetRelationName(relation));
3153  }
3154  }
3155 
3156  /*
3157  * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3158  */
3159  if (relation->rd_newRelfilenodeSubid == mySubid)
3160  {
3161  if (isCommit)
3162  relation->rd_newRelfilenodeSubid = parentSubid;
3163  else
3165  }
3166 }
3167 
3168 
3169 /*
3170  * RelationBuildLocalRelation
3171  * Build a relcache entry for an about-to-be-created relation,
3172  * and enter it into the relcache.
3173  */
3174 Relation
3176  Oid relnamespace,
3177  TupleDesc tupDesc,
3178  Oid relid,
3179  Oid accessmtd,
3180  Oid relfilenode,
3181  Oid reltablespace,
3182  bool shared_relation,
3183  bool mapped_relation,
3184  char relpersistence,
3185  char relkind)
3186 {
3187  Relation rel;
3188  MemoryContext oldcxt;
3189  int natts = tupDesc->natts;
3190  int i;
3191  bool has_not_null;
3192  bool nailit;
3193 
3194  AssertArg(natts >= 0);
3195 
3196  /*
3197  * check for creation of a rel that must be nailed in cache.
3198  *
3199  * XXX this list had better match the relations specially handled in
3200  * RelationCacheInitializePhase2/3.
3201  */
3202  switch (relid)
3203  {
3204  case DatabaseRelationId:
3205  case AuthIdRelationId:
3206  case AuthMemRelationId:
3207  case RelationRelationId:
3208  case AttributeRelationId:
3209  case ProcedureRelationId:
3210  case TypeRelationId:
3211  nailit = true;
3212  break;
3213  default:
3214  nailit = false;
3215  break;
3216  }
3217 
3218  /*
3219  * check that hardwired list of shared rels matches what's in the
3220  * bootstrap .bki file. If you get a failure here during initdb, you
3221  * probably need to fix IsSharedRelation() to match whatever you've done
3222  * to the set of shared relations.
3223  */
3224  if (shared_relation != IsSharedRelation(relid))
3225  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3226  relname, relid);
3227 
3228  /* Shared relations had better be mapped, too */
3229  Assert(mapped_relation || !shared_relation);
3230 
3231  /*
3232  * switch to the cache context to create the relcache entry.
3233  */
3234  if (!CacheMemoryContext)
3236 
3238 
3239  /*
3240  * allocate a new relation descriptor and fill in basic state fields.
3241  */
3242  rel = (Relation) palloc0(sizeof(RelationData));
3243 
3244  /* make sure relation is marked as having no open file yet */
3245  rel->rd_smgr = NULL;
3246 
3247  /* mark it nailed if appropriate */
3248  rel->rd_isnailed = nailit;
3249 
3250  rel->rd_refcnt = nailit ? 1 : 0;
3251 
3252  /* it's being created in this transaction */
3255 
3256  /*
3257  * create a new tuple descriptor from the one passed in. We do this
3258  * partly to copy it into the cache context, and partly because the new
3259  * relation can't have any defaults or constraints yet; they have to be
3260  * added in later steps, because they require additions to multiple system
3261  * catalogs. We can copy attnotnull constraints here, however.
3262  */
3263  rel->rd_att = CreateTupleDescCopy(tupDesc);
3264  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3265  has_not_null = false;
3266  for (i = 0; i < natts; i++)
3267  {
3268  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3269  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3270 
3271  datt->attidentity = satt->attidentity;
3272  datt->attgenerated = satt->attgenerated;
3273  datt->attnotnull = satt->attnotnull;
3274  has_not_null |= satt->attnotnull;
3275  }
3276 
3277  if (has_not_null)
3278  {
3279  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3280 
3281  constr->has_not_null = true;
3282  rel->rd_att->constr = constr;
3283  }
3284 
3285  /*
3286  * initialize relation tuple form (caller may add/override data later)
3287  */
3289 
3290  namestrcpy(&rel->rd_rel->relname, relname);
3291  rel->rd_rel->relnamespace = relnamespace;
3292 
3293  rel->rd_rel->relkind = relkind;
3294  rel->rd_rel->relnatts = natts;
3295  rel->rd_rel->reltype = InvalidOid;
3296  /* needed when bootstrapping: */
3297  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3298 
3299  /* set up persistence and relcache fields dependent on it */
3300  rel->rd_rel->relpersistence = relpersistence;
3301  switch (relpersistence)
3302  {
3303  case RELPERSISTENCE_UNLOGGED:
3304  case RELPERSISTENCE_PERMANENT:
3306  rel->rd_islocaltemp = false;
3307  break;
3308  case RELPERSISTENCE_TEMP:
3309  Assert(isTempOrTempToastNamespace(relnamespace));
3311  rel->rd_islocaltemp = true;
3312  break;
3313  default:
3314  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3315  break;
3316  }
3317 
3318  /* if it's a materialized view, it's not populated initially */
3319  if (relkind == RELKIND_MATVIEW)
3320  rel->rd_rel->relispopulated = false;
3321  else
3322  rel->rd_rel->relispopulated = true;
3323 
3324  /* set replica identity -- system catalogs and non-tables don't have one */
3325  if (!IsCatalogNamespace(relnamespace) &&
3326  (relkind == RELKIND_RELATION ||
3327  relkind == RELKIND_MATVIEW ||
3328  relkind == RELKIND_PARTITIONED_TABLE))
3329  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3330  else
3331  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3332 
3333  /*
3334  * Insert relation physical and logical identifiers (OIDs) into the right
3335  * places. For a mapped relation, we set relfilenode to zero and rely on
3336  * RelationInitPhysicalAddr to consult the map.
3337  */
3338  rel->rd_rel->relisshared = shared_relation;
3339 
3340  RelationGetRelid(rel) = relid;
3341 
3342  for (i = 0; i < natts; i++)
3343  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3344 
3345  rel->rd_rel->reltablespace = reltablespace;
3346 
3347  if (mapped_relation)
3348  {
3349  rel->rd_rel->relfilenode = InvalidOid;
3350  /* Add it to the active mapping information */
3351  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3352  }
3353  else
3354  rel->rd_rel->relfilenode = relfilenode;
3355 
3356  RelationInitLockInfo(rel); /* see lmgr.c */
3357 
3359 
3360  rel->rd_rel->relam = accessmtd;
3361 
3362  if (relkind == RELKIND_RELATION ||
3363  relkind == RELKIND_SEQUENCE ||
3364  relkind == RELKIND_TOASTVALUE ||
3365  relkind == RELKIND_MATVIEW)
3367 
3368  /*
3369  * Okay to insert into the relcache hash table.
3370  *
3371  * Ordinarily, there should certainly not be an existing hash entry for
3372  * the same OID; but during bootstrap, when we create a "real" relcache
3373  * entry for one of the bootstrap relations, we'll be overwriting the
3374  * phony one created with formrdesc. So allow that to happen for nailed
3375  * rels.
3376  */
3377  RelationCacheInsert(rel, nailit);
3378 
3379  /*
3380  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3381  * can't do this before storing relid in it.
3382  */
3383  EOXactListAdd(rel);
3384 
3385  /*
3386  * done building relcache entry.
3387  */
3388  MemoryContextSwitchTo(oldcxt);
3389 
3390  /* It's fully valid */
3391  rel->rd_isvalid = true;
3392 
3393  /*
3394  * Caller expects us to pin the returned entry.
3395  */
3397 
3398  return rel;
3399 }
3400 
3401 
3402 /*
3403  * RelationSetNewRelfilenode
3404  *
3405  * Assign a new relfilenode (physical file name), and possibly a new
3406  * persistence setting, to the relation.
3407  *
3408  * This allows a full rewrite of the relation to be done with transactional
3409  * safety (since the filenode assignment can be rolled back). Note however
3410  * that there is no simple way to access the relation's old data for the
3411  * remainder of the current transaction. This limits the usefulness to cases
3412  * such as TRUNCATE or rebuilding an index from scratch.
3413  *
3414  * Caller must already hold exclusive lock on the relation.
3415  */
3416 void
3417 RelationSetNewRelfilenode(Relation relation, char persistence)
3418 {
3419  Oid newrelfilenode;
3420  Relation pg_class;
3421  HeapTuple tuple;
3422  Form_pg_class classform;
3423  MultiXactId minmulti = InvalidMultiXactId;
3424  TransactionId freezeXid = InvalidTransactionId;
3425  RelFileNode newrnode;
3426 
3427  /* Allocate a new relfilenode */
3428  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3429  persistence);
3430 
3431  /*
3432  * Get a writable copy of the pg_class tuple for the given relation.
3433  */
3434  pg_class = table_open(RelationRelationId, RowExclusiveLock);
3435 
3436  tuple = SearchSysCacheCopy1(RELOID,
3437  ObjectIdGetDatum(RelationGetRelid(relation)));
3438  if (!HeapTupleIsValid(tuple))
3439  elog(ERROR, "could not find tuple for relation %u",
3440  RelationGetRelid(relation));
3441  classform = (Form_pg_class) GETSTRUCT(tuple);
3442 
3443  /*
3444  * Schedule unlinking of the old storage at transaction commit.
3445  */
3446  RelationDropStorage(relation);
3447 
3448  /*
3449  * Create storage for the main fork of the new relfilenode. If it's a
3450  * table-like object, call into the table AM to do so, which'll also
3451  * create the table's init fork if needed.
3452  *
3453  * NOTE: If relevant for the AM, any conflict in relfilenode value will be
3454  * caught here, if GetNewRelFileNode messes up for any reason.
3455  */
3456  newrnode = relation->rd_node;
3457  newrnode.relNode = newrelfilenode;
3458 
3459  switch (relation->rd_rel->relkind)
3460  {
3461  case RELKIND_INDEX:
3462  case RELKIND_SEQUENCE:
3463  {
3464  /* handle these directly, at least for now */
3465  SMgrRelation srel;
3466 
3467  srel = RelationCreateStorage(newrnode, persistence);
3468  smgrclose(srel);
3469  }
3470  break;
3471 
3472  case RELKIND_RELATION:
3473  case RELKIND_TOASTVALUE:
3474  case RELKIND_MATVIEW:
3475  table_relation_set_new_filenode(relation, &newrnode,
3476  persistence,
3477  &freezeXid, &minmulti);
3478  break;
3479 
3480  default:
3481  /* we shouldn't be called for anything else */
3482  elog(ERROR, "relation \"%s\" does not have storage",
3483  RelationGetRelationName(relation));
3484  break;
3485  }
3486 
3487  /*
3488  * If we're dealing with a mapped index, pg_class.relfilenode doesn't
3489  * change; instead we have to send the update to the relation mapper.
3490  *
3491  * For mapped indexes, we don't actually change the pg_class entry at all;
3492  * this is essential when reindexing pg_class itself. That leaves us with
3493  * possibly-inaccurate values of relpages etc, but those will be fixed up
3494  * later.
3495  */
3496  if (RelationIsMapped(relation))
3497  {
3498  /* This case is only supported for indexes */
3499  Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3500 
3501  /* Since we're not updating pg_class, these had better not change */
3502  Assert(classform->relfrozenxid == freezeXid);
3503  Assert(classform->relminmxid == minmulti);
3504  Assert(classform->relpersistence == persistence);
3505 
3506  /*
3507  * In some code paths it's possible that the tuple update we'd
3508  * otherwise do here is the only thing that would assign an XID for
3509  * the current transaction. However, we must have an XID to delete
3510  * files, so make sure one is assigned.
3511  */
3512  (void) GetCurrentTransactionId();
3513 
3514  /* Do the deed */
3516  newrelfilenode,
3517  relation->rd_rel->relisshared,
3518  false);
3519 
3520  /* Since we're not updating pg_class, must trigger inval manually */
3521  CacheInvalidateRelcache(relation);
3522  }
3523  else
3524  {
3525  /* Normal case, update the pg_class entry */
3526  classform->relfilenode = newrelfilenode;
3527 
3528  /* relpages etc. never change for sequences */
3529  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3530  {
3531  classform->relpages = 0; /* it's empty until further notice */
3532  classform->reltuples = 0;
3533  classform->relallvisible = 0;
3534  }
3535  classform->relfrozenxid = freezeXid;
3536  classform->relminmxid = minmulti;
3537  classform->relpersistence = persistence;
3538 
3539  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3540  }
3541 
3542  heap_freetuple(tuple);
3543 
3544  table_close(pg_class, RowExclusiveLock);
3545 
3546  /*
3547  * Make the pg_class row change or relation map change visible. This will
3548  * cause the relcache entry to get updated, too.
3549  */
3551 
3552  /*
3553  * Mark the rel as having been given a new relfilenode in the current
3554  * (sub) transaction. This is a hint that can be used to optimize later
3555  * operations on the rel in the same transaction.
3556  */
3558 
3559  /* Flag relation as needing eoxact cleanup (to remove the hint) */
3560  EOXactListAdd(relation);
3561 }
3562 
3563 
3564 /*
3565  * RelationCacheInitialize
3566  *
3567  * This initializes the relation descriptor cache. At the time
3568  * that this is invoked, we can't do database access yet (mainly
3569  * because the transaction subsystem is not up); all we are doing
3570  * is making an empty cache hashtable. This must be done before
3571  * starting the initialization transaction, because otherwise
3572  * AtEOXact_RelationCache would crash if that transaction aborts
3573  * before we can get the relcache set up.
3574  */
3575 
3576 #define INITRELCACHESIZE 400
3577 
3578 void
3580 {
3581  HASHCTL ctl;
3582 
3583  /*
3584  * make sure cache memory context exists
3585  */
3586  if (!CacheMemoryContext)
3588 
3589  /*
3590  * create hashtable that indexes the relcache
3591  */
3592  MemSet(&ctl, 0, sizeof(ctl));
3593  ctl.keysize = sizeof(Oid);
3594  ctl.entrysize = sizeof(RelIdCacheEnt);
3595  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3596  &ctl, HASH_ELEM | HASH_BLOBS);
3597 
3598  /*
3599  * relation mapper needs to be initialized too
3600  */
3602 }
3603 
3604 /*
3605  * RelationCacheInitializePhase2
3606  *
3607  * This is called to prepare for access to shared catalogs during startup.
3608  * We must at least set up nailed reldescs for pg_database, pg_authid,
3609  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3610  * for their indexes, too. We attempt to load this information from the
3611  * shared relcache init file. If that's missing or broken, just make
3612  * phony entries for the catalogs themselves.
3613  * RelationCacheInitializePhase3 will clean up as needed.
3614  */
3615 void
3617 {
3618  MemoryContext oldcxt;
3619 
3620  /*
3621  * relation mapper needs initialized too
3622  */
3624 
3625  /*
3626  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3627  * nothing.
3628  */
3630  return;
3631 
3632  /*
3633  * switch to cache memory context
3634  */
3636 
3637  /*
3638  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3639  * the cache with pre-made descriptors for the critical shared catalogs.
3640  */
3641  if (!load_relcache_init_file(true))
3642  {
3643  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3644  Natts_pg_database, Desc_pg_database);
3645  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3646  Natts_pg_authid, Desc_pg_authid);
3647  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3648  Natts_pg_auth_members, Desc_pg_auth_members);
3649  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3650  Natts_pg_shseclabel, Desc_pg_shseclabel);
3651  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3652  Natts_pg_subscription, Desc_pg_subscription);
3653 
3654 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3655  }
3656 
3657  MemoryContextSwitchTo(oldcxt);
3658 }
3659 
3660 /*
3661  * RelationCacheInitializePhase3
3662  *
3663  * This is called as soon as the catcache and transaction system
3664  * are functional and we have determined MyDatabaseId. At this point
3665  * we can actually read data from the database's system catalogs.
3666  * We first try to read pre-computed relcache entries from the local
3667  * relcache init file. If that's missing or broken, make phony entries
3668  * for the minimum set of nailed-in-cache relations. Then (unless
3669  * bootstrapping) make sure we have entries for the critical system
3670  * indexes. Once we've done all this, we have enough infrastructure to
3671  * open any system catalog or use any catcache. The last step is to
3672  * rewrite the cache files if needed.
3673  */
3674 void
3676 {
3678  RelIdCacheEnt *idhentry;
3679  MemoryContext oldcxt;
3680  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3681 
3682  /*
3683  * relation mapper needs initialized too
3684  */
3686 
3687  /*
3688  * switch to cache memory context
3689  */
3691 
3692  /*
3693  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3694  * the cache with pre-made descriptors for the critical "nailed-in" system
3695  * catalogs.
3696  */
3697  if (IsBootstrapProcessingMode() ||
3698  !load_relcache_init_file(false))
3699  {
3700  needNewCacheFile = true;
3701 
3702  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3703  Natts_pg_class, Desc_pg_class);
3704  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3705  Natts_pg_attribute, Desc_pg_attribute);
3706  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3707  Natts_pg_proc, Desc_pg_proc);
3708  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3709  Natts_pg_type, Desc_pg_type);
3710 
3711 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3712  }
3713 
3714  MemoryContextSwitchTo(oldcxt);
3715 
3716  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3718  return;
3719 
3720  /*
3721  * If we didn't get the critical system indexes loaded into relcache, do
3722  * so now. These are critical because the catcache and/or opclass cache
3723  * depend on them for fetches done during relcache load. Thus, we have an
3724  * infinite-recursion problem. We can break the recursion by doing
3725  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3726  * performance, we only want to do that until we have the critical indexes
3727  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3728  * decide whether to do heapscan or indexscan at the key spots, and we set
3729  * it true after we've loaded the critical indexes.
3730  *
3731  * The critical indexes are marked as "nailed in cache", partly to make it
3732  * easy for load_relcache_init_file to count them, but mainly because we
3733  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3734  * true. (NOTE: perhaps it would be possible to reload them by
3735  * temporarily setting criticalRelcachesBuilt to false again. For now,
3736  * though, we just nail 'em in.)
3737  *
3738  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3739  * in the same way as the others, because the critical catalogs don't
3740  * (currently) have any rules or triggers, and so these indexes can be
3741  * rebuilt without inducing recursion. However they are used during
3742  * relcache load when a rel does have rules or triggers, so we choose to
3743  * nail them for performance reasons.
3744  */
3746  {
3748  RelationRelationId);
3750  AttributeRelationId);
3752  IndexRelationId);
3754  OperatorClassRelationId);
3756  AccessMethodProcedureRelationId);
3758  RewriteRelationId);
3760  TriggerRelationId);
3761 
3762 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3763 
3764  criticalRelcachesBuilt = true;
3765  }
3766 
3767  /*
3768  * Process critical shared indexes too.
3769  *
3770  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3771  * initial lookup of MyDatabaseId, without which we'll never find any
3772  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3773  * database OID, so it instead depends on DatabaseOidIndexId. We also
3774  * need to nail up some indexes on pg_authid and pg_auth_members for use
3775  * during client authentication. SharedSecLabelObjectIndexId isn't
3776  * critical for the core system, but authentication hooks might be
3777  * interested in it.
3778  */
3780  {
3782  DatabaseRelationId);
3784  DatabaseRelationId);
3786  AuthIdRelationId);
3788  AuthIdRelationId);
3790  AuthMemRelationId);
3792  SharedSecLabelRelationId);
3793 
3794 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3795 
3797  }
3798 
3799  /*
3800  * Now, scan all the relcache entries and update anything that might be
3801  * wrong in the results from formrdesc or the relcache cache file. If we
3802  * faked up relcache entries using formrdesc, then read the real pg_class
3803  * rows and replace the fake entries with them. Also, if any of the
3804  * relcache entries have rules, triggers, or security policies, load that
3805  * info the hard way since it isn't recorded in the cache file.
3806  *
3807  * Whenever we access the catalogs to read data, there is a possibility of
3808  * a shared-inval cache flush causing relcache entries to be removed.
3809  * Since hash_seq_search only guarantees to still work after the *current*
3810  * entry is removed, it's unsafe to continue the hashtable scan afterward.
3811  * We handle this by restarting the scan from scratch after each access.
3812  * This is theoretically O(N^2), but the number of entries that actually
3813  * need to be fixed is small enough that it doesn't matter.
3814  */
3815  hash_seq_init(&status, RelationIdCache);
3816 
3817  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3818  {
3819  Relation relation = idhentry->reldesc;
3820  bool restart = false;
3821 
3822  /*
3823  * Make sure *this* entry doesn't get flushed while we work with it.
3824  */
3826 
3827  /*
3828  * If it's a faked-up entry, read the real pg_class tuple.
3829  */
3830  if (relation->rd_rel->relowner == InvalidOid)
3831  {
3832  HeapTuple htup;
3833  Form_pg_class relp;
3834 
3835  htup = SearchSysCache1(RELOID,
3836  ObjectIdGetDatum(RelationGetRelid(relation)));
3837  if (!HeapTupleIsValid(htup))
3838  elog(FATAL, "cache lookup failed for relation %u",
3839  RelationGetRelid(relation));
3840  relp = (Form_pg_class) GETSTRUCT(htup);
3841 
3842  /*
3843  * Copy tuple to relation->rd_rel. (See notes in
3844  * AllocateRelationDesc())
3845  */
3846  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3847 
3848  /* Update rd_options while we have the tuple */
3849  if (relation->rd_options)
3850  pfree(relation->rd_options);
3851  RelationParseRelOptions(relation, htup);
3852 
3853  /*
3854  * Check the values in rd_att were set up correctly. (We cannot
3855  * just copy them over now: formrdesc must have set up the rd_att
3856  * data correctly to start with, because it may already have been
3857  * copied into one or more catcache entries.)
3858  */
3859  Assert(relation->rd_att->tdtypeid == relp->reltype);
3860  Assert(relation->rd_att->tdtypmod == -1);
3861 
3862  ReleaseSysCache(htup);
3863 
3864  /* relowner had better be OK now, else we'll loop forever */
3865  if (relation->rd_rel->relowner == InvalidOid)
3866  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3867  RelationGetRelationName(relation));
3868 
3869  restart = true;
3870  }
3871 
3872  /*
3873  * Fix data that isn't saved in relcache cache file.
3874  *
3875  * relhasrules or relhastriggers could possibly be wrong or out of
3876  * date. If we don't actually find any rules or triggers, clear the
3877  * local copy of the flag so that we don't get into an infinite loop
3878  * here. We don't make any attempt to fix the pg_class entry, though.
3879  */
3880  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3881  {
3882  RelationBuildRuleLock(relation);
3883  if (relation->rd_rules == NULL)
3884  relation->rd_rel->relhasrules = false;
3885  restart = true;
3886  }
3887  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3888  {
3889  RelationBuildTriggers(relation);
3890  if (relation->trigdesc == NULL)
3891  relation->rd_rel->relhastriggers = false;
3892  restart = true;
3893  }
3894 
3895  /*
3896  * Re-load the row security policies if the relation has them, since
3897  * they are not preserved in the cache. Note that we can never NOT
3898  * have a policy while relrowsecurity is true,
3899  * RelationBuildRowSecurity will create a single default-deny policy
3900  * if there is no policy defined in pg_policy.
3901  */
3902  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3903  {
3904  RelationBuildRowSecurity(relation);
3905 
3906  Assert(relation->rd_rsdesc != NULL);
3907  restart = true;
3908  }
3909 
3910  /*
3911  * Reload the partition key and descriptor for a partitioned table.
3912  */
3913  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3914  relation->rd_partkey == NULL)
3915  {
3916  RelationBuildPartitionKey(relation);
3917  Assert(relation->rd_partkey != NULL);
3918 
3919  restart = true;
3920  }
3921 
3922  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3923  relation->rd_partdesc == NULL)
3924  {
3925  RelationBuildPartitionDesc(relation);
3926  Assert(relation->rd_partdesc != NULL);
3927 
3928  restart = true;
3929  }
3930 
3931  if (relation->rd_tableam == NULL &&
3932  (relation->rd_rel->relkind == RELKIND_RELATION ||
3933  relation->rd_rel->relkind == RELKIND_SEQUENCE ||
3934  relation->rd_rel->relkind == RELKIND_TOASTVALUE ||
3935  relation->rd_rel->relkind == RELKIND_MATVIEW))
3936  {
3938  Assert(relation->rd_tableam != NULL);
3939 
3940  restart = true;
3941  }
3942 
3943  /* Release hold on the relation */
3945 
3946  /* Now, restart the hashtable scan if needed */
3947  if (restart)
3948  {
3949  hash_seq_term(&status);
3950  hash_seq_init(&status, RelationIdCache);
3951  }
3952  }
3953 
3954  /*
3955  * Lastly, write out new relcache cache files if needed. We don't bother
3956  * to distinguish cases where only one of the two needs an update.
3957  */
3958  if (needNewCacheFile)
3959  {
3960  /*
3961  * Force all the catcaches to finish initializing and thereby open the
3962  * catalogs and indexes they use. This will preload the relcache with
3963  * entries for all the most important system catalogs and indexes, so
3964  * that the init files will be most useful for future backends.
3965  */
3967 
3968  /* now write the files */
3970  write_relcache_init_file(false);
3971  }
3972 }
3973 
3974 /*
3975  * Load one critical system index into the relcache
3976  *
3977  * indexoid is the OID of the target index, heapoid is the OID of the catalog
3978  * it belongs to.
3979  */
3980 static void
3981 load_critical_index(Oid indexoid, Oid heapoid)
3982 {
3983  Relation ird;
3984 
3985  /*
3986  * We must lock the underlying catalog before locking the index to avoid
3987  * deadlock, since RelationBuildDesc might well need to read the catalog,
3988  * and if anyone else is exclusive-locking this catalog and index they'll
3989  * be doing it in that order.
3990  */
3991  LockRelationOid(heapoid, AccessShareLock);
3992  LockRelationOid(indexoid, AccessShareLock);
3993  ird = RelationBuildDesc(indexoid, true);
3994  if (ird == NULL)
3995  elog(PANIC, "could not open critical system index %u", indexoid);
3996  ird->rd_isnailed = true;
3997  ird->rd_refcnt = 1;
4000 }
4001 
4002 /*
4003  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
4004  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
4005  *
4006  * We need this kluge because we have to be able to access non-fixed-width
4007  * fields of pg_class and pg_index before we have the standard catalog caches
4008  * available. We use predefined data that's set up in just the same way as
4009  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
4010  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
4011  * does it have a TupleConstr field. But it's good enough for the purpose of
4012  * extracting fields.
4013  */
4014 static TupleDesc
4016 {
4017  TupleDesc result;
4018  MemoryContext oldcxt;
4019  int i;
4020 
4022 
4023  result = CreateTemplateTupleDesc(natts);
4024  result->tdtypeid = RECORDOID; /* not right, but we don't care */
4025  result->tdtypmod = -1;
4026 
4027  for (i = 0; i < natts; i++)
4028  {
4029  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4030  /* make sure attcacheoff is valid */
4031  TupleDescAttr(result, i)->attcacheoff = -1;
4032  }
4033 
4034  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
4035  TupleDescAttr(result, 0)->attcacheoff = 0;
4036 
4037  /* Note: we don't bother to set up a TupleConstr entry */
4038 
4039  MemoryContextSwitchTo(oldcxt);
4040 
4041  return result;
4042 }
4043 
4044 static TupleDesc
4046 {
4047  static TupleDesc pgclassdesc = NULL;
4048 
4049  /* Already done? */
4050  if (pgclassdesc == NULL)
4051  pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4052  Desc_pg_class);
4053 
4054  return pgclassdesc;
4055 }
4056 
4057 static TupleDesc
4059 {
4060  static TupleDesc pgindexdesc = NULL;
4061 
4062  /* Already done? */
4063  if (pgindexdesc == NULL)
4064  pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4065  Desc_pg_index);
4066 
4067  return pgindexdesc;
4068 }
4069 
4070 /*
4071  * Load any default attribute value definitions for the relation.
4072  */
4073 static void
4075 {
4076  AttrDefault *attrdef = relation->rd_att->constr->defval;
4077  int ndef = relation->rd_att->constr->num_defval;
4078  Relation adrel;
4079  SysScanDesc adscan;
4080  ScanKeyData skey;
4081  HeapTuple htup;
4082  Datum val;
4083  bool isnull;
4084  int found;
4085  int i;
4086 
4087  ScanKeyInit(&skey,
4088  Anum_pg_attrdef_adrelid,
4089  BTEqualStrategyNumber, F_OIDEQ,
4090  ObjectIdGetDatum(RelationGetRelid(relation)));
4091 
4092  adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4093  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4094  NULL, 1, &skey);
4095  found = 0;
4096 
4097  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4098  {
4099  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4100  Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - 1);
4101 
4102  for (i = 0; i < ndef; i++)
4103  {
4104  if (adform->adnum != attrdef[i].adnum)
4105  continue;
4106  if (attrdef[i].adbin != NULL)
4107  elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4108  NameStr(attr->attname),
4109  RelationGetRelationName(relation));
4110  else
4111  found++;
4112 
4113  val = fastgetattr(htup,
4114  Anum_pg_attrdef_adbin,
4115  adrel->rd_att, &isnull);
4116  if (isnull)
4117  elog(WARNING, "null adbin for attr %s of rel %s",
4118  NameStr(attr->attname),
4119  RelationGetRelationName(relation));
4120  else
4121  {
4122  /* detoast and convert to cstring in caller's context */
4123  char *s = TextDatumGetCString(val);
4124 
4126  pfree(s);
4127  }
4128  break;
4129  }
4130 
4131  if (i >= ndef)
4132  elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4133  adform->adnum, RelationGetRelationName(relation));
4134  }
4135 
4136  systable_endscan(adscan);
4137  table_close(adrel, AccessShareLock);
4138 }
4139 
4140 /*
4141  * Load any check constraints for the relation.
4142  */
4143 static void
4145 {
4146  ConstrCheck *check = relation->rd_att->constr->check;
4147  int ncheck = relation->rd_att->constr->num_check;
4148  Relation conrel;
4149  SysScanDesc conscan;
4150  ScanKeyData skey[1];
4151  HeapTuple htup;
4152  int found = 0;
4153 
4154  ScanKeyInit(&skey[0],
4155  Anum_pg_constraint_conrelid,
4156  BTEqualStrategyNumber, F_OIDEQ,
4157  ObjectIdGetDatum(RelationGetRelid(relation)));
4158 
4159  conrel = table_open(ConstraintRelationId, AccessShareLock);
4160  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4161  NULL, 1, skey);
4162 
4163  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4164  {
4166  Datum val;
4167  bool isnull;
4168  char *s;
4169 
4170  /* We want check constraints only */
4171  if (conform->contype != CONSTRAINT_CHECK)
4172  continue;
4173 
4174  if (found >= ncheck)
4175  elog(ERROR, "unexpected constraint record found for rel %s",
4176  RelationGetRelationName(relation));
4177 
4178  check[found].ccvalid = conform->convalidated;
4179  check[found].ccnoinherit = conform->connoinherit;
4181  NameStr(conform->conname));
4182 
4183  /* Grab and test conbin is actually set */
4184  val = fastgetattr(htup,
4185  Anum_pg_constraint_conbin,
4186  conrel->rd_att, &isnull);
4187  if (isnull)
4188  elog(ERROR, "null conbin for rel %s",
4189  RelationGetRelationName(relation));
4190 
4191  /* detoast and convert to cstring in caller's context */
4192  s = TextDatumGetCString(val);
4193  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4194  pfree(s);
4195 
4196  found++;
4197  }
4198 
4199  systable_endscan(conscan);
4200  table_close(conrel, AccessShareLock);
4201 
4202  if (found != ncheck)
4203  elog(ERROR, "%d constraint record(s) missing for rel %s",
4204  ncheck - found, RelationGetRelationName(relation));
4205 
4206  /* Sort the records so that CHECKs are applied in a deterministic order */
4207  if (ncheck > 1)
4208  qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4209 }
4210 
4211 /*
4212  * qsort comparator to sort ConstrCheck entries by name
4213  */
4214 static int
4215 CheckConstraintCmp(const void *a, const void *b)
4216 {
4217  const ConstrCheck *ca = (const ConstrCheck *) a;
4218  const ConstrCheck *cb = (const ConstrCheck *) b;
4219 
4220  return strcmp(ca->ccname, cb->ccname);
4221 }
4222 
4223 /*
4224  * RelationGetFKeyList -- get a list of foreign key info for the relation
4225  *
4226  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4227  * the given relation. This data is a direct copy of relevant fields from
4228  * pg_constraint. The list items are in no particular order.
4229  *
4230  * CAUTION: the returned list is part of the relcache's data, and could
4231  * vanish in a relcache entry reset. Callers must inspect or copy it
4232  * before doing anything that might trigger a cache flush, such as
4233  * system catalog accesses. copyObject() can be used if desired.
4234  * (We define it this way because current callers want to filter and
4235  * modify the list entries anyway, so copying would be a waste of time.)
4236  */
4237 List *
4239 {
4240  List *result;
4241  Relation conrel;
4242  SysScanDesc conscan;
4243  ScanKeyData skey;
4244  HeapTuple htup;
4245  List *oldlist;
4246  MemoryContext oldcxt;
4247 
4248  /* Quick exit if we already computed the list. */
4249  if (relation->rd_fkeyvalid)
4250  return relation->rd_fkeylist;
4251 
4252  /* Fast path: non-partitioned tables without triggers can't have FKs */
4253  if (!relation->rd_rel->relhastriggers &&
4254  relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4255  return NIL;
4256 
4257  /*
4258  * We build the list we intend to return (in the caller's context) while
4259  * doing the scan. After successfully completing the scan, we copy that
4260  * list into the relcache entry. This avoids cache-context memory leakage
4261  * if we get some sort of error partway through.
4262  */
4263  result = NIL;
4264 
4265  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4266  ScanKeyInit(&skey,
4267  Anum_pg_constraint_conrelid,
4268  BTEqualStrategyNumber, F_OIDEQ,
4269  ObjectIdGetDatum(RelationGetRelid(relation)));
4270 
4271  conrel = table_open(ConstraintRelationId, AccessShareLock);
4272  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4273  NULL, 1, &skey);
4274 
4275  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4276  {
4277  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4278  ForeignKeyCacheInfo *info;
4279 
4280  /* consider only foreign keys */
4281  if (constraint->contype != CONSTRAINT_FOREIGN)
4282  continue;
4283 
4284  info = makeNode(ForeignKeyCacheInfo);
4285  info->conoid = constraint->oid;
4286  info->conrelid = constraint->conrelid;
4287  info->confrelid = constraint->confrelid;
4288 
4289  DeconstructFkConstraintRow(htup, &info->nkeys,
4290  info->conkey,
4291  info->confkey,
4292  info->conpfeqop,
4293  NULL, NULL);
4294 
4295  /* Add FK's node to the result list */
4296  result = lappend(result, info);
4297  }
4298 
4299  systable_endscan(conscan);
4300  table_close(conrel, AccessShareLock);
4301 
4302  /* Now save a copy of the completed list in the relcache entry. */
4304  oldlist = relation->rd_fkeylist;
4305  relation->rd_fkeylist = copyObject(result);
4306  relation->rd_fkeyvalid = true;
4307  MemoryContextSwitchTo(oldcxt);
4308 
4309  /* Don't leak the old list, if there is one */
4310  list_free_deep(oldlist);
4311 
4312  return result;
4313 }
4314 
4315 /*
4316  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4317  *
4318  * The index list is created only if someone requests it. We scan pg_index
4319  * to find relevant indexes, and add the list to the relcache entry so that
4320  * we won't have to compute it again. Note that shared cache inval of a
4321  * relcache entry will delete the old list and set rd_indexvalid to false,
4322  * so that we must recompute the index list on next request. This handles
4323  * creation or deletion of an index.
4324  *
4325  * Indexes that are marked not indislive are omitted from the returned list.
4326  * Such indexes are expected to be dropped momentarily, and should not be
4327  * touched at all by any caller of this function.
4328  *
4329  * The returned list is guaranteed to be sorted in order by OID. This is
4330  * needed by the executor, since for index types that we obtain exclusive
4331  * locks on when updating the index, all backends must lock the indexes in
4332  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4333  * consistent ordering would do, but ordering by OID is easy.
4334  *
4335  * Since shared cache inval causes the relcache's copy of the list to go away,
4336  * we return a copy of the list palloc'd in the caller's context. The caller
4337  * may list_free() the returned list after scanning it. This is necessary
4338  * since the caller will typically be doing syscache lookups on the relevant
4339  * indexes, and syscache lookup could cause SI messages to be processed!
4340  *
4341  * In exactly the same way, we update rd_pkindex, which is the OID of the
4342  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4343  * which is the pg_class OID of an index to be used as the relation's
4344  * replication identity index, or InvalidOid if there is no such index.
4345  */
4346 List *
4348 {
4349  Relation indrel;
4350  SysScanDesc indscan;
4351  ScanKeyData skey;
4352  HeapTuple htup;
4353  List *result;
4354  List *oldlist;
4355  char replident = relation->rd_rel->relreplident;
4356  Oid pkeyIndex = InvalidOid;
4357  Oid candidateIndex = InvalidOid;
4358  MemoryContext oldcxt;
4359 
4360  /* Quick exit if we already computed the list. */
4361  if (relation->rd_indexvalid)
4362  return list_copy(relation->rd_indexlist);
4363 
4364  /*
4365  * We build the list we intend to return (in the caller's context) while
4366  * doing the scan. After successfully completing the scan, we copy that
4367  * list into the relcache entry. This avoids cache-context memory leakage
4368  * if we get some sort of error partway through.
4369  */
4370  result = NIL;
4371 
4372  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4373  ScanKeyInit(&skey,
4374  Anum_pg_index_indrelid,
4375  BTEqualStrategyNumber, F_OIDEQ,
4376  ObjectIdGetDatum(RelationGetRelid(relation)));
4377 
4378  indrel = table_open(IndexRelationId, AccessShareLock);
4379  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4380  NULL, 1, &skey);
4381 
4382  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4383  {
4385 
4386  /*
4387  * Ignore any indexes that are currently being dropped. This will
4388  * prevent them from being searched, inserted into, or considered in
4389  * HOT-safety decisions. It's unsafe to touch such an index at all
4390  * since its catalog entries could disappear at any instant.
4391  */
4392  if (!index->indislive)
4393  continue;
4394 
4395  /* add index's OID to result list */
4396  result = lappend_oid(result, index->indexrelid);
4397 
4398  /*
4399  * Invalid, non-unique, non-immediate or predicate indexes aren't
4400  * interesting for either oid indexes or replication identity indexes,
4401  * so don't check them.
4402  */
4403  if (!index->indisvalid || !index->indisunique ||
4404  !index->indimmediate ||
4405  !heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4406  continue;
4407 
4408  /* remember primary key index if any */
4409  if (index->indisprimary)
4410  pkeyIndex = index->indexrelid;
4411 
4412  /* remember explicitly chosen replica index */
4413  if (index->indisreplident)
4414  candidateIndex = index->indexrelid;
4415  }
4416 
4417  systable_endscan(indscan);
4418 
4419  table_close(indrel, AccessShareLock);
4420 
4421  /* Sort the result list into OID order, per API spec. */
4422  list_sort(result, list_oid_cmp);
4423 
4424  /* Now save a copy of the completed list in the relcache entry. */
4426  oldlist = relation->rd_indexlist;
4427  relation->rd_indexlist = list_copy(result);
4428  relation->rd_pkindex = pkeyIndex;
4429  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4430  relation->rd_replidindex = pkeyIndex;
4431  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4432  relation->rd_replidindex = candidateIndex;
4433  else
4434  relation->rd_replidindex = InvalidOid;
4435  relation->rd_indexvalid = true;
4436  MemoryContextSwitchTo(oldcxt);
4437 
4438  /* Don't leak the old list, if there is one */
4439  list_free(oldlist);
4440 
4441  return result;
4442 }
4443 
4444 /*
4445  * RelationGetStatExtList
4446  * get a list of OIDs of statistics objects on this relation
4447  *
4448  * The statistics list is created only if someone requests it, in a way
4449  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4450  * relevant statistics, and add the list to the relcache entry so that we
4451  * won't have to compute it again. Note that shared cache inval of a
4452  * relcache entry will delete the old list and set rd_statvalid to 0,
4453  * so that we must recompute the statistics list on next request. This
4454  * handles creation or deletion of a statistics object.
4455  *
4456  * The returned list is guaranteed to be sorted in order by OID, although
4457  * this is not currently needed.
4458  *
4459  * Since shared cache inval causes the relcache's copy of the list to go away,
4460  * we return a copy of the list palloc'd in the caller's context. The caller
4461  * may list_free() the returned list after scanning it. This is necessary
4462  * since the caller will typically be doing syscache lookups on the relevant
4463  * statistics, and syscache lookup could cause SI messages to be processed!
4464  */
4465 List *
4467 {
4468  Relation indrel;
4469  SysScanDesc indscan;
4470  ScanKeyData skey;
4471  HeapTuple htup;
4472  List *result;
4473  List *oldlist;
4474  MemoryContext oldcxt;
4475 
4476  /* Quick exit if we already computed the list. */
4477  if (relation->rd_statvalid != 0)
4478  return list_copy(relation->rd_statlist);
4479 
4480  /*
4481  * We build the list we intend to return (in the caller's context) while
4482  * doing the scan. After successfully completing the scan, we copy that
4483  * list into the relcache entry. This avoids cache-context memory leakage
4484  * if we get some sort of error partway through.
4485  */
4486  result = NIL;
4487 
4488  /*
4489  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4490  * rel.
4491  */
4492  ScanKeyInit(&skey,
4493  Anum_pg_statistic_ext_stxrelid,
4494  BTEqualStrategyNumber, F_OIDEQ,
4495  ObjectIdGetDatum(RelationGetRelid(relation)));
4496 
4497  indrel = table_open(StatisticExtRelationId, AccessShareLock);
4498  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4499  NULL, 1, &skey);
4500 
4501  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4502  {
4503  Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4504 
4505  result = lappend_oid(result, oid);
4506  }
4507 
4508  systable_endscan(indscan);
4509 
4510  table_close(indrel, AccessShareLock);
4511 
4512  /* Sort the result list into OID order, per API spec. */
4513  list_sort(result, list_oid_cmp);
4514 
4515  /* Now save a copy of the completed list in the relcache entry. */
4517  oldlist = relation->rd_statlist;
4518  relation->rd_statlist = list_copy(result);
4519 
4520  relation->rd_statvalid = true;
4521  MemoryContextSwitchTo(oldcxt);
4522 
4523  /* Don't leak the old list, if there is one */
4524  list_free(oldlist);
4525 
4526  return result;
4527 }
4528 
4529 /*
4530  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4531  *
4532  * Returns InvalidOid if there is no such index.
4533  */
4534 Oid
4536 {
4537  List *ilist;
4538 
4539  if (!relation->rd_indexvalid)
4540  {
4541  /* RelationGetIndexList does the heavy lifting. */
4542  ilist = RelationGetIndexList(relation);
4543  list_free(ilist);
4544  Assert(relation->rd_indexvalid);
4545  }
4546 
4547  return relation->rd_pkindex;
4548 }
4549 
4550 /*
4551  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4552  *
4553  * Returns InvalidOid if there is no such index.
4554  */
4555 Oid
4557 {
4558  List *ilist;
4559 
4560  if (!relation->rd_indexvalid)
4561  {
4562  /* RelationGetIndexList does the heavy lifting. */
4563  ilist = RelationGetIndexList(relation);
4564  list_free(ilist);
4565  Assert(relation->rd_indexvalid);
4566  }
4567 
4568  return relation->rd_replidindex;
4569 }
4570 
4571 /*
4572  * RelationGetIndexExpressions -- get the index expressions for an index
4573  *
4574  * We cache the result of transforming pg_index.indexprs into a node tree.
4575  * If the rel is not an index or has no expressional columns, we return NIL.
4576  * Otherwise, the returned tree is copied into the caller's memory context.
4577  * (We don't want to return a pointer to the relcache copy, since it could
4578  * disappear due to relcache invalidation.)
4579  */
4580 List *
4582 {
4583  List *result;
4584  Datum exprsDatum;
4585  bool isnull;
4586  char *exprsString;
4587  MemoryContext oldcxt;
4588 
4589  /* Quick exit if we already computed the result. */
4590  if (relation->rd_indexprs)
4591  return copyObject(relation->rd_indexprs);
4592 
4593  /* Quick exit if there is nothing to do. */
4594  if (relation->rd_indextuple == NULL ||
4595  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4596  return NIL;
4597 
4598  /*
4599  * We build the tree we intend to return in the caller's context. After
4600  * successfully completing the work, we copy it into the relcache entry.
4601  * This avoids problems if we get some sort of error partway through.
4602  */
4603  exprsDatum = heap_getattr(relation->rd_indextuple,
4604  Anum_pg_index_indexprs,
4606  &isnull);
4607  Assert(!isnull);
4608  exprsString = TextDatumGetCString(exprsDatum);
4609  result = (List *) stringToNode(exprsString);
4610  pfree(exprsString);
4611 
4612  /*
4613  * Run the expressions through eval_const_expressions. This is not just an
4614  * optimization, but is necessary, because the planner will be comparing
4615  * them to similarly-processed qual clauses, and may fail to detect valid
4616  * matches without this. We must not use canonicalize_qual, however,
4617  * since these aren't qual expressions.
4618  */
4619  result = (List *) eval_const_expressions(NULL, (Node *) result);
4620 
4621  /* May as well fix opfuncids too */
4622  fix_opfuncids((Node *) result);
4623 
4624  /* Now save a copy of the completed tree in the relcache entry. */
4625  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4626  relation->rd_indexprs = copyObject(result);
4627  MemoryContextSwitchTo(oldcxt);
4628 
4629  return result;
4630 }
4631 
4632 /*
4633  * RelationGetDummyIndexExpressions -- get dummy expressions for an index
4634  *
4635  * Return a list of dummy expressions (just Const nodes) with the same
4636  * types/typmods/collations as the index's real expressions. This is
4637  * useful in situations where we don't want to run any user-defined code.
4638  */
4639 List *
4641 {
4642  List *result;
4643  Datum exprsDatum;
4644  bool isnull;
4645  char *exprsString;
4646  List *rawExprs;
4647  ListCell *lc;
4648 
4649  /* Quick exit if there is nothing to do. */
4650  if (relation->rd_indextuple == NULL ||
4651  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4652  return NIL;
4653 
4654  /* Extract raw node tree(s) from index tuple. */
4655  exprsDatum = heap_getattr(relation->rd_indextuple,
4656  Anum_pg_index_indexprs,
4658  &isnull);
4659  Assert(!isnull);
4660  exprsString = TextDatumGetCString(exprsDatum);
4661  rawExprs = (List *) stringToNode(exprsString);
4662  pfree(exprsString);
4663 
4664  /* Construct null Consts; the typlen and typbyval are arbitrary. */
4665  result = NIL;
4666  foreach(lc, rawExprs)
4667  {
4668  Node *rawExpr = (Node *) lfirst(lc);
4669 
4670  result = lappend(result,
4671  makeConst(exprType(rawExpr),
4672  exprTypmod(rawExpr),
4673  exprCollation(rawExpr),
4674  1,
4675  (Datum) 0,
4676  true,
4677  true));
4678  }
4679 
4680  return result;
4681 }
4682 
4683 /*
4684  * RelationGetIndexPredicate -- get the index predicate for an index
4685  *
4686  * We cache the result of transforming pg_index.indpred into an implicit-AND
4687  * node tree (suitable for use in planning).
4688  * If the rel is not an index or has no predicate, we return NIL.
4689  * Otherwise, the returned tree is copied into the caller's memory context.
4690  * (We don't want to return a pointer to the relcache copy, since it could
4691  * disappear due to relcache invalidation.)
4692  */
4693 List *
4695 {
4696  List *result;
4697  Datum predDatum;
4698  bool isnull;
4699  char *predString;
4700  MemoryContext oldcxt;
4701 
4702  /* Quick exit if we already computed the result. */
4703  if (relation->rd_indpred)
4704  return copyObject(relation->rd_indpred);
4705 
4706  /* Quick exit if there is nothing to do. */
4707  if (relation->rd_indextuple == NULL ||
4708  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
4709  return NIL;
4710 
4711  /*
4712  * We build the tree we intend to return in the caller's context. After
4713  * successfully completing the work, we copy it into the relcache entry.
4714  * This avoids problems if we get some sort of error partway through.
4715  */
4716  predDatum = heap_getattr(relation->rd_indextuple,
4717  Anum_pg_index_indpred,
4719  &isnull);
4720  Assert(!isnull);
4721  predString = TextDatumGetCString(predDatum);
4722  result = (List *) stringToNode(predString);
4723  pfree(predString);
4724 
4725  /*
4726  * Run the expression through const-simplification and canonicalization.
4727  * This is not just an optimization, but is necessary, because the planner
4728  * will be comparing it to similarly-processed qual clauses, and may fail
4729  * to detect valid matches without this. This must match the processing
4730  * done to qual clauses in preprocess_expression()! (We can skip the
4731  * stuff involving subqueries, however, since we don't allow any in index
4732  * predicates.)
4733  */
4734  result = (List *) eval_const_expressions(NULL, (Node *) result);
4735 
4736  result = (List *) canonicalize_qual((Expr *) result, false);
4737 
4738  /* Also convert to implicit-AND format */
4739  result = make_ands_implicit((Expr *) result);
4740 
4741  /* May as well fix opfuncids too */
4742  fix_opfuncids((Node *) result);
4743 
4744  /* Now save a copy of the completed tree in the relcache entry. */
4745  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4746  relation->rd_indpred = copyObject(result);
4747  MemoryContextSwitchTo(oldcxt);
4748 
4749  return result;
4750 }
4751 
4752 /*
4753  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4754  *
4755  * The result has a bit set for each attribute used anywhere in the index
4756  * definitions of all the indexes on this relation. (This includes not only
4757  * simple index keys, but attributes used in expressions and partial-index
4758  * predicates.)
4759  *
4760  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4761  * for all potential foreign key columns, or for all columns in the configured
4762  * replica identity index is returned.
4763  *
4764  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4765  * we can include system attributes (e.g., OID) in the bitmap representation.
4766  *
4767  * Caller had better hold at least RowExclusiveLock on the target relation
4768  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4769  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4770  * that lock level doesn't guarantee a stable set of indexes, so we have to
4771  * be prepared to retry here in case of a change in the set of indexes.
4772  *
4773  * The returned result is palloc'd in the caller's memory context and should
4774  * be bms_free'd when not needed anymore.
4775  */
4776 Bitmapset *
4778 {
4779  Bitmapset *indexattrs; /* indexed columns */
4780  Bitmapset *uindexattrs; /* columns in unique indexes */
4781  Bitmapset *pkindexattrs; /* columns in the primary index */
4782  Bitmapset *idindexattrs; /* columns in the replica identity */
4783  List *indexoidlist;
4784  List *newindexoidlist;
4785  Oid relpkindex;
4786  Oid relreplindex;
4787  ListCell *l;
4788  MemoryContext oldcxt;
4789 
4790  /* Quick exit if we already computed the result. */
4791  if (relation->rd_indexattr != NULL)
4792  {
4793  switch (attrKind)
4794  {
4795  case INDEX_ATTR_BITMAP_ALL:
4796  return bms_copy(relation->rd_indexattr);
4797  case INDEX_ATTR_BITMAP_KEY:
4798  return bms_copy(relation->rd_keyattr);
4800  return bms_copy(relation->rd_pkattr);
4802  return bms_copy(relation->rd_idattr);
4803  default:
4804  elog(ERROR, "unknown attrKind %u", attrKind);
4805  }
4806  }
4807 
4808  /* Fast path if definitely no indexes */
4809  if (!RelationGetForm(relation)->relhasindex)
4810  return NULL;
4811 
4812  /*
4813  * Get cached list of index OIDs. If we have to start over, we do so here.
4814  */
4815 restart:
4816  indexoidlist = RelationGetIndexList(relation);
4817 
4818  /* Fall out if no indexes (but relhasindex was set) */
4819  if (indexoidlist == NIL)
4820  return NULL;
4821 
4822  /*
4823  * Copy the rd_pkindex and rd_replidindex values computed by
4824  * RelationGetIndexList before proceeding. This is needed because a
4825  * relcache flush could occur inside index_open below, resetting the
4826  * fields managed by RelationGetIndexList. We need to do the work with
4827  * stable values of these fields.
4828  */
4829  relpkindex = relation->rd_pkindex;
4830  relreplindex = relation->rd_replidindex;
4831 
4832  /*
4833  * For each index, add referenced attributes to indexattrs.
4834  *
4835  * Note: we consider all indexes returned by RelationGetIndexList, even if
4836  * they are not indisready or indisvalid. This is important because an
4837  * index for which CREATE INDEX CONCURRENTLY has just started must be
4838  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4839  * CONCURRENTLY is far enough along that we should ignore the index, it
4840  * won't be returned at all by RelationGetIndexList.
4841  */
4842  indexattrs = NULL;
4843  uindexattrs = NULL;
4844  pkindexattrs = NULL;
4845  idindexattrs = NULL;
4846  foreach(l, indexoidlist)
4847  {
4848  Oid indexOid = lfirst_oid(l);
4849  Relation indexDesc;
4850  Datum datum;
4851  bool isnull;
4852  Node *indexExpressions;
4853  Node *indexPredicate;
4854  int i;
4855  bool isKey; /* candidate key */
4856  bool isPK; /* primary key */
4857  bool isIDKey; /* replica identity index */
4858 
4859  indexDesc = index_open(indexOid, AccessShareLock);
4860 
4861  /*
4862  * Extract index expressions and index predicate. Note: Don't use
4863  * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
4864  * those might run constant expressions evaluation, which needs a
4865  * snapshot, which we might not have here. (Also, it's probably more
4866  * sound to collect the bitmaps before any transformations that might
4867  * eliminate columns, but the practical impact of this is limited.)
4868  */
4869 
4870  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
4871  GetPgIndexDescriptor(), &isnull);
4872  if (!isnull)
4873  indexExpressions = stringToNode(TextDatumGetCString(datum));
4874  else
4875  indexExpressions = NULL;
4876 
4877  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
4878  GetPgIndexDescriptor(), &isnull);
4879  if (!isnull)
4880  indexPredicate = stringToNode(TextDatumGetCString(datum));
4881  else
4882  indexPredicate = NULL;
4883 
4884  /* Can this index be referenced by a foreign key? */
4885  isKey = indexDesc->rd_index->indisunique &&
4886  indexExpressions == NULL &&
4887  indexPredicate == NULL;
4888 
4889  /* Is this a primary key? */
4890  isPK = (indexOid == relpkindex);
4891 
4892  /* Is this index the configured (or default) replica identity? */
4893  isIDKey = (indexOid == relreplindex);
4894 
4895  /* Collect simple attribute references */
4896  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
4897  {
4898  int attrnum = indexDesc->rd_index->indkey.values[i];
4899 
4900  /*
4901  * Since we have covering indexes with non-key columns, we must
4902  * handle them accurately here. non-key columns must be added into
4903  * indexattrs, since they are in index, and HOT-update shouldn't
4904  * miss them. Obviously, non-key columns couldn't be referenced by
4905  * foreign key or identity key. Hence we do not include them into
4906  * uindexattrs, pkindexattrs and idindexattrs bitmaps.
4907  */
4908  if (attrnum != 0)
4909  {
4910  indexattrs = bms_add_member(indexattrs,
4912 
4913  if (isKey && i < indexDesc->rd_index->indnkeyatts)
4914  uindexattrs = bms_add_member(uindexattrs,
4916 
4917  if (isPK && i < indexDesc->rd_index->indnkeyatts)
4918  pkindexattrs = bms_add_member(pkindexattrs,
4920 
4921  if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
4922  idindexattrs = bms_add_member(idindexattrs,
4924  }
4925  }
4926 
4927  /* Collect all attributes used in expressions, too */
4928  pull_varattnos(indexExpressions, 1, &indexattrs);
4929 
4930  /* Collect all attributes in the index predicate, too */
4931  pull_varattnos(indexPredicate, 1, &indexattrs);
4932 
4933  index_close(indexDesc, AccessShareLock);
4934  }
4935 
4936  /*
4937  * During one of the index_opens in the above loop, we might have received
4938  * a relcache flush event on this relcache entry, which might have been
4939  * signaling a change in the rel's index list. If so, we'd better start
4940  * over to ensure we deliver up-to-date attribute bitmaps.
4941  */
4942  newindexoidlist = RelationGetIndexList(relation);
4943  if (equal(indexoidlist, newindexoidlist) &&
4944  relpkindex == relation->rd_pkindex &&
4945  relreplindex == relation->rd_replidindex)
4946  {
4947  /* Still the same index set, so proceed */
4948  list_free(newindexoidlist);
4949  list_free(indexoidlist);
4950  }
4951  else
4952  {
4953  /* Gotta do it over ... might as well not leak memory */
4954  list_free(newindexoidlist);
4955  list_free(indexoidlist);
4956  bms_free(uindexattrs);
4957  bms_free(pkindexattrs);
4958  bms_free(idindexattrs);
4959  bms_free(indexattrs);
4960 
4961  goto restart;
4962  }
4963 
4964  /* Don't leak the old values of these bitmaps, if any */
4965  bms_free(relation->rd_indexattr);
4966  relation->rd_indexattr = NULL;
4967  bms_free(relation->rd_keyattr);
4968  relation->rd_keyattr = NULL;
4969  bms_free(relation->rd_pkattr);
4970  relation->rd_pkattr = NULL;
4971  bms_free(relation->rd_idattr);
4972  relation->rd_idattr = NULL;
4973 
4974  /*
4975  * Now save copies of the bitmaps in the relcache entry. We intentionally
4976  * set rd_indexattr last, because that's the one that signals validity of
4977  * the values; if we run out of memory before making that copy, we won't
4978  * leave the relcache entry looking like the other ones are valid but
4979  * empty.
4980  */
4982  relation->rd_keyattr = bms_copy(uindexattrs);
4983  relation->rd_pkattr = bms_copy(pkindexattrs);
4984  relation->rd_idattr = bms_copy(idindexattrs);
4985  relation->rd_indexattr = bms_copy(indexattrs);
4986  MemoryContextSwitchTo(oldcxt);
4987 
4988  /* We return our original working copy for caller to play with */
4989  switch (attrKind)
4990  {
4991  case INDEX_ATTR_BITMAP_ALL:
4992  return indexattrs;
4993  case INDEX_ATTR_BITMAP_KEY:
4994  return uindexattrs;
4996  return pkindexattrs;
4998  return idindexattrs;
4999  default:
5000  elog(ERROR, "unknown attrKind %u", attrKind);
5001  return NULL;
5002  }
5003 }
5004 
5005 /*
5006  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5007  *
5008  * This should be called only for an index that is known to have an
5009  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5010  * context) of the exclusion operator OIDs, their underlying functions'
5011  * OIDs, and their strategy numbers in the index's opclasses. We cache
5012  * all this information since it requires a fair amount of work to get.
5013  */
5014 void
5016  Oid **operators,
5017  Oid **procs,
5018  uint16 **strategies)
5019 {
5020  int indnkeyatts;
5021  Oid *ops;
5022  Oid *funcs;
5023  uint16 *strats;
5024  Relation conrel;
5025  SysScanDesc conscan;
5026  ScanKeyData skey[1];
5027  HeapTuple htup;
5028  bool found;
5029  MemoryContext oldcxt;
5030  int i;
5031 
5032  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5033 
5034  /* Allocate result space in caller context */
5035  *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5036  *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5037  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5038 
5039  /* Quick exit if we have the data cached already */
5040  if (indexRelation->rd_exclstrats != NULL)
5041  {
5042  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5043  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5044  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5045  return;
5046  }
5047 
5048  /*
5049  * Search pg_constraint for the constraint associated with the index. To
5050  * make this not too painfully slow, we use the index on conrelid; that
5051  * will hold the parent relation's OID not the index's own OID.
5052  *
5053  * Note: if we wanted to rely on the constraint name matching the index's
5054  * name, we could just do a direct lookup using pg_constraint's unique
5055  * index. For the moment it doesn't seem worth requiring that.
5056  */
5057  ScanKeyInit(&skey[0],
5058  Anum_pg_constraint_conrelid,
5059  BTEqualStrategyNumber, F_OIDEQ,
5060  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5061 
5062  conrel = table_open(ConstraintRelationId, AccessShareLock);
5063  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5064  NULL, 1, skey);
5065  found = false;
5066 
5067  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5068  {
5070  Datum val;
5071  bool isnull;
5072  ArrayType *arr;
5073  int nelem;
5074 
5075  /* We want the exclusion constraint owning the index */
5076  if (conform->contype != CONSTRAINT_EXCLUSION ||
5077  conform->conindid != RelationGetRelid(indexRelation))
5078  continue;
5079 
5080  /* There should be only one */
5081  if (found)
5082  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5083  RelationGetRelationName(indexRelation));
5084  found = true;
5085 
5086  /* Extract the operator OIDS from conexclop */
5087  val = fastgetattr(htup,
5088  Anum_pg_constraint_conexclop,
5089  conrel->rd_att, &isnull);
5090  if (isnull)
5091  elog(ERROR, "null conexclop for rel %s",
5092  RelationGetRelationName(indexRelation));
5093 
5094  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5095  nelem = ARR_DIMS(arr)[0];
5096  if (ARR_NDIM(arr) != 1 ||
5097  nelem != indnkeyatts ||
5098  ARR_HASNULL(arr) ||
5099  ARR_ELEMTYPE(arr) != OIDOID)
5100  elog(ERROR, "conexclop is not a 1-D Oid array");
5101 
5102  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5103  }
5104 
5105  systable_endscan(conscan);
5106  table_close(conrel, AccessShareLock);
5107 
5108  if (!found)
5109  elog(ERROR, "exclusion constraint record missing for rel %s",
5110  RelationGetRelationName(indexRelation));
5111 
5112  /* We need the func OIDs and strategy numbers too */
5113  for (i = 0; i < indnkeyatts; i++)
5114  {
5115  funcs[i] = get_opcode(ops[i]);
5116  strats[i] = get_op_opfamily_strategy(ops[i],
5117  indexRelation->rd_opfamily[i]);
5118  /* shouldn't fail, since it was checked at index creation */
5119  if (strats[i] == InvalidStrategy)
5120  elog(ERROR, "could not find strategy for operator %u in family %u",
5121  ops[i], indexRelation->rd_opfamily[i]);
5122  }
5123 
5124  /* Save a copy of the results in the relcache entry. */
5125  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5126  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5127  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5128  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5129  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5130  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5131  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5132  MemoryContextSwitchTo(oldcxt);
5133 }
5134 
5135 /*
5136  * Get publication actions for the given relation.
5137  */
5138 struct PublicationActions *
5140 {
5141  List *puboids;
5142  ListCell *lc;
5143  MemoryContext oldcxt;
5144  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5145 
5146  /*
5147  * If not publishable, it publishes no actions. (pgoutput_change() will
5148  * ignore it.)
5149  */
5150  if (!is_publishable_relation(relation))
5151  return pubactions;
5152 
5153  if (relation->rd_pubactions)
5154  return memcpy(pubactions, relation->rd_pubactions,
5155  sizeof(PublicationActions));
5156 
5157  /* Fetch the publication membership info. */
5158  puboids = GetRelationPublications(RelationGetRelid(relation));
5159  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5160 
5161  foreach(lc, puboids)
5162  {
5163  Oid pubid = lfirst_oid(lc);
5164  HeapTuple tup;
5165  Form_pg_publication pubform;
5166 
5168 
5169  if (!HeapTupleIsValid(tup))
5170  elog(ERROR, "cache lookup failed for publication %u", pubid);
5171 
5172  pubform = (Form_pg_publication) GETSTRUCT(tup);
5173 
5174  pubactions->pubinsert |= pubform->pubinsert;
5175  pubactions->pubupdate |= pubform->pubupdate;
5176  pubactions->pubdelete |= pubform->pubdelete;
5177  pubactions->pubtruncate |= pubform->pubtruncate;
5178 
5179  ReleaseSysCache(tup);
5180 
5181  /*
5182  * If we know everything is replicated, there is no point to check for
5183  * other publications.
5184  */
5185  if (pubactions->pubinsert && pubactions->pubupdate &&
5186  pubactions->pubdelete && pubactions->pubtruncate)
5187  break;
5188  }
5189 
5190  if (relation->rd_pubactions)
5191  {
5192  pfree(relation->rd_pubactions);
5193  relation->rd_pubactions = NULL;
5194  }
5195 
5196  /* Now save copy of the actions in the relcache entry. */
5198  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5199  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5200  MemoryContextSwitchTo(oldcxt);
5201 
5202  return pubactions;
5203 }
5204 
5205 /*
5206  * Routines to support ereport() reports of relation-related errors
5207  *
5208  * These could have been put into elog.c, but it seems like a module layering
5209  * violation to have elog.c calling relcache or syscache stuff --- and we
5210  * definitely don't want elog.h including rel.h. So we put them here.
5211  */
5212 
5213 /*
5214  * errtable --- stores schema_name and table_name of a table
5215  * within the current errordata.
5216  */
5217 int
5219 {
5223 
5224  return 0; /* return value does not matter */
5225 }
5226 
5227 /*
5228  * errtablecol --- stores schema_name, table_name and column_name
5229  * of a table column within the current errordata.
5230  *
5231  * The column is specified by attribute number --- for most callers, this is
5232  * easier and less error-prone than getting the column name for themselves.
5233  */
5234 int
5236 {
5238  const char *colname;
5239 
5240  /* Use reldesc if it's a user attribute, else consult the catalogs */
5241  if (attnum > 0 && attnum <= reldesc->natts)
5242  colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5243  else
5244  colname = get_attname(RelationGetRelid(rel), attnum, false);
5245 
5246  return errtablecolname(rel, colname);
5247 }
5248 
5249 /*
5250  * errtablecolname --- stores schema_name, table_name and column_name
5251  * of a table column within the current errordata, where the column name is
5252  * given directly rather than extracted from the relation's catalog data.
5253  *
5254  * Don't use this directly unless errtablecol() is inconvenient for some
5255  * reason. This might possibly be needed during intermediate states in ALTER
5256  * TABLE, for instance.
5257  */
5258 int
5259 errtablecolname(Relation rel, const char *colname)
5260 {
5261  errtable(rel);
5263 
5264  return 0; /* return value does not matter */
5265 }
5266 
5267 /*
5268  * errtableconstraint --- stores schema_name, table_name and constraint_name
5269  * of a table-related constraint within the current errordata.
5270  */
5271 int
5272 errtableconstraint(Relation rel, const char *conname)
5273 {
5274  errtable(rel);
5276 
5277  return 0; /* return value does not matter */
5278 }
5279 
5280 
5281 /*
5282  * load_relcache_init_file, write_relcache_init_file
5283  *
5284  * In late 1992, we started regularly having databases with more than
5285  * a thousand classes in them. With this number of classes, it became
5286  * critical to do indexed lookups on the system catalogs.
5287  *
5288  * Bootstrapping these lookups is very hard. We want to be able to
5289  * use an index on pg_attribute, for example, but in order to do so,
5290  * we must have read pg_attribute for the attributes in the index,
5291  * which implies that we need to use the index.
5292  *
5293  * In order to get around the problem, we do the following:
5294  *
5295  * + When the database system is initialized (at initdb time), we
5296  * don't use indexes. We do sequential scans.
5297  *
5298  * + When the backend is started up in normal mode, we load an image
5299  * of the appropriate relation descriptors, in internal format,
5300  * from an initialization file in the data/base/... directory.
5301  *
5302  * + If the initialization file isn't there, then we create the
5303  * relation descriptors using sequential scans and write 'em to
5304  * the initialization file for use by subsequent backends.
5305  *
5306  * As of Postgres 9.0, there is one local initialization file in each
5307  * database, plus one shared initialization file for shared catalogs.
5308  *
5309  * We could dispense with the initialization files and just build the
5310  * critical reldescs the hard way on every backend startup, but that
5311  * slows down backend startup noticeably.
5312  *
5313  * We can in fact go further, and save more relcache entries than
5314  * just the ones that are absolutely critical; this allows us to speed
5315  * up backend startup by not having to build such entries the hard way.
5316  * Presently, all the catalog and index entries that are referred to
5317  * by catcaches are stored in the initialization files.
5318  *
5319  * The same mechanism that detects when catcache and relcache entries
5320  * need to be invalidated (due to catalog updates) also arranges to
5321  * unlink the initialization files when the contents may be out of date.
5322  * The files will then be rebuilt during the next backend startup.
5323  */
5324 
5325 /*
5326  * load_relcache_init_file -- attempt to load cache from the shared
5327  * or local cache init file
5328  *
5329  * If successful, return true and set criticalRelcachesBuilt or
5330  * criticalSharedRelcachesBuilt to true.
5331  * If not successful, return false.
5332  *
5333  * NOTE: we assume we are already switched into CacheMemoryContext.
5334  */
5335 static bool
5337 {
5338  FILE *fp;
5339  char initfilename[MAXPGPATH];
5340  Relation *rels;
5341  int relno,
5342  num_rels,
5343  max_rels,
5344  nailed_rels,
5345  nailed_indexes,
5346  magic;
5347  int i;
5348 
5349  if (shared)
5350  snprintf(initfilename, sizeof(initfilename), "global/%s",
5352  else
5353  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5355 
5356  fp = AllocateFile(initfilename, PG_BINARY_R);
5357  if (fp == NULL)
5358  return false;
5359 
5360  /*
5361  * Read the index relcache entries from the file. Note we will not enter
5362  * any of them into the cache if the read fails partway through; this
5363  * helps to guard against broken init files.
5364  */
5365  max_rels = 100;
5366  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5367  num_rels = 0;
5368  nailed_rels = nailed_indexes = 0;
5369 
5370  /* check for correct magic number (compatible version) */
5371  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5372  goto read_failed;
5373  if (magic != RELCACHE_INIT_FILEMAGIC)
5374  goto read_failed;
5375 
5376  for (relno = 0;; relno++)
5377  {
5378  Size len;
5379  size_t nread;
5380  Relation rel;
5381  Form_pg_class relform;
5382  bool has_not_null;
5383 
5384  /* first read the relation descriptor length */
5385  nread = fread(&len, 1, sizeof(len), fp);
5386  if (nread != sizeof(len))
5387  {
5388  if (nread == 0)
5389  break; /* end of file */
5390  goto read_failed;
5391  }
5392 
5393  /* safety check for incompatible relcache layout */
5394  if (len != sizeof(RelationData))
5395  goto read_failed;
5396 
5397  /* allocate another relcache header */
5398  if (num_rels >= max_rels)
5399  {
5400  max_rels *= 2;
5401  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5402  }
5403 
5404  rel = rels[num_rels++] = (Relation) palloc(len);
5405 
5406  /* then, read the Relation structure */
5407  if (fread(rel, 1, len, fp) != len)
5408  goto read_failed;
5409 
5410  /* next read the relation tuple form */
5411  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5412  goto read_failed;
5413 
5414  relform = (Form_pg_class) palloc(len);
5415  if (fread(relform, 1, len, fp) != len)
5416  goto read_failed;
5417 
5418  rel->rd_rel = relform;
5419 
5420  /* initialize attribute tuple forms */
5421  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts);
5422  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5423 
5424  rel->rd_att->tdtypeid = relform->reltype;
5425  rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5426 
5427  /* next read all the attribute tuple form data entries */
5428  has_not_null = false;
5429  for (i = 0; i < relform->relnatts; i++)
5430  {
5431  Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5432 
5433  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5434  goto read_failed;
5435  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5436  goto read_failed;
5437  if (fread(attr, 1, len, fp) != len)
5438  goto read_failed;
5439 
5440  has_not_null |= attr->attnotnull;
5441  }
5442 
5443  /* next read the access method specific field */
5444  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5445  goto read_failed;
5446  if (len > 0)
5447  {
5448  rel->rd_options = palloc(len);
5449  if (fread(rel->rd_options, 1, len, fp) != len)
5450  goto read_failed;
5451  if (len != VARSIZE(rel->rd_options))
5452  goto read_failed; /* sanity check */
5453  }
5454  else
5455  {
5456  rel->rd_options = NULL;
5457  }
5458 
5459  /* mark not-null status */
5460  if (has_not_null)
5461  {
5462  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5463 
5464  constr->has_not_null = true;
5465  rel->rd_att->constr = constr;
5466  }
5467 
5468  /*
5469  * If it's an index, there's more to do. Note we explicitly ignore
5470  * partitioned indexes here.
5471  */
5472  if (rel->rd_rel->relkind == RELKIND_INDEX)
5473  {
5474  MemoryContext indexcxt;
5475  Oid *opfamily;
5476  Oid *opcintype;
5477  RegProcedure *support;
5478  int nsupport;
5479  int16 *indoption;
5480  Oid *indcollation;
5481 
5482  /* Count nailed indexes to ensure we have 'em all */
5483  if (rel->rd_isnailed)
5484  nailed_indexes++;
5485 
5486  /* next, read the pg_index tuple */
5487  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5488  goto read_failed;
5489 
5490  rel->rd_indextuple = (HeapTuple) palloc(len);
5491  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5492  goto read_failed;
5493 
5494  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5495  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5497 
5498  /*
5499  * prepare index info context --- parameters should match
5500  * RelationInitIndexAccessInfo
5501  */
5503  "index info",
5505  rel->rd_indexcxt = indexcxt;
5508 
5509  /*
5510  * Now we can fetch the index AM's API struct. (We can't store
5511  * that in the init file, since it contains function pointers that
5512  * might vary across server executions. Fortunately, it should be
5513  * safe to call the amhandler even while bootstrapping indexes.)
5514  */
5515  InitIndexAmRoutine(rel);
5516 
5517  /* next, read the vector of opfamily OIDs */
5518  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5519  goto read_failed;
5520 
5521  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5522  if (fread(opfamily, 1, len, fp) != len)
5523  goto read_failed;
5524 
5525  rel->rd_opfamily = opfamily;
5526 
5527  /* next, read the vector of opcintype OIDs */
5528  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5529  goto read_failed;
5530 
5531  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5532  if (fread(opcintype, 1, len, fp) != len)
5533  goto read_failed;
5534 
5535  rel->rd_opcintype = opcintype;
5536 
5537  /* next, read the vector of support procedure OIDs */
5538  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5539  goto read_failed;
5540  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5541  if (fread(support, 1, len, fp) != len)
5542  goto read_failed;
5543 
5544  rel->rd_support = support;
5545 
5546  /* next, read the vector of collation OIDs */
5547  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5548  goto read_failed;
5549 
5550  indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5551  if (fread(indcollation, 1, len, fp) != len)
5552  goto read_failed;
5553 
5554  rel->rd_indcollation = indcollation;
5555 
5556  /* finally, read the vector of indoption values */
5557  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5558  goto read_failed;
5559 
5560  indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5561  if (fread(indoption, 1, len, fp) != len)
5562  goto read_failed;
5563 
5564  rel->rd_indoption = indoption;
5565 
5566  /* set up zeroed fmgr-info vector */
5567  nsupport = relform->relnatts * rel->rd_indam->amsupport;
5568  rel->rd_supportinfo = (FmgrInfo *)
5569  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5570  }
5571  else
5572  {
5573  /* Count nailed rels to ensure we have 'em all */
5574  if (rel->rd_isnailed)
5575  nailed_rels++;
5576 
5577  /* Load table AM data */
5578  if (rel->rd_rel->relkind == RELKIND_RELATION ||
5579  rel->rd_rel->relkind == RELKIND_SEQUENCE ||
5580  rel->rd_rel->relkind == RELKIND_TOASTVALUE ||
5581  rel->rd_rel->relkind == RELKIND_MATVIEW)
5583 
5584  Assert(rel->rd_index == NULL);
5585  Assert(rel->rd_indextuple == NULL);
5586  Assert(rel->rd_indexcxt == NULL);
5587  Assert(rel->rd_indam == NULL);
5588  Assert(rel->rd_opfamily == NULL);
5589  Assert(rel->rd_opcintype == NULL);
5590  Assert(rel->rd_support == NULL);
5591  Assert(rel->rd_supportinfo == NULL);
5592  Assert(rel->rd_indoption == NULL);
5593  Assert(rel->rd_indcollation == NULL);
5594  }
5595 
5596  /*
5597  * Rules and triggers are not saved (mainly because the internal
5598  * format is complex and subject to change). They must be rebuilt if
5599  * needed by RelationCacheInitializePhase3. This is not expected to
5600  * be a big performance hit since few system catalogs have such. Ditto
5601  * for RLS policy data, partition info, index expressions, predicates,
5602  * exclusion info, and FDW info.
5603  */
5604  rel->rd_rules = NULL;
5605  rel->rd_rulescxt = NULL;
5606  rel->trigdesc = NULL;
5607  rel->rd_rsdesc = NULL;
5608  rel->rd_partkey = NULL;
5609  rel->rd_partkeycxt = NULL;
5610  rel->rd_partdesc = NULL;
5611  rel->rd_pdcxt = NULL;
5612  rel->rd_partcheck = NIL;
5613  rel->rd_partcheckvalid = false;
5614  rel->rd_partcheckcxt = NULL;
5615  rel->rd_indexprs = NIL;
5616  rel->rd_indpred = NIL;
5617  rel->rd_exclops = NULL;
5618  rel->rd_exclprocs = NULL;
5619  rel->rd_exclstrats = NULL;
5620  rel->rd_fdwroutine = NULL;
5621 
5622  /*
5623  * Reset transient-state fields in the relcache entry
5624  */
5625  rel->rd_smgr = NULL;
5626  if (rel->rd_isnailed)
5627  rel->rd_refcnt = 1;
5628  else
5629  rel->rd_refcnt = 0;
5630  rel->rd_indexvalid = false;
5631  rel->rd_indexlist = NIL;
5632  rel->rd_pkindex = InvalidOid;
5633  rel->rd_replidindex = InvalidOid;
5634  rel->rd_indexattr = NULL;
5635  rel->rd_keyattr = NULL;
5636  rel->rd_pkattr = NULL;
5637  rel->rd_idattr = NULL;
5638  rel->rd_pubactions = NULL;
5639  rel->rd_statvalid = false;
5640  rel->rd_statlist = NIL;
5641  rel->rd_fkeyvalid = false;
5642  rel->rd_fkeylist = NIL;
5645  rel->rd_amcache = NULL;
5646  MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5647 
5648  /*
5649  * Recompute lock and physical addressing info. This is needed in
5650  * case the pg_internal.init file was copied from some other database
5651  * by CREATE DATABASE.
5652  */
5653  RelationInitLockInfo(rel);
5655  }
5656 
5657  /*
5658  * We reached the end of the init file without apparent problem. Did we
5659  * get the right number of nailed items? This is a useful crosscheck in
5660  * case the set of critical rels or indexes changes. However, that should
5661  * not happen in a normally-running system, so let's bleat if it does.
5662  *
5663  * For the shared init file, we're called before client authentication is
5664  * done, which means that elog(WARNING) will go only to the postmaster
5665  * log, where it's easily missed. To ensure that developers notice bad
5666  * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5667  * an Assert(false) there.
5668  */
5669  if (shared)
5670  {
5671  if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
5672  nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5673  {
5674  elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5675  nailed_rels, nailed_indexes,
5677  /* Make sure we get developers' attention about this */
5678  Assert(false);
5679  /* In production builds, recover by bootstrapping the relcache */
5680  goto read_failed;
5681  }
5682  }
5683  else
5684  {
5685  if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
5686  nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5687  {
5688  elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5689  nailed_rels, nailed_indexes,
5691  /* We don't need an Assert() in this case */
5692  goto read_failed;
5693  }
5694  }
5695 
5696  /*
5697  * OK, all appears well.
5698  *
5699  * Now insert all the new relcache entries into the cache.
5700  */
5701  for (relno = 0; relno < num_rels; relno++)
5702  {
5703  RelationCacheInsert(rels[relno], false);
5704  }
5705 
5706  pfree(rels);
5707  FreeFile(fp);
5708 
5709  if (shared)
5711  else
5712  criticalRelcachesBuilt = true;
5713  return true;
5714 
5715  /*
5716  * init file is broken, so do it the hard way. We don't bother trying to
5717  * free the clutter we just allocated; it's not in the relcache so it
5718  * won't hurt.
5719  */
5720 read_failed:
5721  pfree(rels);
5722  FreeFile(fp);
5723 
5724  return false;
5725 }
5726 
5727 /*
5728  * Write out a new initialization file with the current contents
5729  * of the relcache (either shared rels or local rels, as indicated).
5730  */
5731 static void
5733 {
5734  FILE *fp;
5735  char tempfilename[MAXPGPATH];
5736  char finalfilename[MAXPGPATH];
5737  int magic;
5739  RelIdCacheEnt *idhentry;
5740  int i;
5741 
5742  /*
5743  * If we have already received any relcache inval events, there's no
5744  * chance of succeeding so we may as well skip the whole thing.
5745  */
5746  if (relcacheInvalsReceived != 0L)
5747  return;
5748 
5749  /*
5750  * We must write a temporary file and rename it into place. Otherwise,
5751  * another backend starting at about the same time might crash trying to
5752  * read the partially-complete file.
5753  */
5754  if (shared)
5755  {
5756  snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
5758  snprintf(finalfilename, sizeof(finalfilename), "global/%s",
5760  }
5761  else
5762  {
5763  snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
5765  snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
5767  }
5768 
5769  unlink(tempfilename); /* in case it exists w/wrong permissions */
5770 
5771  fp = AllocateFile(tempfilename, PG_BINARY_W);
5772  if (fp == NULL)
5773  {
5774  /*
5775  * We used to consider this a fatal error, but we might as well
5776  * continue with backend startup ...
5777  */
5778  ereport(WARNING,
5780  errmsg("could not create relation-cache initialization file \"%s\": %m",
5781  tempfilename),
5782  errdetail("Continuing anyway, but there's something wrong.")));
5783  return;
5784  }
5785 
5786  /*
5787  * Write a magic number to serve as a file version identifier. We can
5788  * change the magic number whenever the relcache layout changes.
5789  */
5790  magic = RELCACHE_INIT_FILEMAGIC;
5791  if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5792  elog(FATAL, "could not write init file");
5793 
5794  /*
5795  * Write all the appropriate reldescs (in no particular order).
5796  */
5797  hash_seq_init(&status, RelationIdCache);
5798 
5799  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
5800  {
5801  Relation rel = idhentry->reldesc;
5802  Form_pg_class relform = rel->rd_rel;
5803 
5804  /* ignore if not correct group */
5805  if (relform->relisshared != shared)
5806  continue;
5807 
5808  /*
5809  * Ignore if not supposed to be in init file. We can allow any shared
5810  * relation that's been loaded so far to be in the shared init file,
5811  * but unshared relations must be ones that should be in the local
5812  * file per RelationIdIsInInitFile. (Note: if you want to change the
5813  * criterion for rels to be kept in the init file, see also inval.c.
5814  * The reason for filtering here is to be sure that we don't put
5815  * anything into the local init file for which a relcache inval would
5816  * not cause invalidation of that init file.)
5817  */
5818  if (!shared && !RelationIdIsInInitFile(RelationGetRelid(rel)))
5819  {
5820  /* Nailed rels had better get stored. */
5821  Assert(!rel->rd_isnailed);
5822  continue;
5823  }
5824 
5825  /* first write the relcache entry proper */
5826  write_item(rel, sizeof(RelationData), fp);
5827 
5828  /* next write the relation tuple form */
5829  write_item(relform, CLASS_TUPLE_SIZE, fp);
5830 
5831  /* next, do all the attribute tuple form data entries */
5832  for (i = 0; i < relform->relnatts; i++)
5833  {
5834  write_item(TupleDescAttr(rel->rd_att, i),
5836  }
5837 
5838  /* next, do the access method specific field */
5839  write_item(rel->rd_options,
5840  (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
5841  fp);
5842 
5843  /*
5844  * If it's an index, there's more to do. Note we explicitly ignore
5845  * partitioned indexes here.
5846  */
5847  if (rel->rd_rel->relkind == RELKIND_INDEX)
5848  {
5849  /* write the pg_index tuple */
5850  /* we assume this was created by heap_copytuple! */
5853  fp);
5854 
5855  /* next, write the vector of opfamily OIDs */
5856  write_item(rel->rd_opfamily,
5857  relform->relnatts * sizeof(Oid),
5858  fp);
5859 
5860  /* next, write the vector of opcintype OIDs */
5861  write_item(rel->rd_opcintype,
5862  relform->relnatts * sizeof(Oid),
5863  fp);
5864 
5865  /* next, write the vector of support procedure OIDs */
5866  write_item(rel->rd_support,
5867  relform->relnatts * (rel->rd_indam->amsupport * sizeof(RegProcedure)),
5868  fp);
5869 
5870  /* next, write the vector of collation OIDs */
5872  relform->relnatts * sizeof(Oid),
5873  fp);
5874 
5875  /* finally, write the vector of indoption values */
5876  write_item(rel->rd_indoption,
5877  relform->relnatts * sizeof(int16),
5878  fp);
5879  }
5880  }
5881 
5882  if (FreeFile(fp))
5883  elog(FATAL, "could not write init file");
5884 
5885  /*
5886  * Now we have to check whether the data we've so painstakingly
5887  * accumulated is already obsolete due to someone else's just-committed
5888  * catalog changes. If so, we just delete the temp file and leave it to
5889  * the next backend to try again. (Our own relcache entries will be
5890  * updated by SI message processing, but we can't be sure whether what we
5891  * wrote out was up-to-date.)
5892  *
5893  * This mustn't run concurrently with the code that unlinks an init file
5894  * and sends SI messages, so grab a serialization lock for the duration.
5895  */
5896  LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5897 
5898  /* Make sure we have seen all incoming SI messages */
5900 
5901  /*
5902  * If we have received any SI relcache invals since backend start, assume
5903  * we may have written out-of-date data.
5904  */
5905  if (relcacheInvalsReceived == 0L)
5906  {
5907  /*
5908  * OK, rename the temp file to its final name, deleting any
5909  * previously-existing init file.
5910  *
5911  * Note: a failure here is possible under Cygwin, if some other
5912  * backend is holding open an unlinked-but-not-yet-gone init file. So
5913  * treat this as a noncritical failure; just remove the useless temp
5914  * file on failure.
5915  */
5916  if (rename(tempfilename, finalfilename) < 0)
5917  unlink(tempfilename);
5918  }
5919  else
5920  {
5921  /* Delete the already-obsolete temp file */
5922  unlink(tempfilename);
5923  }
5924 
5925  LWLockRelease(RelCacheInitLock);
5926 }
5927 
5928 /* write a chunk of data preceded by its length */
5929 static void
5930 write_item(const void *data, Size len, FILE *fp)
5931 {
5932  if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
5933  elog(FATAL, "could not write init file");
5934  if (fwrite(data, 1, len, fp) != len)
5935  elog(FATAL, "could not write init file");
5936 }
5937 
5938 /*
5939  * Determine whether a given relation (identified by OID) is one of the ones
5940  * we should store in a relcache init file.
5941  *
5942  * We must cache all nailed rels, and for efficiency we should cache every rel
5943  * that supports a syscache. The former set is almost but not quite a subset
5944  * of the latter. The special cases are relations where
5945  * RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but
5946  * which do not support any syscache.
5947  */
5948 bool
5950 {
5951  if (relationId == SharedSecLabelRelationId ||
5952  relationId == TriggerRelidNameIndexId ||
5953  relationId == DatabaseNameIndexId ||
5954  relationId == SharedSecLabelObjectIndexId)
5955  {
5956  /*
5957  * If this Assert fails, we don't need the applicable special case
5958  * anymore.
5959  */
5960  Assert(!RelationSupportsSysCache(relationId));
5961  return true;
5962<