PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/hash.h"
34 #include "access/htup_details.h"
35 #include "access/multixact.h"
36 #include "access/nbtree.h"
37 #include "access/reloptions.h"
38 #include "access/sysattr.h"
39 #include "access/xact.h"
40 #include "access/xlog.h"
41 #include "catalog/catalog.h"
42 #include "catalog/index.h"
43 #include "catalog/indexing.h"
44 #include "catalog/namespace.h"
45 #include "catalog/partition.h"
46 #include "catalog/pg_am.h"
47 #include "catalog/pg_amproc.h"
48 #include "catalog/pg_attrdef.h"
49 #include "catalog/pg_authid.h"
51 #include "catalog/pg_constraint.h"
52 #include "catalog/pg_database.h"
53 #include "catalog/pg_namespace.h"
54 #include "catalog/pg_opclass.h"
56 #include "catalog/pg_proc.h"
57 #include "catalog/pg_publication.h"
58 #include "catalog/pg_rewrite.h"
59 #include "catalog/pg_shseclabel.h"
62 #include "catalog/pg_tablespace.h"
63 #include "catalog/pg_trigger.h"
64 #include "catalog/pg_type.h"
65 #include "catalog/schemapg.h"
66 #include "catalog/storage.h"
67 #include "commands/policy.h"
68 #include "commands/trigger.h"
69 #include "miscadmin.h"
70 #include "nodes/nodeFuncs.h"
71 #include "optimizer/clauses.h"
72 #include "optimizer/prep.h"
73 #include "optimizer/var.h"
74 #include "rewrite/rewriteDefine.h"
75 #include "rewrite/rowsecurity.h"
76 #include "storage/lmgr.h"
77 #include "storage/smgr.h"
78 #include "utils/array.h"
79 #include "utils/builtins.h"
80 #include "utils/fmgroids.h"
81 #include "utils/inval.h"
82 #include "utils/lsyscache.h"
83 #include "utils/memutils.h"
84 #include "utils/relmapper.h"
85 #include "utils/resowner_private.h"
86 #include "utils/snapmgr.h"
87 #include "utils/syscache.h"
88 #include "utils/tqual.h"
89 
90 
91 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
92 
93 /*
94  * hardcoded tuple descriptors, contents generated by genbki.pl
95  */
96 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
97 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
98 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
99 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
100 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
101 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
102 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
103 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
104 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
105 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
106 
107 /*
108  * Hash tables that index the relation cache
109  *
110  * We used to index the cache by both name and OID, but now there
111  * is only an index by OID.
112  */
113 typedef struct relidcacheent
114 {
117 } RelIdCacheEnt;
118 
120 
121 /*
122  * This flag is false until we have prepared the critical relcache entries
123  * that are needed to do indexscans on the tables read by relcache building.
124  */
126 
127 /*
128  * This flag is false until we have prepared the critical relcache entries
129  * for shared catalogs (which are the tables needed for login).
130  */
132 
133 /*
134  * This counter counts relcache inval events received since backend startup
135  * (but only for rels that are actually in cache). Presently, we use it only
136  * to detect whether data about to be written by write_relcache_init_file()
137  * might already be obsolete.
138  */
139 static long relcacheInvalsReceived = 0L;
140 
141 /*
142  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
143  * cleanup work. This list intentionally has limited size; if it overflows,
144  * we fall back to scanning the whole hashtable. There is no value in a very
145  * large list because (1) at some point, a hash_seq_search scan is faster than
146  * retail lookups, and (2) the value of this is to reduce EOXact work for
147  * short transactions, which can't have dirtied all that many tables anyway.
148  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
149  * cleanup processing must be idempotent.
150  */
151 #define MAX_EOXACT_LIST 32
153 static int eoxact_list_len = 0;
154 static bool eoxact_list_overflowed = false;
155 
156 #define EOXactListAdd(rel) \
157  do { \
158  if (eoxact_list_len < MAX_EOXACT_LIST) \
159  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
160  else \
161  eoxact_list_overflowed = true; \
162  } while (0)
163 
164 /*
165  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
166  * cleanup work. The array expands as needed; there is no hashtable because
167  * we don't need to access individual items except at EOXact.
168  */
170 static int NextEOXactTupleDescNum = 0;
171 static int EOXactTupleDescArrayLen = 0;
172 
173 /*
174  * macros to manipulate the lookup hashtable
175  */
176 #define RelationCacheInsert(RELATION, replace_allowed) \
177 do { \
178  RelIdCacheEnt *hentry; bool found; \
179  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
180  (void *) &((RELATION)->rd_id), \
181  HASH_ENTER, &found); \
182  if (found) \
183  { \
184  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
185  Relation _old_rel = hentry->reldesc; \
186  Assert(replace_allowed); \
187  hentry->reldesc = (RELATION); \
188  if (RelationHasReferenceCountZero(_old_rel)) \
189  RelationDestroyRelation(_old_rel, false); \
190  else if (!IsBootstrapProcessingMode()) \
191  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
192  RelationGetRelationName(_old_rel)); \
193  } \
194  else \
195  hentry->reldesc = (RELATION); \
196 } while(0)
197 
198 #define RelationIdCacheLookup(ID, RELATION) \
199 do { \
200  RelIdCacheEnt *hentry; \
201  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
202  (void *) &(ID), \
203  HASH_FIND, NULL); \
204  if (hentry) \
205  RELATION = hentry->reldesc; \
206  else \
207  RELATION = NULL; \
208 } while(0)
209 
210 #define RelationCacheDelete(RELATION) \
211 do { \
212  RelIdCacheEnt *hentry; \
213  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
214  (void *) &((RELATION)->rd_id), \
215  HASH_REMOVE, NULL); \
216  if (hentry == NULL) \
217  elog(WARNING, "failed to delete relcache entry for OID %u", \
218  (RELATION)->rd_id); \
219 } while(0)
220 
221 
222 /*
223  * Special cache for opclass-related information
224  *
225  * Note: only default support procs get cached, ie, those with
226  * lefttype = righttype = opcintype.
227  */
228 typedef struct opclasscacheent
229 {
230  Oid opclassoid; /* lookup key: OID of opclass */
231  bool valid; /* set true after successful fill-in */
232  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
233  Oid opcfamily; /* OID of opclass's family */
234  Oid opcintype; /* OID of opclass's declared input type */
235  RegProcedure *supportProcs; /* OIDs of support procedures */
237 
238 static HTAB *OpClassCache = NULL;
239 
240 
241 /* non-export function prototypes */
242 
243 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
244 static void RelationClearRelation(Relation relation, bool rebuild);
245 
246 static void RelationReloadIndexInfo(Relation relation);
247 static void RelationFlushRelation(Relation relation);
249 static void AtEOXact_cleanup(Relation relation, bool isCommit);
250 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
251  SubTransactionId mySubid, SubTransactionId parentSubid);
252 static bool load_relcache_init_file(bool shared);
253 static void write_relcache_init_file(bool shared);
254 static void write_item(const void *data, Size len, FILE *fp);
255 
256 static void formrdesc(const char *relationName, Oid relationReltype,
257  bool isshared, bool hasoids,
258  int natts, const FormData_pg_attribute *attrs);
259 
260 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
262 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
263 static void RelationBuildTupleDesc(Relation relation);
264 static void RelationBuildPartitionKey(Relation relation);
265 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
266 static void RelationInitPhysicalAddr(Relation relation);
267 static void load_critical_index(Oid indexoid, Oid heapoid);
268 static TupleDesc GetPgClassDescriptor(void);
269 static TupleDesc GetPgIndexDescriptor(void);
270 static void AttrDefaultFetch(Relation relation);
271 static void CheckConstraintFetch(Relation relation);
272 static int CheckConstraintCmp(const void *a, const void *b);
273 static List *insert_ordered_oid(List *list, Oid datum);
274 static void InitIndexAmRoutine(Relation relation);
275 static void IndexSupportInitialize(oidvector *indclass,
276  RegProcedure *indexSupport,
277  Oid *opFamily,
278  Oid *opcInType,
279  StrategyNumber maxSupportNumber,
280  AttrNumber maxAttributeNumber);
281 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
282  StrategyNumber numSupport);
283 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
284 static void unlink_initfile(const char *initfilename);
285 static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
286  PartitionDesc partdesc2);
287 
288 
289 /*
290  * ScanPgRelation
291  *
292  * This is used by RelationBuildDesc to find a pg_class
293  * tuple matching targetRelId. The caller must hold at least
294  * AccessShareLock on the target relid to prevent concurrent-update
295  * scenarios; it isn't guaranteed that all scans used to build the
296  * relcache entry will use the same snapshot. If, for example,
297  * an attribute were to be added after scanning pg_class and before
298  * scanning pg_attribute, relnatts wouldn't match.
299  *
300  * NB: the returned tuple has been copied into palloc'd storage
301  * and must eventually be freed with heap_freetuple.
302  */
303 static HeapTuple
304 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
305 {
306  HeapTuple pg_class_tuple;
307  Relation pg_class_desc;
308  SysScanDesc pg_class_scan;
309  ScanKeyData key[1];
310  Snapshot snapshot;
311 
312  /*
313  * If something goes wrong during backend startup, we might find ourselves
314  * trying to read pg_class before we've selected a database. That ain't
315  * gonna work, so bail out with a useful error message. If this happens,
316  * it probably means a relcache entry that needs to be nailed isn't.
317  */
318  if (!OidIsValid(MyDatabaseId))
319  elog(FATAL, "cannot read pg_class without having selected a database");
320 
321  /*
322  * form a scan key
323  */
324  ScanKeyInit(&key[0],
326  BTEqualStrategyNumber, F_OIDEQ,
327  ObjectIdGetDatum(targetRelId));
328 
329  /*
330  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
331  * built the critical relcache entries (this includes initdb and startup
332  * without a pg_internal.init file). The caller can also force a heap
333  * scan by setting indexOK == false.
334  */
335  pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
336 
337  /*
338  * The caller might need a tuple that's newer than the one the historic
339  * snapshot; currently the only case requiring to do so is looking up the
340  * relfilenode of non mapped system relations during decoding.
341  */
342  if (force_non_historic)
344  else
346 
347  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
348  indexOK && criticalRelcachesBuilt,
349  snapshot,
350  1, key);
351 
352  pg_class_tuple = systable_getnext(pg_class_scan);
353 
354  /*
355  * Must copy tuple before releasing buffer.
356  */
357  if (HeapTupleIsValid(pg_class_tuple))
358  pg_class_tuple = heap_copytuple(pg_class_tuple);
359 
360  /* all done */
361  systable_endscan(pg_class_scan);
362  heap_close(pg_class_desc, AccessShareLock);
363 
364  return pg_class_tuple;
365 }
366 
367 /*
368  * AllocateRelationDesc
369  *
370  * This is used to allocate memory for a new relation descriptor
371  * and initialize the rd_rel field from the given pg_class tuple.
372  */
373 static Relation
375 {
376  Relation relation;
377  MemoryContext oldcxt;
378  Form_pg_class relationForm;
379 
380  /* Relcache entries must live in CacheMemoryContext */
382 
383  /*
384  * allocate and zero space for new relation descriptor
385  */
386  relation = (Relation) palloc0(sizeof(RelationData));
387 
388  /* make sure relation is marked as having no open file yet */
389  relation->rd_smgr = NULL;
390 
391  /*
392  * Copy the relation tuple form
393  *
394  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
395  * variable-length fields (relacl, reloptions) are NOT stored in the
396  * relcache --- there'd be little point in it, since we don't copy the
397  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
398  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
399  * it from the syscache if you need it. The same goes for the original
400  * form of reloptions (however, we do store the parsed form of reloptions
401  * in rd_options).
402  */
403  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
404 
405  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
406 
407  /* initialize relation tuple form */
408  relation->rd_rel = relationForm;
409 
410  /* and allocate attribute tuple form storage */
411  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
412  relationForm->relhasoids);
413  /* which we mark as a reference-counted tupdesc */
414  relation->rd_att->tdrefcount = 1;
415 
416  MemoryContextSwitchTo(oldcxt);
417 
418  return relation;
419 }
420 
421 /*
422  * RelationParseRelOptions
423  * Convert pg_class.reloptions into pre-parsed rd_options
424  *
425  * tuple is the real pg_class tuple (not rd_rel!) for relation
426  *
427  * Note: rd_rel and (if an index) rd_amroutine must be valid already
428  */
429 static void
431 {
432  bytea *options;
433 
434  relation->rd_options = NULL;
435 
436  /* Fall out if relkind should not have options */
437  switch (relation->rd_rel->relkind)
438  {
439  case RELKIND_RELATION:
440  case RELKIND_TOASTVALUE:
441  case RELKIND_INDEX:
442  case RELKIND_VIEW:
443  case RELKIND_MATVIEW:
445  break;
446  default:
447  return;
448  }
449 
450  /*
451  * Fetch reloptions from tuple; have to use a hardwired descriptor because
452  * we might not have any other for pg_class yet (consider executing this
453  * code for pg_class itself)
454  */
455  options = extractRelOptions(tuple,
457  relation->rd_rel->relkind == RELKIND_INDEX ?
458  relation->rd_amroutine->amoptions : NULL);
459 
460  /*
461  * Copy parsed data into CacheMemoryContext. To guard against the
462  * possibility of leaks in the reloptions code, we want to do the actual
463  * parsing in the caller's memory context and copy the results into
464  * CacheMemoryContext after the fact.
465  */
466  if (options)
467  {
469  VARSIZE(options));
470  memcpy(relation->rd_options, options, VARSIZE(options));
471  pfree(options);
472  }
473 }
474 
475 /*
476  * RelationBuildTupleDesc
477  *
478  * Form the relation's tuple descriptor from information in
479  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
480  */
481 static void
483 {
484  HeapTuple pg_attribute_tuple;
485  Relation pg_attribute_desc;
486  SysScanDesc pg_attribute_scan;
487  ScanKeyData skey[2];
488  int need;
489  TupleConstr *constr;
490  AttrDefault *attrdef = NULL;
491  int ndef = 0;
492 
493  /* copy some fields from pg_class row to rd_att */
494  relation->rd_att->tdtypeid = relation->rd_rel->reltype;
495  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
496  relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
497 
499  sizeof(TupleConstr));
500  constr->has_not_null = false;
501 
502  /*
503  * Form a scan key that selects only user attributes (attnum > 0).
504  * (Eliminating system attribute rows at the index level is lots faster
505  * than fetching them.)
506  */
507  ScanKeyInit(&skey[0],
509  BTEqualStrategyNumber, F_OIDEQ,
511  ScanKeyInit(&skey[1],
513  BTGreaterStrategyNumber, F_INT2GT,
514  Int16GetDatum(0));
515 
516  /*
517  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
518  * built the critical relcache entries (this includes initdb and startup
519  * without a pg_internal.init file).
520  */
521  pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
522  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
525  NULL,
526  2, skey);
527 
528  /*
529  * add attribute data to relation->rd_att
530  */
531  need = relation->rd_rel->relnatts;
532 
533  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
534  {
535  Form_pg_attribute attp;
536 
537  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
538 
539  if (attp->attnum <= 0 ||
540  attp->attnum > relation->rd_rel->relnatts)
541  elog(ERROR, "invalid attribute number %d for %s",
542  attp->attnum, RelationGetRelationName(relation));
543 
544  memcpy(TupleDescAttr(relation->rd_att, attp->attnum - 1),
545  attp,
547 
548  /* Update constraint/default info */
549  if (attp->attnotnull)
550  constr->has_not_null = true;
551 
552  if (attp->atthasdef)
553  {
554  if (attrdef == NULL)
555  attrdef = (AttrDefault *)
557  relation->rd_rel->relnatts *
558  sizeof(AttrDefault));
559  attrdef[ndef].adnum = attp->attnum;
560  attrdef[ndef].adbin = NULL;
561  ndef++;
562  }
563  need--;
564  if (need == 0)
565  break;
566  }
567 
568  /*
569  * end the scan and close the attribute relation
570  */
571  systable_endscan(pg_attribute_scan);
572  heap_close(pg_attribute_desc, AccessShareLock);
573 
574  if (need != 0)
575  elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
576  need, RelationGetRelid(relation));
577 
578  /*
579  * The attcacheoff values we read from pg_attribute should all be -1
580  * ("unknown"). Verify this if assert checking is on. They will be
581  * computed when and if needed during tuple access.
582  */
583 #ifdef USE_ASSERT_CHECKING
584  {
585  int i;
586 
587  for (i = 0; i < relation->rd_rel->relnatts; i++)
588  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
589  }
590 #endif
591 
592  /*
593  * However, we can easily set the attcacheoff value for the first
594  * attribute: it must be zero. This eliminates the need for special cases
595  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
596  */
597  if (relation->rd_rel->relnatts > 0)
598  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
599 
600  /*
601  * Set up constraint/default info
602  */
603  if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
604  {
605  relation->rd_att->constr = constr;
606 
607  if (ndef > 0) /* DEFAULTs */
608  {
609  if (ndef < relation->rd_rel->relnatts)
610  constr->defval = (AttrDefault *)
611  repalloc(attrdef, ndef * sizeof(AttrDefault));
612  else
613  constr->defval = attrdef;
614  constr->num_defval = ndef;
615  AttrDefaultFetch(relation);
616  }
617  else
618  constr->num_defval = 0;
619 
620  if (relation->rd_rel->relchecks > 0) /* CHECKs */
621  {
622  constr->num_check = relation->rd_rel->relchecks;
623  constr->check = (ConstrCheck *)
625  constr->num_check * sizeof(ConstrCheck));
626  CheckConstraintFetch(relation);
627  }
628  else
629  constr->num_check = 0;
630  }
631  else
632  {
633  pfree(constr);
634  relation->rd_att->constr = NULL;
635  }
636 }
637 
638 /*
639  * RelationBuildRuleLock
640  *
641  * Form the relation's rewrite rules from information in
642  * the pg_rewrite system catalog.
643  *
644  * Note: The rule parsetrees are potentially very complex node structures.
645  * To allow these trees to be freed when the relcache entry is flushed,
646  * we make a private memory context to hold the RuleLock information for
647  * each relcache entry that has associated rules. The context is used
648  * just for rule info, not for any other subsidiary data of the relcache
649  * entry, because that keeps the update logic in RelationClearRelation()
650  * manageable. The other subsidiary data structures are simple enough
651  * to be easy to free explicitly, anyway.
652  */
653 static void
655 {
656  MemoryContext rulescxt;
657  MemoryContext oldcxt;
658  HeapTuple rewrite_tuple;
659  Relation rewrite_desc;
660  TupleDesc rewrite_tupdesc;
661  SysScanDesc rewrite_scan;
662  ScanKeyData key;
663  RuleLock *rulelock;
664  int numlocks;
665  RewriteRule **rules;
666  int maxlocks;
667 
668  /*
669  * Make the private context. Assume it'll not contain much data.
670  */
672  RelationGetRelationName(relation),
675  relation->rd_rulescxt = rulescxt;
676 
677  /*
678  * allocate an array to hold the rewrite rules (the array is extended if
679  * necessary)
680  */
681  maxlocks = 4;
682  rules = (RewriteRule **)
683  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
684  numlocks = 0;
685 
686  /*
687  * form a scan key
688  */
689  ScanKeyInit(&key,
691  BTEqualStrategyNumber, F_OIDEQ,
693 
694  /*
695  * open pg_rewrite and begin a scan
696  *
697  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
698  * be reading the rules in name order, except possibly during
699  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
700  * ensures that rules will be fired in name order.
701  */
703  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
704  rewrite_scan = systable_beginscan(rewrite_desc,
706  true, NULL,
707  1, &key);
708 
709  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
710  {
711  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
712  bool isnull;
713  Datum rule_datum;
714  char *rule_str;
715  RewriteRule *rule;
716 
717  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
718  sizeof(RewriteRule));
719 
720  rule->ruleId = HeapTupleGetOid(rewrite_tuple);
721 
722  rule->event = rewrite_form->ev_type - '0';
723  rule->enabled = rewrite_form->ev_enabled;
724  rule->isInstead = rewrite_form->is_instead;
725 
726  /*
727  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
728  * rule strings are often large enough to be toasted. To avoid
729  * leaking memory in the caller's context, do the detoasting here so
730  * we can free the detoasted version.
731  */
732  rule_datum = heap_getattr(rewrite_tuple,
734  rewrite_tupdesc,
735  &isnull);
736  Assert(!isnull);
737  rule_str = TextDatumGetCString(rule_datum);
738  oldcxt = MemoryContextSwitchTo(rulescxt);
739  rule->actions = (List *) stringToNode(rule_str);
740  MemoryContextSwitchTo(oldcxt);
741  pfree(rule_str);
742 
743  rule_datum = heap_getattr(rewrite_tuple,
745  rewrite_tupdesc,
746  &isnull);
747  Assert(!isnull);
748  rule_str = TextDatumGetCString(rule_datum);
749  oldcxt = MemoryContextSwitchTo(rulescxt);
750  rule->qual = (Node *) stringToNode(rule_str);
751  MemoryContextSwitchTo(oldcxt);
752  pfree(rule_str);
753 
754  /*
755  * We want the rule's table references to be checked as though by the
756  * table owner, not the user referencing the rule. Therefore, scan
757  * through the rule's actions and set the checkAsUser field on all
758  * rtable entries. We have to look at the qual as well, in case it
759  * contains sublinks.
760  *
761  * The reason for doing this when the rule is loaded, rather than when
762  * it is stored, is that otherwise ALTER TABLE OWNER would have to
763  * grovel through stored rules to update checkAsUser fields. Scanning
764  * the rule tree during load is relatively cheap (compared to
765  * constructing it in the first place), so we do it here.
766  */
767  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
768  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
769 
770  if (numlocks >= maxlocks)
771  {
772  maxlocks *= 2;
773  rules = (RewriteRule **)
774  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
775  }
776  rules[numlocks++] = rule;
777  }
778 
779  /*
780  * end the scan and close the attribute relation
781  */
782  systable_endscan(rewrite_scan);
783  heap_close(rewrite_desc, AccessShareLock);
784 
785  /*
786  * there might not be any rules (if relhasrules is out-of-date)
787  */
788  if (numlocks == 0)
789  {
790  relation->rd_rules = NULL;
791  relation->rd_rulescxt = NULL;
792  MemoryContextDelete(rulescxt);
793  return;
794  }
795 
796  /*
797  * form a RuleLock and insert into relation
798  */
799  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
800  rulelock->numLocks = numlocks;
801  rulelock->rules = rules;
802 
803  relation->rd_rules = rulelock;
804 }
805 
806 /*
807  * RelationBuildPartitionKey
808  * Build and attach to relcache partition key data of relation
809  *
810  * Partitioning key data is a complex structure; to avoid complicated logic to
811  * free individual elements whenever the relcache entry is flushed, we give it
812  * its own memory context, child of CacheMemoryContext, which can easily be
813  * deleted on its own. To avoid leaking memory in that context in case of an
814  * error partway through this function, the context is initially created as a
815  * child of CurTransactionContext and only re-parented to CacheMemoryContext
816  * at the end, when no further errors are possible. Also, we don't make this
817  * context the current context except in very brief code sections, out of fear
818  * that some of our callees allocate memory on their own which would be leaked
819  * permanently.
820  */
821 static void
823 {
825  HeapTuple tuple;
826  bool isnull;
827  int i;
828  PartitionKey key;
829  AttrNumber *attrs;
830  oidvector *opclass;
831  oidvector *collation;
832  ListCell *partexprs_item;
833  Datum datum;
834  MemoryContext partkeycxt,
835  oldcxt;
836  int16 procnum;
837 
838  tuple = SearchSysCache1(PARTRELID,
840 
841  /*
842  * The following happens when we have created our pg_class entry but not
843  * the pg_partitioned_table entry yet.
844  */
845  if (!HeapTupleIsValid(tuple))
846  return;
847 
849  RelationGetRelationName(relation),
852 
853  key = (PartitionKey) MemoryContextAllocZero(partkeycxt,
854  sizeof(PartitionKeyData));
855 
856  /* Fixed-length attributes */
857  form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
858  key->strategy = form->partstrat;
859  key->partnatts = form->partnatts;
860 
861  /*
862  * We can rely on the first variable-length attribute being mapped to the
863  * relevant field of the catalog's C struct, because all previous
864  * attributes are non-nullable and fixed-length.
865  */
866  attrs = form->partattrs.values;
867 
868  /* But use the hard way to retrieve further variable-length attributes */
869  /* Operator class */
870  datum = SysCacheGetAttr(PARTRELID, tuple,
872  Assert(!isnull);
873  opclass = (oidvector *) DatumGetPointer(datum);
874 
875  /* Collation */
876  datum = SysCacheGetAttr(PARTRELID, tuple,
878  Assert(!isnull);
879  collation = (oidvector *) DatumGetPointer(datum);
880 
881  /* Expressions */
882  datum = SysCacheGetAttr(PARTRELID, tuple,
884  if (!isnull)
885  {
886  char *exprString;
887  Node *expr;
888 
889  exprString = TextDatumGetCString(datum);
890  expr = stringToNode(exprString);
891  pfree(exprString);
892 
893  /*
894  * Run the expressions through const-simplification since the planner
895  * will be comparing them to similarly-processed qual clause operands,
896  * and may fail to detect valid matches without this step; fix
897  * opfuncids while at it. We don't need to bother with
898  * canonicalize_qual() though, because partition expressions are not
899  * full-fledged qualification clauses.
900  */
901  expr = eval_const_expressions(NULL, expr);
902  fix_opfuncids(expr);
903 
904  oldcxt = MemoryContextSwitchTo(partkeycxt);
905  key->partexprs = (List *) copyObject(expr);
906  MemoryContextSwitchTo(oldcxt);
907  }
908 
909  oldcxt = MemoryContextSwitchTo(partkeycxt);
910  key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
911  key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
912  key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
913  key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
914 
915  key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
916 
917  /* Gather type and collation info as well */
918  key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
919  key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
920  key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
921  key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
922  key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
923  key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
924  MemoryContextSwitchTo(oldcxt);
925 
926  /* determine support function number to search for */
927  procnum = (key->strategy == PARTITION_STRATEGY_HASH) ?
929 
930  /* Copy partattrs and fill other per-attribute info */
931  memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
932  partexprs_item = list_head(key->partexprs);
933  for (i = 0; i < key->partnatts; i++)
934  {
935  AttrNumber attno = key->partattrs[i];
936  HeapTuple opclasstup;
937  Form_pg_opclass opclassform;
938  Oid funcid;
939 
940  /* Collect opfamily information */
941  opclasstup = SearchSysCache1(CLAOID,
942  ObjectIdGetDatum(opclass->values[i]));
943  if (!HeapTupleIsValid(opclasstup))
944  elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
945 
946  opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
947  key->partopfamily[i] = opclassform->opcfamily;
948  key->partopcintype[i] = opclassform->opcintype;
949 
950  /* Get a support function for the specified opfamily and datatypes */
951  funcid = get_opfamily_proc(opclassform->opcfamily,
952  opclassform->opcintype,
953  opclassform->opcintype,
954  procnum);
955  if (!OidIsValid(funcid))
956  ereport(ERROR,
957  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
958  errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s",
959  NameStr(opclassform->opcname),
961  "hash" : "btree",
962  procnum,
963  format_type_be(opclassform->opcintype))));
964 
965  fmgr_info(funcid, &key->partsupfunc[i]);
966 
967  /* Collation */
968  key->partcollation[i] = collation->values[i];
969 
970  /* Collect type information */
971  if (attno != 0)
972  {
973  Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
974 
975  key->parttypid[i] = att->atttypid;
976  key->parttypmod[i] = att->atttypmod;
977  key->parttypcoll[i] = att->attcollation;
978  }
979  else
980  {
981  key->parttypid[i] = exprType(lfirst(partexprs_item));
982  key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
983  key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
984  }
986  &key->parttyplen[i],
987  &key->parttypbyval[i],
988  &key->parttypalign[i]);
989 
990  ReleaseSysCache(opclasstup);
991  }
992 
993  ReleaseSysCache(tuple);
994 
995  /*
996  * Success --- reparent our context and make the relcache point to the
997  * newly constructed key
998  */
1000  relation->rd_partkeycxt = partkeycxt;
1001  relation->rd_partkey = key;
1002 }
1003 
1004 /*
1005  * equalRuleLocks
1006  *
1007  * Determine whether two RuleLocks are equivalent
1008  *
1009  * Probably this should be in the rules code someplace...
1010  */
1011 static bool
1013 {
1014  int i;
1015 
1016  /*
1017  * As of 7.3 we assume the rule ordering is repeatable, because
1018  * RelationBuildRuleLock should read 'em in a consistent order. So just
1019  * compare corresponding slots.
1020  */
1021  if (rlock1 != NULL)
1022  {
1023  if (rlock2 == NULL)
1024  return false;
1025  if (rlock1->numLocks != rlock2->numLocks)
1026  return false;
1027  for (i = 0; i < rlock1->numLocks; i++)
1028  {
1029  RewriteRule *rule1 = rlock1->rules[i];
1030  RewriteRule *rule2 = rlock2->rules[i];
1031 
1032  if (rule1->ruleId != rule2->ruleId)
1033  return false;
1034  if (rule1->event != rule2->event)
1035  return false;
1036  if (rule1->enabled != rule2->enabled)
1037  return false;
1038  if (rule1->isInstead != rule2->isInstead)
1039  return false;
1040  if (!equal(rule1->qual, rule2->qual))
1041  return false;
1042  if (!equal(rule1->actions, rule2->actions))
1043  return false;
1044  }
1045  }
1046  else if (rlock2 != NULL)
1047  return false;
1048  return true;
1049 }
1050 
1051 /*
1052  * equalPolicy
1053  *
1054  * Determine whether two policies are equivalent
1055  */
1056 static bool
1058 {
1059  int i;
1060  Oid *r1,
1061  *r2;
1062 
1063  if (policy1 != NULL)
1064  {
1065  if (policy2 == NULL)
1066  return false;
1067 
1068  if (policy1->polcmd != policy2->polcmd)
1069  return false;
1070  if (policy1->hassublinks != policy2->hassublinks)
1071  return false;
1072  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
1073  return false;
1074  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
1075  return false;
1076 
1077  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
1078  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
1079 
1080  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
1081  {
1082  if (r1[i] != r2[i])
1083  return false;
1084  }
1085 
1086  if (!equal(policy1->qual, policy2->qual))
1087  return false;
1088  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
1089  return false;
1090  }
1091  else if (policy2 != NULL)
1092  return false;
1093 
1094  return true;
1095 }
1096 
1097 /*
1098  * equalRSDesc
1099  *
1100  * Determine whether two RowSecurityDesc's are equivalent
1101  */
1102 static bool
1104 {
1105  ListCell *lc,
1106  *rc;
1107 
1108  if (rsdesc1 == NULL && rsdesc2 == NULL)
1109  return true;
1110 
1111  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
1112  (rsdesc1 == NULL && rsdesc2 != NULL))
1113  return false;
1114 
1115  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1116  return false;
1117 
1118  /* RelationBuildRowSecurity should build policies in order */
1119  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1120  {
1123 
1124  if (!equalPolicy(l, r))
1125  return false;
1126  }
1127 
1128  return true;
1129 }
1130 
1131 /*
1132  * equalPartitionDescs
1133  * Compare two partition descriptors for logical equality
1134  */
1135 static bool
1137  PartitionDesc partdesc2)
1138 {
1139  int i;
1140 
1141  if (partdesc1 != NULL)
1142  {
1143  if (partdesc2 == NULL)
1144  return false;
1145  if (partdesc1->nparts != partdesc2->nparts)
1146  return false;
1147 
1148  Assert(key != NULL || partdesc1->nparts == 0);
1149 
1150  /*
1151  * Same oids? If the partitioning structure did not change, that is,
1152  * no partitions were added or removed to the relation, the oids array
1153  * should still match element-by-element.
1154  */
1155  for (i = 0; i < partdesc1->nparts; i++)
1156  {
1157  if (partdesc1->oids[i] != partdesc2->oids[i])
1158  return false;
1159  }
1160 
1161  /*
1162  * Now compare partition bound collections. The logic to iterate over
1163  * the collections is private to partition.c.
1164  */
1165  if (partdesc1->boundinfo != NULL)
1166  {
1167  if (partdesc2->boundinfo == NULL)
1168  return false;
1169 
1171  key->parttypbyval,
1172  partdesc1->boundinfo,
1173  partdesc2->boundinfo))
1174  return false;
1175  }
1176  else if (partdesc2->boundinfo != NULL)
1177  return false;
1178  }
1179  else if (partdesc2 != NULL)
1180  return false;
1181 
1182  return true;
1183 }
1184 
1185 /*
1186  * RelationBuildDesc
1187  *
1188  * Build a relation descriptor. The caller must hold at least
1189  * AccessShareLock on the target relid.
1190  *
1191  * The new descriptor is inserted into the hash table if insertIt is true.
1192  *
1193  * Returns NULL if no pg_class row could be found for the given relid
1194  * (suggesting we are trying to access a just-deleted relation).
1195  * Any other error is reported via elog.
1196  */
1197 static Relation
1198 RelationBuildDesc(Oid targetRelId, bool insertIt)
1199 {
1200  Relation relation;
1201  Oid relid;
1202  HeapTuple pg_class_tuple;
1203  Form_pg_class relp;
1204 
1205  /*
1206  * find the tuple in pg_class corresponding to the given relation id
1207  */
1208  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1209 
1210  /*
1211  * if no such tuple exists, return NULL
1212  */
1213  if (!HeapTupleIsValid(pg_class_tuple))
1214  return NULL;
1215 
1216  /*
1217  * get information from the pg_class_tuple
1218  */
1219  relid = HeapTupleGetOid(pg_class_tuple);
1220  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1221  Assert(relid == targetRelId);
1222 
1223  /*
1224  * allocate storage for the relation descriptor, and copy pg_class_tuple
1225  * to relation->rd_rel.
1226  */
1227  relation = AllocateRelationDesc(relp);
1228 
1229  /*
1230  * initialize the relation's relation id (relation->rd_id)
1231  */
1232  RelationGetRelid(relation) = relid;
1233 
1234  /*
1235  * normal relations are not nailed into the cache; nor can a pre-existing
1236  * relation be new. It could be temp though. (Actually, it could be new
1237  * too, but it's okay to forget that fact if forced to flush the entry.)
1238  */
1239  relation->rd_refcnt = 0;
1240  relation->rd_isnailed = false;
1243  switch (relation->rd_rel->relpersistence)
1244  {
1247  relation->rd_backend = InvalidBackendId;
1248  relation->rd_islocaltemp = false;
1249  break;
1250  case RELPERSISTENCE_TEMP:
1251  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1252  {
1253  relation->rd_backend = BackendIdForTempRelations();
1254  relation->rd_islocaltemp = true;
1255  }
1256  else
1257  {
1258  /*
1259  * If it's a temp table, but not one of ours, we have to use
1260  * the slow, grotty method to figure out the owning backend.
1261  *
1262  * Note: it's possible that rd_backend gets set to MyBackendId
1263  * here, in case we are looking at a pg_class entry left over
1264  * from a crashed backend that coincidentally had the same
1265  * BackendId we're using. We should *not* consider such a
1266  * table to be "ours"; this is why we need the separate
1267  * rd_islocaltemp flag. The pg_class entry will get flushed
1268  * if/when we clean out the corresponding temp table namespace
1269  * in preparation for using it.
1270  */
1271  relation->rd_backend =
1272  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1273  Assert(relation->rd_backend != InvalidBackendId);
1274  relation->rd_islocaltemp = false;
1275  }
1276  break;
1277  default:
1278  elog(ERROR, "invalid relpersistence: %c",
1279  relation->rd_rel->relpersistence);
1280  break;
1281  }
1282 
1283  /*
1284  * initialize the tuple descriptor (relation->rd_att).
1285  */
1286  RelationBuildTupleDesc(relation);
1287 
1288  /*
1289  * Fetch rules and triggers that affect this relation
1290  */
1291  if (relation->rd_rel->relhasrules)
1292  RelationBuildRuleLock(relation);
1293  else
1294  {
1295  relation->rd_rules = NULL;
1296  relation->rd_rulescxt = NULL;
1297  }
1298 
1299  if (relation->rd_rel->relhastriggers)
1300  RelationBuildTriggers(relation);
1301  else
1302  relation->trigdesc = NULL;
1303 
1304  if (relation->rd_rel->relrowsecurity)
1305  RelationBuildRowSecurity(relation);
1306  else
1307  relation->rd_rsdesc = NULL;
1308 
1309  /* foreign key data is not loaded till asked for */
1310  relation->rd_fkeylist = NIL;
1311  relation->rd_fkeyvalid = false;
1312 
1313  /* if a partitioned table, initialize key and partition descriptor info */
1314  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1315  {
1316  RelationBuildPartitionKey(relation);
1317  RelationBuildPartitionDesc(relation);
1318  }
1319  else
1320  {
1321  relation->rd_partkeycxt = NULL;
1322  relation->rd_partkey = NULL;
1323  relation->rd_partdesc = NULL;
1324  relation->rd_pdcxt = NULL;
1325  }
1326 
1327  /*
1328  * if it's an index, initialize index-related information
1329  */
1330  if (OidIsValid(relation->rd_rel->relam))
1331  RelationInitIndexAccessInfo(relation);
1332 
1333  /* extract reloptions if any */
1334  RelationParseRelOptions(relation, pg_class_tuple);
1335 
1336  /*
1337  * initialize the relation lock manager information
1338  */
1339  RelationInitLockInfo(relation); /* see lmgr.c */
1340 
1341  /*
1342  * initialize physical addressing information for the relation
1343  */
1344  RelationInitPhysicalAddr(relation);
1345 
1346  /* make sure relation is marked as having no open file yet */
1347  relation->rd_smgr = NULL;
1348 
1349  /*
1350  * now we can free the memory allocated for pg_class_tuple
1351  */
1352  heap_freetuple(pg_class_tuple);
1353 
1354  /*
1355  * Insert newly created relation into relcache hash table, if requested.
1356  *
1357  * There is one scenario in which we might find a hashtable entry already
1358  * present, even though our caller failed to find it: if the relation is a
1359  * system catalog or index that's used during relcache load, we might have
1360  * recursively created the same relcache entry during the preceding steps.
1361  * So allow RelationCacheInsert to delete any already-present relcache
1362  * entry for the same OID. The already-present entry should have refcount
1363  * zero (else somebody forgot to close it); in the event that it doesn't,
1364  * we'll elog a WARNING and leak the already-present entry.
1365  */
1366  if (insertIt)
1367  RelationCacheInsert(relation, true);
1368 
1369  /* It's fully valid */
1370  relation->rd_isvalid = true;
1371 
1372  return relation;
1373 }
1374 
1375 /*
1376  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1377  *
1378  * Note: at the physical level, relations in the pg_global tablespace must
1379  * be treated as shared, even if relisshared isn't set. Hence we do not
1380  * look at relisshared here.
1381  */
1382 static void
1384 {
1385  if (relation->rd_rel->reltablespace)
1386  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1387  else
1388  relation->rd_node.spcNode = MyDatabaseTableSpace;
1389  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1390  relation->rd_node.dbNode = InvalidOid;
1391  else
1392  relation->rd_node.dbNode = MyDatabaseId;
1393 
1394  if (relation->rd_rel->relfilenode)
1395  {
1396  /*
1397  * Even if we are using a decoding snapshot that doesn't represent the
1398  * current state of the catalog we need to make sure the filenode
1399  * points to the current file since the older file will be gone (or
1400  * truncated). The new file will still contain older rows so lookups
1401  * in them will work correctly. This wouldn't work correctly if
1402  * rewrites were allowed to change the schema in an incompatible way,
1403  * but those are prevented both on catalog tables and on user tables
1404  * declared as additional catalog tables.
1405  */
1408  && IsTransactionState())
1409  {
1410  HeapTuple phys_tuple;
1411  Form_pg_class physrel;
1412 
1413  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1414  RelationGetRelid(relation) != ClassOidIndexId,
1415  true);
1416  if (!HeapTupleIsValid(phys_tuple))
1417  elog(ERROR, "could not find pg_class entry for %u",
1418  RelationGetRelid(relation));
1419  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1420 
1421  relation->rd_rel->reltablespace = physrel->reltablespace;
1422  relation->rd_rel->relfilenode = physrel->relfilenode;
1423  heap_freetuple(phys_tuple);
1424  }
1425 
1426  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1427  }
1428  else
1429  {
1430  /* Consult the relation mapper */
1431  relation->rd_node.relNode =
1432  RelationMapOidToFilenode(relation->rd_id,
1433  relation->rd_rel->relisshared);
1434  if (!OidIsValid(relation->rd_node.relNode))
1435  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1436  RelationGetRelationName(relation), relation->rd_id);
1437  }
1438 }
1439 
1440 /*
1441  * Fill in the IndexAmRoutine for an index relation.
1442  *
1443  * relation's rd_amhandler and rd_indexcxt must be valid already.
1444  */
1445 static void
1447 {
1448  IndexAmRoutine *cached,
1449  *tmp;
1450 
1451  /*
1452  * Call the amhandler in current, short-lived memory context, just in case
1453  * it leaks anything (it probably won't, but let's be paranoid).
1454  */
1455  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1456 
1457  /* OK, now transfer the data into relation's rd_indexcxt. */
1458  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1459  sizeof(IndexAmRoutine));
1460  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1461  relation->rd_amroutine = cached;
1462 
1463  pfree(tmp);
1464 }
1465 
1466 /*
1467  * Initialize index-access-method support data for an index relation
1468  */
1469 void
1471 {
1472  HeapTuple tuple;
1473  Form_pg_am aform;
1474  Datum indcollDatum;
1475  Datum indclassDatum;
1476  Datum indoptionDatum;
1477  bool isnull;
1478  oidvector *indcoll;
1479  oidvector *indclass;
1480  int2vector *indoption;
1481  MemoryContext indexcxt;
1482  MemoryContext oldcontext;
1483  int natts;
1484  uint16 amsupport;
1485 
1486  /*
1487  * Make a copy of the pg_index entry for the index. Since pg_index
1488  * contains variable-length and possibly-null fields, we have to do this
1489  * honestly rather than just treating it as a Form_pg_index struct.
1490  */
1491  tuple = SearchSysCache1(INDEXRELID,
1492  ObjectIdGetDatum(RelationGetRelid(relation)));
1493  if (!HeapTupleIsValid(tuple))
1494  elog(ERROR, "cache lookup failed for index %u",
1495  RelationGetRelid(relation));
1497  relation->rd_indextuple = heap_copytuple(tuple);
1498  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1499  MemoryContextSwitchTo(oldcontext);
1500  ReleaseSysCache(tuple);
1501 
1502  /*
1503  * Look up the index's access method, save the OID of its handler function
1504  */
1505  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1506  if (!HeapTupleIsValid(tuple))
1507  elog(ERROR, "cache lookup failed for access method %u",
1508  relation->rd_rel->relam);
1509  aform = (Form_pg_am) GETSTRUCT(tuple);
1510  relation->rd_amhandler = aform->amhandler;
1511  ReleaseSysCache(tuple);
1512 
1513  natts = relation->rd_rel->relnatts;
1514  if (natts != relation->rd_index->indnatts)
1515  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1516  RelationGetRelid(relation));
1517 
1518  /*
1519  * Make the private context to hold index access info. The reason we need
1520  * a context, and not just a couple of pallocs, is so that we won't leak
1521  * any subsidiary info attached to fmgr lookup records.
1522  */
1524  RelationGetRelationName(relation),
1527  relation->rd_indexcxt = indexcxt;
1528 
1529  /*
1530  * Now we can fetch the index AM's API struct
1531  */
1532  InitIndexAmRoutine(relation);
1533 
1534  /*
1535  * Allocate arrays to hold data
1536  */
1537  relation->rd_opfamily = (Oid *)
1538  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1539  relation->rd_opcintype = (Oid *)
1540  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1541 
1542  amsupport = relation->rd_amroutine->amsupport;
1543  if (amsupport > 0)
1544  {
1545  int nsupport = natts * amsupport;
1546 
1547  relation->rd_support = (RegProcedure *)
1548  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1549  relation->rd_supportinfo = (FmgrInfo *)
1550  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1551  }
1552  else
1553  {
1554  relation->rd_support = NULL;
1555  relation->rd_supportinfo = NULL;
1556  }
1557 
1558  relation->rd_indcollation = (Oid *)
1559  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1560 
1561  relation->rd_indoption = (int16 *)
1562  MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1563 
1564  /*
1565  * indcollation cannot be referenced directly through the C struct,
1566  * because it comes after the variable-width indkey field. Must extract
1567  * the datum the hard way...
1568  */
1569  indcollDatum = fastgetattr(relation->rd_indextuple,
1572  &isnull);
1573  Assert(!isnull);
1574  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1575  memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid));
1576 
1577  /*
1578  * indclass cannot be referenced directly through the C struct, because it
1579  * comes after the variable-width indkey field. Must extract the datum
1580  * the hard way...
1581  */
1582  indclassDatum = fastgetattr(relation->rd_indextuple,
1585  &isnull);
1586  Assert(!isnull);
1587  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1588 
1589  /*
1590  * Fill the support procedure OID array, as well as the info about
1591  * opfamilies and opclass input types. (aminfo and supportinfo are left
1592  * as zeroes, and are filled on-the-fly when used)
1593  */
1594  IndexSupportInitialize(indclass, relation->rd_support,
1595  relation->rd_opfamily, relation->rd_opcintype,
1596  amsupport, natts);
1597 
1598  /*
1599  * Similarly extract indoption and copy it to the cache entry
1600  */
1601  indoptionDatum = fastgetattr(relation->rd_indextuple,
1604  &isnull);
1605  Assert(!isnull);
1606  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1607  memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1608 
1609  /*
1610  * expressions, predicate, exclusion caches will be filled later
1611  */
1612  relation->rd_indexprs = NIL;
1613  relation->rd_indpred = NIL;
1614  relation->rd_exclops = NULL;
1615  relation->rd_exclprocs = NULL;
1616  relation->rd_exclstrats = NULL;
1617  relation->rd_amcache = NULL;
1618 }
1619 
1620 /*
1621  * IndexSupportInitialize
1622  * Initializes an index's cached opclass information,
1623  * given the index's pg_index.indclass entry.
1624  *
1625  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1626  * which are arrays allocated by the caller.
1627  *
1628  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1629  * indicate the size of the arrays it has allocated --- but in practice these
1630  * numbers must always match those obtainable from the system catalog entries
1631  * for the index and access method.
1632  */
1633 static void
1635  RegProcedure *indexSupport,
1636  Oid *opFamily,
1637  Oid *opcInType,
1638  StrategyNumber maxSupportNumber,
1639  AttrNumber maxAttributeNumber)
1640 {
1641  int attIndex;
1642 
1643  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1644  {
1645  OpClassCacheEnt *opcentry;
1646 
1647  if (!OidIsValid(indclass->values[attIndex]))
1648  elog(ERROR, "bogus pg_index tuple");
1649 
1650  /* look up the info for this opclass, using a cache */
1651  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1652  maxSupportNumber);
1653 
1654  /* copy cached data into relcache entry */
1655  opFamily[attIndex] = opcentry->opcfamily;
1656  opcInType[attIndex] = opcentry->opcintype;
1657  if (maxSupportNumber > 0)
1658  memcpy(&indexSupport[attIndex * maxSupportNumber],
1659  opcentry->supportProcs,
1660  maxSupportNumber * sizeof(RegProcedure));
1661  }
1662 }
1663 
1664 /*
1665  * LookupOpclassInfo
1666  *
1667  * This routine maintains a per-opclass cache of the information needed
1668  * by IndexSupportInitialize(). This is more efficient than relying on
1669  * the catalog cache, because we can load all the info about a particular
1670  * opclass in a single indexscan of pg_amproc.
1671  *
1672  * The information from pg_am about expected range of support function
1673  * numbers is passed in, rather than being looked up, mainly because the
1674  * caller will have it already.
1675  *
1676  * Note there is no provision for flushing the cache. This is OK at the
1677  * moment because there is no way to ALTER any interesting properties of an
1678  * existing opclass --- all you can do is drop it, which will result in
1679  * a useless but harmless dead entry in the cache. To support altering
1680  * opclass membership (not the same as opfamily membership!), we'd need to
1681  * be able to flush this cache as well as the contents of relcache entries
1682  * for indexes.
1683  */
1684 static OpClassCacheEnt *
1685 LookupOpclassInfo(Oid operatorClassOid,
1686  StrategyNumber numSupport)
1687 {
1688  OpClassCacheEnt *opcentry;
1689  bool found;
1690  Relation rel;
1691  SysScanDesc scan;
1692  ScanKeyData skey[3];
1693  HeapTuple htup;
1694  bool indexOK;
1695 
1696  if (OpClassCache == NULL)
1697  {
1698  /* First time through: initialize the opclass cache */
1699  HASHCTL ctl;
1700 
1701  MemSet(&ctl, 0, sizeof(ctl));
1702  ctl.keysize = sizeof(Oid);
1703  ctl.entrysize = sizeof(OpClassCacheEnt);
1704  OpClassCache = hash_create("Operator class cache", 64,
1705  &ctl, HASH_ELEM | HASH_BLOBS);
1706 
1707  /* Also make sure CacheMemoryContext exists */
1708  if (!CacheMemoryContext)
1710  }
1711 
1712  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1713  (void *) &operatorClassOid,
1714  HASH_ENTER, &found);
1715 
1716  if (!found)
1717  {
1718  /* Need to allocate memory for new entry */
1719  opcentry->valid = false; /* until known OK */
1720  opcentry->numSupport = numSupport;
1721 
1722  if (numSupport > 0)
1723  opcentry->supportProcs = (RegProcedure *)
1725  numSupport * sizeof(RegProcedure));
1726  else
1727  opcentry->supportProcs = NULL;
1728  }
1729  else
1730  {
1731  Assert(numSupport == opcentry->numSupport);
1732  }
1733 
1734  /*
1735  * When testing for cache-flush hazards, we intentionally disable the
1736  * operator class cache and force reloading of the info on each call. This
1737  * is helpful because we want to test the case where a cache flush occurs
1738  * while we are loading the info, and it's very hard to provoke that if
1739  * this happens only once per opclass per backend.
1740  */
1741 #if defined(CLOBBER_CACHE_ALWAYS)
1742  opcentry->valid = false;
1743 #endif
1744 
1745  if (opcentry->valid)
1746  return opcentry;
1747 
1748  /*
1749  * Need to fill in new entry.
1750  *
1751  * To avoid infinite recursion during startup, force heap scans if we're
1752  * looking up info for the opclasses used by the indexes we would like to
1753  * reference here.
1754  */
1755  indexOK = criticalRelcachesBuilt ||
1756  (operatorClassOid != OID_BTREE_OPS_OID &&
1757  operatorClassOid != INT2_BTREE_OPS_OID);
1758 
1759  /*
1760  * We have to fetch the pg_opclass row to determine its opfamily and
1761  * opcintype, which are needed to look up related operators and functions.
1762  * It'd be convenient to use the syscache here, but that probably doesn't
1763  * work while bootstrapping.
1764  */
1765  ScanKeyInit(&skey[0],
1767  BTEqualStrategyNumber, F_OIDEQ,
1768  ObjectIdGetDatum(operatorClassOid));
1770  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1771  NULL, 1, skey);
1772 
1773  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1774  {
1775  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1776 
1777  opcentry->opcfamily = opclassform->opcfamily;
1778  opcentry->opcintype = opclassform->opcintype;
1779  }
1780  else
1781  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1782 
1783  systable_endscan(scan);
1785 
1786  /*
1787  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1788  * the default ones (those with lefttype = righttype = opcintype).
1789  */
1790  if (numSupport > 0)
1791  {
1792  ScanKeyInit(&skey[0],
1794  BTEqualStrategyNumber, F_OIDEQ,
1795  ObjectIdGetDatum(opcentry->opcfamily));
1796  ScanKeyInit(&skey[1],
1798  BTEqualStrategyNumber, F_OIDEQ,
1799  ObjectIdGetDatum(opcentry->opcintype));
1800  ScanKeyInit(&skey[2],
1802  BTEqualStrategyNumber, F_OIDEQ,
1803  ObjectIdGetDatum(opcentry->opcintype));
1805  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1806  NULL, 3, skey);
1807 
1808  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1809  {
1810  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1811 
1812  if (amprocform->amprocnum <= 0 ||
1813  (StrategyNumber) amprocform->amprocnum > numSupport)
1814  elog(ERROR, "invalid amproc number %d for opclass %u",
1815  amprocform->amprocnum, operatorClassOid);
1816 
1817  opcentry->supportProcs[amprocform->amprocnum - 1] =
1818  amprocform->amproc;
1819  }
1820 
1821  systable_endscan(scan);
1823  }
1824 
1825  opcentry->valid = true;
1826  return opcentry;
1827 }
1828 
1829 
1830 /*
1831  * formrdesc
1832  *
1833  * This is a special cut-down version of RelationBuildDesc(),
1834  * used while initializing the relcache.
1835  * The relation descriptor is built just from the supplied parameters,
1836  * without actually looking at any system table entries. We cheat
1837  * quite a lot since we only need to work for a few basic system
1838  * catalogs.
1839  *
1840  * formrdesc is currently used for: pg_database, pg_authid, pg_auth_members,
1841  * pg_shseclabel, pg_class, pg_attribute, pg_proc, and pg_type
1842  * (see RelationCacheInitializePhase2/3).
1843  *
1844  * Note that these catalogs can't have constraints (except attnotnull),
1845  * default values, rules, or triggers, since we don't cope with any of that.
1846  * (Well, actually, this only matters for properties that need to be valid
1847  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1848  * these properties matter then...)
1849  *
1850  * NOTE: we assume we are already switched into CacheMemoryContext.
1851  */
1852 static void
1853 formrdesc(const char *relationName, Oid relationReltype,
1854  bool isshared, bool hasoids,
1855  int natts, const FormData_pg_attribute *attrs)
1856 {
1857  Relation relation;
1858  int i;
1859  bool has_not_null;
1860 
1861  /*
1862  * allocate new relation desc, clear all fields of reldesc
1863  */
1864  relation = (Relation) palloc0(sizeof(RelationData));
1865 
1866  /* make sure relation is marked as having no open file yet */
1867  relation->rd_smgr = NULL;
1868 
1869  /*
1870  * initialize reference count: 1 because it is nailed in cache
1871  */
1872  relation->rd_refcnt = 1;
1873 
1874  /*
1875  * all entries built with this routine are nailed-in-cache; none are for
1876  * new or temp relations.
1877  */
1878  relation->rd_isnailed = true;
1881  relation->rd_backend = InvalidBackendId;
1882  relation->rd_islocaltemp = false;
1883 
1884  /*
1885  * initialize relation tuple form
1886  *
1887  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1888  * get us launched. RelationCacheInitializePhase3() will read the real
1889  * data from pg_class and replace what we've done here. Note in
1890  * particular that relowner is left as zero; this cues
1891  * RelationCacheInitializePhase3 that the real data isn't there yet.
1892  */
1894 
1895  namestrcpy(&relation->rd_rel->relname, relationName);
1896  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1897  relation->rd_rel->reltype = relationReltype;
1898 
1899  /*
1900  * It's important to distinguish between shared and non-shared relations,
1901  * even at bootstrap time, to make sure we know where they are stored.
1902  */
1903  relation->rd_rel->relisshared = isshared;
1904  if (isshared)
1905  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1906 
1907  /* formrdesc is used only for permanent relations */
1908  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1909 
1910  /* ... and they're always populated, too */
1911  relation->rd_rel->relispopulated = true;
1912 
1913  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1914  relation->rd_rel->relpages = 0;
1915  relation->rd_rel->reltuples = 0;
1916  relation->rd_rel->relallvisible = 0;
1917  relation->rd_rel->relkind = RELKIND_RELATION;
1918  relation->rd_rel->relhasoids = hasoids;
1919  relation->rd_rel->relnatts = (int16) natts;
1920 
1921  /*
1922  * initialize attribute tuple form
1923  *
1924  * Unlike the case with the relation tuple, this data had better be right
1925  * because it will never be replaced. The data comes from
1926  * src/include/catalog/ headers via genbki.pl.
1927  */
1928  relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1929  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1930 
1931  relation->rd_att->tdtypeid = relationReltype;
1932  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1933 
1934  /*
1935  * initialize tuple desc info
1936  */
1937  has_not_null = false;
1938  for (i = 0; i < natts; i++)
1939  {
1940  memcpy(TupleDescAttr(relation->rd_att, i),
1941  &attrs[i],
1943  has_not_null |= attrs[i].attnotnull;
1944  /* make sure attcacheoff is valid */
1945  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1946  }
1947 
1948  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1949  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1950 
1951  /* mark not-null status */
1952  if (has_not_null)
1953  {
1954  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1955 
1956  constr->has_not_null = true;
1957  relation->rd_att->constr = constr;
1958  }
1959 
1960  /*
1961  * initialize relation id from info in att array (my, this is ugly)
1962  */
1963  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1964 
1965  /*
1966  * All relations made with formrdesc are mapped. This is necessarily so
1967  * because there is no other way to know what filenode they currently
1968  * have. In bootstrap mode, add them to the initial relation mapper data,
1969  * specifying that the initial filenode is the same as the OID.
1970  */
1971  relation->rd_rel->relfilenode = InvalidOid;
1974  RelationGetRelid(relation),
1975  isshared, true);
1976 
1977  /*
1978  * initialize the relation lock manager information
1979  */
1980  RelationInitLockInfo(relation); /* see lmgr.c */
1981 
1982  /*
1983  * initialize physical addressing information for the relation
1984  */
1985  RelationInitPhysicalAddr(relation);
1986 
1987  /*
1988  * initialize the rel-has-index flag, using hardwired knowledge
1989  */
1991  {
1992  /* In bootstrap mode, we have no indexes */
1993  relation->rd_rel->relhasindex = false;
1994  }
1995  else
1996  {
1997  /* Otherwise, all the rels formrdesc is used for have indexes */
1998  relation->rd_rel->relhasindex = true;
1999  }
2000 
2001  /*
2002  * add new reldesc to relcache
2003  */
2004  RelationCacheInsert(relation, false);
2005 
2006  /* It's fully valid */
2007  relation->rd_isvalid = true;
2008 }
2009 
2010 
2011 /* ----------------------------------------------------------------
2012  * Relation Descriptor Lookup Interface
2013  * ----------------------------------------------------------------
2014  */
2015 
2016 /*
2017  * RelationIdGetRelation
2018  *
2019  * Lookup a reldesc by OID; make one if not already in cache.
2020  *
2021  * Returns NULL if no pg_class row could be found for the given relid
2022  * (suggesting we are trying to access a just-deleted relation).
2023  * Any other error is reported via elog.
2024  *
2025  * NB: caller should already have at least AccessShareLock on the
2026  * relation ID, else there are nasty race conditions.
2027  *
2028  * NB: relation ref count is incremented, or set to 1 if new entry.
2029  * Caller should eventually decrement count. (Usually,
2030  * that happens by calling RelationClose().)
2031  */
2032 Relation
2034 {
2035  Relation rd;
2036 
2037  /* Make sure we're in an xact, even if this ends up being a cache hit */
2039 
2040  /*
2041  * first try to find reldesc in the cache
2042  */
2043  RelationIdCacheLookup(relationId, rd);
2044 
2045  if (RelationIsValid(rd))
2046  {
2048  /* revalidate cache entry if necessary */
2049  if (!rd->rd_isvalid)
2050  {
2051  /*
2052  * Indexes only have a limited number of possible schema changes,
2053  * and we don't want to use the full-blown procedure because it's
2054  * a headache for indexes that reload itself depends on.
2055  */
2056  if (rd->rd_rel->relkind == RELKIND_INDEX)
2058  else
2059  RelationClearRelation(rd, true);
2060  Assert(rd->rd_isvalid);
2061  }
2062  return rd;
2063  }
2064 
2065  /*
2066  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2067  * it.
2068  */
2069  rd = RelationBuildDesc(relationId, true);
2070  if (RelationIsValid(rd))
2072  return rd;
2073 }
2074 
2075 /* ----------------------------------------------------------------
2076  * cache invalidation support routines
2077  * ----------------------------------------------------------------
2078  */
2079 
2080 /*
2081  * RelationIncrementReferenceCount
2082  * Increments relation reference count.
2083  *
2084  * Note: bootstrap mode has its own weird ideas about relation refcount
2085  * behavior; we ought to fix it someday, but for now, just disable
2086  * reference count ownership tracking in bootstrap mode.
2087  */
2088 void
2090 {
2092  rel->rd_refcnt += 1;
2095 }
2096 
2097 /*
2098  * RelationDecrementReferenceCount
2099  * Decrements relation reference count.
2100  */
2101 void
2103 {
2104  Assert(rel->rd_refcnt > 0);
2105  rel->rd_refcnt -= 1;
2108 }
2109 
2110 /*
2111  * RelationClose - close an open relation
2112  *
2113  * Actually, we just decrement the refcount.
2114  *
2115  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2116  * will be freed as soon as their refcount goes to zero. In combination
2117  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2118  * to catch references to already-released relcache entries. It slows
2119  * things down quite a bit, however.
2120  */
2121 void
2123 {
2124  /* Note: no locking manipulations needed */
2126 
2127 #ifdef RELCACHE_FORCE_RELEASE
2128  if (RelationHasReferenceCountZero(relation) &&
2129  relation->rd_createSubid == InvalidSubTransactionId &&
2131  RelationClearRelation(relation, false);
2132 #endif
2133 }
2134 
2135 /*
2136  * RelationReloadIndexInfo - reload minimal information for an open index
2137  *
2138  * This function is used only for indexes. A relcache inval on an index
2139  * can mean that its pg_class or pg_index row changed. There are only
2140  * very limited changes that are allowed to an existing index's schema,
2141  * so we can update the relcache entry without a complete rebuild; which
2142  * is fortunate because we can't rebuild an index entry that is "nailed"
2143  * and/or in active use. We support full replacement of the pg_class row,
2144  * as well as updates of a few simple fields of the pg_index row.
2145  *
2146  * We can't necessarily reread the catalog rows right away; we might be
2147  * in a failed transaction when we receive the SI notification. If so,
2148  * RelationClearRelation just marks the entry as invalid by setting
2149  * rd_isvalid to false. This routine is called to fix the entry when it
2150  * is next needed.
2151  *
2152  * We assume that at the time we are called, we have at least AccessShareLock
2153  * on the target index. (Note: in the calls from RelationClearRelation,
2154  * this is legitimate because we know the rel has positive refcount.)
2155  *
2156  * If the target index is an index on pg_class or pg_index, we'd better have
2157  * previously gotten at least AccessShareLock on its underlying catalog,
2158  * else we are at risk of deadlock against someone trying to exclusive-lock
2159  * the heap and index in that order. This is ensured in current usage by
2160  * only applying this to indexes being opened or having positive refcount.
2161  */
2162 static void
2164 {
2165  bool indexOK;
2166  HeapTuple pg_class_tuple;
2167  Form_pg_class relp;
2168 
2169  /* Should be called only for invalidated indexes */
2170  Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
2171  !relation->rd_isvalid);
2172 
2173  /* Ensure it's closed at smgr level */
2174  RelationCloseSmgr(relation);
2175 
2176  /* Must free any AM cached data upon relcache flush */
2177  if (relation->rd_amcache)
2178  pfree(relation->rd_amcache);
2179  relation->rd_amcache = NULL;
2180 
2181  /*
2182  * If it's a shared index, we might be called before backend startup has
2183  * finished selecting a database, in which case we have no way to read
2184  * pg_class yet. However, a shared index can never have any significant
2185  * schema updates, so it's okay to ignore the invalidation signal. Just
2186  * mark it valid and return without doing anything more.
2187  */
2188  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2189  {
2190  relation->rd_isvalid = true;
2191  return;
2192  }
2193 
2194  /*
2195  * Read the pg_class row
2196  *
2197  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2198  * for pg_class_oid_index ...
2199  */
2200  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2201  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2202  if (!HeapTupleIsValid(pg_class_tuple))
2203  elog(ERROR, "could not find pg_class tuple for index %u",
2204  RelationGetRelid(relation));
2205  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2206  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2207  /* Reload reloptions in case they changed */
2208  if (relation->rd_options)
2209  pfree(relation->rd_options);
2210  RelationParseRelOptions(relation, pg_class_tuple);
2211  /* done with pg_class tuple */
2212  heap_freetuple(pg_class_tuple);
2213  /* We must recalculate physical address in case it changed */
2214  RelationInitPhysicalAddr(relation);
2215 
2216  /*
2217  * For a non-system index, there are fields of the pg_index row that are
2218  * allowed to change, so re-read that row and update the relcache entry.
2219  * Most of the info derived from pg_index (such as support function lookup
2220  * info) cannot change, and indeed the whole point of this routine is to
2221  * update the relcache entry without clobbering that data; so wholesale
2222  * replacement is not appropriate.
2223  */
2224  if (!IsSystemRelation(relation))
2225  {
2226  HeapTuple tuple;
2228 
2229  tuple = SearchSysCache1(INDEXRELID,
2230  ObjectIdGetDatum(RelationGetRelid(relation)));
2231  if (!HeapTupleIsValid(tuple))
2232  elog(ERROR, "cache lookup failed for index %u",
2233  RelationGetRelid(relation));
2234  index = (Form_pg_index) GETSTRUCT(tuple);
2235 
2236  /*
2237  * Basically, let's just copy all the bool fields. There are one or
2238  * two of these that can't actually change in the current code, but
2239  * it's not worth it to track exactly which ones they are. None of
2240  * the array fields are allowed to change, though.
2241  */
2242  relation->rd_index->indisunique = index->indisunique;
2243  relation->rd_index->indisprimary = index->indisprimary;
2244  relation->rd_index->indisexclusion = index->indisexclusion;
2245  relation->rd_index->indimmediate = index->indimmediate;
2246  relation->rd_index->indisclustered = index->indisclustered;
2247  relation->rd_index->indisvalid = index->indisvalid;
2248  relation->rd_index->indcheckxmin = index->indcheckxmin;
2249  relation->rd_index->indisready = index->indisready;
2250  relation->rd_index->indislive = index->indislive;
2251 
2252  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2254  HeapTupleHeaderGetXmin(tuple->t_data));
2255 
2256  ReleaseSysCache(tuple);
2257  }
2258 
2259  /* Okay, now it's valid again */
2260  relation->rd_isvalid = true;
2261 }
2262 
2263 /*
2264  * RelationDestroyRelation
2265  *
2266  * Physically delete a relation cache entry and all subsidiary data.
2267  * Caller must already have unhooked the entry from the hash table.
2268  */
2269 static void
2270 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2271 {
2273 
2274  /*
2275  * Make sure smgr and lower levels close the relation's files, if they
2276  * weren't closed already. (This was probably done by caller, but let's
2277  * just be real sure.)
2278  */
2279  RelationCloseSmgr(relation);
2280 
2281  /*
2282  * Free all the subsidiary data structures of the relcache entry, then the
2283  * entry itself.
2284  */
2285  if (relation->rd_rel)
2286  pfree(relation->rd_rel);
2287  /* can't use DecrTupleDescRefCount here */
2288  Assert(relation->rd_att->tdrefcount > 0);
2289  if (--relation->rd_att->tdrefcount == 0)
2290  {
2291  /*
2292  * If we Rebuilt a relcache entry during a transaction then its
2293  * possible we did that because the TupDesc changed as the result of
2294  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2295  * possible someone copied that TupDesc, in which case the copy would
2296  * point to free'd memory. So if we rebuild an entry we keep the
2297  * TupDesc around until end of transaction, to be safe.
2298  */
2299  if (remember_tupdesc)
2301  else
2302  FreeTupleDesc(relation->rd_att);
2303  }
2304  FreeTriggerDesc(relation->trigdesc);
2305  list_free_deep(relation->rd_fkeylist);
2306  list_free(relation->rd_indexlist);
2307  bms_free(relation->rd_indexattr);
2308  bms_free(relation->rd_keyattr);
2309  bms_free(relation->rd_pkattr);
2310  bms_free(relation->rd_idattr);
2311  if (relation->rd_pubactions)
2312  pfree(relation->rd_pubactions);
2313  if (relation->rd_options)
2314  pfree(relation->rd_options);
2315  if (relation->rd_indextuple)
2316  pfree(relation->rd_indextuple);
2317  if (relation->rd_indexcxt)
2318  MemoryContextDelete(relation->rd_indexcxt);
2319  if (relation->rd_rulescxt)
2320  MemoryContextDelete(relation->rd_rulescxt);
2321  if (relation->rd_rsdesc)
2322  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2323  if (relation->rd_partkeycxt)
2325  if (relation->rd_pdcxt)
2326  MemoryContextDelete(relation->rd_pdcxt);
2327  if (relation->rd_partcheck)
2328  pfree(relation->rd_partcheck);
2329  if (relation->rd_fdwroutine)
2330  pfree(relation->rd_fdwroutine);
2331  pfree(relation);
2332 }
2333 
2334 /*
2335  * RelationClearRelation
2336  *
2337  * Physically blow away a relation cache entry, or reset it and rebuild
2338  * it from scratch (that is, from catalog entries). The latter path is
2339  * used when we are notified of a change to an open relation (one with
2340  * refcount > 0).
2341  *
2342  * NB: when rebuilding, we'd better hold some lock on the relation,
2343  * else the catalog data we need to read could be changing under us.
2344  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2345  * an sinval reset could happen while we're accessing the catalogs, and
2346  * the rel would get blown away underneath us by RelationCacheInvalidate
2347  * if it has zero refcnt.
2348  *
2349  * The "rebuild" parameter is redundant in current usage because it has
2350  * to match the relation's refcnt status, but we keep it as a crosscheck
2351  * that we're doing what the caller expects.
2352  */
2353 static void
2354 RelationClearRelation(Relation relation, bool rebuild)
2355 {
2356  /*
2357  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2358  * course it would be an equally bad idea to blow away one with nonzero
2359  * refcnt, since that would leave someone somewhere with a dangling
2360  * pointer. All callers are expected to have verified that this holds.
2361  */
2362  Assert(rebuild ?
2363  !RelationHasReferenceCountZero(relation) :
2364  RelationHasReferenceCountZero(relation));
2365 
2366  /*
2367  * Make sure smgr and lower levels close the relation's files, if they
2368  * weren't closed already. If the relation is not getting deleted, the
2369  * next smgr access should reopen the files automatically. This ensures
2370  * that the low-level file access state is updated after, say, a vacuum
2371  * truncation.
2372  */
2373  RelationCloseSmgr(relation);
2374 
2375  /*
2376  * Never, never ever blow away a nailed-in system relation, because we'd
2377  * be unable to recover. However, we must redo RelationInitPhysicalAddr
2378  * in case it is a mapped relation whose mapping changed.
2379  *
2380  * If it's a nailed-but-not-mapped index, then we need to re-read the
2381  * pg_class row to see if its relfilenode changed. We do that immediately
2382  * if we're inside a valid transaction and the relation is open (not
2383  * counting the nailed refcount). Otherwise just mark the entry as
2384  * possibly invalid, and it'll be fixed when next opened.
2385  */
2386  if (relation->rd_isnailed)
2387  {
2388  RelationInitPhysicalAddr(relation);
2389 
2390  if (relation->rd_rel->relkind == RELKIND_INDEX)
2391  {
2392  relation->rd_isvalid = false; /* needs to be revalidated */
2393  if (relation->rd_refcnt > 1 && IsTransactionState())
2394  RelationReloadIndexInfo(relation);
2395  }
2396  return;
2397  }
2398 
2399  /*
2400  * Even non-system indexes should not be blown away if they are open and
2401  * have valid index support information. This avoids problems with active
2402  * use of the index support information. As with nailed indexes, we
2403  * re-read the pg_class row to handle possible physical relocation of the
2404  * index, and we check for pg_index updates too.
2405  */
2406  if (relation->rd_rel->relkind == RELKIND_INDEX &&
2407  relation->rd_refcnt > 0 &&
2408  relation->rd_indexcxt != NULL)
2409  {
2410  relation->rd_isvalid = false; /* needs to be revalidated */
2411  if (IsTransactionState())
2412  RelationReloadIndexInfo(relation);
2413  return;
2414  }
2415 
2416  /* Mark it invalid until we've finished rebuild */
2417  relation->rd_isvalid = false;
2418 
2419  /*
2420  * If we're really done with the relcache entry, blow it away. But if
2421  * someone is still using it, reconstruct the whole deal without moving
2422  * the physical RelationData record (so that the someone's pointer is
2423  * still valid).
2424  */
2425  if (!rebuild)
2426  {
2427  /* Remove it from the hash table */
2428  RelationCacheDelete(relation);
2429 
2430  /* And release storage */
2431  RelationDestroyRelation(relation, false);
2432  }
2433  else if (!IsTransactionState())
2434  {
2435  /*
2436  * If we're not inside a valid transaction, we can't do any catalog
2437  * access so it's not possible to rebuild yet. Just exit, leaving
2438  * rd_isvalid = false so that the rebuild will occur when the entry is
2439  * next opened.
2440  *
2441  * Note: it's possible that we come here during subtransaction abort,
2442  * and the reason for wanting to rebuild is that the rel is open in
2443  * the outer transaction. In that case it might seem unsafe to not
2444  * rebuild immediately, since whatever code has the rel already open
2445  * will keep on using the relcache entry as-is. However, in such a
2446  * case the outer transaction should be holding a lock that's
2447  * sufficient to prevent any significant change in the rel's schema,
2448  * so the existing entry contents should be good enough for its
2449  * purposes; at worst we might be behind on statistics updates or the
2450  * like. (See also CheckTableNotInUse() and its callers.) These same
2451  * remarks also apply to the cases above where we exit without having
2452  * done RelationReloadIndexInfo() yet.
2453  */
2454  return;
2455  }
2456  else
2457  {
2458  /*
2459  * Our strategy for rebuilding an open relcache entry is to build a
2460  * new entry from scratch, swap its contents with the old entry, and
2461  * finally delete the new entry (along with any infrastructure swapped
2462  * over from the old entry). This is to avoid trouble in case an
2463  * error causes us to lose control partway through. The old entry
2464  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2465  * on next access. Meanwhile it's not any less valid than it was
2466  * before, so any code that might expect to continue accessing it
2467  * isn't hurt by the rebuild failure. (Consider for example a
2468  * subtransaction that ALTERs a table and then gets canceled partway
2469  * through the cache entry rebuild. The outer transaction should
2470  * still see the not-modified cache entry as valid.) The worst
2471  * consequence of an error is leaking the necessarily-unreferenced new
2472  * entry, and this shouldn't happen often enough for that to be a big
2473  * problem.
2474  *
2475  * When rebuilding an open relcache entry, we must preserve ref count,
2476  * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2477  * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2478  * rewrite-rule, partition key, and partition descriptor substructures
2479  * in place, because various places assume that these structures won't
2480  * move while they are working with an open relcache entry. (Note:
2481  * the refcount mechanism for tupledescs might someday allow us to
2482  * remove this hack for the tupledesc.)
2483  *
2484  * Note that this process does not touch CurrentResourceOwner; which
2485  * is good because whatever ref counts the entry may have do not
2486  * necessarily belong to that resource owner.
2487  */
2488  Relation newrel;
2489  Oid save_relid = RelationGetRelid(relation);
2490  bool keep_tupdesc;
2491  bool keep_rules;
2492  bool keep_policies;
2493  bool keep_partkey;
2494  bool keep_partdesc;
2495 
2496  /* Build temporary entry, but don't link it into hashtable */
2497  newrel = RelationBuildDesc(save_relid, false);
2498  if (newrel == NULL)
2499  {
2500  /*
2501  * We can validly get here, if we're using a historic snapshot in
2502  * which a relation, accessed from outside logical decoding, is
2503  * still invisible. In that case it's fine to just mark the
2504  * relation as invalid and return - it'll fully get reloaded by
2505  * the cache reset at the end of logical decoding (or at the next
2506  * access). During normal processing we don't want to ignore this
2507  * case as it shouldn't happen there, as explained below.
2508  */
2509  if (HistoricSnapshotActive())
2510  return;
2511 
2512  /*
2513  * This shouldn't happen as dropping a relation is intended to be
2514  * impossible if still referenced (cf. CheckTableNotInUse()). But
2515  * if we get here anyway, we can't just delete the relcache entry,
2516  * as it possibly could get accessed later (as e.g. the error
2517  * might get trapped and handled via a subtransaction rollback).
2518  */
2519  elog(ERROR, "relation %u deleted while still in use", save_relid);
2520  }
2521 
2522  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2523  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2524  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2525  keep_partkey = (relation->rd_partkey != NULL);
2526  keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2527  relation->rd_partdesc,
2528  newrel->rd_partdesc);
2529 
2530  /*
2531  * Perform swapping of the relcache entry contents. Within this
2532  * process the old entry is momentarily invalid, so there *must* be no
2533  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2534  * all-in-line code for safety.
2535  *
2536  * Since the vast majority of fields should be swapped, our method is
2537  * to swap the whole structures and then re-swap those few fields we
2538  * didn't want swapped.
2539  */
2540 #define SWAPFIELD(fldtype, fldname) \
2541  do { \
2542  fldtype _tmp = newrel->fldname; \
2543  newrel->fldname = relation->fldname; \
2544  relation->fldname = _tmp; \
2545  } while (0)
2546 
2547  /* swap all Relation struct fields */
2548  {
2549  RelationData tmpstruct;
2550 
2551  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2552  memcpy(newrel, relation, sizeof(RelationData));
2553  memcpy(relation, &tmpstruct, sizeof(RelationData));
2554  }
2555 
2556  /* rd_smgr must not be swapped, due to back-links from smgr level */
2557  SWAPFIELD(SMgrRelation, rd_smgr);
2558  /* rd_refcnt must be preserved */
2559  SWAPFIELD(int, rd_refcnt);
2560  /* isnailed shouldn't change */
2561  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2562  /* creation sub-XIDs must be preserved */
2563  SWAPFIELD(SubTransactionId, rd_createSubid);
2564  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2565  /* un-swap rd_rel pointers, swap contents instead */
2566  SWAPFIELD(Form_pg_class, rd_rel);
2567  /* ... but actually, we don't have to update newrel->rd_rel */
2568  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2569  /* preserve old tupledesc and rules if no logical change */
2570  if (keep_tupdesc)
2571  SWAPFIELD(TupleDesc, rd_att);
2572  if (keep_rules)
2573  {
2574  SWAPFIELD(RuleLock *, rd_rules);
2575  SWAPFIELD(MemoryContext, rd_rulescxt);
2576  }
2577  if (keep_policies)
2578  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2579  /* toast OID override must be preserved */
2580  SWAPFIELD(Oid, rd_toastoid);
2581  /* pgstat_info must be preserved */
2582  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2583  /* partition key must be preserved, if we have one */
2584  if (keep_partkey)
2585  {
2586  SWAPFIELD(PartitionKey, rd_partkey);
2587  SWAPFIELD(MemoryContext, rd_partkeycxt);
2588  }
2589  /* preserve old partdesc if no logical change */
2590  if (keep_partdesc)
2591  {
2592  SWAPFIELD(PartitionDesc, rd_partdesc);
2593  SWAPFIELD(MemoryContext, rd_pdcxt);
2594  }
2595 
2596 #undef SWAPFIELD
2597 
2598  /* And now we can throw away the temporary entry */
2599  RelationDestroyRelation(newrel, !keep_tupdesc);
2600  }
2601 }
2602 
2603 /*
2604  * RelationFlushRelation
2605  *
2606  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2607  * This is used when we receive a cache invalidation event for the rel.
2608  */
2609 static void
2611 {
2612  if (relation->rd_createSubid != InvalidSubTransactionId ||
2614  {
2615  /*
2616  * New relcache entries are always rebuilt, not flushed; else we'd
2617  * forget the "new" status of the relation, which is a useful
2618  * optimization to have. Ditto for the new-relfilenode status.
2619  *
2620  * The rel could have zero refcnt here, so temporarily increment the
2621  * refcnt to ensure it's safe to rebuild it. We can assume that the
2622  * current transaction has some lock on the rel already.
2623  */
2625  RelationClearRelation(relation, true);
2627  }
2628  else
2629  {
2630  /*
2631  * Pre-existing rels can be dropped from the relcache if not open.
2632  */
2633  bool rebuild = !RelationHasReferenceCountZero(relation);
2634 
2635  RelationClearRelation(relation, rebuild);
2636  }
2637 }
2638 
2639 /*
2640  * RelationForgetRelation - unconditionally remove a relcache entry
2641  *
2642  * External interface for destroying a relcache entry when we
2643  * drop the relation.
2644  */
2645 void
2647 {
2648  Relation relation;
2649 
2650  RelationIdCacheLookup(rid, relation);
2651 
2652  if (!PointerIsValid(relation))
2653  return; /* not in cache, nothing to do */
2654 
2655  if (!RelationHasReferenceCountZero(relation))
2656  elog(ERROR, "relation %u is still open", rid);
2657 
2658  /* Unconditionally destroy the relcache entry */
2659  RelationClearRelation(relation, false);
2660 }
2661 
2662 /*
2663  * RelationCacheInvalidateEntry
2664  *
2665  * This routine is invoked for SI cache flush messages.
2666  *
2667  * Any relcache entry matching the relid must be flushed. (Note: caller has
2668  * already determined that the relid belongs to our database or is a shared
2669  * relation.)
2670  *
2671  * We used to skip local relations, on the grounds that they could
2672  * not be targets of cross-backend SI update messages; but it seems
2673  * safer to process them, so that our *own* SI update messages will
2674  * have the same effects during CommandCounterIncrement for both
2675  * local and nonlocal relations.
2676  */
2677 void
2679 {
2680  Relation relation;
2681 
2682  RelationIdCacheLookup(relationId, relation);
2683 
2684  if (PointerIsValid(relation))
2685  {
2687  RelationFlushRelation(relation);
2688  }
2689 }
2690 
2691 /*
2692  * RelationCacheInvalidate
2693  * Blow away cached relation descriptors that have zero reference counts,
2694  * and rebuild those with positive reference counts. Also reset the smgr
2695  * relation cache and re-read relation mapping data.
2696  *
2697  * This is currently used only to recover from SI message buffer overflow,
2698  * so we do not touch new-in-transaction relations; they cannot be targets
2699  * of cross-backend SI updates (and our own updates now go through a
2700  * separate linked list that isn't limited by the SI message buffer size).
2701  * Likewise, we need not discard new-relfilenode-in-transaction hints,
2702  * since any invalidation of those would be a local event.
2703  *
2704  * We do this in two phases: the first pass deletes deletable items, and
2705  * the second one rebuilds the rebuildable items. This is essential for
2706  * safety, because hash_seq_search only copes with concurrent deletion of
2707  * the element it is currently visiting. If a second SI overflow were to
2708  * occur while we are walking the table, resulting in recursive entry to
2709  * this routine, we could crash because the inner invocation blows away
2710  * the entry next to be visited by the outer scan. But this way is OK,
2711  * because (a) during the first pass we won't process any more SI messages,
2712  * so hash_seq_search will complete safely; (b) during the second pass we
2713  * only hold onto pointers to nondeletable entries.
2714  *
2715  * The two-phase approach also makes it easy to update relfilenodes for
2716  * mapped relations before we do anything else, and to ensure that the
2717  * second pass processes nailed-in-cache items before other nondeletable
2718  * items. This should ensure that system catalogs are up to date before
2719  * we attempt to use them to reload information about other open relations.
2720  */
2721 void
2723 {
2725  RelIdCacheEnt *idhentry;
2726  Relation relation;
2727  List *rebuildFirstList = NIL;
2728  List *rebuildList = NIL;
2729  ListCell *l;
2730 
2731  /*
2732  * Reload relation mapping data before starting to reconstruct cache.
2733  */
2735 
2736  /* Phase 1 */
2737  hash_seq_init(&status, RelationIdCache);
2738 
2739  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2740  {
2741  relation = idhentry->reldesc;
2742 
2743  /* Must close all smgr references to avoid leaving dangling ptrs */
2744  RelationCloseSmgr(relation);
2745 
2746  /*
2747  * Ignore new relations; no other backend will manipulate them before
2748  * we commit. Likewise, before replacing a relation's relfilenode, we
2749  * shall have acquired AccessExclusiveLock and drained any applicable
2750  * pending invalidations.
2751  */
2752  if (relation->rd_createSubid != InvalidSubTransactionId ||
2754  continue;
2755 
2757 
2758  if (RelationHasReferenceCountZero(relation))
2759  {
2760  /* Delete this entry immediately */
2761  Assert(!relation->rd_isnailed);
2762  RelationClearRelation(relation, false);
2763  }
2764  else
2765  {
2766  /*
2767  * If it's a mapped relation, immediately update its rd_node in
2768  * case its relfilenode changed. We must do this during phase 1
2769  * in case the relation is consulted during rebuild of other
2770  * relcache entries in phase 2. It's safe since consulting the
2771  * map doesn't involve any access to relcache entries.
2772  */
2773  if (RelationIsMapped(relation))
2774  RelationInitPhysicalAddr(relation);
2775 
2776  /*
2777  * Add this entry to list of stuff to rebuild in second pass.
2778  * pg_class goes to the front of rebuildFirstList while
2779  * pg_class_oid_index goes to the back of rebuildFirstList, so
2780  * they are done first and second respectively. Other nailed
2781  * relations go to the front of rebuildList, so they'll be done
2782  * next in no particular order; and everything else goes to the
2783  * back of rebuildList.
2784  */
2785  if (RelationGetRelid(relation) == RelationRelationId)
2786  rebuildFirstList = lcons(relation, rebuildFirstList);
2787  else if (RelationGetRelid(relation) == ClassOidIndexId)
2788  rebuildFirstList = lappend(rebuildFirstList, relation);
2789  else if (relation->rd_isnailed)
2790  rebuildList = lcons(relation, rebuildList);
2791  else
2792  rebuildList = lappend(rebuildList, relation);
2793  }
2794  }
2795 
2796  /*
2797  * Now zap any remaining smgr cache entries. This must happen before we
2798  * start to rebuild entries, since that may involve catalog fetches which
2799  * will re-open catalog files.
2800  */
2801  smgrcloseall();
2802 
2803  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2804  foreach(l, rebuildFirstList)
2805  {
2806  relation = (Relation) lfirst(l);
2807  RelationClearRelation(relation, true);
2808  }
2809  list_free(rebuildFirstList);
2810  foreach(l, rebuildList)
2811  {
2812  relation = (Relation) lfirst(l);
2813  RelationClearRelation(relation, true);
2814  }
2815  list_free(rebuildList);
2816 }
2817 
2818 /*
2819  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2820  *
2821  * Needed in some cases where we are changing a relation's physical mapping.
2822  * The link will be automatically reopened on next use.
2823  */
2824 void
2826 {
2827  Relation relation;
2828 
2829  RelationIdCacheLookup(relationId, relation);
2830 
2831  if (!PointerIsValid(relation))
2832  return; /* not in cache, nothing to do */
2833 
2834  RelationCloseSmgr(relation);
2835 }
2836 
2837 static void
2839 {
2840  if (EOXactTupleDescArray == NULL)
2841  {
2842  MemoryContext oldcxt;
2843 
2845 
2846  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2849  MemoryContextSwitchTo(oldcxt);
2850  }
2852  {
2853  int32 newlen = EOXactTupleDescArrayLen * 2;
2854 
2856 
2857  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2858  newlen * sizeof(TupleDesc));
2859  EOXactTupleDescArrayLen = newlen;
2860  }
2861 
2862  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2863 }
2864 
2865 /*
2866  * AtEOXact_RelationCache
2867  *
2868  * Clean up the relcache at main-transaction commit or abort.
2869  *
2870  * Note: this must be called *before* processing invalidation messages.
2871  * In the case of abort, we don't want to try to rebuild any invalidated
2872  * cache entries (since we can't safely do database accesses). Therefore
2873  * we must reset refcnts before handling pending invalidations.
2874  *
2875  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2876  * ResourceOwner mechanism. This routine just does a debugging
2877  * cross-check that no pins remain. However, we also need to do special
2878  * cleanup when the current transaction created any relations or made use
2879  * of forced index lists.
2880  */
2881 void
2883 {
2885  RelIdCacheEnt *idhentry;
2886  int i;
2887 
2888  /*
2889  * Unless the eoxact_list[] overflowed, we only need to examine the rels
2890  * listed in it. Otherwise fall back on a hash_seq_search scan.
2891  *
2892  * For simplicity, eoxact_list[] entries are not deleted till end of
2893  * top-level transaction, even though we could remove them at
2894  * subtransaction end in some cases, or remove relations from the list if
2895  * they are cleared for other reasons. Therefore we should expect the
2896  * case that list entries are not found in the hashtable; if not, there's
2897  * nothing to do for them.
2898  */
2900  {
2901  hash_seq_init(&status, RelationIdCache);
2902  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2903  {
2904  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2905  }
2906  }
2907  else
2908  {
2909  for (i = 0; i < eoxact_list_len; i++)
2910  {
2911  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2912  (void *) &eoxact_list[i],
2913  HASH_FIND,
2914  NULL);
2915  if (idhentry != NULL)
2916  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2917  }
2918  }
2919 
2920  if (EOXactTupleDescArrayLen > 0)
2921  {
2922  Assert(EOXactTupleDescArray != NULL);
2923  for (i = 0; i < NextEOXactTupleDescNum; i++)
2924  FreeTupleDesc(EOXactTupleDescArray[i]);
2925  pfree(EOXactTupleDescArray);
2926  EOXactTupleDescArray = NULL;
2927  }
2928 
2929  /* Now we're out of the transaction and can clear the lists */
2930  eoxact_list_len = 0;
2931  eoxact_list_overflowed = false;
2934 }
2935 
2936 /*
2937  * AtEOXact_cleanup
2938  *
2939  * Clean up a single rel at main-transaction commit or abort
2940  *
2941  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
2942  * bother to prevent duplicate entries in eoxact_list[].
2943  */
2944 static void
2945 AtEOXact_cleanup(Relation relation, bool isCommit)
2946 {
2947  /*
2948  * The relcache entry's ref count should be back to its normal
2949  * not-in-a-transaction state: 0 unless it's nailed in cache.
2950  *
2951  * In bootstrap mode, this is NOT true, so don't check it --- the
2952  * bootstrap code expects relations to stay open across start/commit
2953  * transaction calls. (That seems bogus, but it's not worth fixing.)
2954  *
2955  * Note: ideally this check would be applied to every relcache entry, not
2956  * just those that have eoxact work to do. But it's not worth forcing a
2957  * scan of the whole relcache just for this. (Moreover, doing so would
2958  * mean that assert-enabled testing never tests the hash_search code path
2959  * above, which seems a bad idea.)
2960  */
2961 #ifdef USE_ASSERT_CHECKING
2963  {
2964  int expected_refcnt;
2965 
2966  expected_refcnt = relation->rd_isnailed ? 1 : 0;
2967  Assert(relation->rd_refcnt == expected_refcnt);
2968  }
2969 #endif
2970 
2971  /*
2972  * Is it a relation created in the current transaction?
2973  *
2974  * During commit, reset the flag to zero, since we are now out of the
2975  * creating transaction. During abort, simply delete the relcache entry
2976  * --- it isn't interesting any longer. (NOTE: if we have forgotten the
2977  * new-ness of a new relation due to a forced cache flush, the entry will
2978  * get deleted anyway by shared-cache-inval processing of the aborted
2979  * pg_class insertion.)
2980  */
2981  if (relation->rd_createSubid != InvalidSubTransactionId)
2982  {
2983  if (isCommit)
2985  else if (RelationHasReferenceCountZero(relation))
2986  {
2987  RelationClearRelation(relation, false);
2988  return;
2989  }
2990  else
2991  {
2992  /*
2993  * Hmm, somewhere there's a (leaked?) reference to the relation.
2994  * We daren't remove the entry for fear of dereferencing a
2995  * dangling pointer later. Bleat, and mark it as not belonging to
2996  * the current transaction. Hopefully it'll get cleaned up
2997  * eventually. This must be just a WARNING to avoid
2998  * error-during-error-recovery loops.
2999  */
3001  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3002  RelationGetRelationName(relation));
3003  }
3004  }
3005 
3006  /*
3007  * Likewise, reset the hint about the relfilenode being new.
3008  */
3010 
3011  /*
3012  * Flush any temporary index list.
3013  */
3014  if (relation->rd_indexvalid == 2)
3015  {
3016  list_free(relation->rd_indexlist);
3017  relation->rd_indexlist = NIL;
3018  relation->rd_oidindex = InvalidOid;
3019  relation->rd_pkindex = InvalidOid;
3020  relation->rd_replidindex = InvalidOid;
3021  relation->rd_indexvalid = 0;
3022  }
3023 }
3024 
3025 /*
3026  * AtEOSubXact_RelationCache
3027  *
3028  * Clean up the relcache at sub-transaction commit or abort.
3029  *
3030  * Note: this must be called *before* processing invalidation messages.
3031  */
3032 void
3034  SubTransactionId parentSubid)
3035 {
3037  RelIdCacheEnt *idhentry;
3038  int i;
3039 
3040  /*
3041  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3042  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3043  * logic as in AtEOXact_RelationCache.
3044  */
3046  {
3047  hash_seq_init(&status, RelationIdCache);
3048  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3049  {
3050  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3051  mySubid, parentSubid);
3052  }
3053  }
3054  else
3055  {
3056  for (i = 0; i < eoxact_list_len; i++)
3057  {
3058  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3059  (void *) &eoxact_list[i],
3060  HASH_FIND,
3061  NULL);
3062  if (idhentry != NULL)
3063  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3064  mySubid, parentSubid);
3065  }
3066  }
3067 
3068  /* Don't reset the list; we still need more cleanup later */
3069 }
3070 
3071 /*
3072  * AtEOSubXact_cleanup
3073  *
3074  * Clean up a single rel at subtransaction commit or abort
3075  *
3076  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3077  * bother to prevent duplicate entries in eoxact_list[].
3078  */
3079 static void
3080 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3081  SubTransactionId mySubid, SubTransactionId parentSubid)
3082 {
3083  /*
3084  * Is it a relation created in the current subtransaction?
3085  *
3086  * During subcommit, mark it as belonging to the parent, instead. During
3087  * subabort, simply delete the relcache entry.
3088  */
3089  if (relation->rd_createSubid == mySubid)
3090  {
3091  if (isCommit)
3092  relation->rd_createSubid = parentSubid;
3093  else if (RelationHasReferenceCountZero(relation))
3094  {
3095  RelationClearRelation(relation, false);
3096  return;
3097  }
3098  else
3099  {
3100  /*
3101  * Hmm, somewhere there's a (leaked?) reference to the relation.
3102  * We daren't remove the entry for fear of dereferencing a
3103  * dangling pointer later. Bleat, and transfer it to the parent
3104  * subtransaction so we can try again later. This must be just a
3105  * WARNING to avoid error-during-error-recovery loops.
3106  */
3107  relation->rd_createSubid = parentSubid;
3108  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3109  RelationGetRelationName(relation));
3110  }
3111  }
3112 
3113  /*
3114  * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3115  */
3116  if (relation->rd_newRelfilenodeSubid == mySubid)
3117  {
3118  if (isCommit)
3119  relation->rd_newRelfilenodeSubid = parentSubid;
3120  else
3122  }
3123 
3124  /*
3125  * Flush any temporary index list.
3126  */
3127  if (relation->rd_indexvalid == 2)
3128  {
3129  list_free(relation->rd_indexlist);
3130  relation->rd_indexlist = NIL;
3131  relation->rd_oidindex = InvalidOid;
3132  relation->rd_pkindex = InvalidOid;
3133  relation->rd_replidindex = InvalidOid;
3134  relation->rd_indexvalid = 0;
3135  }
3136 }
3137 
3138 
3139 /*
3140  * RelationBuildLocalRelation
3141  * Build a relcache entry for an about-to-be-created relation,
3142  * and enter it into the relcache.
3143  */
3144 Relation
3145 RelationBuildLocalRelation(const char *relname,
3146  Oid relnamespace,
3147  TupleDesc tupDesc,
3148  Oid relid,
3149  Oid relfilenode,
3150  Oid reltablespace,
3151  bool shared_relation,
3152  bool mapped_relation,
3153  char relpersistence,
3154  char relkind)
3155 {
3156  Relation rel;
3157  MemoryContext oldcxt;
3158  int natts = tupDesc->natts;
3159  int i;
3160  bool has_not_null;
3161  bool nailit;
3162 
3163  AssertArg(natts >= 0);
3164 
3165  /*
3166  * check for creation of a rel that must be nailed in cache.
3167  *
3168  * XXX this list had better match the relations specially handled in
3169  * RelationCacheInitializePhase2/3.
3170  */
3171  switch (relid)
3172  {
3173  case DatabaseRelationId:
3174  case AuthIdRelationId:
3175  case AuthMemRelationId:
3176  case RelationRelationId:
3177  case AttributeRelationId:
3178  case ProcedureRelationId:
3179  case TypeRelationId:
3180  nailit = true;
3181  break;
3182  default:
3183  nailit = false;
3184  break;
3185  }
3186 
3187  /*
3188  * check that hardwired list of shared rels matches what's in the
3189  * bootstrap .bki file. If you get a failure here during initdb, you
3190  * probably need to fix IsSharedRelation() to match whatever you've done
3191  * to the set of shared relations.
3192  */
3193  if (shared_relation != IsSharedRelation(relid))
3194  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3195  relname, relid);
3196 
3197  /* Shared relations had better be mapped, too */
3198  Assert(mapped_relation || !shared_relation);
3199 
3200  /*
3201  * switch to the cache context to create the relcache entry.
3202  */
3203  if (!CacheMemoryContext)
3205 
3207 
3208  /*
3209  * allocate a new relation descriptor and fill in basic state fields.
3210  */
3211  rel = (Relation) palloc0(sizeof(RelationData));
3212 
3213  /* make sure relation is marked as having no open file yet */
3214  rel->rd_smgr = NULL;
3215 
3216  /* mark it nailed if appropriate */
3217  rel->rd_isnailed = nailit;
3218 
3219  rel->rd_refcnt = nailit ? 1 : 0;
3220 
3221  /* it's being created in this transaction */
3224 
3225  /*
3226  * create a new tuple descriptor from the one passed in. We do this
3227  * partly to copy it into the cache context, and partly because the new
3228  * relation can't have any defaults or constraints yet; they have to be
3229  * added in later steps, because they require additions to multiple system
3230  * catalogs. We can copy attnotnull constraints here, however.
3231  */
3232  rel->rd_att = CreateTupleDescCopy(tupDesc);
3233  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3234  has_not_null = false;
3235  for (i = 0; i < natts; i++)
3236  {
3237  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3238  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3239 
3240  datt->attidentity = satt->attidentity;
3241  datt->attnotnull = satt->attnotnull;
3242  has_not_null |= satt->attnotnull;
3243  }
3244 
3245  if (has_not_null)
3246  {
3247  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3248 
3249  constr->has_not_null = true;
3250  rel->rd_att->constr = constr;
3251  }
3252 
3253  /*
3254  * initialize relation tuple form (caller may add/override data later)
3255  */
3257 
3258  namestrcpy(&rel->rd_rel->relname, relname);
3259  rel->rd_rel->relnamespace = relnamespace;
3260 
3261  rel->rd_rel->relkind = relkind;
3262  rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
3263  rel->rd_rel->relnatts = natts;
3264  rel->rd_rel->reltype = InvalidOid;
3265  /* needed when bootstrapping: */
3266  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3267 
3268  /* set up persistence and relcache fields dependent on it */
3269  rel->rd_rel->relpersistence = relpersistence;
3270  switch (relpersistence)
3271  {
3275  rel->rd_islocaltemp = false;
3276  break;
3277  case RELPERSISTENCE_TEMP:
3278  Assert(isTempOrTempToastNamespace(relnamespace));
3280  rel->rd_islocaltemp = true;
3281  break;
3282  default:
3283  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3284  break;
3285  }
3286 
3287  /* if it's a materialized view, it's not populated initially */
3288  if (relkind == RELKIND_MATVIEW)
3289  rel->rd_rel->relispopulated = false;
3290  else
3291  rel->rd_rel->relispopulated = true;
3292 
3293  /* system relations and non-table objects don't have one */
3294  if (!IsSystemNamespace(relnamespace) &&
3295  (relkind == RELKIND_RELATION ||
3296  relkind == RELKIND_MATVIEW ||
3297  relkind == RELKIND_PARTITIONED_TABLE))
3298  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3299  else
3300  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3301 
3302  /*
3303  * Insert relation physical and logical identifiers (OIDs) into the right
3304  * places. For a mapped relation, we set relfilenode to zero and rely on
3305  * RelationInitPhysicalAddr to consult the map.
3306  */
3307  rel->rd_rel->relisshared = shared_relation;
3308 
3309  RelationGetRelid(rel) = relid;
3310 
3311  for (i = 0; i < natts; i++)
3312  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3313 
3314  rel->rd_rel->reltablespace = reltablespace;
3315 
3316  if (mapped_relation)
3317  {
3318  rel->rd_rel->relfilenode = InvalidOid;
3319  /* Add it to the active mapping information */
3320  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3321  }
3322  else
3323  rel->rd_rel->relfilenode = relfilenode;
3324 
3325  RelationInitLockInfo(rel); /* see lmgr.c */
3326 
3328 
3329  /*
3330  * Okay to insert into the relcache hash table.
3331  *
3332  * Ordinarily, there should certainly not be an existing hash entry for
3333  * the same OID; but during bootstrap, when we create a "real" relcache
3334  * entry for one of the bootstrap relations, we'll be overwriting the
3335  * phony one created with formrdesc. So allow that to happen for nailed
3336  * rels.
3337  */
3338  RelationCacheInsert(rel, nailit);
3339 
3340  /*
3341  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3342  * can't do this before storing relid in it.
3343  */
3344  EOXactListAdd(rel);
3345 
3346  /*
3347  * done building relcache entry.
3348  */
3349  MemoryContextSwitchTo(oldcxt);
3350 
3351  /* It's fully valid */
3352  rel->rd_isvalid = true;
3353 
3354  /*
3355  * Caller expects us to pin the returned entry.
3356  */
3358 
3359  return rel;
3360 }
3361 
3362 
3363 /*
3364  * RelationSetNewRelfilenode
3365  *
3366  * Assign a new relfilenode (physical file name) to the relation.
3367  *
3368  * This allows a full rewrite of the relation to be done with transactional
3369  * safety (since the filenode assignment can be rolled back). Note however
3370  * that there is no simple way to access the relation's old data for the
3371  * remainder of the current transaction. This limits the usefulness to cases
3372  * such as TRUNCATE or rebuilding an index from scratch.
3373  *
3374  * Caller must already hold exclusive lock on the relation.
3375  *
3376  * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
3377  * must be passed for indexes and sequences). This should be a lower bound on
3378  * the XIDs that will be put into the new relation contents.
3379  *
3380  * The new filenode's persistence is set to the given value. This is useful
3381  * for the cases that are changing the relation's persistence; other callers
3382  * need to pass the original relpersistence value.
3383  */
3384 void
3385 RelationSetNewRelfilenode(Relation relation, char persistence,
3386  TransactionId freezeXid, MultiXactId minmulti)
3387 {
3388  Oid newrelfilenode;
3389  RelFileNodeBackend newrnode;
3390  Relation pg_class;
3391  HeapTuple tuple;
3392  Form_pg_class classform;
3393 
3394  /* Indexes, sequences must have Invalid frozenxid; other rels must not */
3395  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
3396  relation->rd_rel->relkind == RELKIND_SEQUENCE) ?
3397  freezeXid == InvalidTransactionId :
3398  TransactionIdIsNormal(freezeXid));
3399  Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti));
3400 
3401  /* Allocate a new relfilenode */
3402  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3403  persistence);
3404 
3405  /*
3406  * Get a writable copy of the pg_class tuple for the given relation.
3407  */
3409 
3410  tuple = SearchSysCacheCopy1(RELOID,
3411  ObjectIdGetDatum(RelationGetRelid(relation)));
3412  if (!HeapTupleIsValid(tuple))
3413  elog(ERROR, "could not find tuple for relation %u",
3414  RelationGetRelid(relation));
3415  classform = (Form_pg_class) GETSTRUCT(tuple);
3416 
3417  /*
3418  * Create storage for the main fork of the new relfilenode.
3419  *
3420  * NOTE: any conflict in relfilenode value will be caught here, if
3421  * GetNewRelFileNode messes up for any reason.
3422  */
3423  newrnode.node = relation->rd_node;
3424  newrnode.node.relNode = newrelfilenode;
3425  newrnode.backend = relation->rd_backend;
3426  RelationCreateStorage(newrnode.node, persistence);
3427  smgrclosenode(newrnode);
3428 
3429  /*
3430  * Schedule unlinking of the old storage at transaction commit.
3431  */
3432  RelationDropStorage(relation);
3433 
3434  /*
3435  * Now update the pg_class row. However, if we're dealing with a mapped
3436  * index, pg_class.relfilenode doesn't change; instead we have to send the
3437  * update to the relation mapper.
3438  */
3439  if (RelationIsMapped(relation))
3441  newrelfilenode,
3442  relation->rd_rel->relisshared,
3443  false);
3444  else
3445  classform->relfilenode = newrelfilenode;
3446 
3447  /* These changes are safe even for a mapped relation */
3448  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3449  {
3450  classform->relpages = 0; /* it's empty until further notice */
3451  classform->reltuples = 0;
3452  classform->relallvisible = 0;
3453  }
3454  classform->relfrozenxid = freezeXid;
3455  classform->relminmxid = minmulti;
3456  classform->relpersistence = persistence;
3457 
3458  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3459 
3460  heap_freetuple(tuple);
3461 
3462  heap_close(pg_class, RowExclusiveLock);
3463 
3464  /*
3465  * Make the pg_class row change visible, as well as the relation map
3466  * change if any. This will cause the relcache entry to get updated, too.
3467  */
3469 
3470  /*
3471  * Mark the rel as having been given a new relfilenode in the current
3472  * (sub) transaction. This is a hint that can be used to optimize later
3473  * operations on the rel in the same transaction.
3474  */
3476 
3477  /* Flag relation as needing eoxact cleanup (to remove the hint) */
3478  EOXactListAdd(relation);
3479 }
3480 
3481 
3482 /*
3483  * RelationCacheInitialize
3484  *
3485  * This initializes the relation descriptor cache. At the time
3486  * that this is invoked, we can't do database access yet (mainly
3487  * because the transaction subsystem is not up); all we are doing
3488  * is making an empty cache hashtable. This must be done before
3489  * starting the initialization transaction, because otherwise
3490  * AtEOXact_RelationCache would crash if that transaction aborts
3491  * before we can get the relcache set up.
3492  */
3493 
3494 #define INITRELCACHESIZE 400
3495 
3496 void
3498 {
3499  HASHCTL ctl;
3500 
3501  /*
3502  * make sure cache memory context exists
3503  */
3504  if (!CacheMemoryContext)
3506 
3507  /*
3508  * create hashtable that indexes the relcache
3509  */
3510  MemSet(&ctl, 0, sizeof(ctl));
3511  ctl.keysize = sizeof(Oid);
3512  ctl.entrysize = sizeof(RelIdCacheEnt);
3513  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3514  &ctl, HASH_ELEM | HASH_BLOBS);
3515 
3516  /*
3517  * relation mapper needs to be initialized too
3518  */
3520 }
3521 
3522 /*
3523  * RelationCacheInitializePhase2
3524  *
3525  * This is called to prepare for access to shared catalogs during startup.
3526  * We must at least set up nailed reldescs for pg_database, pg_authid,
3527  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3528  * for their indexes, too. We attempt to load this information from the
3529  * shared relcache init file. If that's missing or broken, just make
3530  * phony entries for the catalogs themselves.
3531  * RelationCacheInitializePhase3 will clean up as needed.
3532  */
3533 void
3535 {
3536  MemoryContext oldcxt;
3537 
3538  /*
3539  * relation mapper needs initialized too
3540  */
3542 
3543  /*
3544  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3545  * nothing.
3546  */
3548  return;
3549 
3550  /*
3551  * switch to cache memory context
3552  */
3554 
3555  /*
3556  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3557  * the cache with pre-made descriptors for the critical shared catalogs.
3558  */
3559  if (!load_relcache_init_file(true))
3560  {
3561  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3563  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3565  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3567  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3569  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3571 
3572 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3573  }
3574 
3575  MemoryContextSwitchTo(oldcxt);
3576 }
3577 
3578 /*
3579  * RelationCacheInitializePhase3
3580  *
3581  * This is called as soon as the catcache and transaction system
3582  * are functional and we have determined MyDatabaseId. At this point
3583  * we can actually read data from the database's system catalogs.
3584  * We first try to read pre-computed relcache entries from the local
3585  * relcache init file. If that's missing or broken, make phony entries
3586  * for the minimum set of nailed-in-cache relations. Then (unless
3587  * bootstrapping) make sure we have entries for the critical system
3588  * indexes. Once we've done all this, we have enough infrastructure to
3589  * open any system catalog or use any catcache. The last step is to
3590  * rewrite the cache files if needed.
3591  */
3592 void
3594 {
3596  RelIdCacheEnt *idhentry;
3597  MemoryContext oldcxt;
3598  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3599 
3600  /*
3601  * relation mapper needs initialized too
3602  */
3604 
3605  /*
3606  * switch to cache memory context
3607  */
3609 
3610  /*
3611  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3612  * the cache with pre-made descriptors for the critical "nailed-in" system
3613  * catalogs.
3614  */
3615  if (IsBootstrapProcessingMode() ||
3616  !load_relcache_init_file(false))
3617  {
3618  needNewCacheFile = true;
3619 
3620  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3621  true, Natts_pg_class, Desc_pg_class);
3622  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3624  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3625  true, Natts_pg_proc, Desc_pg_proc);
3626  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3627  true, Natts_pg_type, Desc_pg_type);
3628 
3629 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3630  }
3631 
3632  MemoryContextSwitchTo(oldcxt);
3633 
3634  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3636  return;
3637 
3638  /*
3639  * If we didn't get the critical system indexes loaded into relcache, do
3640  * so now. These are critical because the catcache and/or opclass cache
3641  * depend on them for fetches done during relcache load. Thus, we have an
3642  * infinite-recursion problem. We can break the recursion by doing
3643  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3644  * performance, we only want to do that until we have the critical indexes
3645  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3646  * decide whether to do heapscan or indexscan at the key spots, and we set
3647  * it true after we've loaded the critical indexes.
3648  *
3649  * The critical indexes are marked as "nailed in cache", partly to make it
3650  * easy for load_relcache_init_file to count them, but mainly because we
3651  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3652  * true. (NOTE: perhaps it would be possible to reload them by
3653  * temporarily setting criticalRelcachesBuilt to false again. For now,
3654  * though, we just nail 'em in.)
3655  *
3656  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3657  * in the same way as the others, because the critical catalogs don't
3658  * (currently) have any rules or triggers, and so these indexes can be
3659  * rebuilt without inducing recursion. However they are used during
3660  * relcache load when a rel does have rules or triggers, so we choose to
3661  * nail them for performance reasons.
3662  */
3664  {
3670  IndexRelationId);
3679 
3680 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3681 
3682  criticalRelcachesBuilt = true;
3683  }
3684 
3685  /*
3686  * Process critical shared indexes too.
3687  *
3688  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3689  * initial lookup of MyDatabaseId, without which we'll never find any
3690  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3691  * database OID, so it instead depends on DatabaseOidIndexId. We also
3692  * need to nail up some indexes on pg_authid and pg_auth_members for use
3693  * during client authentication. SharedSecLabelObjectIndexId isn't
3694  * critical for the core system, but authentication hooks might be
3695  * interested in it.
3696  */
3698  {
3711 
3712 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3713 
3715  }
3716 
3717  /*
3718  * Now, scan all the relcache entries and update anything that might be
3719  * wrong in the results from formrdesc or the relcache cache file. If we
3720  * faked up relcache entries using formrdesc, then read the real pg_class
3721  * rows and replace the fake entries with them. Also, if any of the
3722  * relcache entries have rules, triggers, or security policies, load that
3723  * info the hard way since it isn't recorded in the cache file.
3724  *
3725  * Whenever we access the catalogs to read data, there is a possibility of
3726  * a shared-inval cache flush causing relcache entries to be removed.
3727  * Since hash_seq_search only guarantees to still work after the *current*
3728  * entry is removed, it's unsafe to continue the hashtable scan afterward.
3729  * We handle this by restarting the scan from scratch after each access.
3730  * This is theoretically O(N^2), but the number of entries that actually
3731  * need to be fixed is small enough that it doesn't matter.
3732  */
3733  hash_seq_init(&status, RelationIdCache);
3734 
3735  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3736  {
3737  Relation relation = idhentry->reldesc;
3738  bool restart = false;
3739 
3740  /*
3741  * Make sure *this* entry doesn't get flushed while we work with it.
3742  */
3744 
3745  /*
3746  * If it's a faked-up entry, read the real pg_class tuple.
3747  */
3748  if (relation->rd_rel->relowner == InvalidOid)
3749  {
3750  HeapTuple htup;
3751  Form_pg_class relp;
3752 
3753  htup = SearchSysCache1(RELOID,
3754  ObjectIdGetDatum(RelationGetRelid(relation)));
3755  if (!HeapTupleIsValid(htup))
3756  elog(FATAL, "cache lookup failed for relation %u",
3757  RelationGetRelid(relation));
3758  relp = (Form_pg_class) GETSTRUCT(htup);
3759 
3760  /*
3761  * Copy tuple to relation->rd_rel. (See notes in
3762  * AllocateRelationDesc())
3763  */
3764  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3765 
3766  /* Update rd_options while we have the tuple */
3767  if (relation->rd_options)
3768  pfree(relation->rd_options);
3769  RelationParseRelOptions(relation, htup);
3770 
3771  /*
3772  * Check the values in rd_att were set up correctly. (We cannot
3773  * just copy them over now: formrdesc must have set up the rd_att
3774  * data correctly to start with, because it may already have been
3775  * copied into one or more catcache entries.)
3776  */
3777  Assert(relation->rd_att->tdtypeid == relp->reltype);
3778  Assert(relation->rd_att->tdtypmod == -1);
3779  Assert(relation->rd_att->tdhasoid == relp->relhasoids);
3780 
3781  ReleaseSysCache(htup);
3782 
3783  /* relowner had better be OK now, else we'll loop forever */
3784  if (relation->rd_rel->relowner == InvalidOid)
3785  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3786  RelationGetRelationName(relation));
3787 
3788  restart = true;
3789  }
3790 
3791  /*
3792  * Fix data that isn't saved in relcache cache file.
3793  *
3794  * relhasrules or relhastriggers could possibly be wrong or out of
3795  * date. If we don't actually find any rules or triggers, clear the
3796  * local copy of the flag so that we don't get into an infinite loop
3797  * here. We don't make any attempt to fix the pg_class entry, though.
3798  */
3799  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3800  {
3801  RelationBuildRuleLock(relation);
3802  if (relation->rd_rules == NULL)
3803  relation->rd_rel->relhasrules = false;
3804  restart = true;
3805  }
3806  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3807  {
3808  RelationBuildTriggers(relation);
3809  if (relation->trigdesc == NULL)
3810  relation->rd_rel->relhastriggers = false;
3811  restart = true;
3812  }
3813 
3814  /*
3815  * Re-load the row security policies if the relation has them, since
3816  * they are not preserved in the cache. Note that we can never NOT
3817  * have a policy while relrowsecurity is true,
3818  * RelationBuildRowSecurity will create a single default-deny policy
3819  * if there is no policy defined in pg_policy.
3820  */
3821  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3822  {
3823  RelationBuildRowSecurity(relation);
3824 
3825  Assert(relation->rd_rsdesc != NULL);
3826  restart = true;
3827  }
3828 
3829  /*
3830  * Reload the partition key and descriptor for a partitioned table.
3831  */
3832  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3833  relation->rd_partkey == NULL)
3834  {
3835  RelationBuildPartitionKey(relation);
3836  Assert(relation->rd_partkey != NULL);
3837 
3838  restart = true;
3839  }
3840 
3841  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3842  relation->rd_partdesc == NULL)
3843  {
3844  RelationBuildPartitionDesc(relation);
3845  Assert(relation->rd_partdesc != NULL);
3846 
3847  restart = true;
3848  }
3849 
3850  /* Release hold on the relation */
3852 
3853  /* Now, restart the hashtable scan if needed */
3854  if (restart)
3855  {
3856  hash_seq_term(&status);
3857  hash_seq_init(&status, RelationIdCache);
3858  }
3859  }
3860 
3861  /*
3862  * Lastly, write out new relcache cache files if needed. We don't bother
3863  * to distinguish cases where only one of the two needs an update.
3864  */
3865  if (needNewCacheFile)
3866  {
3867  /*
3868  * Force all the catcaches to finish initializing and thereby open the
3869  * catalogs and indexes they use. This will preload the relcache with
3870  * entries for all the most important system catalogs and indexes, so
3871  * that the init files will be most useful for future backends.
3872  */
3874 
3875  /* now write the files */
3877  write_relcache_init_file(false);
3878  }
3879 }
3880 
3881 /*
3882  * Load one critical system index into the relcache
3883  *
3884  * indexoid is the OID of the target index, heapoid is the OID of the catalog
3885  * it belongs to.
3886  */
3887 static void
3888 load_critical_index(Oid indexoid, Oid heapoid)
3889 {
3890  Relation ird;
3891 
3892  /*
3893  * We must lock the underlying catalog before locking the index to avoid
3894  * deadlock, since RelationBuildDesc might well need to read the catalog,
3895  * and if anyone else is exclusive-locking this catalog and index they'll
3896  * be doing it in that order.
3897  */
3898  LockRelationOid(heapoid, AccessShareLock);
3899  LockRelationOid(indexoid, AccessShareLock);
3900  ird = RelationBuildDesc(indexoid, true);
3901  if (ird == NULL)
3902  elog(PANIC, "could not open critical system index %u", indexoid);
3903  ird->rd_isnailed = true;
3904  ird->rd_refcnt = 1;
3907 }
3908 
3909 /*
3910  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3911  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3912  *
3913  * We need this kluge because we have to be able to access non-fixed-width
3914  * fields of pg_class and pg_index before we have the standard catalog caches
3915  * available. We use predefined data that's set up in just the same way as
3916  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
3917  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3918  * does it have a TupleConstr field. But it's good enough for the purpose of
3919  * extracting fields.
3920  */
3921 static TupleDesc
3923  bool hasoids)
3924 {
3925  TupleDesc result;
3926  MemoryContext oldcxt;
3927  int i;
3928 
3930 
3931  result = CreateTemplateTupleDesc(natts, hasoids);
3932  result->tdtypeid = RECORDOID; /* not right, but we don't care */
3933  result->tdtypmod = -1;
3934 
3935  for (i = 0; i < natts; i++)
3936  {
3937  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
3938  /* make sure attcacheoff is valid */
3939  TupleDescAttr(result, i)->attcacheoff = -1;
3940  }
3941 
3942  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
3943  TupleDescAttr(result, 0)->attcacheoff = 0;
3944 
3945  /* Note: we don't bother to set up a TupleConstr entry */
3946 
3947  MemoryContextSwitchTo(oldcxt);
3948 
3949  return result;
3950 }
3951 
3952 static TupleDesc
3954 {
3955  static TupleDesc pgclassdesc = NULL;
3956 
3957  /* Already done? */
3958  if (pgclassdesc == NULL)
3960  Desc_pg_class,
3961  true);
3962 
3963  return pgclassdesc;
3964 }
3965 
3966 static TupleDesc
3968 {
3969  static TupleDesc pgindexdesc = NULL;
3970 
3971  /* Already done? */
3972  if (pgindexdesc == NULL)
3974  Desc_pg_index,
3975  false);
3976 
3977  return pgindexdesc;
3978 }
3979 
3980 /*
3981  * Load any default attribute value definitions for the relation.
3982  */
3983 static void
3985 {
3986  AttrDefault *attrdef = relation->rd_att->constr->defval;
3987  int ndef = relation->rd_att->constr->num_defval;
3988  Relation adrel;
3989  SysScanDesc adscan;
3990  ScanKeyData skey;
3991  HeapTuple htup;
3992  Datum val;
3993  bool isnull;
3994  int found;
3995  int i;
3996 
3997  ScanKeyInit(&skey,
3999  BTEqualStrategyNumber, F_OIDEQ,
4000  ObjectIdGetDatum(RelationGetRelid(relation)));
4001 
4003  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4004  NULL, 1, &skey);
4005  found = 0;
4006 
4007  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4008  {
4009  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4010  Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - 1);
4011 
4012  for (i = 0; i < ndef; i++)
4013  {
4014  if (adform->adnum != attrdef[i].adnum)
4015  continue;
4016  if (attrdef[i].adbin != NULL)
4017  elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4018  NameStr(attr->attname),
4019  RelationGetRelationName(relation));
4020  else
4021  found++;
4022 
4023  val = fastgetattr(htup,
4025  adrel->rd_att, &isnull);
4026  if (isnull)
4027  elog(WARNING, "null adbin for attr %s of rel %s",
4028  NameStr(attr->attname),
4029  RelationGetRelationName(relation));
4030  else
4031  {
4032  /* detoast and convert to cstring in caller's context */
4033  char *s = TextDatumGetCString(val);
4034 
4036  pfree(s);
4037  }
4038  break;
4039  }
4040 
4041  if (i >= ndef)
4042  elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4043  adform->adnum, RelationGetRelationName(relation));
4044  }
4045 
4046  systable_endscan(adscan);
4047  heap_close(adrel, AccessShareLock);
4048 
4049  if (found != ndef)
4050  elog(WARNING, "%d attrdef record(s) missing for rel %s",
4051  ndef - found, RelationGetRelationName(relation));
4052 }
4053 
4054 /*
4055  * Load any check constraints for the relation.
4056  */
4057 static void
4059 {
4060  ConstrCheck *check = relation->rd_att->constr->check;
4061  int ncheck = relation->rd_att->constr->num_check;
4062  Relation conrel;
4063  SysScanDesc conscan;
4064  ScanKeyData skey[1];
4065  HeapTuple htup;
4066  int found = 0;
4067 
4068  ScanKeyInit(&skey[0],
4070  BTEqualStrategyNumber, F_OIDEQ,
4071  ObjectIdGetDatum(RelationGetRelid(relation)));
4072 
4074  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4075  NULL, 1, skey);
4076 
4077  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4078  {
4080  Datum val;
4081  bool isnull;
4082  char *s;
4083 
4084  /* We want check constraints only */
4085  if (conform->contype != CONSTRAINT_CHECK)
4086  continue;
4087 
4088  if (found >= ncheck)
4089  elog(ERROR, "unexpected constraint record found for rel %s",
4090  RelationGetRelationName(relation));
4091 
4092  check[found].ccvalid = conform->convalidated;
4093  check[found].ccnoinherit = conform->connoinherit;
4095  NameStr(conform->conname));
4096 
4097  /* Grab and test conbin is actually set */
4098  val = fastgetattr(htup,
4100  conrel->rd_att, &isnull);
4101  if (isnull)
4102  elog(ERROR, "null conbin for rel %s",
4103  RelationGetRelationName(relation));
4104 
4105  /* detoast and convert to cstring in caller's context */
4106  s = TextDatumGetCString(val);
4107  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4108  pfree(s);
4109 
4110  found++;
4111  }
4112 
4113  systable_endscan(conscan);
4114  heap_close(conrel, AccessShareLock);
4115 
4116  if (found != ncheck)
4117  elog(ERROR, "%d constraint record(s) missing for rel %s",
4118  ncheck - found, RelationGetRelationName(relation));
4119 
4120  /* Sort the records so that CHECKs are applied in a deterministic order */
4121  if (ncheck > 1)
4122  qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4123 }
4124 
4125 /*
4126  * qsort comparator to sort ConstrCheck entries by name
4127  */
4128 static int
4129 CheckConstraintCmp(const void *a, const void *b)
4130 {
4131  const ConstrCheck *ca = (const ConstrCheck *) a;
4132  const ConstrCheck *cb = (const ConstrCheck *) b;
4133 
4134  return strcmp(ca->ccname, cb->ccname);
4135 }
4136 
4137 /*
4138  * RelationGetFKeyList -- get a list of foreign key info for the relation
4139  *
4140  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4141  * the given relation. This data is a direct copy of relevant fields from
4142  * pg_constraint. The list items are in no particular order.
4143  *
4144  * CAUTION: the returned list is part of the relcache's data, and could
4145  * vanish in a relcache entry reset. Callers must inspect or copy it
4146  * before doing anything that might trigger a cache flush, such as
4147  * system catalog accesses. copyObject() can be used if desired.
4148  * (We define it this way because current callers want to filter and
4149  * modify the list entries anyway, so copying would be a waste of time.)
4150  */
4151 List *
4153 {
4154  List *result;
4155  Relation conrel;
4156  SysScanDesc conscan;
4157  ScanKeyData skey;
4158  HeapTuple htup;
4159  List *oldlist;
4160  MemoryContext oldcxt;
4161 
4162  /* Quick exit if we already computed the list. */
4163  if (relation->rd_fkeyvalid)
4164  return relation->rd_fkeylist;
4165 
4166  /* Fast path: if it doesn't have any triggers, it can't have FKs */
4167  if (!relation->rd_rel->relhastriggers)
4168  return NIL;
4169 
4170  /*
4171  * We build the list we intend to return (in the caller's context) while
4172  * doing the scan. After successfully completing the scan, we copy that
4173  * list into the relcache entry. This avoids cache-context memory leakage
4174  * if we get some sort of error partway through.
4175  */
4176  result = NIL;
4177 
4178  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4179  ScanKeyInit(&skey,
4181  BTEqualStrategyNumber, F_OIDEQ,
4182  ObjectIdGetDatum(RelationGetRelid(relation)));
4183 
4185  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4186  NULL, 1, &skey);
4187 
4188  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4189  {
4190  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4191  ForeignKeyCacheInfo *info;
4192  Datum adatum;
4193  bool isnull;
4194  ArrayType *arr;
4195  int nelem;
4196 
4197  /* consider only foreign keys */
4198  if (constraint->contype != CONSTRAINT_FOREIGN)
4199  continue;
4200 
4201  info = makeNode(ForeignKeyCacheInfo);
4202  info->conrelid = constraint->conrelid;
4203  info->confrelid = constraint->confrelid;
4204 
4205  /* Extract data from conkey field */
4206  adatum = fastgetattr(htup, Anum_pg_constraint_conkey,
4207  conrel->rd_att, &isnull);
4208  if (isnull)
4209  elog(ERROR, "null conkey for rel %s",
4210  RelationGetRelationName(relation));
4211 
4212  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4213  nelem = ARR_DIMS(arr)[0];
4214  if (ARR_NDIM(arr) != 1 ||
4215  nelem < 1 ||
4216  nelem > INDEX_MAX_KEYS ||
4217  ARR_HASNULL(arr) ||
4218  ARR_ELEMTYPE(arr) != INT2OID)
4219  elog(ERROR, "conkey is not a 1-D smallint array");
4220 
4221  info->nkeys = nelem;
4222  memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4223 
4224  /* Likewise for confkey */
4225  adatum = fastgetattr(htup, Anum_pg_constraint_confkey,
4226  conrel->rd_att, &isnull);
4227  if (isnull)
4228  elog(ERROR, "null confkey for rel %s",
4229  RelationGetRelationName(relation));
4230 
4231  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4232  nelem = ARR_DIMS(arr)[0];
4233  if (ARR_NDIM(arr) != 1 ||
4234  nelem != info->nkeys ||
4235  ARR_HASNULL(arr) ||
4236  ARR_ELEMTYPE(arr) != INT2OID)
4237  elog(ERROR, "confkey is not a 1-D smallint array");
4238 
4239  memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4240 
4241  /* Likewise for conpfeqop */
4243  conrel->rd_att, &isnull);
4244  if (isnull)
4245  elog(ERROR, "null conpfeqop for rel %s",
4246  RelationGetRelationName(relation));
4247 
4248  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4249  nelem = ARR_DIMS(arr)[0];
4250  if (ARR_NDIM(arr) != 1 ||
4251  nelem != info->nkeys ||
4252  ARR_HASNULL(arr) ||
4253  ARR_ELEMTYPE(arr) != OIDOID)
4254  elog(ERROR, "conpfeqop is not a 1-D OID array");
4255 
4256  memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid));
4257 
4258  /* Add FK's node to the result list */
4259  result = lappend(result, info);
4260  }
4261 
4262  systable_endscan(conscan);
4263  heap_close(conrel, AccessShareLock);
4264 
4265  /* Now save a copy of the completed list in the relcache entry. */
4267  oldlist = relation->rd_fkeylist;
4268  relation->rd_fkeylist = copyObject(result);
4269  relation->rd_fkeyvalid = true;
4270  MemoryContextSwitchTo(oldcxt);
4271 
4272  /* Don't leak the old list, if there is one */
4273  list_free_deep(oldlist);
4274 
4275  return result;
4276 }
4277 
4278 /*
4279  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4280  *
4281  * The index list is created only if someone requests it. We scan pg_index
4282  * to find relevant indexes, and add the list to the relcache entry so that
4283  * we won't have to compute it again. Note that shared cache inval of a
4284  * relcache entry will delete the old list and set rd_indexvalid to 0,
4285  * so that we must recompute the index list on next request. This handles
4286  * creation or deletion of an index.
4287  *
4288  * Indexes that are marked not IndexIsLive are omitted from the returned list.
4289  * Such indexes are expected to be dropped momentarily, and should not be
4290  * touched at all by any caller of this function.
4291  *
4292  * The returned list is guaranteed to be sorted in order by OID. This is
4293  * needed by the executor, since for index types that we obtain exclusive
4294  * locks on when updating the index, all backends must lock the indexes in
4295  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4296  * consistent ordering would do, but ordering by OID is easy.
4297  *
4298  * Since shared cache inval causes the relcache's copy of the list to go away,
4299  * we return a copy of the list palloc'd in the caller's context. The caller
4300  * may list_free() the returned list after scanning it. This is necessary
4301  * since the caller will typically be doing syscache lookups on the relevant
4302  * indexes, and syscache lookup could cause SI messages to be processed!
4303  *
4304  * We also update rd_oidindex, which this module treats as effectively part
4305  * of the index list. rd_oidindex is valid when rd_indexvalid isn't zero;
4306  * it is the pg_class OID of a unique index on OID when the relation has one,
4307  * and InvalidOid if there is no such index.
4308  *
4309  * In exactly the same way, we update rd_pkindex, which is the OID of the
4310  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4311  * which is the pg_class OID of an index to be used as the relation's
4312  * replication identity index, or InvalidOid if there is no such index.
4313  */
4314 List *
4316 {
4317  Relation indrel;
4318  SysScanDesc indscan;
4319  ScanKeyData skey;
4320  HeapTuple htup;
4321  List *result;
4322  List *oldlist;
4323  char replident = relation->rd_rel->relreplident;
4324  Oid oidIndex = InvalidOid;
4325  Oid pkeyIndex = InvalidOid;
4326  Oid candidateIndex = InvalidOid;
4327  MemoryContext oldcxt;
4328 
4329  /* Quick exit if we already computed the list. */
4330  if (relation->rd_indexvalid != 0)
4331  return list_copy(relation->rd_indexlist);
4332 
4333  /*
4334  * We build the list we intend to return (in the caller's context) while
4335  * doing the scan. After successfully completing the scan, we copy that
4336  * list into the relcache entry. This avoids cache-context memory leakage
4337  * if we get some sort of error partway through.
4338  */
4339  result = NIL;
4340  oidIndex = InvalidOid;
4341 
4342  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4343  ScanKeyInit(&skey,
4345  BTEqualStrategyNumber, F_OIDEQ,
4346  ObjectIdGetDatum(RelationGetRelid(relation)));
4347 
4349  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4350  NULL, 1, &skey);
4351 
4352  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4353  {
4355  Datum indclassDatum;
4356  oidvector *indclass;
4357  bool isnull;
4358 
4359  /*
4360  * Ignore any indexes that are currently being dropped. This will
4361  * prevent them from being searched, inserted into, or considered in
4362  * HOT-safety decisions. It's unsafe to touch such an index at all
4363  * since its catalog entries could disappear at any instant.
4364  */
4365  if (!IndexIsLive(index))
4366  continue;
4367 
4368  /* Add index's OID to result list in the proper order */
4369  result = insert_ordered_oid(result, index->indexrelid);
4370 
4371  /*
4372  * indclass cannot be referenced directly through the C struct,
4373  * because it comes after the variable-width indkey field. Must
4374  * extract the datum the hard way...
4375  */
4376  indclassDatum = heap_getattr(htup,
4379  &isnull);
4380  Assert(!isnull);
4381  indclass = (oidvector *) DatumGetPointer(indclassDatum);
4382 
4383  /*
4384  * Invalid, non-unique, non-immediate or predicate indexes aren't
4385  * interesting for either oid indexes or replication identity indexes,
4386  * so don't check them.
4387  */
4388  if (!IndexIsValid(index) || !index->indisunique ||
4389  !index->indimmediate ||
4391  continue;
4392 
4393  /* Check to see if is a usable btree index on OID */
4394  if (index->indnatts == 1 &&
4395  index->indkey.values[0] == ObjectIdAttributeNumber &&
4396  indclass->values[0] == OID_BTREE_OPS_OID)
4397  oidIndex = index->indexrelid;
4398 
4399  /* remember primary key index if any */
4400  if (index->indisprimary)
4401  pkeyIndex = index->indexrelid;
4402 
4403  /* remember explicitly chosen replica index */
4404  if (index->indisreplident)
4405  candidateIndex = index->indexrelid;
4406  }
4407 
4408  systable_endscan(indscan);
4409 
4410  heap_close(indrel, AccessShareLock);
4411 
4412  /* Now save a copy of the completed list in the relcache entry. */
4414  oldlist = relation->rd_indexlist;
4415  relation->rd_indexlist = list_copy(result);
4416  relation->rd_oidindex = oidIndex;
4417  relation->rd_pkindex = pkeyIndex;
4418  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4419  relation->rd_replidindex = pkeyIndex;
4420  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4421  relation->rd_replidindex = candidateIndex;
4422  else
4423  relation->rd_replidindex = InvalidOid;
4424  relation->rd_indexvalid = 1;
4425  MemoryContextSwitchTo(oldcxt);
4426 
4427  /* Don't leak the old list, if there is one */
4428  list_free(oldlist);
4429 
4430  return result;
4431 }
4432 
4433 /*
4434  * RelationGetStatExtList
4435  * get a list of OIDs of statistics objects on this relation
4436  *
4437  * The statistics list is created only if someone requests it, in a way
4438  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4439  * relevant statistics, and add the list to the relcache entry so that we
4440  * won't have to compute it again. Note that shared cache inval of a
4441  * relcache entry will delete the old list and set rd_statvalid to 0,
4442  * so that we must recompute the statistics list on next request. This
4443  * handles creation or deletion of a statistics object.
4444  *
4445  * The returned list is guaranteed to be sorted in order by OID, although
4446  * this is not currently needed.
4447  *
4448  * Since shared cache inval causes the relcache's copy of the list to go away,
4449  * we return a copy of the list palloc'd in the caller's context. The caller
4450  * may list_free() the returned list after scanning it. This is necessary
4451  * since the caller will typically be doing syscache lookups on the relevant
4452  * statistics, and syscache lookup could cause SI messages to be processed!
4453  */
4454 List *
4456 {
4457  Relation indrel;
4458  SysScanDesc indscan;
4459  ScanKeyData skey;
4460  HeapTuple htup;
4461  List *result;
4462  List *oldlist;
4463  MemoryContext oldcxt;
4464 
4465  /* Quick exit if we already computed the list. */
4466  if (relation->rd_statvalid != 0)
4467  return list_copy(relation->rd_statlist);
4468 
4469  /*
4470  * We build the list we intend to return (in the caller's context) while
4471  * doing the scan. After successfully completing the scan, we copy that
4472  * list into the relcache entry. This avoids cache-context memory leakage
4473  * if we get some sort of error partway through.
4474  */
4475  result = NIL;
4476 
4477  /*
4478  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4479  * rel.
4480  */
4481  ScanKeyInit(&skey,
4483  BTEqualStrategyNumber, F_OIDEQ,
4484  ObjectIdGetDatum(RelationGetRelid(relation)));
4485 
4487  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4488  NULL, 1, &skey);
4489 
4490  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4491  result = insert_ordered_oid(result, HeapTupleGetOid(htup));
4492 
4493  systable_endscan(indscan);
4494 
4495  heap_close(indrel, AccessShareLock);
4496 
4497  /* Now save a copy of the completed list in the relcache entry. */
4499  oldlist = relation->rd_statlist;
4500  relation->rd_statlist = list_copy(result);
4501 
4502  relation->rd_statvalid = true;
4503  MemoryContextSwitchTo(oldcxt);
4504 
4505  /* Don't leak the old list, if there is one */
4506  list_free(oldlist);
4507 
4508  return result;
4509 }
4510 
4511 /*
4512  * insert_ordered_oid
4513  * Insert a new Oid into a sorted list of Oids, preserving ordering
4514  *
4515  * Building the ordered list this way is O(N^2), but with a pretty small
4516  * constant, so for the number of entries we expect it will probably be
4517  * faster than trying to apply qsort(). Most tables don't have very many
4518  * indexes...
4519  */
4520 static List *
4522 {
4523  ListCell *prev;
4524 
4525  /* Does the datum belong at the front? */
4526  if (list == NIL || datum < linitial_oid(list))
4527  return lcons_oid(datum, list);
4528  /* No, so find the entry it belongs after */
4529  prev = list_head(list);
4530  for (;;)
4531  {
4532  ListCell *curr = lnext(prev);
4533 
4534  if (curr == NULL || datum < lfirst_oid(curr))
4535  break; /* it belongs after 'prev', before 'curr' */
4536 
4537  prev = curr;
4538  }
4539  /* Insert datum into list after 'prev' */
4540  lappend_cell_oid(list, prev, datum);
4541  return list;
4542 }
4543 
4544 /*
4545  * RelationSetIndexList -- externally force the index list contents
4546  *
4547  * This is used to temporarily override what we think the set of valid
4548  * indexes is (including the presence or absence of an OID index).
4549  * The forcing will be valid only until transaction commit or abort.
4550  *
4551  * This should only be applied to nailed relations, because in a non-nailed
4552  * relation the hacked index list could be lost at any time due to SI
4553  * messages. In practice it is only used on pg_class (see REINDEX).
4554  *
4555  * It is up to the caller to make sure the given list is correctly ordered.
4556  *
4557  * We deliberately do not change rd_indexattr here: even when operating
4558  * with a temporary partial index list, HOT-update decisions must be made
4559  * correctly with respect to the full index set. It is up to the caller
4560  * to ensure that a correct rd_indexattr set has been cached before first
4561  * calling RelationSetIndexList; else a subsequent inquiry might cause a
4562  * wrong rd_indexattr set to get computed and cached. Likewise, we do not
4563  * touch rd_keyattr, rd_pkattr or rd_idattr.
4564  */
4565 void
4566 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
4567 {
4568  MemoryContext oldcxt;
4569 
4570  Assert(relation->rd_isnailed);
4571  /* Copy the list into the cache context (could fail for lack of mem) */
4573  indexIds = list_copy(indexIds);
4574  MemoryContextSwitchTo(oldcxt);
4575  /* Okay to replace old list */
4576  list_free(relation->rd_indexlist);
4577  relation->rd_indexlist = indexIds;
4578  relation->rd_oidindex = oidIndex;
4579 
4580  /*
4581  * For the moment, assume the target rel hasn't got a pk or replica index.
4582  * We'll load them on demand in the API that wraps access to them.
4583  */
4584  relation->rd_pkindex = InvalidOid;
4585  relation->rd_replidindex = InvalidOid;
4586  relation->rd_indexvalid = 2; /* mark list as forced */
4587  /* Flag relation as needing eoxact cleanup (to reset the list) */
4588  EOXactListAdd(relation);
4589 }
4590 
4591 /*
4592  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
4593  *
4594  * Returns InvalidOid if there is no such index.
4595  */
4596 Oid
4598 {
4599  List *ilist;
4600 
4601  /*
4602  * If relation doesn't have OIDs at all, caller is probably confused. (We
4603  * could just silently return InvalidOid, but it seems better to throw an
4604  * assertion.)
4605  */
4606  Assert(relation->rd_rel->relhasoids);
4607 
4608  if (relation->rd_indexvalid == 0)
4609  {
4610  /* RelationGetIndexList does the heavy lifting. */
4611  ilist = RelationGetIndexList(relation);
4612  list_free(ilist);
4613  Assert(relation->rd_indexvalid != 0);
4614  }
4615 
4616  return relation->rd_oidindex;
4617 }
4618 
4619 /*
4620  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4621  *
4622  * Returns InvalidOid if there is no such index.
4623  */
4624 Oid
4626 {
4627  List *ilist;
4628 
4629  if (relation->rd_indexvalid == 0)
4630  {
4631  /* RelationGetIndexList does the heavy lifting. */
4632  ilist = RelationGetIndexList(relation);
4633  list_free(ilist);
4634  Assert(relation->rd_indexvalid != 0);
4635  }
4636 
4637  return relation->rd_pkindex;
4638 }
4639 
4640 /*
4641  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4642  *
4643  * Returns InvalidOid if there is no such index.
4644  */
4645 Oid
4647 {
4648  List *ilist;
4649 
4650  if (relation->rd_indexvalid == 0)
4651  {
4652  /* RelationGetIndexList does the heavy lifting. */
4653  ilist = RelationGetIndexList(relation);
4654  list_free(ilist);
4655  Assert(relation->rd_indexvalid != 0);
4656  }
4657 
4658  return relation->rd_replidindex;
4659 }
4660 
4661 /*
4662  * RelationGetIndexExpressions -- get the index expressions for an index
4663  *
4664  * We cache the result of transforming pg_index.indexprs into a node tree.
4665  * If the rel is not an index or has no expressional columns, we return NIL.
4666  * Otherwise, the returned tree is copied into the caller's memory context.
4667  * (We don't want to return a pointer to the relcache copy, since it could
4668  * disappear due to relcache invalidation.)
4669  */
4670 List *
4672 {
4673  List *result;
4674  Datum exprsDatum;
4675  bool isnull;
4676  char *exprsString;
4677  MemoryContext oldcxt;
4678 
4679  /* Quick exit if we already computed the result. */
4680  if (relation->rd_indexprs)
4681  return copyObject(relation->rd_indexprs);
4682 
4683  /* Quick exit if there is nothing to do. */
4684  if (relation->rd_indextuple == NULL ||
4686  return NIL;
4687 
4688  /*
4689  * We build the tree we intend to return in the caller's context. After
4690  * successfully completing the work, we copy it into the relcache entry.
4691  * This avoids problems if we get some sort of error partway through.
4692  */
4693  exprsDatum = heap_getattr(relation->rd_indextuple,
4696  &isnull);
4697  Assert(!isnull);
4698  exprsString = TextDatumGetCString(exprsDatum);
4699  result = (List *) stringToNode(exprsString);
4700  pfree(exprsString);
4701 
4702  /*
4703  * Run the expressions through eval_const_expressions. This is not just an
4704  * optimization, but is necessary, because the planner will be comparing
4705  * them to similarly-processed qual clauses, and may fail to detect valid
4706  * matches without this. We don't bother with canonicalize_qual, however.
4707  */
4708  result = (List *) eval_const_expressions(NULL, (Node *) result);
4709 
4710  result = (List *) canonicalize_qual((Expr *) result);
4711 
4712  /* May as well fix opfuncids too */
4713  fix_opfuncids((Node *) result);
4714 
4715  /* Now save a copy of the completed tree in the relcache entry. */
4716  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4717  relation->rd_indexprs = copyObject(result);
4718  MemoryContextSwitchTo(oldcxt);
4719 
4720  return result;
4721 }
4722 
4723 /*
4724  * RelationGetIndexPredicate -- get the index predicate for an index
4725  *
4726  * We cache the result of transforming pg_index.indpred into an implicit-AND
4727  * node tree (suitable for use in planning).
4728  * If the rel is not an index or has no predicate, we return NIL.
4729  * Otherwise, the returned tree is copied into the caller's memory context.
4730  * (We don't want to return a pointer to the relcache copy, since it could
4731  * disappear due to relcache invalidation.)
4732  */
4733 List *
4735 {
4736  List *result;
4737  Datum predDatum;
4738  bool isnull;
4739  char *predString;
4740  MemoryContext oldcxt;
4741 
4742  /* Quick exit if we already computed the result. */
4743  if (relation->rd_indpred)
4744  return copyObject(relation->rd_indpred);
4745 
4746  /* Quick exit if there is nothing to do. */
4747  if (relation->rd_indextuple == NULL ||
4749  return NIL;
4750 
4751  /*
4752  * We build the tree we intend to return in the caller's context. After
4753  * successfully completing the work, we copy it into the relcache entry.
4754  * This avoids problems if we get some sort of error partway through.
4755  */
4756  predDatum = heap_getattr(relation->rd_indextuple,
4759  &isnull);
4760  Assert(!isnull);
4761  predString = TextDatumGetCString(predDatum);
4762  result = (List *) stringToNode(predString);
4763  pfree(predString);
4764 
4765  /*
4766  * Run the expression through const-simplification and canonicalization.
4767  * This is not just an optimization, but is necessary, because the planner
4768  * will be comparing it to similarly-processed qual clauses, and may fail
4769  * to detect valid matches without this. This must match the processing
4770  * done to qual clauses in preprocess_expression()! (We can skip the
4771  * stuff involving subqueries, however, since we don't allow any in index
4772  * predicates.)
4773  */
4774  result = (List *) eval_const_expressions(NULL, (Node *) result);
4775 
4776  result = (List *) canonicalize_qual((Expr *) result);
4777 
4778  /* Also convert to implicit-AND format */
4779  result = make_ands_implicit((Expr *) result);
4780 
4781  /* May as well fix opfuncids too */
4782  fix_opfuncids((Node *) result);
4783 
4784  /* Now save a copy of the completed tree in the relcache entry. */
4785  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4786  relation->rd_indpred = copyObject(result);
4787  MemoryContextSwitchTo(oldcxt);
4788 
4789  return result;
4790 }
4791 
4792 /*
4793  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4794  *
4795  * The result has a bit set for each attribute used anywhere in the index
4796  * definitions of all the indexes on this relation. (This includes not only
4797  * simple index keys, but attributes used in expressions and partial-index
4798  * predicates.)
4799  *
4800  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4801  * for all potential foreign key columns, or for all columns in the configured
4802  * replica identity index is returned.
4803  *
4804  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4805  * we can include system attributes (e.g., OID) in the bitmap representation.
4806  *
4807  * Caller had better hold at least RowExclusiveLock on the target relation
4808  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4809  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4810  * that lock level doesn't guarantee a stable set of indexes, so we have to
4811  * be prepared to retry here in case of a change in the set of indexes.
4812  *
4813  * The returned result is palloc'd in the caller's memory context and should
4814  * be bms_free'd when not needed anymore.
4815  */
4816 Bitmapset *
4818 {
4819  Bitmapset *indexattrs; /* indexed columns */
4820  Bitmapset *uindexattrs; /* columns in unique indexes */
4821  Bitmapset *pkindexattrs; /* columns in the primary index */
4822  Bitmapset *idindexattrs; /* columns in the replica identity */
4823  List *indexoidlist;
4824  List *newindexoidlist;
4825  Oid relpkindex;
4826  Oid relreplindex;
4827  ListCell *l;
4828  MemoryContext oldcxt;
4829 
4830  /* Quick exit if we already computed the result. */
4831  if (relation->rd_indexattr != NULL)
4832  {
4833  switch (attrKind)
4834  {
4835  case INDEX_ATTR_BITMAP_ALL:
4836  return bms_copy(relation->rd_indexattr);
4837  case INDEX_ATTR_BITMAP_KEY:
4838  return bms_copy(relation->rd_keyattr);
4840  return bms_copy(relation->rd_pkattr);
4842  return bms_copy(relation->rd_idattr);
4843  default:
4844  elog(ERROR, "unknown attrKind %u", attrKind);
4845  }
4846  }
4847 
4848  /* Fast path if definitely no indexes */
4849  if (!RelationGetForm(relation)->relhasindex)
4850  return NULL;
4851 
4852  /*
4853  * Get cached list of index OIDs. If we have to start over, we do so here.
4854  */
4855 restart:
4856  indexoidlist = RelationGetIndexList(relation);
4857 
4858  /* Fall out if no indexes (but relhasindex was set) */
4859  if (indexoidlist == NIL)
4860  return NULL;
4861 
4862  /*
4863  * Copy the rd_pkindex and rd_replidindex values computed by
4864  * RelationGetIndexList before proceeding. This is needed because a
4865  * relcache flush could occur inside index_open below, resetting the
4866  * fields managed by RelationGetIndexList. We need to do the work with
4867  * stable values of these fields.
4868  */
4869  relpkindex = relation->rd_pkindex;
4870  relreplindex = relation->rd_replidindex;
4871 
4872  /*
4873  * For each index, add referenced attributes to indexattrs.
4874  *
4875  * Note: we consider all indexes returned by RelationGetIndexList, even if
4876  * they are not indisready or indisvalid. This is important because an
4877  * index for which CREATE INDEX CONCURRENTLY has just started must be
4878  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4879  * CONCURRENTLY is far enough along that we should ignore the index, it
4880  * won't be returned at all by RelationGetIndexList.
4881  */
4882  indexattrs = NULL;
4883  uindexattrs = NULL;
4884  pkindexattrs = NULL;
4885  idindexattrs = NULL;
4886  foreach(l, indexoidlist)
4887  {
4888  Oid indexOid = lfirst_oid(l);
4889  Relation indexDesc;
4890  IndexInfo *indexInfo;
4891  int i;
4892  bool isKey; /* candidate key */
4893  bool isPK; /* primary key */
4894  bool isIDKey; /* replica identity index */
4895 
4896  indexDesc = index_open(indexOid, AccessShareLock);
4897 
4898  /* Extract index key information from the index's pg_index row */
4899  indexInfo = BuildIndexInfo(indexDesc);
4900 
4901  /* Can this index be referenced by a foreign key? */
4902  isKey = indexInfo->ii_Unique &&
4903  indexInfo->ii_Expressions == NIL &&
4904  indexInfo->ii_Predicate == NIL;
4905 
4906  /* Is this a primary key? */
4907  isPK = (indexOid == relpkindex);
4908 
4909  /* Is this index the configured (or default) replica identity? */
4910  isIDKey = (indexOid == relreplindex);
4911 
4912  /* Collect simple attribute references */
4913  for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
4914  {
4915  int attrnum = indexInfo->ii_KeyAttrNumbers[i];
4916 
4917  if (attrnum != 0)
4918  {
4919  indexattrs = bms_add_member(indexattrs,
4921 
4922  if (isKey)
4923  uindexattrs = bms_add_member(uindexattrs,
4925 
4926  if (isPK)
4927  pkindexattrs = bms_add_member(pkindexattrs,
4929 
4930  if (isIDKey)
4931  idindexattrs = bms_add_member(idindexattrs,
4933  }
4934  }
4935 
4936  /* Collect all attributes used in expressions, too */
4937  pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
4938 
4939  /* Collect all attributes in the index predicate, too */
4940  pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
4941 
4942  index_close(indexDesc, AccessShareLock);
4943  }
4944 
4945  /*
4946  * During one of the index_opens in the above loop, we might have received
4947  * a relcache flush event on this relcache entry, which might have been
4948  * signaling a change in the rel's index list. If so, we'd better start
4949  * over to ensure we deliver up-to-date attribute bitmaps.
4950  */
4951  newindexoidlist = RelationGetIndexList(relation);
4952  if (equal(indexoidlist, newindexoidlist) &&
4953  relpkindex == relation->rd_pkindex &&
4954  relreplindex == relation->rd_replidindex)
4955  {
4956  /* Still the same index set, so proceed */
4957  list_free(newindexoidlist);
4958  list_free(indexoidlist);
4959  }
4960  else
4961  {
4962  /* Gotta do it over ... might as well not leak memory */
4963  list_free(newindexoidlist);
4964  list_free(indexoidlist);
4965  bms_free(uindexattrs);
4966  bms_free(pkindexattrs);
4967  bms_free(idindexattrs);
4968  bms_free(indexattrs);
4969 
4970  goto restart;
4971  }
4972 
4973  /* Don't leak the old values of these bitmaps, if any */
4974  bms_free(relation->rd_indexattr);
4975  relation->rd_indexattr = NULL;
4976  bms_free(relation->rd_keyattr);
4977  relation->rd_keyattr = NULL;
4978  bms_free(relation->rd_pkattr);
4979  relation->rd_pkattr = NULL;
4980  bms_free(relation->rd_idattr);
4981  relation->rd_idattr = NULL;
4982 
4983  /*
4984  * Now save copies of the bitmaps in the relcache entry. We intentionally
4985  * set rd_indexattr last, because that's the one that signals validity of
4986  * the values; if we run out of memory before making that copy, we won't
4987  * leave the relcache entry looking like the other ones are valid but
4988  * empty.
4989  */
4991  relation->rd_keyattr = bms_copy(uindexattrs);
4992  relation->rd_pkattr = bms_copy(pkindexattrs);
4993  relation->rd_idattr = bms_copy(idindexattrs);
4994  relation->rd_indexattr = bms_copy(indexattrs);
4995  MemoryContextSwitchTo(oldcxt);
4996 
4997  /* We return our original working copy for caller to play with */
4998  switch (attrKind)
4999  {
5000  case INDEX_ATTR_BITMAP_ALL:
5001  return indexattrs;
5002  case INDEX_ATTR_BITMAP_KEY:
5003  return uindexattrs;
5005  return bms_copy(relation->rd_pkattr);
5007  return idindexattrs;
5008  default:
5009  elog(ERROR, "unknown attrKind %u", attrKind);
5010  return NULL;
5011  }
5012 }
5013 
5014 /*
5015  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5016  *
5017  * This should be called only for an index that is known to have an
5018  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5019  * context) of the exclusion operator OIDs, their underlying functions'
5020  * OIDs, and their strategy numbers in the index's opclasses. We cache
5021  * all this information since it requires a fair amount of work to get.
5022  */
5023 void
5025  Oid **operators,
5026  Oid **procs,
5027  uint16 **strategies)
5028 {
5029  int ncols = indexRelation->rd_rel->relnatts;
5030  Oid *ops;
5031  Oid *funcs;
5032  uint16 *strats;
5033  Relation conrel;
5034  SysScanDesc conscan;
5035  ScanKeyData skey[1];
5036  HeapTuple htup;
5037  bool found;
5038  MemoryContext oldcxt;
5039  int i;
5040 
5041  /* Allocate result space in caller context */
5042  *operators = ops = (Oid *) palloc(sizeof(Oid) * ncols);
5043  *procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols);
5044  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols);
5045 
5046  /* Quick exit if we have the data cached already */
5047  if (indexRelation->rd_exclstrats != NULL)
5048  {
5049  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols);
5050  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols);
5051  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols);
5052  return;
5053  }
5054 
5055  /*
5056  * Search pg_constraint for the constraint associated with the index. To
5057  * make this not too painfully slow, we use the index on conrelid; that
5058  * will hold the parent relation's OID not the index's own OID.
5059  */
5060  ScanKeyInit(&skey[0],
5062  BTEqualStrategyNumber, F_OIDEQ,
5063  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5064 
5066  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
5067  NULL, 1, skey);
5068  found = false;
5069 
5070  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5071  {
5073  Datum val;
5074  bool isnull;
5075  ArrayType *arr;
5076  int nelem;
5077 
5078  /* We want the exclusion constraint owning the index */
5079  if (conform->contype != CONSTRAINT_EXCLUSION ||
5080  conform->conindid != RelationGetRelid(indexRelation))
5081  continue;
5082 
5083  /* There should be only one */
5084  if (found)
5085  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5086  RelationGetRelationName(indexRelation));
5087  found = true;
5088 
5089  /* Extract the operator OIDS from conexclop */
5090  val = fastgetattr(htup,
5092  conrel->rd_att, &isnull);
5093  if (isnull)
5094  elog(ERROR, "null conexclop for rel %s",
5095  RelationGetRelationName(indexRelation));
5096 
5097  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5098  nelem = ARR_DIMS(arr)[0];
5099  if (ARR_NDIM(arr) != 1 ||
5100  nelem != ncols ||
5101  ARR_HASNULL(arr) ||
5102  ARR_ELEMTYPE(arr) != OIDOID)
5103  elog(ERROR, "conexclop is not a 1-D Oid array");
5104 
5105  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols);
5106  }
5107 
5108  systable_endscan(conscan);
5109  heap_close(conrel, AccessShareLock);
5110 
5111  if (!found)
5112  elog(ERROR, "exclusion constraint record missing for rel %s",
5113  RelationGetRelationName(indexRelation));
5114 
5115  /* We need the func OIDs and strategy numbers too */
5116  for (i = 0; i < ncols; i++)
5117  {
5118  funcs[i] = get_opcode(ops[i]);
5119  strats[i] = get_op_opfamily_strategy(ops[i],
5120  indexRelation->rd_opfamily[i]);
5121  /* shouldn't fail, since it was checked at index creation */
5122  if (strats[i] == InvalidStrategy)
5123  elog(ERROR, "could not find strategy for operator %u in family %u",
5124  ops[i], indexRelation->rd_opfamily[i]);
5125  }
5126 
5127  /* Save a copy of the results in the relcache entry. */
5128  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5129  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols);
5130  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols);
5131  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols);
5132  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols);
5133  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols);
5134  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols);
5135  MemoryContextSwitchTo(oldcxt);
5136 }
5137 
5138 /*
5139  * Get publication actions for the given relation.
5140  */
5141 struct PublicationActions *
5143 {
5144  List *puboids;
5145  ListCell *lc;
5146  MemoryContext oldcxt;
5147  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5148 
5149  if (relation->rd_pubactions)
5150  return memcpy(pubactions, relation->rd_pubactions,
5151  sizeof(PublicationActions));
5152 
5153  /* Fetch the publication membership info. */
5154  puboids = GetRelationPublications(RelationGetRelid(relation));
5155  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5156 
5157  foreach(lc, puboids)
5158  {
5159  Oid pubid = lfirst_oid(lc);
5160  HeapTuple tup;
5161  Form_pg_publication pubform;
5162 
5164 
5165  if (!HeapTupleIsValid(tup))
5166  elog(ERROR, "cache lookup failed for publication %u", pubid);
5167 
5168  pubform = (Form_pg_publication) GETSTRUCT(tup);
5169 
5170  pubactions->pubinsert |= pubform->pubinsert;
5171  pubactions->pubupdate |= pubform->pubupdate;
5172  pubactions->pubdelete |= pubform->pubdelete;
5173 
5174  ReleaseSysCache(tup);
5175 
5176  /*
5177  * If we know everything is replicated, there is no point to check for
5178  * other publications.
5179  */
5180  if (pubactions->pubinsert && pubactions->pubupdate &&
5181  pubactions->pubdelete)
5182  break;
5183  }
5184 
5185  if (relation->rd_pubactions)
5186  {
5187  pfree(relation->rd_pubactions);
5188  relation->rd_pubactions = NULL;
5189  }
5190 
5191  /* Now save copy of the actions in the relcache entry. */
5193  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5194  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5195  MemoryContextSwitchTo(oldcxt);
5196 
5197  return pubactions;
5198 }
5199 
5200 /*
5201  * Routines to support ereport() reports of relation-related errors
5202  *
5203  * These could have been put into elog.c, but it seems like a module layering
5204  * violation to have elog.c calling relcache or syscache stuff --- and we
5205  * definitely don't want elog.h including rel.h. So we put them here.
5206  */
5207 
5208 /*
5209  * errtable --- stores schema_name and table_name of a table
5210  * within the current errordata.
5211  */
5212 int
5214 {
5218 
5219  return 0; /* return value does not matter */
5220 }
5221 
5222 /*
5223  * errtablecol --- stores schema_name, table_name and column_name
5224  * of a table column within the current errordata.
5225  *
5226  * The column is specified by attribute number --- for most callers, this is
5227  * easier and less error-prone than getting the column name for themselves.
5228  */
5229 int
5230 errtablecol(Relation rel, int attnum)
5231 {
5233  const char *colname;
5234 
5235  /* Use reldesc if it's a user attribute, else consult the catalogs */
5236  if (attnum > 0 && attnum <= reldesc->natts)
5237  colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5238  else
5239  colname = get_relid_attribute_name(RelationGetRelid(rel), attnum);
5240 
5241  return errtablecolname(rel, colname);
5242 }
5243 
5244 /*
5245  * errtablecolname --- stores schema_name, table_name and column_name
5246  * of a table column within the current errordata, where the column name is
5247  * given directly rather than extracted from the relation's catalog data.
5248  *
5249  * Don't use this directly unless errtablecol() is inconvenient for some
5250  * reason. This might possibly be needed during intermediate states in ALTER
5251  * TABLE, for instance.
5252  */
5253 int
5254 errtablecolname(Relation rel, const char *colname)
5255 {
5256  errtable(rel);
5258 
5259  return 0; /* return value does not matter */
5260 }
5261 
5262 /*
5263  * errtableconstraint --- stores schema_name, table_name and constraint_name
5264  * of a table-related constraint within the current errordata.
5265  */
5266 int
5267 errtableconstraint(Relation rel, const char *conname)
5268 {
5269  errtable(rel);
5271 
5272  return 0; /* return value does not matter */
5273 }
5274 
5275 
5276 /*
5277  * load_relcache_init_file, write_relcache_init_file
5278  *
5279  * In late 1992, we started regularly having databases with more than
5280  * a thousand classes in them. With this number of classes, it became
5281  * critical to do indexed lookups on the system catalogs.
5282  *
5283  * Bootstrapping these lookups is very hard. We want to be able to
5284  * use an index on pg_attribute, for example, but in order to do so,
5285  * we must have read pg_attribute for the attributes in the index,
5286  * which implies that we need to use the index.
5287  *
5288  * In order to get around the problem, we do the following:
5289  *
5290  * + When the database system is initialized (at initdb time), we
5291  * don't use indexes. We do sequential scans.
5292  *
5293  * + When the backend is started up in normal mode, we load an image
5294  * of the appropriate relation descriptors, in internal format,
5295  * from an initialization file in the data/base/... directory.
5296  *
5297  * + If the initialization file isn't there, then we create the
5298  * relation descriptors using sequential scans and write 'em to
5299  * the initialization file for use by subsequent backends.
5300  *
5301  * As of Postgres 9.0, there is one local initialization file in each
5302  * database, plus one shared initialization file for shared catalogs.
5303  *
5304  * We could dispense with the initialization files and just build the
5305  * critical reldescs the hard way on every backend startup, but that
5306  * slows down backend startup noticeably.
5307  *
5308  * We can in fact go further, and save more relcache entries than
5309  * just the ones that are absolutely critical; this allows us to speed
5310  * up backend startup by not having to build such entries the hard way.
5311  * Presently, all the catalog and index entries that are referred to
5312  * by catcaches are stored in the initialization files.
5313  *
5314  * The same mechanism that detects when catcache and relcache entries
5315  * need to be invalidated (due to catalog updates) also arranges to
5316  * unlink the initialization files when the contents may be out of date.
5317  * The files will then be rebuilt during the next backend startup.
5318  */
5319 
5320 /*
5321  * load_relcache_init_file -- attempt to load cache from the shared
5322  * or local cache init file
5323  *
5324  * If successful, return true and set criticalRelcachesBuilt or
5325  * criticalSharedRelcachesBuilt to true.
5326  * If not successful, return false.
5327  *
5328  * NOTE: we assume we are already switched into CacheMemoryContext.
5329  */
5330 static bool
5332 {
5333  FILE *fp;
5334  char initfilename[MAXPGPATH];
5335  Relation *rels;
5336  int relno,
5337  num_rels,
5338  max_rels,
5339  nailed_rels,
5340  nailed_indexes,
5341  magic;
5342  int i;
5343 
5344  if (shared)
5345  snprintf(initfilename, sizeof(initfilename), "global/%s",
5347  else
5348  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5350 
5351  fp = AllocateFile(initfilename, PG_BINARY_R);
5352  if (fp == NULL)
5353  return false;
5354 
5355  /*
5356  * Read the index relcache entries from the file. Note we will not enter
5357  * any of them into the cache if the read fails partway through; this
5358  * helps to guard against broken init files.
5359  */
5360  max_rels = 100;
5361  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5362  num_rels = 0;
5363  nailed_rels = nailed_indexes = 0;
5364 
5365  /* check for correct magic number (compatible version) */
5366  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5367  goto read_failed;
5368  if (magic != RELCACHE_INIT_FILEMAGIC)
5369  goto read_failed;
5370 
5371  for (relno = 0;; relno++)
5372  {
5373  Size len;
5374  size_t nread;
5375  Relation rel;
5376  Form_pg_class relform;
5377  bool has_not_null;
5378 
5379  /* first read the relation descriptor length */
5380  nread = fread(&len, 1, sizeof(len), fp);
5381  if (nread != sizeof(len))
5382  {
5383  if (nread == 0)
5384  break; /* end of file */
5385  goto read_failed;
5386  }
5387 
5388  /* safety check for incompatible relcache layout */
5389  if (len != sizeof(RelationData))
5390  goto read_failed;
5391 
5392  /* allocate another relcache header */
5393  if (num_rels >= max_rels)
5394  {
5395  max_rels *= 2;
5396  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5397  }
5398 
5399  rel = rels[num_rels++] = (Relation) palloc(len);
5400 
5401  /* then, read the Relation structure */
5402  if (fread(rel, 1, len, fp) != len)
5403  goto read_failed;
5404 
5405  /* next read the relation tuple form */
5406  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5407  goto read_failed;
5408 
5409  relform = (Form_pg_class) palloc(len);
5410  if (fread(relform, 1, len, fp) != len)
5411  goto read_failed;
5412 
5413  rel->rd_rel = relform;
5414 
5415  /* initialize attribute tuple forms */
5416  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
5417  relform->relhasoids);
5418  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5419 
5420  rel->rd_att->tdtypeid = relform->reltype;
5421  rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5422 
5423  /* next read all the attribute tuple form data entries */
5424  has_not_null = false;
5425  for (i = 0; i < relform->relnatts; i++)
5426  {
5427  Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5428 
5429  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5430  goto read_failed;
5431  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5432  goto read_failed;
5433  if (fread(attr, 1, len, fp) != len)
5434  goto read_failed;
5435 
5436  has_not_null |= attr->attnotnull;
5437  }
5438 
5439  /* next read the access method specific field */
5440  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5441  goto read_failed;
5442  if (len > 0)
5443  {
5444  rel->rd_options = palloc(len);
5445  if (fread(rel->rd_options, 1, len, fp) != len)
5446  goto read_failed;
5447  if (len != VARSIZE(rel->rd_options))
5448  goto read_failed; /* sanity check */
5449  }
5450  else
5451  {
5452  rel->rd_options = NULL;
5453  }
5454 
5455  /* mark not-null status */
5456  if (has_not_null)
5457  {
5458  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5459 
5460  constr->has_not_null = true;
5461  rel->rd_att->constr = constr;
5462  }
5463 
5464  /* If it's an index, there's more to do */
5465  if (rel->rd_rel->relkind == RELKIND_INDEX)
5466  {
5467  MemoryContext indexcxt;
5468  Oid *opfamily;
5469  Oid *opcintype;
5470  RegProcedure *support;
5471  int nsupport;
5472  int16 *indoption;
5473  Oid *indcollation;
5474 
5475  /* Count nailed indexes to ensure we have 'em all */
5476  if (rel->rd_isnailed)
5477  nailed_indexes++;
5478 
5479  /* next, read the pg_index tuple */
5480  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5481  goto read_failed;
5482 
5483  rel->rd_indextuple = (HeapTuple) palloc(len);
5484  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5485  goto read_failed;
5486 
5487  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5488  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5490 
5491  /*
5492  * prepare index info context --- parameters should match
5493  * RelationInitIndexAccessInfo
5494  */
5495  indexcxt =
5500  rel->rd_indexcxt = indexcxt;
5501 
5502  /*
5503  * Now we can fetch the index AM's API struct. (We can't store
5504  * that in the init file, since it contains function pointers that
5505  * might vary across server executions. Fortunately, it should be
5506  * safe to call the amhandler even while bootstrapping indexes.)
5507  */
5508  InitIndexAmRoutine(rel);
5509 
5510  /* next, read the vector of opfamily OIDs */
5511  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5512  goto read_failed;
5513 
5514  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5515  if (fread(opfamily, 1, len, fp) != len)
5516  goto read_failed;
5517 
5518  rel->rd_opfamily = opfamily;
5519 
5520  /* next, read the vector of opcintype OIDs */
5521  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5522  goto read_failed;
5523 
5524  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5525  if (fread(opcintype, 1, len, fp) != len)
5526  goto read_failed;
5527 
5528  rel->rd_opcintype = opcintype;
5529 
5530  /* next, read the vector of support procedure OIDs */
5531  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5532  goto read_failed;
5533  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5534  if (fread(support, 1, len, fp) != len)
5535  goto read_failed;
5536 
5537  rel->rd_support = support;
5538 
5539  /* next, read the vector of collation OIDs */
5540  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5541  goto read_failed;
5542 
5543  indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5544  if (fread(indcollation, 1, len, fp) != len)
5545  goto read_failed;
5546 
5547  rel->rd_indcollation = indcollation;
5548 
5549  /* finally, read the vector of indoption values */
5550  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5551  goto read_failed;
5552 
5553  indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5554  if (fread(indoption, 1, len, fp) != len)
5555  goto read_failed;
5556 
5557  rel->rd_indoption = indoption;
5558 
5559  /* set up zeroed fmgr-info vector */
5560  nsupport = relform->relnatts * rel->rd_amroutine->amsupport;
5561  rel->rd_supportinfo = (FmgrInfo *)
5562  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5563  }
5564  else
5565  {
5566  /* Count nailed rels to ensure we have 'em all */
5567  if (rel->rd_isnailed)
5568  nailed_rels++;
5569 
5570  Assert(rel->rd_index == NULL);
5571  Assert(rel->rd_indextuple == NULL);
5572  Assert(rel->rd_indexcxt == NULL);
5573  Assert(rel->rd_amroutine == NULL);
5574  Assert(rel->rd_opfamily == NULL);
5575  Assert(rel->rd_opcintype == NULL);
5576  Assert(rel->rd_support == NULL);
5577  Assert(rel->rd_supportinfo == NULL);
5578  Assert(rel->rd_indoption == NULL);
5579  Assert(rel->rd_indcollation == NULL);
5580  }
5581 
5582  /*
5583  * Rules and triggers are not saved (mainly because the internal
5584  * format is complex and subject to change). They must be rebuilt if
5585  * needed by RelationCacheInitializePhase3. This is not expected to
5586  * be a big performance hit since few system catalogs have such. Ditto
5587  * for RLS policy data, index expressions, predicates, exclusion info,
5588  * and FDW info.
5589  */
5590  rel->rd_rules = NULL;
5591  rel->rd_rulescxt = NULL;
5592  rel->trigdesc = NULL;
5593  rel->rd_rsdesc = NULL;
5594  rel->rd_partkeycxt = NULL;
5595  rel->rd_partkey = NULL;
5596  rel->rd_pdcxt = NULL;
5597  rel->rd_partdesc = NULL;
5598  rel->rd_partcheck = NIL;
5599  rel->rd_indexprs = NIL;
5600  rel->rd_indpred = NIL;
5601  rel->rd_exclops = NULL;
5602  rel->rd_exclprocs = NULL;
5603  rel->rd_exclstrats = NULL;
5604  rel->rd_fdwroutine = NULL;
5605 
5606  /*
5607  * Reset transient-state fields in the relcache entry
5608  */
5609  rel->rd_smgr = NULL;
5610  if (rel->rd_isnailed)
5611  rel->rd_refcnt = 1;
5612  else
5613  rel->rd_refcnt = 0;
5614  rel->rd_indexvalid = 0;
5615  rel->rd_fkeylist = NIL;
5616  rel->rd_fkeyvalid = false;
5617  rel->rd_indexlist = NIL;
5618  rel->rd_oidindex = InvalidOid;
5619  rel->rd_pkindex = InvalidOid;
5620  rel->rd_replidindex = InvalidOid;
5621  rel->rd_indexattr = NULL;
5622  rel->rd_keyattr = NULL;
5623  rel->rd_pkattr = NULL;
5624  rel->rd_idattr = NULL;
5625  rel->rd_pubactions = NULL;
5626  rel->rd_statvalid = false;
5627  rel->rd_statlist = NIL;
5630  rel->rd_amcache = NULL;
5631  MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5632 
5633  /*
5634  * Recompute lock and physical addressing info. This is needed in
5635  * case the pg_internal.init file was copied from some other database
5636  * by CREATE DATABASE.
5637  */
5638  RelationInitLockInfo(rel);
5640  }
5641 
5642  /*
5643  * We reached the end of the init file without apparent problem. Did we
5644  * get the right number of nailed items? This is a useful crosscheck in
5645  * case the set of critical rels or indexes changes. However, that should
5646  * not happen in a normally-running system, so let's bleat if it does.
5647  *
5648  * For the shared init file, we're called before client authentication is
5649  * done, which means that elog(WARNING) will go only to the postmaster
5650  * log, where it's easily missed. To ensure that developers notice bad
5651  * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5652  * an Assert(false) there.
5653  */
5654  if (shared)
5655  {
5656  if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
5657  nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5658  {
5659  elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5660  nailed_rels, nailed_indexes,
5662  /* Make sure we get developers' attention about this */
5663  Assert(false);
5664  /* In production builds, recover by bootstrapping the relcache */
5665  goto read_failed;
5666  }
5667  }
5668  else
5669  {
5670  if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
5671  nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5672  {
5673  elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5674  nailed_rels, nailed_indexes,
5676  /* We don't need an Assert() in this case */
5677  goto read_failed;
5678  }
5679  }
5680 
5681  /*
5682  * OK, all appears well.
5683  *
5684  * Now insert all the new relcache entries into the cache.
5685  */
5686  for (relno = 0; relno < num_rels; relno++)
5687  {
5688  RelationCacheInsert(rels[relno], false);
5689  }
5690 
5691  pfree(rels);
5692  FreeFile(fp);
5693 
5694  if (shared)
5696  else
5697  criticalRelcachesBuilt = true;
5698  return true;
5699 
5700  /*
5701  * init file is broken, so do it the hard way. We don't bother trying to
5702  * free the clutter we just allocated; it's not in the relcache so it
5703  * won't hurt.
5704  */
5705 read_failed:
5706  pfree(rels);
5707  FreeFile(fp);
5708 
5709  return false;
5710 }
5711 
5712 /*
5713  * Write out a new initialization file with the current contents
5714  * of the relcache (either shared rels or local rels, as indicated).
5715  */
5716 static void
5718 {
5719  FILE *fp;
5720  char tempfilename[MAXPGPATH];
5721  char finalfilename[MAXPGPATH];
5722  int magic;
5724  RelIdCacheEnt *idhentry;
5725  int i;
5726 
5727  /*
5728  * If we have already received any relcache inval events, there's no
5729  * chance of succeeding so we may as well skip the whole thing.
5730  */
5731  if (relcacheInvalsReceived != 0L)
5732  return;
5733 
5734  /*
5735  * We must write a temporary file and rename it into place. Otherwise,
5736  * another backend starting at about the same time might crash trying to
5737  * read the partially-complete file.
5738  */
5739  if (shared)
5740  {
5741  snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
5743  snprintf(finalfilename, sizeof(finalfilename), "global/%s",
5745  }
5746  else
5747  {
5748  snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
5750  snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
5752  }
5753 
5754  unlink(tempfilename); /* in case it exists w/wrong permissions */
5755 
5756  fp = AllocateFile(tempfilename, PG_BINARY_W);
5757  if (fp == NULL)
5758  {
5759  /*
5760  * We used to consider this a fatal error, but we might as well
5761  * continue with backend startup ...
5762  */
5763  ereport(WARNING,
5765  errmsg("could not create relation-cache initialization file \"%s\": %m",
5766  tempfilename),
5767  errdetail("Continuing anyway, but there's something wrong.")));
5768  return;
5769  }
5770 
5771  /*
5772  * Write a magic number to serve as a file version identifier. We can
5773  * change the magic number whenever the relcache layout changes.
5774  */
5775  magic = RELCACHE_INIT_FILEMAGIC;
5776  if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5777  elog(FATAL, "could not write init file");
5778 
5779  /*
5780  * Write all the appropriate reldescs (in no particular order).
5781  */
5782  hash_seq_init(&status, RelationIdCache);
5783 
5784  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
5785  {
5786  Relation rel = idhentry->reldesc;
5787  Form_pg_class relform = rel->rd_rel;
5788 
5789  /* ignore if not correct group */
5790  if (relform->relisshared != shared)
5791  continue;
5792 
5793  /*
5794  * Ignore if not supposed to be in init file. We can allow any shared
5795  * relation that's been loaded so far to be in the shared init file,
5796  * but unshared relations must be ones that should be in the local
5797  * file per RelationIdIsInInitFile. (Note: if you want to change the
5798  * criterion for rels to be kept in the init file, see also inval.c.
5799  * The reason for filtering here is to be sure that we don't put
5800  * anything into the local init file for which a relcache inval would
5801  * not cause invalidation of that init file.)
5802  */
5803  if (!shared && !RelationIdIsInInitFile(RelationGetRelid(rel)))
5804  {
5805  /* Nailed rels had better get stored. */
5806  Assert(!rel->rd_isnailed);
5807  continue;
5808  }
5809 
5810  /* first write the relcache entry proper */
5811  write_item(rel, sizeof(RelationData), fp);
5812 
5813  /* next write the relation tuple form */<