PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/hash.h"
34 #include "access/htup_details.h"
35 #include "access/multixact.h"
36 #include "access/nbtree.h"
37 #include "access/reloptions.h"
38 #include "access/sysattr.h"
39 #include "access/xact.h"
40 #include "access/xlog.h"
41 #include "catalog/catalog.h"
42 #include "catalog/index.h"
43 #include "catalog/indexing.h"
44 #include "catalog/namespace.h"
45 #include "catalog/partition.h"
46 #include "catalog/pg_am.h"
47 #include "catalog/pg_amproc.h"
48 #include "catalog/pg_attrdef.h"
49 #include "catalog/pg_authid.h"
51 #include "catalog/pg_constraint.h"
52 #include "catalog/pg_database.h"
53 #include "catalog/pg_namespace.h"
54 #include "catalog/pg_opclass.h"
56 #include "catalog/pg_proc.h"
57 #include "catalog/pg_publication.h"
58 #include "catalog/pg_rewrite.h"
59 #include "catalog/pg_shseclabel.h"
62 #include "catalog/pg_tablespace.h"
63 #include "catalog/pg_trigger.h"
64 #include "catalog/pg_type.h"
65 #include "catalog/schemapg.h"
66 #include "catalog/storage.h"
67 #include "commands/policy.h"
68 #include "commands/trigger.h"
69 #include "miscadmin.h"
70 #include "nodes/nodeFuncs.h"
71 #include "optimizer/clauses.h"
72 #include "optimizer/prep.h"
73 #include "optimizer/var.h"
74 #include "rewrite/rewriteDefine.h"
75 #include "rewrite/rowsecurity.h"
76 #include "storage/lmgr.h"
77 #include "storage/smgr.h"
78 #include "utils/array.h"
79 #include "utils/builtins.h"
80 #include "utils/fmgroids.h"
81 #include "utils/inval.h"
82 #include "utils/lsyscache.h"
83 #include "utils/memutils.h"
84 #include "utils/relmapper.h"
85 #include "utils/resowner_private.h"
86 #include "utils/snapmgr.h"
87 #include "utils/syscache.h"
88 #include "utils/tqual.h"
89 
90 
91 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
92 
93 /*
94  * hardcoded tuple descriptors, contents generated by genbki.pl
95  */
96 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
97 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
98 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
99 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
100 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
101 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
102 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
103 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
104 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
105 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
106 
107 /*
108  * Hash tables that index the relation cache
109  *
110  * We used to index the cache by both name and OID, but now there
111  * is only an index by OID.
112  */
113 typedef struct relidcacheent
114 {
117 } RelIdCacheEnt;
118 
120 
121 /*
122  * This flag is false until we have prepared the critical relcache entries
123  * that are needed to do indexscans on the tables read by relcache building.
124  */
126 
127 /*
128  * This flag is false until we have prepared the critical relcache entries
129  * for shared catalogs (which are the tables needed for login).
130  */
132 
133 /*
134  * This counter counts relcache inval events received since backend startup
135  * (but only for rels that are actually in cache). Presently, we use it only
136  * to detect whether data about to be written by write_relcache_init_file()
137  * might already be obsolete.
138  */
139 static long relcacheInvalsReceived = 0L;
140 
141 /*
142  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
143  * cleanup work. This list intentionally has limited size; if it overflows,
144  * we fall back to scanning the whole hashtable. There is no value in a very
145  * large list because (1) at some point, a hash_seq_search scan is faster than
146  * retail lookups, and (2) the value of this is to reduce EOXact work for
147  * short transactions, which can't have dirtied all that many tables anyway.
148  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
149  * cleanup processing must be idempotent.
150  */
151 #define MAX_EOXACT_LIST 32
153 static int eoxact_list_len = 0;
154 static bool eoxact_list_overflowed = false;
155 
156 #define EOXactListAdd(rel) \
157  do { \
158  if (eoxact_list_len < MAX_EOXACT_LIST) \
159  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
160  else \
161  eoxact_list_overflowed = true; \
162  } while (0)
163 
164 /*
165  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
166  * cleanup work. The array expands as needed; there is no hashtable because
167  * we don't need to access individual items except at EOXact.
168  */
170 static int NextEOXactTupleDescNum = 0;
171 static int EOXactTupleDescArrayLen = 0;
172 
173 /*
174  * macros to manipulate the lookup hashtable
175  */
176 #define RelationCacheInsert(RELATION, replace_allowed) \
177 do { \
178  RelIdCacheEnt *hentry; bool found; \
179  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
180  (void *) &((RELATION)->rd_id), \
181  HASH_ENTER, &found); \
182  if (found) \
183  { \
184  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
185  Relation _old_rel = hentry->reldesc; \
186  Assert(replace_allowed); \
187  hentry->reldesc = (RELATION); \
188  if (RelationHasReferenceCountZero(_old_rel)) \
189  RelationDestroyRelation(_old_rel, false); \
190  else if (!IsBootstrapProcessingMode()) \
191  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
192  RelationGetRelationName(_old_rel)); \
193  } \
194  else \
195  hentry->reldesc = (RELATION); \
196 } while(0)
197 
198 #define RelationIdCacheLookup(ID, RELATION) \
199 do { \
200  RelIdCacheEnt *hentry; \
201  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
202  (void *) &(ID), \
203  HASH_FIND, NULL); \
204  if (hentry) \
205  RELATION = hentry->reldesc; \
206  else \
207  RELATION = NULL; \
208 } while(0)
209 
210 #define RelationCacheDelete(RELATION) \
211 do { \
212  RelIdCacheEnt *hentry; \
213  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
214  (void *) &((RELATION)->rd_id), \
215  HASH_REMOVE, NULL); \
216  if (hentry == NULL) \
217  elog(WARNING, "failed to delete relcache entry for OID %u", \
218  (RELATION)->rd_id); \
219 } while(0)
220 
221 
222 /*
223  * Special cache for opclass-related information
224  *
225  * Note: only default support procs get cached, ie, those with
226  * lefttype = righttype = opcintype.
227  */
228 typedef struct opclasscacheent
229 {
230  Oid opclassoid; /* lookup key: OID of opclass */
231  bool valid; /* set true after successful fill-in */
232  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
233  Oid opcfamily; /* OID of opclass's family */
234  Oid opcintype; /* OID of opclass's declared input type */
235  RegProcedure *supportProcs; /* OIDs of support procedures */
237 
238 static HTAB *OpClassCache = NULL;
239 
240 
241 /* non-export function prototypes */
242 
243 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
244 static void RelationClearRelation(Relation relation, bool rebuild);
245 
246 static void RelationReloadIndexInfo(Relation relation);
247 static void RelationFlushRelation(Relation relation);
249 static void AtEOXact_cleanup(Relation relation, bool isCommit);
250 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
251  SubTransactionId mySubid, SubTransactionId parentSubid);
252 static bool load_relcache_init_file(bool shared);
253 static void write_relcache_init_file(bool shared);
254 static void write_item(const void *data, Size len, FILE *fp);
255 
256 static void formrdesc(const char *relationName, Oid relationReltype,
257  bool isshared, bool hasoids,
258  int natts, const FormData_pg_attribute *attrs);
259 
260 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
262 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
263 static void RelationBuildTupleDesc(Relation relation);
264 static void RelationBuildPartitionKey(Relation relation);
266 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
267 static void RelationInitPhysicalAddr(Relation relation);
268 static void load_critical_index(Oid indexoid, Oid heapoid);
269 static TupleDesc GetPgClassDescriptor(void);
270 static TupleDesc GetPgIndexDescriptor(void);
271 static void AttrDefaultFetch(Relation relation);
272 static void CheckConstraintFetch(Relation relation);
273 static int CheckConstraintCmp(const void *a, const void *b);
274 static List *insert_ordered_oid(List *list, Oid datum);
275 static void InitIndexAmRoutine(Relation relation);
276 static void IndexSupportInitialize(oidvector *indclass,
277  RegProcedure *indexSupport,
278  Oid *opFamily,
279  Oid *opcInType,
280  StrategyNumber maxSupportNumber,
281  AttrNumber maxAttributeNumber);
282 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
283  StrategyNumber numSupport);
284 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
285 static void unlink_initfile(const char *initfilename);
286 static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
287  PartitionDesc partdesc2);
288 
289 
290 /*
291  * ScanPgRelation
292  *
293  * This is used by RelationBuildDesc to find a pg_class
294  * tuple matching targetRelId. The caller must hold at least
295  * AccessShareLock on the target relid to prevent concurrent-update
296  * scenarios; it isn't guaranteed that all scans used to build the
297  * relcache entry will use the same snapshot. If, for example,
298  * an attribute were to be added after scanning pg_class and before
299  * scanning pg_attribute, relnatts wouldn't match.
300  *
301  * NB: the returned tuple has been copied into palloc'd storage
302  * and must eventually be freed with heap_freetuple.
303  */
304 static HeapTuple
305 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
306 {
307  HeapTuple pg_class_tuple;
308  Relation pg_class_desc;
309  SysScanDesc pg_class_scan;
310  ScanKeyData key[1];
311  Snapshot snapshot;
312 
313  /*
314  * If something goes wrong during backend startup, we might find ourselves
315  * trying to read pg_class before we've selected a database. That ain't
316  * gonna work, so bail out with a useful error message. If this happens,
317  * it probably means a relcache entry that needs to be nailed isn't.
318  */
319  if (!OidIsValid(MyDatabaseId))
320  elog(FATAL, "cannot read pg_class without having selected a database");
321 
322  /*
323  * form a scan key
324  */
325  ScanKeyInit(&key[0],
327  BTEqualStrategyNumber, F_OIDEQ,
328  ObjectIdGetDatum(targetRelId));
329 
330  /*
331  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
332  * built the critical relcache entries (this includes initdb and startup
333  * without a pg_internal.init file). The caller can also force a heap
334  * scan by setting indexOK == false.
335  */
336  pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
337 
338  /*
339  * The caller might need a tuple that's newer than the one the historic
340  * snapshot; currently the only case requiring to do so is looking up the
341  * relfilenode of non mapped system relations during decoding.
342  */
343  if (force_non_historic)
345  else
347 
348  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
349  indexOK && criticalRelcachesBuilt,
350  snapshot,
351  1, key);
352 
353  pg_class_tuple = systable_getnext(pg_class_scan);
354 
355  /*
356  * Must copy tuple before releasing buffer.
357  */
358  if (HeapTupleIsValid(pg_class_tuple))
359  pg_class_tuple = heap_copytuple(pg_class_tuple);
360 
361  /* all done */
362  systable_endscan(pg_class_scan);
363  heap_close(pg_class_desc, AccessShareLock);
364 
365  return pg_class_tuple;
366 }
367 
368 /*
369  * AllocateRelationDesc
370  *
371  * This is used to allocate memory for a new relation descriptor
372  * and initialize the rd_rel field from the given pg_class tuple.
373  */
374 static Relation
376 {
377  Relation relation;
378  MemoryContext oldcxt;
379  Form_pg_class relationForm;
380 
381  /* Relcache entries must live in CacheMemoryContext */
383 
384  /*
385  * allocate and zero space for new relation descriptor
386  */
387  relation = (Relation) palloc0(sizeof(RelationData));
388 
389  /* make sure relation is marked as having no open file yet */
390  relation->rd_smgr = NULL;
391 
392  /*
393  * Copy the relation tuple form
394  *
395  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
396  * variable-length fields (relacl, reloptions) are NOT stored in the
397  * relcache --- there'd be little point in it, since we don't copy the
398  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
399  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
400  * it from the syscache if you need it. The same goes for the original
401  * form of reloptions (however, we do store the parsed form of reloptions
402  * in rd_options).
403  */
404  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
405 
406  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
407 
408  /* initialize relation tuple form */
409  relation->rd_rel = relationForm;
410 
411  /* and allocate attribute tuple form storage */
412  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
413  relationForm->relhasoids);
414  /* which we mark as a reference-counted tupdesc */
415  relation->rd_att->tdrefcount = 1;
416 
417  MemoryContextSwitchTo(oldcxt);
418 
419  return relation;
420 }
421 
422 /*
423  * RelationParseRelOptions
424  * Convert pg_class.reloptions into pre-parsed rd_options
425  *
426  * tuple is the real pg_class tuple (not rd_rel!) for relation
427  *
428  * Note: rd_rel and (if an index) rd_amroutine must be valid already
429  */
430 static void
432 {
433  bytea *options;
434 
435  relation->rd_options = NULL;
436 
437  /* Fall out if relkind should not have options */
438  switch (relation->rd_rel->relkind)
439  {
440  case RELKIND_RELATION:
441  case RELKIND_TOASTVALUE:
442  case RELKIND_INDEX:
443  case RELKIND_VIEW:
444  case RELKIND_MATVIEW:
446  break;
447  default:
448  return;
449  }
450 
451  /*
452  * Fetch reloptions from tuple; have to use a hardwired descriptor because
453  * we might not have any other for pg_class yet (consider executing this
454  * code for pg_class itself)
455  */
456  options = extractRelOptions(tuple,
458  relation->rd_rel->relkind == RELKIND_INDEX ?
459  relation->rd_amroutine->amoptions : NULL);
460 
461  /*
462  * Copy parsed data into CacheMemoryContext. To guard against the
463  * possibility of leaks in the reloptions code, we want to do the actual
464  * parsing in the caller's memory context and copy the results into
465  * CacheMemoryContext after the fact.
466  */
467  if (options)
468  {
470  VARSIZE(options));
471  memcpy(relation->rd_options, options, VARSIZE(options));
472  pfree(options);
473  }
474 }
475 
476 /*
477  * RelationBuildTupleDesc
478  *
479  * Form the relation's tuple descriptor from information in
480  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
481  */
482 static void
484 {
485  HeapTuple pg_attribute_tuple;
486  Relation pg_attribute_desc;
487  SysScanDesc pg_attribute_scan;
488  ScanKeyData skey[2];
489  int need;
490  TupleConstr *constr;
491  AttrDefault *attrdef = NULL;
492  int ndef = 0;
493 
494  /* copy some fields from pg_class row to rd_att */
495  relation->rd_att->tdtypeid = relation->rd_rel->reltype;
496  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
497  relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
498 
500  sizeof(TupleConstr));
501  constr->has_not_null = false;
502 
503  /*
504  * Form a scan key that selects only user attributes (attnum > 0).
505  * (Eliminating system attribute rows at the index level is lots faster
506  * than fetching them.)
507  */
508  ScanKeyInit(&skey[0],
510  BTEqualStrategyNumber, F_OIDEQ,
512  ScanKeyInit(&skey[1],
514  BTGreaterStrategyNumber, F_INT2GT,
515  Int16GetDatum(0));
516 
517  /*
518  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
519  * built the critical relcache entries (this includes initdb and startup
520  * without a pg_internal.init file).
521  */
522  pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
523  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
526  NULL,
527  2, skey);
528 
529  /*
530  * add attribute data to relation->rd_att
531  */
532  need = relation->rd_rel->relnatts;
533 
534  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
535  {
536  Form_pg_attribute attp;
537 
538  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
539 
540  if (attp->attnum <= 0 ||
541  attp->attnum > relation->rd_rel->relnatts)
542  elog(ERROR, "invalid attribute number %d for %s",
543  attp->attnum, RelationGetRelationName(relation));
544 
545  memcpy(TupleDescAttr(relation->rd_att, attp->attnum - 1),
546  attp,
548 
549  /* Update constraint/default info */
550  if (attp->attnotnull)
551  constr->has_not_null = true;
552 
553  if (attp->atthasdef)
554  {
555  if (attrdef == NULL)
556  attrdef = (AttrDefault *)
558  relation->rd_rel->relnatts *
559  sizeof(AttrDefault));
560  attrdef[ndef].adnum = attp->attnum;
561  attrdef[ndef].adbin = NULL;
562  ndef++;
563  }
564  need--;
565  if (need == 0)
566  break;
567  }
568 
569  /*
570  * end the scan and close the attribute relation
571  */
572  systable_endscan(pg_attribute_scan);
573  heap_close(pg_attribute_desc, AccessShareLock);
574 
575  if (need != 0)
576  elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
577  need, RelationGetRelid(relation));
578 
579  /*
580  * The attcacheoff values we read from pg_attribute should all be -1
581  * ("unknown"). Verify this if assert checking is on. They will be
582  * computed when and if needed during tuple access.
583  */
584 #ifdef USE_ASSERT_CHECKING
585  {
586  int i;
587 
588  for (i = 0; i < relation->rd_rel->relnatts; i++)
589  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
590  }
591 #endif
592 
593  /*
594  * However, we can easily set the attcacheoff value for the first
595  * attribute: it must be zero. This eliminates the need for special cases
596  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
597  */
598  if (relation->rd_rel->relnatts > 0)
599  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
600 
601  /*
602  * Set up constraint/default info
603  */
604  if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
605  {
606  relation->rd_att->constr = constr;
607 
608  if (ndef > 0) /* DEFAULTs */
609  {
610  if (ndef < relation->rd_rel->relnatts)
611  constr->defval = (AttrDefault *)
612  repalloc(attrdef, ndef * sizeof(AttrDefault));
613  else
614  constr->defval = attrdef;
615  constr->num_defval = ndef;
616  AttrDefaultFetch(relation);
617  }
618  else
619  constr->num_defval = 0;
620 
621  if (relation->rd_rel->relchecks > 0) /* CHECKs */
622  {
623  constr->num_check = relation->rd_rel->relchecks;
624  constr->check = (ConstrCheck *)
626  constr->num_check * sizeof(ConstrCheck));
627  CheckConstraintFetch(relation);
628  }
629  else
630  constr->num_check = 0;
631  }
632  else
633  {
634  pfree(constr);
635  relation->rd_att->constr = NULL;
636  }
637 }
638 
639 /*
640  * RelationBuildRuleLock
641  *
642  * Form the relation's rewrite rules from information in
643  * the pg_rewrite system catalog.
644  *
645  * Note: The rule parsetrees are potentially very complex node structures.
646  * To allow these trees to be freed when the relcache entry is flushed,
647  * we make a private memory context to hold the RuleLock information for
648  * each relcache entry that has associated rules. The context is used
649  * just for rule info, not for any other subsidiary data of the relcache
650  * entry, because that keeps the update logic in RelationClearRelation()
651  * manageable. The other subsidiary data structures are simple enough
652  * to be easy to free explicitly, anyway.
653  */
654 static void
656 {
657  MemoryContext rulescxt;
658  MemoryContext oldcxt;
659  HeapTuple rewrite_tuple;
660  Relation rewrite_desc;
661  TupleDesc rewrite_tupdesc;
662  SysScanDesc rewrite_scan;
663  ScanKeyData key;
664  RuleLock *rulelock;
665  int numlocks;
666  RewriteRule **rules;
667  int maxlocks;
668 
669  /*
670  * Make the private context. Assume it'll not contain much data.
671  */
673  RelationGetRelationName(relation),
675  relation->rd_rulescxt = rulescxt;
676 
677  /*
678  * allocate an array to hold the rewrite rules (the array is extended if
679  * necessary)
680  */
681  maxlocks = 4;
682  rules = (RewriteRule **)
683  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
684  numlocks = 0;
685 
686  /*
687  * form a scan key
688  */
689  ScanKeyInit(&key,
691  BTEqualStrategyNumber, F_OIDEQ,
693 
694  /*
695  * open pg_rewrite and begin a scan
696  *
697  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
698  * be reading the rules in name order, except possibly during
699  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
700  * ensures that rules will be fired in name order.
701  */
703  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
704  rewrite_scan = systable_beginscan(rewrite_desc,
706  true, NULL,
707  1, &key);
708 
709  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
710  {
711  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
712  bool isnull;
713  Datum rule_datum;
714  char *rule_str;
715  RewriteRule *rule;
716 
717  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
718  sizeof(RewriteRule));
719 
720  rule->ruleId = HeapTupleGetOid(rewrite_tuple);
721 
722  rule->event = rewrite_form->ev_type - '0';
723  rule->enabled = rewrite_form->ev_enabled;
724  rule->isInstead = rewrite_form->is_instead;
725 
726  /*
727  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
728  * rule strings are often large enough to be toasted. To avoid
729  * leaking memory in the caller's context, do the detoasting here so
730  * we can free the detoasted version.
731  */
732  rule_datum = heap_getattr(rewrite_tuple,
734  rewrite_tupdesc,
735  &isnull);
736  Assert(!isnull);
737  rule_str = TextDatumGetCString(rule_datum);
738  oldcxt = MemoryContextSwitchTo(rulescxt);
739  rule->actions = (List *) stringToNode(rule_str);
740  MemoryContextSwitchTo(oldcxt);
741  pfree(rule_str);
742 
743  rule_datum = heap_getattr(rewrite_tuple,
745  rewrite_tupdesc,
746  &isnull);
747  Assert(!isnull);
748  rule_str = TextDatumGetCString(rule_datum);
749  oldcxt = MemoryContextSwitchTo(rulescxt);
750  rule->qual = (Node *) stringToNode(rule_str);
751  MemoryContextSwitchTo(oldcxt);
752  pfree(rule_str);
753 
754  /*
755  * We want the rule's table references to be checked as though by the
756  * table owner, not the user referencing the rule. Therefore, scan
757  * through the rule's actions and set the checkAsUser field on all
758  * rtable entries. We have to look at the qual as well, in case it
759  * contains sublinks.
760  *
761  * The reason for doing this when the rule is loaded, rather than when
762  * it is stored, is that otherwise ALTER TABLE OWNER would have to
763  * grovel through stored rules to update checkAsUser fields. Scanning
764  * the rule tree during load is relatively cheap (compared to
765  * constructing it in the first place), so we do it here.
766  */
767  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
768  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
769 
770  if (numlocks >= maxlocks)
771  {
772  maxlocks *= 2;
773  rules = (RewriteRule **)
774  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
775  }
776  rules[numlocks++] = rule;
777  }
778 
779  /*
780  * end the scan and close the attribute relation
781  */
782  systable_endscan(rewrite_scan);
783  heap_close(rewrite_desc, AccessShareLock);
784 
785  /*
786  * there might not be any rules (if relhasrules is out-of-date)
787  */
788  if (numlocks == 0)
789  {
790  relation->rd_rules = NULL;
791  relation->rd_rulescxt = NULL;
792  MemoryContextDelete(rulescxt);
793  return;
794  }
795 
796  /*
797  * form a RuleLock and insert into relation
798  */
799  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
800  rulelock->numLocks = numlocks;
801  rulelock->rules = rules;
802 
803  relation->rd_rules = rulelock;
804 }
805 
806 /*
807  * RelationBuildPartitionKey
808  * Build and attach to relcache partition key data of relation
809  *
810  * Partitioning key data is stored in CacheMemoryContext to ensure it survives
811  * as long as the relcache. To avoid leaking memory in that context in case
812  * of an error partway through this function, we build the structure in the
813  * working context (which must be short-lived) and copy the completed
814  * structure into the cache memory.
815  *
816  * Also, since the structure being created here is sufficiently complex, we
817  * make a private child context of CacheMemoryContext for each relation that
818  * has associated partition key information. That means no complicated logic
819  * to free individual elements whenever the relcache entry is flushed - just
820  * delete the context.
821  */
822 static void
824 {
826  HeapTuple tuple;
827  bool isnull;
828  int i;
829  PartitionKey key;
830  AttrNumber *attrs;
831  oidvector *opclass;
832  oidvector *collation;
833  ListCell *partexprs_item;
834  Datum datum;
835  MemoryContext partkeycxt,
836  oldcxt;
837  int16 procnum;
838 
839  tuple = SearchSysCache1(PARTRELID,
841 
842  /*
843  * The following happens when we have created our pg_class entry but not
844  * the pg_partitioned_table entry yet.
845  */
846  if (!HeapTupleIsValid(tuple))
847  return;
848 
849  key = (PartitionKey) palloc0(sizeof(PartitionKeyData));
850 
851  /* Fixed-length attributes */
852  form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
853  key->strategy = form->partstrat;
854  key->partnatts = form->partnatts;
855 
856  /*
857  * We can rely on the first variable-length attribute being mapped to the
858  * relevant field of the catalog's C struct, because all previous
859  * attributes are non-nullable and fixed-length.
860  */
861  attrs = form->partattrs.values;
862 
863  /* But use the hard way to retrieve further variable-length attributes */
864  /* Operator class */
865  datum = SysCacheGetAttr(PARTRELID, tuple,
867  Assert(!isnull);
868  opclass = (oidvector *) DatumGetPointer(datum);
869 
870  /* Collation */
871  datum = SysCacheGetAttr(PARTRELID, tuple,
873  Assert(!isnull);
874  collation = (oidvector *) DatumGetPointer(datum);
875 
876  /* Expressions */
877  datum = SysCacheGetAttr(PARTRELID, tuple,
879  if (!isnull)
880  {
881  char *exprString;
882  Node *expr;
883 
884  exprString = TextDatumGetCString(datum);
885  expr = stringToNode(exprString);
886  pfree(exprString);
887 
888  /*
889  * Run the expressions through const-simplification since the planner
890  * will be comparing them to similarly-processed qual clause operands,
891  * and may fail to detect valid matches without this step. We don't
892  * need to bother with canonicalize_qual() though, because partition
893  * expressions are not full-fledged qualification clauses.
894  */
895  expr = eval_const_expressions(NULL, (Node *) expr);
896 
897  /* May as well fix opfuncids too */
898  fix_opfuncids((Node *) expr);
899  key->partexprs = (List *) expr;
900  }
901 
902  key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
903  key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
904  key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
905  key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
906 
907  key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
908 
909  /* Gather type and collation info as well */
910  key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
911  key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
912  key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
913  key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
914  key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
915  key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
916 
917  /* For the hash partitioning, an extended hash function will be used. */
918  procnum = (key->strategy == PARTITION_STRATEGY_HASH) ?
920 
921  /* Copy partattrs and fill other per-attribute info */
922  memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
923  partexprs_item = list_head(key->partexprs);
924  for (i = 0; i < key->partnatts; i++)
925  {
926  AttrNumber attno = key->partattrs[i];
927  HeapTuple opclasstup;
928  Form_pg_opclass opclassform;
929  Oid funcid;
930 
931  /* Collect opfamily information */
932  opclasstup = SearchSysCache1(CLAOID,
933  ObjectIdGetDatum(opclass->values[i]));
934  if (!HeapTupleIsValid(opclasstup))
935  elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
936 
937  opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
938  key->partopfamily[i] = opclassform->opcfamily;
939  key->partopcintype[i] = opclassform->opcintype;
940 
941  /* Get a support function for the specified opfamily and datatypes */
942  funcid = get_opfamily_proc(opclassform->opcfamily,
943  opclassform->opcintype,
944  opclassform->opcintype,
945  procnum);
946  if (!OidIsValid(funcid))
947  ereport(ERROR,
948  (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
949  errmsg("operator class \"%s\" of access method %s is missing support function %d for data type \"%s\"",
950  NameStr(opclassform->opcname),
952  "hash" : "btree",
953  procnum,
954  format_type_be(opclassform->opcintype))));
955 
956  fmgr_info(funcid, &key->partsupfunc[i]);
957 
958  /* Collation */
959  key->partcollation[i] = collation->values[i];
960 
961  /* Collect type information */
962  if (attno != 0)
963  {
964  Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
965 
966  key->parttypid[i] = att->atttypid;
967  key->parttypmod[i] = att->atttypmod;
968  key->parttypcoll[i] = att->attcollation;
969  }
970  else
971  {
972  key->parttypid[i] = exprType(lfirst(partexprs_item));
973  key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
974  key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
975  }
977  &key->parttyplen[i],
978  &key->parttypbyval[i],
979  &key->parttypalign[i]);
980 
981  ReleaseSysCache(opclasstup);
982  }
983 
984  ReleaseSysCache(tuple);
985 
986  /* Success --- now copy to the cache memory */
988  RelationGetRelationName(relation),
990  relation->rd_partkeycxt = partkeycxt;
991  oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt);
992  relation->rd_partkey = copy_partition_key(key);
993  MemoryContextSwitchTo(oldcxt);
994 }
995 
996 /*
997  * copy_partition_key
998  *
999  * The copy is allocated in the current memory context.
1000  */
1001 static PartitionKey
1003 {
1004  PartitionKey newkey;
1005  int n;
1006 
1007  newkey = (PartitionKey) palloc(sizeof(PartitionKeyData));
1008 
1009  newkey->strategy = fromkey->strategy;
1010  newkey->partnatts = n = fromkey->partnatts;
1011 
1012  newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber));
1013  memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber));
1014 
1015  newkey->partexprs = copyObject(fromkey->partexprs);
1016 
1017  newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid));
1018  memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid));
1019 
1020  newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid));
1021  memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid));
1022 
1023  newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo));
1024  memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo));
1025 
1026  newkey->partcollation = (Oid *) palloc(n * sizeof(Oid));
1027  memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid));
1028 
1029  newkey->parttypid = (Oid *) palloc(n * sizeof(Oid));
1030  memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid));
1031 
1032  newkey->parttypmod = (int32 *) palloc(n * sizeof(int32));
1033  memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32));
1034 
1035  newkey->parttyplen = (int16 *) palloc(n * sizeof(int16));
1036  memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16));
1037 
1038  newkey->parttypbyval = (bool *) palloc(n * sizeof(bool));
1039  memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool));
1040 
1041  newkey->parttypalign = (char *) palloc(n * sizeof(bool));
1042  memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char));
1043 
1044  newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid));
1045  memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid));
1046 
1047  return newkey;
1048 }
1049 
1050 /*
1051  * equalRuleLocks
1052  *
1053  * Determine whether two RuleLocks are equivalent
1054  *
1055  * Probably this should be in the rules code someplace...
1056  */
1057 static bool
1059 {
1060  int i;
1061 
1062  /*
1063  * As of 7.3 we assume the rule ordering is repeatable, because
1064  * RelationBuildRuleLock should read 'em in a consistent order. So just
1065  * compare corresponding slots.
1066  */
1067  if (rlock1 != NULL)
1068  {
1069  if (rlock2 == NULL)
1070  return false;
1071  if (rlock1->numLocks != rlock2->numLocks)
1072  return false;
1073  for (i = 0; i < rlock1->numLocks; i++)
1074  {
1075  RewriteRule *rule1 = rlock1->rules[i];
1076  RewriteRule *rule2 = rlock2->rules[i];
1077 
1078  if (rule1->ruleId != rule2->ruleId)
1079  return false;
1080  if (rule1->event != rule2->event)
1081  return false;
1082  if (rule1->enabled != rule2->enabled)
1083  return false;
1084  if (rule1->isInstead != rule2->isInstead)
1085  return false;
1086  if (!equal(rule1->qual, rule2->qual))
1087  return false;
1088  if (!equal(rule1->actions, rule2->actions))
1089  return false;
1090  }
1091  }
1092  else if (rlock2 != NULL)
1093  return false;
1094  return true;
1095 }
1096 
1097 /*
1098  * equalPolicy
1099  *
1100  * Determine whether two policies are equivalent
1101  */
1102 static bool
1104 {
1105  int i;
1106  Oid *r1,
1107  *r2;
1108 
1109  if (policy1 != NULL)
1110  {
1111  if (policy2 == NULL)
1112  return false;
1113 
1114  if (policy1->polcmd != policy2->polcmd)
1115  return false;
1116  if (policy1->hassublinks != policy2->hassublinks)
1117  return false;
1118  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
1119  return false;
1120  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
1121  return false;
1122 
1123  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
1124  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
1125 
1126  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
1127  {
1128  if (r1[i] != r2[i])
1129  return false;
1130  }
1131 
1132  if (!equal(policy1->qual, policy2->qual))
1133  return false;
1134  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
1135  return false;
1136  }
1137  else if (policy2 != NULL)
1138  return false;
1139 
1140  return true;
1141 }
1142 
1143 /*
1144  * equalRSDesc
1145  *
1146  * Determine whether two RowSecurityDesc's are equivalent
1147  */
1148 static bool
1150 {
1151  ListCell *lc,
1152  *rc;
1153 
1154  if (rsdesc1 == NULL && rsdesc2 == NULL)
1155  return true;
1156 
1157  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
1158  (rsdesc1 == NULL && rsdesc2 != NULL))
1159  return false;
1160 
1161  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1162  return false;
1163 
1164  /* RelationBuildRowSecurity should build policies in order */
1165  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1166  {
1169 
1170  if (!equalPolicy(l, r))
1171  return false;
1172  }
1173 
1174  return true;
1175 }
1176 
1177 /*
1178  * equalPartitionDescs
1179  * Compare two partition descriptors for logical equality
1180  */
1181 static bool
1183  PartitionDesc partdesc2)
1184 {
1185  int i;
1186 
1187  if (partdesc1 != NULL)
1188  {
1189  if (partdesc2 == NULL)
1190  return false;
1191  if (partdesc1->nparts != partdesc2->nparts)
1192  return false;
1193 
1194  Assert(key != NULL || partdesc1->nparts == 0);
1195 
1196  /*
1197  * Same oids? If the partitioning structure did not change, that is,
1198  * no partitions were added or removed to the relation, the oids array
1199  * should still match element-by-element.
1200  */
1201  for (i = 0; i < partdesc1->nparts; i++)
1202  {
1203  if (partdesc1->oids[i] != partdesc2->oids[i])
1204  return false;
1205  }
1206 
1207  /*
1208  * Now compare partition bound collections. The logic to iterate over
1209  * the collections is private to partition.c.
1210  */
1211  if (partdesc1->boundinfo != NULL)
1212  {
1213  if (partdesc2->boundinfo == NULL)
1214  return false;
1215 
1217  key->parttypbyval,
1218  partdesc1->boundinfo,
1219  partdesc2->boundinfo))
1220  return false;
1221  }
1222  else if (partdesc2->boundinfo != NULL)
1223  return false;
1224  }
1225  else if (partdesc2 != NULL)
1226  return false;
1227 
1228  return true;
1229 }
1230 
1231 /*
1232  * RelationBuildDesc
1233  *
1234  * Build a relation descriptor. The caller must hold at least
1235  * AccessShareLock on the target relid.
1236  *
1237  * The new descriptor is inserted into the hash table if insertIt is true.
1238  *
1239  * Returns NULL if no pg_class row could be found for the given relid
1240  * (suggesting we are trying to access a just-deleted relation).
1241  * Any other error is reported via elog.
1242  */
1243 static Relation
1244 RelationBuildDesc(Oid targetRelId, bool insertIt)
1245 {
1246  Relation relation;
1247  Oid relid;
1248  HeapTuple pg_class_tuple;
1249  Form_pg_class relp;
1250 
1251  /*
1252  * find the tuple in pg_class corresponding to the given relation id
1253  */
1254  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1255 
1256  /*
1257  * if no such tuple exists, return NULL
1258  */
1259  if (!HeapTupleIsValid(pg_class_tuple))
1260  return NULL;
1261 
1262  /*
1263  * get information from the pg_class_tuple
1264  */
1265  relid = HeapTupleGetOid(pg_class_tuple);
1266  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1267  Assert(relid == targetRelId);
1268 
1269  /*
1270  * allocate storage for the relation descriptor, and copy pg_class_tuple
1271  * to relation->rd_rel.
1272  */
1273  relation = AllocateRelationDesc(relp);
1274 
1275  /*
1276  * initialize the relation's relation id (relation->rd_id)
1277  */
1278  RelationGetRelid(relation) = relid;
1279 
1280  /*
1281  * normal relations are not nailed into the cache; nor can a pre-existing
1282  * relation be new. It could be temp though. (Actually, it could be new
1283  * too, but it's okay to forget that fact if forced to flush the entry.)
1284  */
1285  relation->rd_refcnt = 0;
1286  relation->rd_isnailed = false;
1289  switch (relation->rd_rel->relpersistence)
1290  {
1293  relation->rd_backend = InvalidBackendId;
1294  relation->rd_islocaltemp = false;
1295  break;
1296  case RELPERSISTENCE_TEMP:
1297  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1298  {
1299  relation->rd_backend = BackendIdForTempRelations();
1300  relation->rd_islocaltemp = true;
1301  }
1302  else
1303  {
1304  /*
1305  * If it's a temp table, but not one of ours, we have to use
1306  * the slow, grotty method to figure out the owning backend.
1307  *
1308  * Note: it's possible that rd_backend gets set to MyBackendId
1309  * here, in case we are looking at a pg_class entry left over
1310  * from a crashed backend that coincidentally had the same
1311  * BackendId we're using. We should *not* consider such a
1312  * table to be "ours"; this is why we need the separate
1313  * rd_islocaltemp flag. The pg_class entry will get flushed
1314  * if/when we clean out the corresponding temp table namespace
1315  * in preparation for using it.
1316  */
1317  relation->rd_backend =
1318  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1319  Assert(relation->rd_backend != InvalidBackendId);
1320  relation->rd_islocaltemp = false;
1321  }
1322  break;
1323  default:
1324  elog(ERROR, "invalid relpersistence: %c",
1325  relation->rd_rel->relpersistence);
1326  break;
1327  }
1328 
1329  /*
1330  * initialize the tuple descriptor (relation->rd_att).
1331  */
1332  RelationBuildTupleDesc(relation);
1333 
1334  /*
1335  * Fetch rules and triggers that affect this relation
1336  */
1337  if (relation->rd_rel->relhasrules)
1338  RelationBuildRuleLock(relation);
1339  else
1340  {
1341  relation->rd_rules = NULL;
1342  relation->rd_rulescxt = NULL;
1343  }
1344 
1345  if (relation->rd_rel->relhastriggers)
1346  RelationBuildTriggers(relation);
1347  else
1348  relation->trigdesc = NULL;
1349 
1350  if (relation->rd_rel->relrowsecurity)
1351  RelationBuildRowSecurity(relation);
1352  else
1353  relation->rd_rsdesc = NULL;
1354 
1355  /* foreign key data is not loaded till asked for */
1356  relation->rd_fkeylist = NIL;
1357  relation->rd_fkeyvalid = false;
1358 
1359  /* if a partitioned table, initialize key and partition descriptor info */
1360  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1361  {
1362  RelationBuildPartitionKey(relation);
1363  RelationBuildPartitionDesc(relation);
1364  }
1365  else
1366  {
1367  relation->rd_partkeycxt = NULL;
1368  relation->rd_partkey = NULL;
1369  relation->rd_partdesc = NULL;
1370  relation->rd_pdcxt = NULL;
1371  }
1372 
1373  /*
1374  * if it's an index, initialize index-related information
1375  */
1376  if (OidIsValid(relation->rd_rel->relam))
1377  RelationInitIndexAccessInfo(relation);
1378 
1379  /* extract reloptions if any */
1380  RelationParseRelOptions(relation, pg_class_tuple);
1381 
1382  /*
1383  * initialize the relation lock manager information
1384  */
1385  RelationInitLockInfo(relation); /* see lmgr.c */
1386 
1387  /*
1388  * initialize physical addressing information for the relation
1389  */
1390  RelationInitPhysicalAddr(relation);
1391 
1392  /* make sure relation is marked as having no open file yet */
1393  relation->rd_smgr = NULL;
1394 
1395  /*
1396  * now we can free the memory allocated for pg_class_tuple
1397  */
1398  heap_freetuple(pg_class_tuple);
1399 
1400  /*
1401  * Insert newly created relation into relcache hash table, if requested.
1402  *
1403  * There is one scenario in which we might find a hashtable entry already
1404  * present, even though our caller failed to find it: if the relation is a
1405  * system catalog or index that's used during relcache load, we might have
1406  * recursively created the same relcache entry during the preceding steps.
1407  * So allow RelationCacheInsert to delete any already-present relcache
1408  * entry for the same OID. The already-present entry should have refcount
1409  * zero (else somebody forgot to close it); in the event that it doesn't,
1410  * we'll elog a WARNING and leak the already-present entry.
1411  */
1412  if (insertIt)
1413  RelationCacheInsert(relation, true);
1414 
1415  /* It's fully valid */
1416  relation->rd_isvalid = true;
1417 
1418  return relation;
1419 }
1420 
1421 /*
1422  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1423  *
1424  * Note: at the physical level, relations in the pg_global tablespace must
1425  * be treated as shared, even if relisshared isn't set. Hence we do not
1426  * look at relisshared here.
1427  */
1428 static void
1430 {
1431  if (relation->rd_rel->reltablespace)
1432  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1433  else
1434  relation->rd_node.spcNode = MyDatabaseTableSpace;
1435  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1436  relation->rd_node.dbNode = InvalidOid;
1437  else
1438  relation->rd_node.dbNode = MyDatabaseId;
1439 
1440  if (relation->rd_rel->relfilenode)
1441  {
1442  /*
1443  * Even if we are using a decoding snapshot that doesn't represent the
1444  * current state of the catalog we need to make sure the filenode
1445  * points to the current file since the older file will be gone (or
1446  * truncated). The new file will still contain older rows so lookups
1447  * in them will work correctly. This wouldn't work correctly if
1448  * rewrites were allowed to change the schema in an incompatible way,
1449  * but those are prevented both on catalog tables and on user tables
1450  * declared as additional catalog tables.
1451  */
1454  && IsTransactionState())
1455  {
1456  HeapTuple phys_tuple;
1457  Form_pg_class physrel;
1458 
1459  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1460  RelationGetRelid(relation) != ClassOidIndexId,
1461  true);
1462  if (!HeapTupleIsValid(phys_tuple))
1463  elog(ERROR, "could not find pg_class entry for %u",
1464  RelationGetRelid(relation));
1465  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1466 
1467  relation->rd_rel->reltablespace = physrel->reltablespace;
1468  relation->rd_rel->relfilenode = physrel->relfilenode;
1469  heap_freetuple(phys_tuple);
1470  }
1471 
1472  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1473  }
1474  else
1475  {
1476  /* Consult the relation mapper */
1477  relation->rd_node.relNode =
1478  RelationMapOidToFilenode(relation->rd_id,
1479  relation->rd_rel->relisshared);
1480  if (!OidIsValid(relation->rd_node.relNode))
1481  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1482  RelationGetRelationName(relation), relation->rd_id);
1483  }
1484 }
1485 
1486 /*
1487  * Fill in the IndexAmRoutine for an index relation.
1488  *
1489  * relation's rd_amhandler and rd_indexcxt must be valid already.
1490  */
1491 static void
1493 {
1494  IndexAmRoutine *cached,
1495  *tmp;
1496 
1497  /*
1498  * Call the amhandler in current, short-lived memory context, just in case
1499  * it leaks anything (it probably won't, but let's be paranoid).
1500  */
1501  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1502 
1503  /* OK, now transfer the data into relation's rd_indexcxt. */
1504  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1505  sizeof(IndexAmRoutine));
1506  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1507  relation->rd_amroutine = cached;
1508 
1509  pfree(tmp);
1510 }
1511 
1512 /*
1513  * Initialize index-access-method support data for an index relation
1514  */
1515 void
1517 {
1518  HeapTuple tuple;
1519  Form_pg_am aform;
1520  Datum indcollDatum;
1521  Datum indclassDatum;
1522  Datum indoptionDatum;
1523  bool isnull;
1524  oidvector *indcoll;
1525  oidvector *indclass;
1526  int2vector *indoption;
1527  MemoryContext indexcxt;
1528  MemoryContext oldcontext;
1529  int natts;
1530  uint16 amsupport;
1531 
1532  /*
1533  * Make a copy of the pg_index entry for the index. Since pg_index
1534  * contains variable-length and possibly-null fields, we have to do this
1535  * honestly rather than just treating it as a Form_pg_index struct.
1536  */
1537  tuple = SearchSysCache1(INDEXRELID,
1538  ObjectIdGetDatum(RelationGetRelid(relation)));
1539  if (!HeapTupleIsValid(tuple))
1540  elog(ERROR, "cache lookup failed for index %u",
1541  RelationGetRelid(relation));
1543  relation->rd_indextuple = heap_copytuple(tuple);
1544  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1545  MemoryContextSwitchTo(oldcontext);
1546  ReleaseSysCache(tuple);
1547 
1548  /*
1549  * Look up the index's access method, save the OID of its handler function
1550  */
1551  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1552  if (!HeapTupleIsValid(tuple))
1553  elog(ERROR, "cache lookup failed for access method %u",
1554  relation->rd_rel->relam);
1555  aform = (Form_pg_am) GETSTRUCT(tuple);
1556  relation->rd_amhandler = aform->amhandler;
1557  ReleaseSysCache(tuple);
1558 
1559  natts = relation->rd_rel->relnatts;
1560  if (natts != relation->rd_index->indnatts)
1561  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1562  RelationGetRelid(relation));
1563 
1564  /*
1565  * Make the private context to hold index access info. The reason we need
1566  * a context, and not just a couple of pallocs, is so that we won't leak
1567  * any subsidiary info attached to fmgr lookup records.
1568  */
1570  RelationGetRelationName(relation),
1572  relation->rd_indexcxt = indexcxt;
1573 
1574  /*
1575  * Now we can fetch the index AM's API struct
1576  */
1577  InitIndexAmRoutine(relation);
1578 
1579  /*
1580  * Allocate arrays to hold data
1581  */
1582  relation->rd_opfamily = (Oid *)
1583  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1584  relation->rd_opcintype = (Oid *)
1585  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1586 
1587  amsupport = relation->rd_amroutine->amsupport;
1588  if (amsupport > 0)
1589  {
1590  int nsupport = natts * amsupport;
1591 
1592  relation->rd_support = (RegProcedure *)
1593  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1594  relation->rd_supportinfo = (FmgrInfo *)
1595  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1596  }
1597  else
1598  {
1599  relation->rd_support = NULL;
1600  relation->rd_supportinfo = NULL;
1601  }
1602 
1603  relation->rd_indcollation = (Oid *)
1604  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1605 
1606  relation->rd_indoption = (int16 *)
1607  MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1608 
1609  /*
1610  * indcollation cannot be referenced directly through the C struct,
1611  * because it comes after the variable-width indkey field. Must extract
1612  * the datum the hard way...
1613  */
1614  indcollDatum = fastgetattr(relation->rd_indextuple,
1617  &isnull);
1618  Assert(!isnull);
1619  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1620  memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid));
1621 
1622  /*
1623  * indclass cannot be referenced directly through the C struct, because it
1624  * comes after the variable-width indkey field. Must extract the datum
1625  * the hard way...
1626  */
1627  indclassDatum = fastgetattr(relation->rd_indextuple,
1630  &isnull);
1631  Assert(!isnull);
1632  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1633 
1634  /*
1635  * Fill the support procedure OID array, as well as the info about
1636  * opfamilies and opclass input types. (aminfo and supportinfo are left
1637  * as zeroes, and are filled on-the-fly when used)
1638  */
1639  IndexSupportInitialize(indclass, relation->rd_support,
1640  relation->rd_opfamily, relation->rd_opcintype,
1641  amsupport, natts);
1642 
1643  /*
1644  * Similarly extract indoption and copy it to the cache entry
1645  */
1646  indoptionDatum = fastgetattr(relation->rd_indextuple,
1649  &isnull);
1650  Assert(!isnull);
1651  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1652  memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1653 
1654  /*
1655  * expressions, predicate, exclusion caches will be filled later
1656  */
1657  relation->rd_indexprs = NIL;
1658  relation->rd_indpred = NIL;
1659  relation->rd_exclops = NULL;
1660  relation->rd_exclprocs = NULL;
1661  relation->rd_exclstrats = NULL;
1662  relation->rd_amcache = NULL;
1663 }
1664 
1665 /*
1666  * IndexSupportInitialize
1667  * Initializes an index's cached opclass information,
1668  * given the index's pg_index.indclass entry.
1669  *
1670  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1671  * which are arrays allocated by the caller.
1672  *
1673  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1674  * indicate the size of the arrays it has allocated --- but in practice these
1675  * numbers must always match those obtainable from the system catalog entries
1676  * for the index and access method.
1677  */
1678 static void
1680  RegProcedure *indexSupport,
1681  Oid *opFamily,
1682  Oid *opcInType,
1683  StrategyNumber maxSupportNumber,
1684  AttrNumber maxAttributeNumber)
1685 {
1686  int attIndex;
1687 
1688  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1689  {
1690  OpClassCacheEnt *opcentry;
1691 
1692  if (!OidIsValid(indclass->values[attIndex]))
1693  elog(ERROR, "bogus pg_index tuple");
1694 
1695  /* look up the info for this opclass, using a cache */
1696  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1697  maxSupportNumber);
1698 
1699  /* copy cached data into relcache entry */
1700  opFamily[attIndex] = opcentry->opcfamily;
1701  opcInType[attIndex] = opcentry->opcintype;
1702  if (maxSupportNumber > 0)
1703  memcpy(&indexSupport[attIndex * maxSupportNumber],
1704  opcentry->supportProcs,
1705  maxSupportNumber * sizeof(RegProcedure));
1706  }
1707 }
1708 
1709 /*
1710  * LookupOpclassInfo
1711  *
1712  * This routine maintains a per-opclass cache of the information needed
1713  * by IndexSupportInitialize(). This is more efficient than relying on
1714  * the catalog cache, because we can load all the info about a particular
1715  * opclass in a single indexscan of pg_amproc.
1716  *
1717  * The information from pg_am about expected range of support function
1718  * numbers is passed in, rather than being looked up, mainly because the
1719  * caller will have it already.
1720  *
1721  * Note there is no provision for flushing the cache. This is OK at the
1722  * moment because there is no way to ALTER any interesting properties of an
1723  * existing opclass --- all you can do is drop it, which will result in
1724  * a useless but harmless dead entry in the cache. To support altering
1725  * opclass membership (not the same as opfamily membership!), we'd need to
1726  * be able to flush this cache as well as the contents of relcache entries
1727  * for indexes.
1728  */
1729 static OpClassCacheEnt *
1730 LookupOpclassInfo(Oid operatorClassOid,
1731  StrategyNumber numSupport)
1732 {
1733  OpClassCacheEnt *opcentry;
1734  bool found;
1735  Relation rel;
1736  SysScanDesc scan;
1737  ScanKeyData skey[3];
1738  HeapTuple htup;
1739  bool indexOK;
1740 
1741  if (OpClassCache == NULL)
1742  {
1743  /* First time through: initialize the opclass cache */
1744  HASHCTL ctl;
1745 
1746  MemSet(&ctl, 0, sizeof(ctl));
1747  ctl.keysize = sizeof(Oid);
1748  ctl.entrysize = sizeof(OpClassCacheEnt);
1749  OpClassCache = hash_create("Operator class cache", 64,
1750  &ctl, HASH_ELEM | HASH_BLOBS);
1751 
1752  /* Also make sure CacheMemoryContext exists */
1753  if (!CacheMemoryContext)
1755  }
1756 
1757  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1758  (void *) &operatorClassOid,
1759  HASH_ENTER, &found);
1760 
1761  if (!found)
1762  {
1763  /* Need to allocate memory for new entry */
1764  opcentry->valid = false; /* until known OK */
1765  opcentry->numSupport = numSupport;
1766 
1767  if (numSupport > 0)
1768  opcentry->supportProcs = (RegProcedure *)
1770  numSupport * sizeof(RegProcedure));
1771  else
1772  opcentry->supportProcs = NULL;
1773  }
1774  else
1775  {
1776  Assert(numSupport == opcentry->numSupport);
1777  }
1778 
1779  /*
1780  * When testing for cache-flush hazards, we intentionally disable the
1781  * operator class cache and force reloading of the info on each call. This
1782  * is helpful because we want to test the case where a cache flush occurs
1783  * while we are loading the info, and it's very hard to provoke that if
1784  * this happens only once per opclass per backend.
1785  */
1786 #if defined(CLOBBER_CACHE_ALWAYS)
1787  opcentry->valid = false;
1788 #endif
1789 
1790  if (opcentry->valid)
1791  return opcentry;
1792 
1793  /*
1794  * Need to fill in new entry.
1795  *
1796  * To avoid infinite recursion during startup, force heap scans if we're
1797  * looking up info for the opclasses used by the indexes we would like to
1798  * reference here.
1799  */
1800  indexOK = criticalRelcachesBuilt ||
1801  (operatorClassOid != OID_BTREE_OPS_OID &&
1802  operatorClassOid != INT2_BTREE_OPS_OID);
1803 
1804  /*
1805  * We have to fetch the pg_opclass row to determine its opfamily and
1806  * opcintype, which are needed to look up related operators and functions.
1807  * It'd be convenient to use the syscache here, but that probably doesn't
1808  * work while bootstrapping.
1809  */
1810  ScanKeyInit(&skey[0],
1812  BTEqualStrategyNumber, F_OIDEQ,
1813  ObjectIdGetDatum(operatorClassOid));
1815  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1816  NULL, 1, skey);
1817 
1818  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1819  {
1820  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1821 
1822  opcentry->opcfamily = opclassform->opcfamily;
1823  opcentry->opcintype = opclassform->opcintype;
1824  }
1825  else
1826  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1827 
1828  systable_endscan(scan);
1830 
1831  /*
1832  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1833  * the default ones (those with lefttype = righttype = opcintype).
1834  */
1835  if (numSupport > 0)
1836  {
1837  ScanKeyInit(&skey[0],
1839  BTEqualStrategyNumber, F_OIDEQ,
1840  ObjectIdGetDatum(opcentry->opcfamily));
1841  ScanKeyInit(&skey[1],
1843  BTEqualStrategyNumber, F_OIDEQ,
1844  ObjectIdGetDatum(opcentry->opcintype));
1845  ScanKeyInit(&skey[2],
1847  BTEqualStrategyNumber, F_OIDEQ,
1848  ObjectIdGetDatum(opcentry->opcintype));
1850  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1851  NULL, 3, skey);
1852 
1853  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1854  {
1855  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1856 
1857  if (amprocform->amprocnum <= 0 ||
1858  (StrategyNumber) amprocform->amprocnum > numSupport)
1859  elog(ERROR, "invalid amproc number %d for opclass %u",
1860  amprocform->amprocnum, operatorClassOid);
1861 
1862  opcentry->supportProcs[amprocform->amprocnum - 1] =
1863  amprocform->amproc;
1864  }
1865 
1866  systable_endscan(scan);
1868  }
1869 
1870  opcentry->valid = true;
1871  return opcentry;
1872 }
1873 
1874 
1875 /*
1876  * formrdesc
1877  *
1878  * This is a special cut-down version of RelationBuildDesc(),
1879  * used while initializing the relcache.
1880  * The relation descriptor is built just from the supplied parameters,
1881  * without actually looking at any system table entries. We cheat
1882  * quite a lot since we only need to work for a few basic system
1883  * catalogs.
1884  *
1885  * formrdesc is currently used for: pg_database, pg_authid, pg_auth_members,
1886  * pg_shseclabel, pg_class, pg_attribute, pg_proc, and pg_type
1887  * (see RelationCacheInitializePhase2/3).
1888  *
1889  * Note that these catalogs can't have constraints (except attnotnull),
1890  * default values, rules, or triggers, since we don't cope with any of that.
1891  * (Well, actually, this only matters for properties that need to be valid
1892  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1893  * these properties matter then...)
1894  *
1895  * NOTE: we assume we are already switched into CacheMemoryContext.
1896  */
1897 static void
1898 formrdesc(const char *relationName, Oid relationReltype,
1899  bool isshared, bool hasoids,
1900  int natts, const FormData_pg_attribute *attrs)
1901 {
1902  Relation relation;
1903  int i;
1904  bool has_not_null;
1905 
1906  /*
1907  * allocate new relation desc, clear all fields of reldesc
1908  */
1909  relation = (Relation) palloc0(sizeof(RelationData));
1910 
1911  /* make sure relation is marked as having no open file yet */
1912  relation->rd_smgr = NULL;
1913 
1914  /*
1915  * initialize reference count: 1 because it is nailed in cache
1916  */
1917  relation->rd_refcnt = 1;
1918 
1919  /*
1920  * all entries built with this routine are nailed-in-cache; none are for
1921  * new or temp relations.
1922  */
1923  relation->rd_isnailed = true;
1926  relation->rd_backend = InvalidBackendId;
1927  relation->rd_islocaltemp = false;
1928 
1929  /*
1930  * initialize relation tuple form
1931  *
1932  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1933  * get us launched. RelationCacheInitializePhase3() will read the real
1934  * data from pg_class and replace what we've done here. Note in
1935  * particular that relowner is left as zero; this cues
1936  * RelationCacheInitializePhase3 that the real data isn't there yet.
1937  */
1939 
1940  namestrcpy(&relation->rd_rel->relname, relationName);
1941  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1942  relation->rd_rel->reltype = relationReltype;
1943 
1944  /*
1945  * It's important to distinguish between shared and non-shared relations,
1946  * even at bootstrap time, to make sure we know where they are stored.
1947  */
1948  relation->rd_rel->relisshared = isshared;
1949  if (isshared)
1950  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1951 
1952  /* formrdesc is used only for permanent relations */
1953  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1954 
1955  /* ... and they're always populated, too */
1956  relation->rd_rel->relispopulated = true;
1957 
1958  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1959  relation->rd_rel->relpages = 0;
1960  relation->rd_rel->reltuples = 0;
1961  relation->rd_rel->relallvisible = 0;
1962  relation->rd_rel->relkind = RELKIND_RELATION;
1963  relation->rd_rel->relhasoids = hasoids;
1964  relation->rd_rel->relnatts = (int16) natts;
1965 
1966  /*
1967  * initialize attribute tuple form
1968  *
1969  * Unlike the case with the relation tuple, this data had better be right
1970  * because it will never be replaced. The data comes from
1971  * src/include/catalog/ headers via genbki.pl.
1972  */
1973  relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1974  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1975 
1976  relation->rd_att->tdtypeid = relationReltype;
1977  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1978 
1979  /*
1980  * initialize tuple desc info
1981  */
1982  has_not_null = false;
1983  for (i = 0; i < natts; i++)
1984  {
1985  memcpy(TupleDescAttr(relation->rd_att, i),
1986  &attrs[i],
1988  has_not_null |= attrs[i].attnotnull;
1989  /* make sure attcacheoff is valid */
1990  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1991  }
1992 
1993  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1994  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1995 
1996  /* mark not-null status */
1997  if (has_not_null)
1998  {
1999  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
2000 
2001  constr->has_not_null = true;
2002  relation->rd_att->constr = constr;
2003  }
2004 
2005  /*
2006  * initialize relation id from info in att array (my, this is ugly)
2007  */
2008  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
2009 
2010  /*
2011  * All relations made with formrdesc are mapped. This is necessarily so
2012  * because there is no other way to know what filenode they currently
2013  * have. In bootstrap mode, add them to the initial relation mapper data,
2014  * specifying that the initial filenode is the same as the OID.
2015  */
2016  relation->rd_rel->relfilenode = InvalidOid;
2019  RelationGetRelid(relation),
2020  isshared, true);
2021 
2022  /*
2023  * initialize the relation lock manager information
2024  */
2025  RelationInitLockInfo(relation); /* see lmgr.c */
2026 
2027  /*
2028  * initialize physical addressing information for the relation
2029  */
2030  RelationInitPhysicalAddr(relation);
2031 
2032  /*
2033  * initialize the rel-has-index flag, using hardwired knowledge
2034  */
2036  {
2037  /* In bootstrap mode, we have no indexes */
2038  relation->rd_rel->relhasindex = false;
2039  }
2040  else
2041  {
2042  /* Otherwise, all the rels formrdesc is used for have indexes */
2043  relation->rd_rel->relhasindex = true;
2044  }
2045 
2046  /*
2047  * add new reldesc to relcache
2048  */
2049  RelationCacheInsert(relation, false);
2050 
2051  /* It's fully valid */
2052  relation->rd_isvalid = true;
2053 }
2054 
2055 
2056 /* ----------------------------------------------------------------
2057  * Relation Descriptor Lookup Interface
2058  * ----------------------------------------------------------------
2059  */
2060 
2061 /*
2062  * RelationIdGetRelation
2063  *
2064  * Lookup a reldesc by OID; make one if not already in cache.
2065  *
2066  * Returns NULL if no pg_class row could be found for the given relid
2067  * (suggesting we are trying to access a just-deleted relation).
2068  * Any other error is reported via elog.
2069  *
2070  * NB: caller should already have at least AccessShareLock on the
2071  * relation ID, else there are nasty race conditions.
2072  *
2073  * NB: relation ref count is incremented, or set to 1 if new entry.
2074  * Caller should eventually decrement count. (Usually,
2075  * that happens by calling RelationClose().)
2076  */
2077 Relation
2079 {
2080  Relation rd;
2081 
2082  /* Make sure we're in an xact, even if this ends up being a cache hit */
2084 
2085  /*
2086  * first try to find reldesc in the cache
2087  */
2088  RelationIdCacheLookup(relationId, rd);
2089 
2090  if (RelationIsValid(rd))
2091  {
2093  /* revalidate cache entry if necessary */
2094  if (!rd->rd_isvalid)
2095  {
2096  /*
2097  * Indexes only have a limited number of possible schema changes,
2098  * and we don't want to use the full-blown procedure because it's
2099  * a headache for indexes that reload itself depends on.
2100  */
2101  if (rd->rd_rel->relkind == RELKIND_INDEX)
2103  else
2104  RelationClearRelation(rd, true);
2105  Assert(rd->rd_isvalid);
2106  }
2107  return rd;
2108  }
2109 
2110  /*
2111  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2112  * it.
2113  */
2114  rd = RelationBuildDesc(relationId, true);
2115  if (RelationIsValid(rd))
2117  return rd;
2118 }
2119 
2120 /* ----------------------------------------------------------------
2121  * cache invalidation support routines
2122  * ----------------------------------------------------------------
2123  */
2124 
2125 /*
2126  * RelationIncrementReferenceCount
2127  * Increments relation reference count.
2128  *
2129  * Note: bootstrap mode has its own weird ideas about relation refcount
2130  * behavior; we ought to fix it someday, but for now, just disable
2131  * reference count ownership tracking in bootstrap mode.
2132  */
2133 void
2135 {
2137  rel->rd_refcnt += 1;
2140 }
2141 
2142 /*
2143  * RelationDecrementReferenceCount
2144  * Decrements relation reference count.
2145  */
2146 void
2148 {
2149  Assert(rel->rd_refcnt > 0);
2150  rel->rd_refcnt -= 1;
2153 }
2154 
2155 /*
2156  * RelationClose - close an open relation
2157  *
2158  * Actually, we just decrement the refcount.
2159  *
2160  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2161  * will be freed as soon as their refcount goes to zero. In combination
2162  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2163  * to catch references to already-released relcache entries. It slows
2164  * things down quite a bit, however.
2165  */
2166 void
2168 {
2169  /* Note: no locking manipulations needed */
2171 
2172 #ifdef RELCACHE_FORCE_RELEASE
2173  if (RelationHasReferenceCountZero(relation) &&
2174  relation->rd_createSubid == InvalidSubTransactionId &&
2176  RelationClearRelation(relation, false);
2177 #endif
2178 }
2179 
2180 /*
2181  * RelationReloadIndexInfo - reload minimal information for an open index
2182  *
2183  * This function is used only for indexes. A relcache inval on an index
2184  * can mean that its pg_class or pg_index row changed. There are only
2185  * very limited changes that are allowed to an existing index's schema,
2186  * so we can update the relcache entry without a complete rebuild; which
2187  * is fortunate because we can't rebuild an index entry that is "nailed"
2188  * and/or in active use. We support full replacement of the pg_class row,
2189  * as well as updates of a few simple fields of the pg_index row.
2190  *
2191  * We can't necessarily reread the catalog rows right away; we might be
2192  * in a failed transaction when we receive the SI notification. If so,
2193  * RelationClearRelation just marks the entry as invalid by setting
2194  * rd_isvalid to false. This routine is called to fix the entry when it
2195  * is next needed.
2196  *
2197  * We assume that at the time we are called, we have at least AccessShareLock
2198  * on the target index. (Note: in the calls from RelationClearRelation,
2199  * this is legitimate because we know the rel has positive refcount.)
2200  *
2201  * If the target index is an index on pg_class or pg_index, we'd better have
2202  * previously gotten at least AccessShareLock on its underlying catalog,
2203  * else we are at risk of deadlock against someone trying to exclusive-lock
2204  * the heap and index in that order. This is ensured in current usage by
2205  * only applying this to indexes being opened or having positive refcount.
2206  */
2207 static void
2209 {
2210  bool indexOK;
2211  HeapTuple pg_class_tuple;
2212  Form_pg_class relp;
2213 
2214  /* Should be called only for invalidated indexes */
2215  Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
2216  !relation->rd_isvalid);
2217 
2218  /* Ensure it's closed at smgr level */
2219  RelationCloseSmgr(relation);
2220 
2221  /* Must free any AM cached data upon relcache flush */
2222  if (relation->rd_amcache)
2223  pfree(relation->rd_amcache);
2224  relation->rd_amcache = NULL;
2225 
2226  /*
2227  * If it's a shared index, we might be called before backend startup has
2228  * finished selecting a database, in which case we have no way to read
2229  * pg_class yet. However, a shared index can never have any significant
2230  * schema updates, so it's okay to ignore the invalidation signal. Just
2231  * mark it valid and return without doing anything more.
2232  */
2233  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2234  {
2235  relation->rd_isvalid = true;
2236  return;
2237  }
2238 
2239  /*
2240  * Read the pg_class row
2241  *
2242  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2243  * for pg_class_oid_index ...
2244  */
2245  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2246  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2247  if (!HeapTupleIsValid(pg_class_tuple))
2248  elog(ERROR, "could not find pg_class tuple for index %u",
2249  RelationGetRelid(relation));
2250  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2251  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2252  /* Reload reloptions in case they changed */
2253  if (relation->rd_options)
2254  pfree(relation->rd_options);
2255  RelationParseRelOptions(relation, pg_class_tuple);
2256  /* done with pg_class tuple */
2257  heap_freetuple(pg_class_tuple);
2258  /* We must recalculate physical address in case it changed */
2259  RelationInitPhysicalAddr(relation);
2260 
2261  /*
2262  * For a non-system index, there are fields of the pg_index row that are
2263  * allowed to change, so re-read that row and update the relcache entry.
2264  * Most of the info derived from pg_index (such as support function lookup
2265  * info) cannot change, and indeed the whole point of this routine is to
2266  * update the relcache entry without clobbering that data; so wholesale
2267  * replacement is not appropriate.
2268  */
2269  if (!IsSystemRelation(relation))
2270  {
2271  HeapTuple tuple;
2273 
2274  tuple = SearchSysCache1(INDEXRELID,
2275  ObjectIdGetDatum(RelationGetRelid(relation)));
2276  if (!HeapTupleIsValid(tuple))
2277  elog(ERROR, "cache lookup failed for index %u",
2278  RelationGetRelid(relation));
2279  index = (Form_pg_index) GETSTRUCT(tuple);
2280 
2281  /*
2282  * Basically, let's just copy all the bool fields. There are one or
2283  * two of these that can't actually change in the current code, but
2284  * it's not worth it to track exactly which ones they are. None of
2285  * the array fields are allowed to change, though.
2286  */
2287  relation->rd_index->indisunique = index->indisunique;
2288  relation->rd_index->indisprimary = index->indisprimary;
2289  relation->rd_index->indisexclusion = index->indisexclusion;
2290  relation->rd_index->indimmediate = index->indimmediate;
2291  relation->rd_index->indisclustered = index->indisclustered;
2292  relation->rd_index->indisvalid = index->indisvalid;
2293  relation->rd_index->indcheckxmin = index->indcheckxmin;
2294  relation->rd_index->indisready = index->indisready;
2295  relation->rd_index->indislive = index->indislive;
2296 
2297  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2299  HeapTupleHeaderGetXmin(tuple->t_data));
2300 
2301  ReleaseSysCache(tuple);
2302  }
2303 
2304  /* Okay, now it's valid again */
2305  relation->rd_isvalid = true;
2306 }
2307 
2308 /*
2309  * RelationDestroyRelation
2310  *
2311  * Physically delete a relation cache entry and all subsidiary data.
2312  * Caller must already have unhooked the entry from the hash table.
2313  */
2314 static void
2315 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2316 {
2318 
2319  /*
2320  * Make sure smgr and lower levels close the relation's files, if they
2321  * weren't closed already. (This was probably done by caller, but let's
2322  * just be real sure.)
2323  */
2324  RelationCloseSmgr(relation);
2325 
2326  /*
2327  * Free all the subsidiary data structures of the relcache entry, then the
2328  * entry itself.
2329  */
2330  if (relation->rd_rel)
2331  pfree(relation->rd_rel);
2332  /* can't use DecrTupleDescRefCount here */
2333  Assert(relation->rd_att->tdrefcount > 0);
2334  if (--relation->rd_att->tdrefcount == 0)
2335  {
2336  /*
2337  * If we Rebuilt a relcache entry during a transaction then its
2338  * possible we did that because the TupDesc changed as the result of
2339  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2340  * possible someone copied that TupDesc, in which case the copy would
2341  * point to free'd memory. So if we rebuild an entry we keep the
2342  * TupDesc around until end of transaction, to be safe.
2343  */
2344  if (remember_tupdesc)
2346  else
2347  FreeTupleDesc(relation->rd_att);
2348  }
2349  FreeTriggerDesc(relation->trigdesc);
2350  list_free_deep(relation->rd_fkeylist);
2351  list_free(relation->rd_indexlist);
2352  bms_free(relation->rd_indexattr);
2353  bms_free(relation->rd_keyattr);
2354  bms_free(relation->rd_pkattr);
2355  bms_free(relation->rd_idattr);
2356  if (relation->rd_pubactions)
2357  pfree(relation->rd_pubactions);
2358  if (relation->rd_options)
2359  pfree(relation->rd_options);
2360  if (relation->rd_indextuple)
2361  pfree(relation->rd_indextuple);
2362  if (relation->rd_indexcxt)
2363  MemoryContextDelete(relation->rd_indexcxt);
2364  if (relation->rd_rulescxt)
2365  MemoryContextDelete(relation->rd_rulescxt);
2366  if (relation->rd_rsdesc)
2367  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2368  if (relation->rd_partkeycxt)
2370  if (relation->rd_pdcxt)
2371  MemoryContextDelete(relation->rd_pdcxt);
2372  if (relation->rd_partcheck)
2373  pfree(relation->rd_partcheck);
2374  if (relation->rd_fdwroutine)
2375  pfree(relation->rd_fdwroutine);
2376  pfree(relation);
2377 }
2378 
2379 /*
2380  * RelationClearRelation
2381  *
2382  * Physically blow away a relation cache entry, or reset it and rebuild
2383  * it from scratch (that is, from catalog entries). The latter path is
2384  * used when we are notified of a change to an open relation (one with
2385  * refcount > 0).
2386  *
2387  * NB: when rebuilding, we'd better hold some lock on the relation,
2388  * else the catalog data we need to read could be changing under us.
2389  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2390  * an sinval reset could happen while we're accessing the catalogs, and
2391  * the rel would get blown away underneath us by RelationCacheInvalidate
2392  * if it has zero refcnt.
2393  *
2394  * The "rebuild" parameter is redundant in current usage because it has
2395  * to match the relation's refcnt status, but we keep it as a crosscheck
2396  * that we're doing what the caller expects.
2397  */
2398 static void
2399 RelationClearRelation(Relation relation, bool rebuild)
2400 {
2401  /*
2402  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2403  * course it would be an equally bad idea to blow away one with nonzero
2404  * refcnt, since that would leave someone somewhere with a dangling
2405  * pointer. All callers are expected to have verified that this holds.
2406  */
2407  Assert(rebuild ?
2408  !RelationHasReferenceCountZero(relation) :
2409  RelationHasReferenceCountZero(relation));
2410 
2411  /*
2412  * Make sure smgr and lower levels close the relation's files, if they
2413  * weren't closed already. If the relation is not getting deleted, the
2414  * next smgr access should reopen the files automatically. This ensures
2415  * that the low-level file access state is updated after, say, a vacuum
2416  * truncation.
2417  */
2418  RelationCloseSmgr(relation);
2419 
2420  /*
2421  * Never, never ever blow away a nailed-in system relation, because we'd
2422  * be unable to recover. However, we must redo RelationInitPhysicalAddr
2423  * in case it is a mapped relation whose mapping changed.
2424  *
2425  * If it's a nailed-but-not-mapped index, then we need to re-read the
2426  * pg_class row to see if its relfilenode changed. We do that immediately
2427  * if we're inside a valid transaction and the relation is open (not
2428  * counting the nailed refcount). Otherwise just mark the entry as
2429  * possibly invalid, and it'll be fixed when next opened.
2430  */
2431  if (relation->rd_isnailed)
2432  {
2433  RelationInitPhysicalAddr(relation);
2434 
2435  if (relation->rd_rel->relkind == RELKIND_INDEX)
2436  {
2437  relation->rd_isvalid = false; /* needs to be revalidated */
2438  if (relation->rd_refcnt > 1 && IsTransactionState())
2439  RelationReloadIndexInfo(relation);
2440  }
2441  return;
2442  }
2443 
2444  /*
2445  * Even non-system indexes should not be blown away if they are open and
2446  * have valid index support information. This avoids problems with active
2447  * use of the index support information. As with nailed indexes, we
2448  * re-read the pg_class row to handle possible physical relocation of the
2449  * index, and we check for pg_index updates too.
2450  */
2451  if (relation->rd_rel->relkind == RELKIND_INDEX &&
2452  relation->rd_refcnt > 0 &&
2453  relation->rd_indexcxt != NULL)
2454  {
2455  relation->rd_isvalid = false; /* needs to be revalidated */
2456  if (IsTransactionState())
2457  RelationReloadIndexInfo(relation);
2458  return;
2459  }
2460 
2461  /* Mark it invalid until we've finished rebuild */
2462  relation->rd_isvalid = false;
2463 
2464  /*
2465  * If we're really done with the relcache entry, blow it away. But if
2466  * someone is still using it, reconstruct the whole deal without moving
2467  * the physical RelationData record (so that the someone's pointer is
2468  * still valid).
2469  */
2470  if (!rebuild)
2471  {
2472  /* Remove it from the hash table */
2473  RelationCacheDelete(relation);
2474 
2475  /* And release storage */
2476  RelationDestroyRelation(relation, false);
2477  }
2478  else if (!IsTransactionState())
2479  {
2480  /*
2481  * If we're not inside a valid transaction, we can't do any catalog
2482  * access so it's not possible to rebuild yet. Just exit, leaving
2483  * rd_isvalid = false so that the rebuild will occur when the entry is
2484  * next opened.
2485  *
2486  * Note: it's possible that we come here during subtransaction abort,
2487  * and the reason for wanting to rebuild is that the rel is open in
2488  * the outer transaction. In that case it might seem unsafe to not
2489  * rebuild immediately, since whatever code has the rel already open
2490  * will keep on using the relcache entry as-is. However, in such a
2491  * case the outer transaction should be holding a lock that's
2492  * sufficient to prevent any significant change in the rel's schema,
2493  * so the existing entry contents should be good enough for its
2494  * purposes; at worst we might be behind on statistics updates or the
2495  * like. (See also CheckTableNotInUse() and its callers.) These same
2496  * remarks also apply to the cases above where we exit without having
2497  * done RelationReloadIndexInfo() yet.
2498  */
2499  return;
2500  }
2501  else
2502  {
2503  /*
2504  * Our strategy for rebuilding an open relcache entry is to build a
2505  * new entry from scratch, swap its contents with the old entry, and
2506  * finally delete the new entry (along with any infrastructure swapped
2507  * over from the old entry). This is to avoid trouble in case an
2508  * error causes us to lose control partway through. The old entry
2509  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2510  * on next access. Meanwhile it's not any less valid than it was
2511  * before, so any code that might expect to continue accessing it
2512  * isn't hurt by the rebuild failure. (Consider for example a
2513  * subtransaction that ALTERs a table and then gets canceled partway
2514  * through the cache entry rebuild. The outer transaction should
2515  * still see the not-modified cache entry as valid.) The worst
2516  * consequence of an error is leaking the necessarily-unreferenced new
2517  * entry, and this shouldn't happen often enough for that to be a big
2518  * problem.
2519  *
2520  * When rebuilding an open relcache entry, we must preserve ref count,
2521  * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2522  * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2523  * rewrite-rule, partition key, and partition descriptor substructures
2524  * in place, because various places assume that these structures won't
2525  * move while they are working with an open relcache entry. (Note:
2526  * the refcount mechanism for tupledescs might someday allow us to
2527  * remove this hack for the tupledesc.)
2528  *
2529  * Note that this process does not touch CurrentResourceOwner; which
2530  * is good because whatever ref counts the entry may have do not
2531  * necessarily belong to that resource owner.
2532  */
2533  Relation newrel;
2534  Oid save_relid = RelationGetRelid(relation);
2535  bool keep_tupdesc;
2536  bool keep_rules;
2537  bool keep_policies;
2538  bool keep_partkey;
2539  bool keep_partdesc;
2540 
2541  /* Build temporary entry, but don't link it into hashtable */
2542  newrel = RelationBuildDesc(save_relid, false);
2543  if (newrel == NULL)
2544  {
2545  /*
2546  * We can validly get here, if we're using a historic snapshot in
2547  * which a relation, accessed from outside logical decoding, is
2548  * still invisible. In that case it's fine to just mark the
2549  * relation as invalid and return - it'll fully get reloaded by
2550  * the cache reset at the end of logical decoding (or at the next
2551  * access). During normal processing we don't want to ignore this
2552  * case as it shouldn't happen there, as explained below.
2553  */
2554  if (HistoricSnapshotActive())
2555  return;
2556 
2557  /*
2558  * This shouldn't happen as dropping a relation is intended to be
2559  * impossible if still referenced (c.f. CheckTableNotInUse()). But
2560  * if we get here anyway, we can't just delete the relcache entry,
2561  * as it possibly could get accessed later (as e.g. the error
2562  * might get trapped and handled via a subtransaction rollback).
2563  */
2564  elog(ERROR, "relation %u deleted while still in use", save_relid);
2565  }
2566 
2567  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2568  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2569  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2570  keep_partkey = (relation->rd_partkey != NULL);
2571  keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2572  relation->rd_partdesc,
2573  newrel->rd_partdesc);
2574 
2575  /*
2576  * Perform swapping of the relcache entry contents. Within this
2577  * process the old entry is momentarily invalid, so there *must* be no
2578  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2579  * all-in-line code for safety.
2580  *
2581  * Since the vast majority of fields should be swapped, our method is
2582  * to swap the whole structures and then re-swap those few fields we
2583  * didn't want swapped.
2584  */
2585 #define SWAPFIELD(fldtype, fldname) \
2586  do { \
2587  fldtype _tmp = newrel->fldname; \
2588  newrel->fldname = relation->fldname; \
2589  relation->fldname = _tmp; \
2590  } while (0)
2591 
2592  /* swap all Relation struct fields */
2593  {
2594  RelationData tmpstruct;
2595 
2596  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2597  memcpy(newrel, relation, sizeof(RelationData));
2598  memcpy(relation, &tmpstruct, sizeof(RelationData));
2599  }
2600 
2601  /* rd_smgr must not be swapped, due to back-links from smgr level */
2602  SWAPFIELD(SMgrRelation, rd_smgr);
2603  /* rd_refcnt must be preserved */
2604  SWAPFIELD(int, rd_refcnt);
2605  /* isnailed shouldn't change */
2606  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2607  /* creation sub-XIDs must be preserved */
2608  SWAPFIELD(SubTransactionId, rd_createSubid);
2609  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2610  /* un-swap rd_rel pointers, swap contents instead */
2611  SWAPFIELD(Form_pg_class, rd_rel);
2612  /* ... but actually, we don't have to update newrel->rd_rel */
2613  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2614  /* preserve old tupledesc and rules if no logical change */
2615  if (keep_tupdesc)
2616  SWAPFIELD(TupleDesc, rd_att);
2617  if (keep_rules)
2618  {
2619  SWAPFIELD(RuleLock *, rd_rules);
2620  SWAPFIELD(MemoryContext, rd_rulescxt);
2621  }
2622  if (keep_policies)
2623  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2624  /* toast OID override must be preserved */
2625  SWAPFIELD(Oid, rd_toastoid);
2626  /* pgstat_info must be preserved */
2627  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2628  /* partition key must be preserved, if we have one */
2629  if (keep_partkey)
2630  {
2631  SWAPFIELD(PartitionKey, rd_partkey);
2632  SWAPFIELD(MemoryContext, rd_partkeycxt);
2633  }
2634  /* preserve old partdesc if no logical change */
2635  if (keep_partdesc)
2636  {
2637  SWAPFIELD(PartitionDesc, rd_partdesc);
2638  SWAPFIELD(MemoryContext, rd_pdcxt);
2639  }
2640 
2641 #undef SWAPFIELD
2642 
2643  /* And now we can throw away the temporary entry */
2644  RelationDestroyRelation(newrel, !keep_tupdesc);
2645  }
2646 }
2647 
2648 /*
2649  * RelationFlushRelation
2650  *
2651  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2652  * This is used when we receive a cache invalidation event for the rel.
2653  */
2654 static void
2656 {
2657  if (relation->rd_createSubid != InvalidSubTransactionId ||
2659  {
2660  /*
2661  * New relcache entries are always rebuilt, not flushed; else we'd
2662  * forget the "new" status of the relation, which is a useful
2663  * optimization to have. Ditto for the new-relfilenode status.
2664  *
2665  * The rel could have zero refcnt here, so temporarily increment the
2666  * refcnt to ensure it's safe to rebuild it. We can assume that the
2667  * current transaction has some lock on the rel already.
2668  */
2670  RelationClearRelation(relation, true);
2672  }
2673  else
2674  {
2675  /*
2676  * Pre-existing rels can be dropped from the relcache if not open.
2677  */
2678  bool rebuild = !RelationHasReferenceCountZero(relation);
2679 
2680  RelationClearRelation(relation, rebuild);
2681  }
2682 }
2683 
2684 /*
2685  * RelationForgetRelation - unconditionally remove a relcache entry
2686  *
2687  * External interface for destroying a relcache entry when we
2688  * drop the relation.
2689  */
2690 void
2692 {
2693  Relation relation;
2694 
2695  RelationIdCacheLookup(rid, relation);
2696 
2697  if (!PointerIsValid(relation))
2698  return; /* not in cache, nothing to do */
2699 
2700  if (!RelationHasReferenceCountZero(relation))
2701  elog(ERROR, "relation %u is still open", rid);
2702 
2703  /* Unconditionally destroy the relcache entry */
2704  RelationClearRelation(relation, false);
2705 }
2706 
2707 /*
2708  * RelationCacheInvalidateEntry
2709  *
2710  * This routine is invoked for SI cache flush messages.
2711  *
2712  * Any relcache entry matching the relid must be flushed. (Note: caller has
2713  * already determined that the relid belongs to our database or is a shared
2714  * relation.)
2715  *
2716  * We used to skip local relations, on the grounds that they could
2717  * not be targets of cross-backend SI update messages; but it seems
2718  * safer to process them, so that our *own* SI update messages will
2719  * have the same effects during CommandCounterIncrement for both
2720  * local and nonlocal relations.
2721  */
2722 void
2724 {
2725  Relation relation;
2726 
2727  RelationIdCacheLookup(relationId, relation);
2728 
2729  if (PointerIsValid(relation))
2730  {
2732  RelationFlushRelation(relation);
2733  }
2734 }
2735 
2736 /*
2737  * RelationCacheInvalidate
2738  * Blow away cached relation descriptors that have zero reference counts,
2739  * and rebuild those with positive reference counts. Also reset the smgr
2740  * relation cache and re-read relation mapping data.
2741  *
2742  * This is currently used only to recover from SI message buffer overflow,
2743  * so we do not touch new-in-transaction relations; they cannot be targets
2744  * of cross-backend SI updates (and our own updates now go through a
2745  * separate linked list that isn't limited by the SI message buffer size).
2746  * Likewise, we need not discard new-relfilenode-in-transaction hints,
2747  * since any invalidation of those would be a local event.
2748  *
2749  * We do this in two phases: the first pass deletes deletable items, and
2750  * the second one rebuilds the rebuildable items. This is essential for
2751  * safety, because hash_seq_search only copes with concurrent deletion of
2752  * the element it is currently visiting. If a second SI overflow were to
2753  * occur while we are walking the table, resulting in recursive entry to
2754  * this routine, we could crash because the inner invocation blows away
2755  * the entry next to be visited by the outer scan. But this way is OK,
2756  * because (a) during the first pass we won't process any more SI messages,
2757  * so hash_seq_search will complete safely; (b) during the second pass we
2758  * only hold onto pointers to nondeletable entries.
2759  *
2760  * The two-phase approach also makes it easy to update relfilenodes for
2761  * mapped relations before we do anything else, and to ensure that the
2762  * second pass processes nailed-in-cache items before other nondeletable
2763  * items. This should ensure that system catalogs are up to date before
2764  * we attempt to use them to reload information about other open relations.
2765  */
2766 void
2768 {
2770  RelIdCacheEnt *idhentry;
2771  Relation relation;
2772  List *rebuildFirstList = NIL;
2773  List *rebuildList = NIL;
2774  ListCell *l;
2775 
2776  /*
2777  * Reload relation mapping data before starting to reconstruct cache.
2778  */
2780 
2781  /* Phase 1 */
2782  hash_seq_init(&status, RelationIdCache);
2783 
2784  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2785  {
2786  relation = idhentry->reldesc;
2787 
2788  /* Must close all smgr references to avoid leaving dangling ptrs */
2789  RelationCloseSmgr(relation);
2790 
2791  /*
2792  * Ignore new relations; no other backend will manipulate them before
2793  * we commit. Likewise, before replacing a relation's relfilenode, we
2794  * shall have acquired AccessExclusiveLock and drained any applicable
2795  * pending invalidations.
2796  */
2797  if (relation->rd_createSubid != InvalidSubTransactionId ||
2799  continue;
2800 
2802 
2803  if (RelationHasReferenceCountZero(relation))
2804  {
2805  /* Delete this entry immediately */
2806  Assert(!relation->rd_isnailed);
2807  RelationClearRelation(relation, false);
2808  }
2809  else
2810  {
2811  /*
2812  * If it's a mapped relation, immediately update its rd_node in
2813  * case its relfilenode changed. We must do this during phase 1
2814  * in case the relation is consulted during rebuild of other
2815  * relcache entries in phase 2. It's safe since consulting the
2816  * map doesn't involve any access to relcache entries.
2817  */
2818  if (RelationIsMapped(relation))
2819  RelationInitPhysicalAddr(relation);
2820 
2821  /*
2822  * Add this entry to list of stuff to rebuild in second pass.
2823  * pg_class goes to the front of rebuildFirstList while
2824  * pg_class_oid_index goes to the back of rebuildFirstList, so
2825  * they are done first and second respectively. Other nailed
2826  * relations go to the front of rebuildList, so they'll be done
2827  * next in no particular order; and everything else goes to the
2828  * back of rebuildList.
2829  */
2830  if (RelationGetRelid(relation) == RelationRelationId)
2831  rebuildFirstList = lcons(relation, rebuildFirstList);
2832  else if (RelationGetRelid(relation) == ClassOidIndexId)
2833  rebuildFirstList = lappend(rebuildFirstList, relation);
2834  else if (relation->rd_isnailed)
2835  rebuildList = lcons(relation, rebuildList);
2836  else
2837  rebuildList = lappend(rebuildList, relation);
2838  }
2839  }
2840 
2841  /*
2842  * Now zap any remaining smgr cache entries. This must happen before we
2843  * start to rebuild entries, since that may involve catalog fetches which
2844  * will re-open catalog files.
2845  */
2846  smgrcloseall();
2847 
2848  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2849  foreach(l, rebuildFirstList)
2850  {
2851  relation = (Relation) lfirst(l);
2852  RelationClearRelation(relation, true);
2853  }
2854  list_free(rebuildFirstList);
2855  foreach(l, rebuildList)
2856  {
2857  relation = (Relation) lfirst(l);
2858  RelationClearRelation(relation, true);
2859  }
2860  list_free(rebuildList);
2861 }
2862 
2863 /*
2864  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2865  *
2866  * Needed in some cases where we are changing a relation's physical mapping.
2867  * The link will be automatically reopened on next use.
2868  */
2869 void
2871 {
2872  Relation relation;
2873 
2874  RelationIdCacheLookup(relationId, relation);
2875 
2876  if (!PointerIsValid(relation))
2877  return; /* not in cache, nothing to do */
2878 
2879  RelationCloseSmgr(relation);
2880 }
2881 
2882 static void
2884 {
2885  if (EOXactTupleDescArray == NULL)
2886  {
2887  MemoryContext oldcxt;
2888 
2890 
2891  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2894  MemoryContextSwitchTo(oldcxt);
2895  }
2897  {
2898  int32 newlen = EOXactTupleDescArrayLen * 2;
2899 
2901 
2902  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2903  newlen * sizeof(TupleDesc));
2904  EOXactTupleDescArrayLen = newlen;
2905  }
2906 
2907  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2908 }
2909 
2910 /*
2911  * AtEOXact_RelationCache
2912  *
2913  * Clean up the relcache at main-transaction commit or abort.
2914  *
2915  * Note: this must be called *before* processing invalidation messages.
2916  * In the case of abort, we don't want to try to rebuild any invalidated
2917  * cache entries (since we can't safely do database accesses). Therefore
2918  * we must reset refcnts before handling pending invalidations.
2919  *
2920  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2921  * ResourceOwner mechanism. This routine just does a debugging
2922  * cross-check that no pins remain. However, we also need to do special
2923  * cleanup when the current transaction created any relations or made use
2924  * of forced index lists.
2925  */
2926 void
2928 {
2930  RelIdCacheEnt *idhentry;
2931  int i;
2932 
2933  /*
2934  * Unless the eoxact_list[] overflowed, we only need to examine the rels
2935  * listed in it. Otherwise fall back on a hash_seq_search scan.
2936  *
2937  * For simplicity, eoxact_list[] entries are not deleted till end of
2938  * top-level transaction, even though we could remove them at
2939  * subtransaction end in some cases, or remove relations from the list if
2940  * they are cleared for other reasons. Therefore we should expect the
2941  * case that list entries are not found in the hashtable; if not, there's
2942  * nothing to do for them.
2943  */
2945  {
2946  hash_seq_init(&status, RelationIdCache);
2947  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2948  {
2949  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2950  }
2951  }
2952  else
2953  {
2954  for (i = 0; i < eoxact_list_len; i++)
2955  {
2956  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2957  (void *) &eoxact_list[i],
2958  HASH_FIND,
2959  NULL);
2960  if (idhentry != NULL)
2961  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2962  }
2963  }
2964 
2965  if (EOXactTupleDescArrayLen > 0)
2966  {
2967  Assert(EOXactTupleDescArray != NULL);
2968  for (i = 0; i < NextEOXactTupleDescNum; i++)
2969  FreeTupleDesc(EOXactTupleDescArray[i]);
2970  pfree(EOXactTupleDescArray);
2971  EOXactTupleDescArray = NULL;
2972  }
2973 
2974  /* Now we're out of the transaction and can clear the lists */
2975  eoxact_list_len = 0;
2976  eoxact_list_overflowed = false;
2979 }
2980 
2981 /*
2982  * AtEOXact_cleanup
2983  *
2984  * Clean up a single rel at main-transaction commit or abort
2985  *
2986  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
2987  * bother to prevent duplicate entries in eoxact_list[].
2988  */
2989 static void
2990 AtEOXact_cleanup(Relation relation, bool isCommit)
2991 {
2992  /*
2993  * The relcache entry's ref count should be back to its normal
2994  * not-in-a-transaction state: 0 unless it's nailed in cache.
2995  *
2996  * In bootstrap mode, this is NOT true, so don't check it --- the
2997  * bootstrap code expects relations to stay open across start/commit
2998  * transaction calls. (That seems bogus, but it's not worth fixing.)
2999  *
3000  * Note: ideally this check would be applied to every relcache entry, not
3001  * just those that have eoxact work to do. But it's not worth forcing a
3002  * scan of the whole relcache just for this. (Moreover, doing so would
3003  * mean that assert-enabled testing never tests the hash_search code path
3004  * above, which seems a bad idea.)
3005  */
3006 #ifdef USE_ASSERT_CHECKING
3008  {
3009  int expected_refcnt;
3010 
3011  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3012  Assert(relation->rd_refcnt == expected_refcnt);
3013  }
3014 #endif
3015 
3016  /*
3017  * Is it a relation created in the current transaction?
3018  *
3019  * During commit, reset the flag to zero, since we are now out of the
3020  * creating transaction. During abort, simply delete the relcache entry
3021  * --- it isn't interesting any longer. (NOTE: if we have forgotten the
3022  * new-ness of a new relation due to a forced cache flush, the entry will
3023  * get deleted anyway by shared-cache-inval processing of the aborted
3024  * pg_class insertion.)
3025  */
3026  if (relation->rd_createSubid != InvalidSubTransactionId)
3027  {
3028  if (isCommit)
3030  else if (RelationHasReferenceCountZero(relation))
3031  {
3032  RelationClearRelation(relation, false);
3033  return;
3034  }
3035  else
3036  {
3037  /*
3038  * Hmm, somewhere there's a (leaked?) reference to the relation.
3039  * We daren't remove the entry for fear of dereferencing a
3040  * dangling pointer later. Bleat, and mark it as not belonging to
3041  * the current transaction. Hopefully it'll get cleaned up
3042  * eventually. This must be just a WARNING to avoid
3043  * error-during-error-recovery loops.
3044  */
3046  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3047  RelationGetRelationName(relation));
3048  }
3049  }
3050 
3051  /*
3052  * Likewise, reset the hint about the relfilenode being new.
3053  */
3055 
3056  /*
3057  * Flush any temporary index list.
3058  */
3059  if (relation->rd_indexvalid == 2)
3060  {
3061  list_free(relation->rd_indexlist);
3062  relation->rd_indexlist = NIL;
3063  relation->rd_oidindex = InvalidOid;
3064  relation->rd_pkindex = InvalidOid;
3065  relation->rd_replidindex = InvalidOid;
3066  relation->rd_indexvalid = 0;
3067  }
3068 }
3069 
3070 /*
3071  * AtEOSubXact_RelationCache
3072  *
3073  * Clean up the relcache at sub-transaction commit or abort.
3074  *
3075  * Note: this must be called *before* processing invalidation messages.
3076  */
3077 void
3079  SubTransactionId parentSubid)
3080 {
3082  RelIdCacheEnt *idhentry;
3083  int i;
3084 
3085  /*
3086  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3087  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3088  * logic as in AtEOXact_RelationCache.
3089  */
3091  {
3092  hash_seq_init(&status, RelationIdCache);
3093  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3094  {
3095  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3096  mySubid, parentSubid);
3097  }
3098  }
3099  else
3100  {
3101  for (i = 0; i < eoxact_list_len; i++)
3102  {
3103  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3104  (void *) &eoxact_list[i],
3105  HASH_FIND,
3106  NULL);
3107  if (idhentry != NULL)
3108  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3109  mySubid, parentSubid);
3110  }
3111  }
3112 
3113  /* Don't reset the list; we still need more cleanup later */
3114 }
3115 
3116 /*
3117  * AtEOSubXact_cleanup
3118  *
3119  * Clean up a single rel at subtransaction commit or abort
3120  *
3121  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3122  * bother to prevent duplicate entries in eoxact_list[].
3123  */
3124 static void
3125 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3126  SubTransactionId mySubid, SubTransactionId parentSubid)
3127 {
3128  /*
3129  * Is it a relation created in the current subtransaction?
3130  *
3131  * During subcommit, mark it as belonging to the parent, instead. During
3132  * subabort, simply delete the relcache entry.
3133  */
3134  if (relation->rd_createSubid == mySubid)
3135  {
3136  if (isCommit)
3137  relation->rd_createSubid = parentSubid;
3138  else if (RelationHasReferenceCountZero(relation))
3139  {
3140  RelationClearRelation(relation, false);
3141  return;
3142  }
3143  else
3144  {
3145  /*
3146  * Hmm, somewhere there's a (leaked?) reference to the relation.
3147  * We daren't remove the entry for fear of dereferencing a
3148  * dangling pointer later. Bleat, and transfer it to the parent
3149  * subtransaction so we can try again later. This must be just a
3150  * WARNING to avoid error-during-error-recovery loops.
3151  */
3152  relation->rd_createSubid = parentSubid;
3153  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3154  RelationGetRelationName(relation));
3155  }
3156  }
3157 
3158  /*
3159  * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3160  */
3161  if (relation->rd_newRelfilenodeSubid == mySubid)
3162  {
3163  if (isCommit)
3164  relation->rd_newRelfilenodeSubid = parentSubid;
3165  else
3167  }
3168 
3169  /*
3170  * Flush any temporary index list.
3171  */
3172  if (relation->rd_indexvalid == 2)
3173  {
3174  list_free(relation->rd_indexlist);
3175  relation->rd_indexlist = NIL;
3176  relation->rd_oidindex = InvalidOid;
3177  relation->rd_pkindex = InvalidOid;
3178  relation->rd_replidindex = InvalidOid;
3179  relation->rd_indexvalid = 0;
3180  }
3181 }
3182 
3183 
3184 /*
3185  * RelationBuildLocalRelation
3186  * Build a relcache entry for an about-to-be-created relation,
3187  * and enter it into the relcache.
3188  */
3189 Relation
3190 RelationBuildLocalRelation(const char *relname,
3191  Oid relnamespace,
3192  TupleDesc tupDesc,
3193  Oid relid,
3194  Oid relfilenode,
3195  Oid reltablespace,
3196  bool shared_relation,
3197  bool mapped_relation,
3198  char relpersistence,
3199  char relkind)
3200 {
3201  Relation rel;
3202  MemoryContext oldcxt;
3203  int natts = tupDesc->natts;
3204  int i;
3205  bool has_not_null;
3206  bool nailit;
3207 
3208  AssertArg(natts >= 0);
3209 
3210  /*
3211  * check for creation of a rel that must be nailed in cache.
3212  *
3213  * XXX this list had better match the relations specially handled in
3214  * RelationCacheInitializePhase2/3.
3215  */
3216  switch (relid)
3217  {
3218  case DatabaseRelationId:
3219  case AuthIdRelationId:
3220  case AuthMemRelationId:
3221  case RelationRelationId:
3222  case AttributeRelationId:
3223  case ProcedureRelationId:
3224  case TypeRelationId:
3225  nailit = true;
3226  break;
3227  default:
3228  nailit = false;
3229  break;
3230  }
3231 
3232  /*
3233  * check that hardwired list of shared rels matches what's in the
3234  * bootstrap .bki file. If you get a failure here during initdb, you
3235  * probably need to fix IsSharedRelation() to match whatever you've done
3236  * to the set of shared relations.
3237  */
3238  if (shared_relation != IsSharedRelation(relid))
3239  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3240  relname, relid);
3241 
3242  /* Shared relations had better be mapped, too */
3243  Assert(mapped_relation || !shared_relation);
3244 
3245  /*
3246  * switch to the cache context to create the relcache entry.
3247  */
3248  if (!CacheMemoryContext)
3250 
3252 
3253  /*
3254  * allocate a new relation descriptor and fill in basic state fields.
3255  */
3256  rel = (Relation) palloc0(sizeof(RelationData));
3257 
3258  /* make sure relation is marked as having no open file yet */
3259  rel->rd_smgr = NULL;
3260 
3261  /* mark it nailed if appropriate */
3262  rel->rd_isnailed = nailit;
3263 
3264  rel->rd_refcnt = nailit ? 1 : 0;
3265 
3266  /* it's being created in this transaction */
3269 
3270  /*
3271  * create a new tuple descriptor from the one passed in. We do this
3272  * partly to copy it into the cache context, and partly because the new
3273  * relation can't have any defaults or constraints yet; they have to be
3274  * added in later steps, because they require additions to multiple system
3275  * catalogs. We can copy attnotnull constraints here, however.
3276  */
3277  rel->rd_att = CreateTupleDescCopy(tupDesc);
3278  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3279  has_not_null = false;
3280  for (i = 0; i < natts; i++)
3281  {
3282  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3283  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3284 
3285  datt->attidentity = satt->attidentity;
3286  datt->attnotnull = satt->attnotnull;
3287  has_not_null |= satt->attnotnull;
3288  }
3289 
3290  if (has_not_null)
3291  {
3292  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3293 
3294  constr->has_not_null = true;
3295  rel->rd_att->constr = constr;
3296  }
3297 
3298  /*
3299  * initialize relation tuple form (caller may add/override data later)
3300  */
3302 
3303  namestrcpy(&rel->rd_rel->relname, relname);
3304  rel->rd_rel->relnamespace = relnamespace;
3305 
3306  rel->rd_rel->relkind = relkind;
3307  rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
3308  rel->rd_rel->relnatts = natts;
3309  rel->rd_rel->reltype = InvalidOid;
3310  /* needed when bootstrapping: */
3311  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3312 
3313  /* set up persistence and relcache fields dependent on it */
3314  rel->rd_rel->relpersistence = relpersistence;
3315  switch (relpersistence)
3316  {
3320  rel->rd_islocaltemp = false;
3321  break;
3322  case RELPERSISTENCE_TEMP:
3323  Assert(isTempOrTempToastNamespace(relnamespace));
3325  rel->rd_islocaltemp = true;
3326  break;
3327  default:
3328  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3329  break;
3330  }
3331 
3332  /* if it's a materialized view, it's not populated initially */
3333  if (relkind == RELKIND_MATVIEW)
3334  rel->rd_rel->relispopulated = false;
3335  else
3336  rel->rd_rel->relispopulated = true;
3337 
3338  /* system relations and non-table objects don't have one */
3339  if (!IsSystemNamespace(relnamespace) &&
3340  (relkind == RELKIND_RELATION ||
3341  relkind == RELKIND_MATVIEW ||
3342  relkind == RELKIND_PARTITIONED_TABLE))
3343  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3344  else
3345  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3346 
3347  /*
3348  * Insert relation physical and logical identifiers (OIDs) into the right
3349  * places. For a mapped relation, we set relfilenode to zero and rely on
3350  * RelationInitPhysicalAddr to consult the map.
3351  */
3352  rel->rd_rel->relisshared = shared_relation;
3353 
3354  RelationGetRelid(rel) = relid;
3355 
3356  for (i = 0; i < natts; i++)
3357  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3358 
3359  rel->rd_rel->reltablespace = reltablespace;
3360 
3361  if (mapped_relation)
3362  {
3363  rel->rd_rel->relfilenode = InvalidOid;
3364  /* Add it to the active mapping information */
3365  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3366  }
3367  else
3368  rel->rd_rel->relfilenode = relfilenode;
3369 
3370  RelationInitLockInfo(rel); /* see lmgr.c */
3371 
3373 
3374  /*
3375  * Okay to insert into the relcache hash table.
3376  *
3377  * Ordinarily, there should certainly not be an existing hash entry for
3378  * the same OID; but during bootstrap, when we create a "real" relcache
3379  * entry for one of the bootstrap relations, we'll be overwriting the
3380  * phony one created with formrdesc. So allow that to happen for nailed
3381  * rels.
3382  */
3383  RelationCacheInsert(rel, nailit);
3384 
3385  /*
3386  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3387  * can't do this before storing relid in it.
3388  */
3389  EOXactListAdd(rel);
3390 
3391  /*
3392  * done building relcache entry.
3393  */
3394  MemoryContextSwitchTo(oldcxt);
3395 
3396  /* It's fully valid */
3397  rel->rd_isvalid = true;
3398 
3399  /*
3400  * Caller expects us to pin the returned entry.
3401  */
3403 
3404  return rel;
3405 }
3406 
3407 
3408 /*
3409  * RelationSetNewRelfilenode
3410  *
3411  * Assign a new relfilenode (physical file name) to the relation.
3412  *
3413  * This allows a full rewrite of the relation to be done with transactional
3414  * safety (since the filenode assignment can be rolled back). Note however
3415  * that there is no simple way to access the relation's old data for the
3416  * remainder of the current transaction. This limits the usefulness to cases
3417  * such as TRUNCATE or rebuilding an index from scratch.
3418  *
3419  * Caller must already hold exclusive lock on the relation.
3420  *
3421  * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
3422  * must be passed for indexes and sequences). This should be a lower bound on
3423  * the XIDs that will be put into the new relation contents.
3424  *
3425  * The new filenode's persistence is set to the given value. This is useful
3426  * for the cases that are changing the relation's persistence; other callers
3427  * need to pass the original relpersistence value.
3428  */
3429 void
3430 RelationSetNewRelfilenode(Relation relation, char persistence,
3431  TransactionId freezeXid, MultiXactId minmulti)
3432 {
3433  Oid newrelfilenode;
3434  RelFileNodeBackend newrnode;
3435  Relation pg_class;
3436  HeapTuple tuple;
3437  Form_pg_class classform;
3438 
3439  /* Indexes, sequences must have Invalid frozenxid; other rels must not */
3440  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
3441  relation->rd_rel->relkind == RELKIND_SEQUENCE) ?
3442  freezeXid == InvalidTransactionId :
3443  TransactionIdIsNormal(freezeXid));
3444  Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti));
3445 
3446  /* Allocate a new relfilenode */
3447  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3448  persistence);
3449 
3450  /*
3451  * Get a writable copy of the pg_class tuple for the given relation.
3452  */
3454 
3455  tuple = SearchSysCacheCopy1(RELOID,
3456  ObjectIdGetDatum(RelationGetRelid(relation)));
3457  if (!HeapTupleIsValid(tuple))
3458  elog(ERROR, "could not find tuple for relation %u",
3459  RelationGetRelid(relation));
3460  classform = (Form_pg_class) GETSTRUCT(tuple);
3461 
3462  /*
3463  * Create storage for the main fork of the new relfilenode.
3464  *
3465  * NOTE: any conflict in relfilenode value will be caught here, if
3466  * GetNewRelFileNode messes up for any reason.
3467  */
3468  newrnode.node = relation->rd_node;
3469  newrnode.node.relNode = newrelfilenode;
3470  newrnode.backend = relation->rd_backend;
3471  RelationCreateStorage(newrnode.node, persistence);
3472  smgrclosenode(newrnode);
3473 
3474  /*
3475  * Schedule unlinking of the old storage at transaction commit.
3476  */
3477  RelationDropStorage(relation);
3478 
3479  /*
3480  * Now update the pg_class row. However, if we're dealing with a mapped
3481  * index, pg_class.relfilenode doesn't change; instead we have to send the
3482  * update to the relation mapper.
3483  */
3484  if (RelationIsMapped(relation))
3486  newrelfilenode,
3487  relation->rd_rel->relisshared,
3488  false);
3489  else
3490  classform->relfilenode = newrelfilenode;
3491 
3492  /* These changes are safe even for a mapped relation */
3493  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3494  {
3495  classform->relpages = 0; /* it's empty until further notice */
3496  classform->reltuples = 0;
3497  classform->relallvisible = 0;
3498  }
3499  classform->relfrozenxid = freezeXid;
3500  classform->relminmxid = minmulti;
3501  classform->relpersistence = persistence;
3502 
3503  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3504 
3505  heap_freetuple(tuple);
3506 
3507  heap_close(pg_class, RowExclusiveLock);
3508 
3509  /*
3510  * Make the pg_class row change visible, as well as the relation map
3511  * change if any. This will cause the relcache entry to get updated, too.
3512  */
3514 
3515  /*
3516  * Mark the rel as having been given a new relfilenode in the current
3517  * (sub) transaction. This is a hint that can be used to optimize later
3518  * operations on the rel in the same transaction.
3519  */
3521 
3522  /* Flag relation as needing eoxact cleanup (to remove the hint) */
3523  EOXactListAdd(relation);
3524 }
3525 
3526 
3527 /*
3528  * RelationCacheInitialize
3529  *
3530  * This initializes the relation descriptor cache. At the time
3531  * that this is invoked, we can't do database access yet (mainly
3532  * because the transaction subsystem is not up); all we are doing
3533  * is making an empty cache hashtable. This must be done before
3534  * starting the initialization transaction, because otherwise
3535  * AtEOXact_RelationCache would crash if that transaction aborts
3536  * before we can get the relcache set up.
3537  */
3538 
3539 #define INITRELCACHESIZE 400
3540 
3541 void
3543 {
3544  HASHCTL ctl;
3545 
3546  /*
3547  * make sure cache memory context exists
3548  */
3549  if (!CacheMemoryContext)
3551 
3552  /*
3553  * create hashtable that indexes the relcache
3554  */
3555  MemSet(&ctl, 0, sizeof(ctl));
3556  ctl.keysize = sizeof(Oid);
3557  ctl.entrysize = sizeof(RelIdCacheEnt);
3558  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3559  &ctl, HASH_ELEM | HASH_BLOBS);
3560 
3561  /*
3562  * relation mapper needs to be initialized too
3563  */
3565 }
3566 
3567 /*
3568  * RelationCacheInitializePhase2
3569  *
3570  * This is called to prepare for access to shared catalogs during startup.
3571  * We must at least set up nailed reldescs for pg_database, pg_authid,
3572  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3573  * for their indexes, too. We attempt to load this information from the
3574  * shared relcache init file. If that's missing or broken, just make
3575  * phony entries for the catalogs themselves.
3576  * RelationCacheInitializePhase3 will clean up as needed.
3577  */
3578 void
3580 {
3581  MemoryContext oldcxt;
3582 
3583  /*
3584  * relation mapper needs initialized too
3585  */
3587 
3588  /*
3589  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3590  * nothing.
3591  */
3593  return;
3594 
3595  /*
3596  * switch to cache memory context
3597  */
3599 
3600  /*
3601  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3602  * the cache with pre-made descriptors for the critical shared catalogs.
3603  */
3604  if (!load_relcache_init_file(true))
3605  {
3606  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3608  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3610  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3612  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3614  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3616 
3617 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3618  }
3619 
3620  MemoryContextSwitchTo(oldcxt);
3621 }
3622 
3623 /*
3624  * RelationCacheInitializePhase3
3625  *
3626  * This is called as soon as the catcache and transaction system
3627  * are functional and we have determined MyDatabaseId. At this point
3628  * we can actually read data from the database's system catalogs.
3629  * We first try to read pre-computed relcache entries from the local
3630  * relcache init file. If that's missing or broken, make phony entries
3631  * for the minimum set of nailed-in-cache relations. Then (unless
3632  * bootstrapping) make sure we have entries for the critical system
3633  * indexes. Once we've done all this, we have enough infrastructure to
3634  * open any system catalog or use any catcache. The last step is to
3635  * rewrite the cache files if needed.
3636  */
3637 void
3639 {
3641  RelIdCacheEnt *idhentry;
3642  MemoryContext oldcxt;
3643  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3644 
3645  /*
3646  * relation mapper needs initialized too
3647  */
3649 
3650  /*
3651  * switch to cache memory context
3652  */
3654 
3655  /*
3656  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3657  * the cache with pre-made descriptors for the critical "nailed-in" system
3658  * catalogs.
3659  */
3660  if (IsBootstrapProcessingMode() ||
3661  !load_relcache_init_file(false))
3662  {
3663  needNewCacheFile = true;
3664 
3665  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3666  true, Natts_pg_class, Desc_pg_class);
3667  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3669  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3670  true, Natts_pg_proc, Desc_pg_proc);
3671  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3672  true, Natts_pg_type, Desc_pg_type);
3673 
3674 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3675  }
3676 
3677  MemoryContextSwitchTo(oldcxt);
3678 
3679  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3681  return;
3682 
3683  /*
3684  * If we didn't get the critical system indexes loaded into relcache, do
3685  * so now. These are critical because the catcache and/or opclass cache
3686  * depend on them for fetches done during relcache load. Thus, we have an
3687  * infinite-recursion problem. We can break the recursion by doing
3688  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3689  * performance, we only want to do that until we have the critical indexes
3690  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3691  * decide whether to do heapscan or indexscan at the key spots, and we set
3692  * it true after we've loaded the critical indexes.
3693  *
3694  * The critical indexes are marked as "nailed in cache", partly to make it
3695  * easy for load_relcache_init_file to count them, but mainly because we
3696  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3697  * true. (NOTE: perhaps it would be possible to reload them by
3698  * temporarily setting criticalRelcachesBuilt to false again. For now,
3699  * though, we just nail 'em in.)
3700  *
3701  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3702  * in the same way as the others, because the critical catalogs don't
3703  * (currently) have any rules or triggers, and so these indexes can be
3704  * rebuilt without inducing recursion. However they are used during
3705  * relcache load when a rel does have rules or triggers, so we choose to
3706  * nail them for performance reasons.
3707  */
3709  {
3715  IndexRelationId);
3724 
3725 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3726 
3727  criticalRelcachesBuilt = true;
3728  }
3729 
3730  /*
3731  * Process critical shared indexes too.
3732  *
3733  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3734  * initial lookup of MyDatabaseId, without which we'll never find any
3735  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3736  * database OID, so it instead depends on DatabaseOidIndexId. We also
3737  * need to nail up some indexes on pg_authid and pg_auth_members for use
3738  * during client authentication. SharedSecLabelObjectIndexId isn't
3739  * critical for the core system, but authentication hooks might be
3740  * interested in it.
3741  */
3743  {
3756 
3757 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3758 
3760  }
3761 
3762  /*
3763  * Now, scan all the relcache entries and update anything that might be
3764  * wrong in the results from formrdesc or the relcache cache file. If we
3765  * faked up relcache entries using formrdesc, then read the real pg_class
3766  * rows and replace the fake entries with them. Also, if any of the
3767  * relcache entries have rules, triggers, or security policies, load that
3768  * info the hard way since it isn't recorded in the cache file.
3769  *
3770  * Whenever we access the catalogs to read data, there is a possibility of
3771  * a shared-inval cache flush causing relcache entries to be removed.
3772  * Since hash_seq_search only guarantees to still work after the *current*
3773  * entry is removed, it's unsafe to continue the hashtable scan afterward.
3774  * We handle this by restarting the scan from scratch after each access.
3775  * This is theoretically O(N^2), but the number of entries that actually
3776  * need to be fixed is small enough that it doesn't matter.
3777  */
3778  hash_seq_init(&status, RelationIdCache);
3779 
3780  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3781  {
3782  Relation relation = idhentry->reldesc;
3783  bool restart = false;
3784 
3785  /*
3786  * Make sure *this* entry doesn't get flushed while we work with it.
3787  */
3789 
3790  /*
3791  * If it's a faked-up entry, read the real pg_class tuple.
3792  */
3793  if (relation->rd_rel->relowner == InvalidOid)
3794  {
3795  HeapTuple htup;
3796  Form_pg_class relp;
3797 
3798  htup = SearchSysCache1(RELOID,
3799  ObjectIdGetDatum(RelationGetRelid(relation)));
3800  if (!HeapTupleIsValid(htup))
3801  elog(FATAL, "cache lookup failed for relation %u",
3802  RelationGetRelid(relation));
3803  relp = (Form_pg_class) GETSTRUCT(htup);
3804 
3805  /*
3806  * Copy tuple to relation->rd_rel. (See notes in
3807  * AllocateRelationDesc())
3808  */
3809  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3810 
3811  /* Update rd_options while we have the tuple */
3812  if (relation->rd_options)
3813  pfree(relation->rd_options);
3814  RelationParseRelOptions(relation, htup);
3815 
3816  /*
3817  * Check the values in rd_att were set up correctly. (We cannot
3818  * just copy them over now: formrdesc must have set up the rd_att
3819  * data correctly to start with, because it may already have been
3820  * copied into one or more catcache entries.)
3821  */
3822  Assert(relation->rd_att->tdtypeid == relp->reltype);
3823  Assert(relation->rd_att->tdtypmod == -1);
3824  Assert(relation->rd_att->tdhasoid == relp->relhasoids);
3825 
3826  ReleaseSysCache(htup);
3827 
3828  /* relowner had better be OK now, else we'll loop forever */
3829  if (relation->rd_rel->relowner == InvalidOid)
3830  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3831  RelationGetRelationName(relation));
3832 
3833  restart = true;
3834  }
3835 
3836  /*
3837  * Fix data that isn't saved in relcache cache file.
3838  *
3839  * relhasrules or relhastriggers could possibly be wrong or out of
3840  * date. If we don't actually find any rules or triggers, clear the
3841  * local copy of the flag so that we don't get into an infinite loop
3842  * here. We don't make any attempt to fix the pg_class entry, though.
3843  */
3844  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3845  {
3846  RelationBuildRuleLock(relation);
3847  if (relation->rd_rules == NULL)
3848  relation->rd_rel->relhasrules = false;
3849  restart = true;
3850  }
3851  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3852  {
3853  RelationBuildTriggers(relation);
3854  if (relation->trigdesc == NULL)
3855  relation->rd_rel->relhastriggers = false;
3856  restart = true;
3857  }
3858 
3859  /*
3860  * Re-load the row security policies if the relation has them, since
3861  * they are not preserved in the cache. Note that we can never NOT
3862  * have a policy while relrowsecurity is true,
3863  * RelationBuildRowSecurity will create a single default-deny policy
3864  * if there is no policy defined in pg_policy.
3865  */
3866  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3867  {
3868  RelationBuildRowSecurity(relation);
3869 
3870  Assert(relation->rd_rsdesc != NULL);
3871  restart = true;
3872  }
3873 
3874  /*
3875  * Reload the partition key and descriptor for a partitioned table.
3876  */
3877  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3878  relation->rd_partkey == NULL)
3879  {
3880  RelationBuildPartitionKey(relation);
3881  Assert(relation->rd_partkey != NULL);
3882 
3883  restart = true;
3884  }
3885 
3886  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3887  relation->rd_partdesc == NULL)
3888  {
3889  RelationBuildPartitionDesc(relation);
3890  Assert(relation->rd_partdesc != NULL);
3891 
3892  restart = true;
3893  }
3894 
3895  /* Release hold on the relation */
3897 
3898  /* Now, restart the hashtable scan if needed */
3899  if (restart)
3900  {
3901  hash_seq_term(&status);
3902  hash_seq_init(&status, RelationIdCache);
3903  }
3904  }
3905 
3906  /*
3907  * Lastly, write out new relcache cache files if needed. We don't bother
3908  * to distinguish cases where only one of the two needs an update.
3909  */
3910  if (needNewCacheFile)
3911  {
3912  /*
3913  * Force all the catcaches to finish initializing and thereby open the
3914  * catalogs and indexes they use. This will preload the relcache with
3915  * entries for all the most important system catalogs and indexes, so
3916  * that the init files will be most useful for future backends.
3917  */
3919 
3920  /* now write the files */
3922  write_relcache_init_file(false);
3923  }
3924 }
3925 
3926 /*
3927  * Load one critical system index into the relcache
3928  *
3929  * indexoid is the OID of the target index, heapoid is the OID of the catalog
3930  * it belongs to.
3931  */
3932 static void
3933 load_critical_index(Oid indexoid, Oid heapoid)
3934 {
3935  Relation ird;
3936 
3937  /*
3938  * We must lock the underlying catalog before locking the index to avoid
3939  * deadlock, since RelationBuildDesc might well need to read the catalog,
3940  * and if anyone else is exclusive-locking this catalog and index they'll
3941  * be doing it in that order.
3942  */
3943  LockRelationOid(heapoid, AccessShareLock);
3944  LockRelationOid(indexoid, AccessShareLock);
3945  ird = RelationBuildDesc(indexoid, true);
3946  if (ird == NULL)
3947  elog(PANIC, "could not open critical system index %u", indexoid);
3948  ird->rd_isnailed = true;
3949  ird->rd_refcnt = 1;
3952 }
3953 
3954 /*
3955  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3956  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3957  *
3958  * We need this kluge because we have to be able to access non-fixed-width
3959  * fields of pg_class and pg_index before we have the standard catalog caches
3960  * available. We use predefined data that's set up in just the same way as
3961  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
3962  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3963  * does it have a TupleConstr field. But it's good enough for the purpose of
3964  * extracting fields.
3965  */
3966 static TupleDesc
3968  bool hasoids)
3969 {
3970  TupleDesc result;
3971  MemoryContext oldcxt;
3972  int i;
3973 
3975 
3976  result = CreateTemplateTupleDesc(natts, hasoids);
3977  result->tdtypeid = RECORDOID; /* not right, but we don't care */
3978  result->tdtypmod = -1;
3979 
3980  for (i = 0; i < natts; i++)
3981  {
3982  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
3983  /* make sure attcacheoff is valid */
3984  TupleDescAttr(result, i)->attcacheoff = -1;
3985  }
3986 
3987  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
3988  TupleDescAttr(result, 0)->attcacheoff = 0;
3989 
3990  /* Note: we don't bother to set up a TupleConstr entry */
3991 
3992  MemoryContextSwitchTo(oldcxt);
3993 
3994  return result;
3995 }
3996 
3997 static TupleDesc
3999 {
4000  static TupleDesc pgclassdesc = NULL;
4001 
4002  /* Already done? */
4003  if (pgclassdesc == NULL)
4005  Desc_pg_class,
4006  true);
4007 
4008  return pgclassdesc;
4009 }
4010 
4011 static TupleDesc
4013 {
4014  static TupleDesc pgindexdesc = NULL;
4015 
4016  /* Already done? */
4017  if (pgindexdesc == NULL)
4019  Desc_pg_index,
4020  false);
4021 
4022  return pgindexdesc;
4023 }
4024 
4025 /*
4026  * Load any default attribute value definitions for the relation.
4027  */
4028 static void
4030 {
4031  AttrDefault *attrdef = relation->rd_att->constr->defval;
4032  int ndef = relation->rd_att->constr->num_defval;
4033  Relation adrel;
4034  SysScanDesc adscan;
4035  ScanKeyData skey;
4036  HeapTuple htup;
4037  Datum val;
4038  bool isnull;
4039  int found;
4040  int i;
4041 
4042  ScanKeyInit(&skey,
4044  BTEqualStrategyNumber, F_OIDEQ,
4045  ObjectIdGetDatum(RelationGetRelid(relation)));
4046 
4048  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4049  NULL, 1, &skey);
4050  found = 0;
4051 
4052  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4053  {
4054  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4055  Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - 1);
4056 
4057  for (i = 0; i < ndef; i++)
4058  {
4059  if (adform->adnum != attrdef[i].adnum)
4060  continue;
4061  if (attrdef[i].adbin != NULL)
4062  elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4063  NameStr(attr->attname),
4064  RelationGetRelationName(relation));
4065  else
4066  found++;
4067 
4068  val = fastgetattr(htup,
4070  adrel->rd_att, &isnull);
4071  if (isnull)
4072  elog(WARNING, "null adbin for attr %s of rel %s",
4073  NameStr(attr->attname),
4074  RelationGetRelationName(relation));
4075  else
4076  {
4077  /* detoast and convert to cstring in caller's context */
4078  char *s = TextDatumGetCString(val);
4079 
4081  pfree(s);
4082  }
4083  break;
4084  }
4085 
4086  if (i >= ndef)
4087  elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4088  adform->adnum, RelationGetRelationName(relation));
4089  }
4090 
4091  systable_endscan(adscan);
4092  heap_close(adrel, AccessShareLock);
4093 
4094  if (found != ndef)
4095  elog(WARNING, "%d attrdef record(s) missing for rel %s",
4096  ndef - found, RelationGetRelationName(relation));
4097 }
4098 
4099 /*
4100  * Load any check constraints for the relation.
4101  */
4102 static void
4104 {
4105  ConstrCheck *check = relation->rd_att->constr->check;
4106  int ncheck = relation->rd_att->constr->num_check;
4107  Relation conrel;
4108  SysScanDesc conscan;
4109  ScanKeyData skey[1];
4110  HeapTuple htup;
4111  int found = 0;
4112 
4113  ScanKeyInit(&skey[0],
4115  BTEqualStrategyNumber, F_OIDEQ,
4116  ObjectIdGetDatum(RelationGetRelid(relation)));
4117 
4119  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4120  NULL, 1, skey);
4121 
4122  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4123  {
4125  Datum val;
4126  bool isnull;
4127  char *s;
4128 
4129  /* We want check constraints only */
4130  if (conform->contype != CONSTRAINT_CHECK)
4131  continue;
4132 
4133  if (found >= ncheck)
4134  elog(ERROR, "unexpected constraint record found for rel %s",
4135  RelationGetRelationName(relation));
4136 
4137  check[found].ccvalid = conform->convalidated;
4138  check[found].ccnoinherit = conform->connoinherit;
4140  NameStr(conform->conname));
4141 
4142  /* Grab and test conbin is actually set */
4143  val = fastgetattr(htup,
4145  conrel->rd_att, &isnull);
4146  if (isnull)
4147  elog(ERROR, "null conbin for rel %s",
4148  RelationGetRelationName(relation));
4149 
4150  /* detoast and convert to cstring in caller's context */
4151  s = TextDatumGetCString(val);
4152  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4153  pfree(s);
4154 
4155  found++;
4156  }
4157 
4158  systable_endscan(conscan);
4159  heap_close(conrel, AccessShareLock);
4160 
4161  if (found != ncheck)
4162  elog(ERROR, "%d constraint record(s) missing for rel %s",
4163  ncheck - found, RelationGetRelationName(relation));
4164 
4165  /* Sort the records so that CHECKs are applied in a deterministic order */
4166  if (ncheck > 1)
4167  qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4168 }
4169 
4170 /*
4171  * qsort comparator to sort ConstrCheck entries by name
4172  */
4173 static int
4174 CheckConstraintCmp(const void *a, const void *b)
4175 {
4176  const ConstrCheck *ca = (const ConstrCheck *) a;
4177  const ConstrCheck *cb = (const ConstrCheck *) b;
4178 
4179  return strcmp(ca->ccname, cb->ccname);
4180 }
4181 
4182 /*
4183  * RelationGetFKeyList -- get a list of foreign key info for the relation
4184  *
4185  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4186  * the given relation. This data is a direct copy of relevant fields from
4187  * pg_constraint. The list items are in no particular order.
4188  *
4189  * CAUTION: the returned list is part of the relcache's data, and could
4190  * vanish in a relcache entry reset. Callers must inspect or copy it
4191  * before doing anything that might trigger a cache flush, such as
4192  * system catalog accesses. copyObject() can be used if desired.
4193  * (We define it this way because current callers want to filter and
4194  * modify the list entries anyway, so copying would be a waste of time.)
4195  */
4196 List *
4198 {
4199  List *result;
4200  Relation conrel;
4201  SysScanDesc conscan;
4202  ScanKeyData skey;
4203  HeapTuple htup;
4204  List *oldlist;
4205  MemoryContext oldcxt;
4206 
4207  /* Quick exit if we already computed the list. */
4208  if (relation->rd_fkeyvalid)
4209  return relation->rd_fkeylist;
4210 
4211  /* Fast path: if it doesn't have any triggers, it can't have FKs */
4212  if (!relation->rd_rel->relhastriggers)
4213  return NIL;
4214 
4215  /*
4216  * We build the list we intend to return (in the caller's context) while
4217  * doing the scan. After successfully completing the scan, we copy that
4218  * list into the relcache entry. This avoids cache-context memory leakage
4219  * if we get some sort of error partway through.
4220  */
4221  result = NIL;
4222 
4223  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4224  ScanKeyInit(&skey,
4226  BTEqualStrategyNumber, F_OIDEQ,
4227  ObjectIdGetDatum(RelationGetRelid(relation)));
4228 
4230  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4231  NULL, 1, &skey);
4232 
4233  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4234  {
4235  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4236  ForeignKeyCacheInfo *info;
4237  Datum adatum;
4238  bool isnull;
4239  ArrayType *arr;
4240  int nelem;
4241 
4242  /* consider only foreign keys */
4243  if (constraint->contype != CONSTRAINT_FOREIGN)
4244  continue;
4245 
4246  info = makeNode(ForeignKeyCacheInfo);
4247  info->conrelid = constraint->conrelid;
4248  info->confrelid = constraint->confrelid;
4249 
4250  /* Extract data from conkey field */
4251  adatum = fastgetattr(htup, Anum_pg_constraint_conkey,
4252  conrel->rd_att, &isnull);
4253  if (isnull)
4254  elog(ERROR, "null conkey for rel %s",
4255  RelationGetRelationName(relation));
4256 
4257  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4258  nelem = ARR_DIMS(arr)[0];
4259  if (ARR_NDIM(arr) != 1 ||
4260  nelem < 1 ||
4261  nelem > INDEX_MAX_KEYS ||
4262  ARR_HASNULL(arr) ||
4263  ARR_ELEMTYPE(arr) != INT2OID)
4264  elog(ERROR, "conkey is not a 1-D smallint array");
4265 
4266  info->nkeys = nelem;
4267  memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4268 
4269  /* Likewise for confkey */
4270  adatum = fastgetattr(htup, Anum_pg_constraint_confkey,
4271  conrel->rd_att, &isnull);
4272  if (isnull)
4273  elog(ERROR, "null confkey for rel %s",
4274  RelationGetRelationName(relation));
4275 
4276  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4277  nelem = ARR_DIMS(arr)[0];
4278  if (ARR_NDIM(arr) != 1 ||
4279  nelem != info->nkeys ||
4280  ARR_HASNULL(arr) ||
4281  ARR_ELEMTYPE(arr) != INT2OID)
4282  elog(ERROR, "confkey is not a 1-D smallint array");
4283 
4284  memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4285 
4286  /* Likewise for conpfeqop */
4288  conrel->rd_att, &isnull);
4289  if (isnull)
4290  elog(ERROR, "null conpfeqop for rel %s",
4291  RelationGetRelationName(relation));
4292 
4293  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4294  nelem = ARR_DIMS(arr)[0];
4295  if (ARR_NDIM(arr) != 1 ||
4296  nelem != info->nkeys ||
4297  ARR_HASNULL(arr) ||
4298  ARR_ELEMTYPE(arr) != OIDOID)
4299  elog(ERROR, "conpfeqop is not a 1-D OID array");
4300 
4301  memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid));
4302 
4303  /* Add FK's node to the result list */
4304  result = lappend(result, info);
4305  }
4306 
4307  systable_endscan(conscan);
4308  heap_close(conrel, AccessShareLock);
4309 
4310  /* Now save a copy of the completed list in the relcache entry. */
4312  oldlist = relation->rd_fkeylist;
4313  relation->rd_fkeylist = copyObject(result);
4314  relation->rd_fkeyvalid = true;
4315  MemoryContextSwitchTo(oldcxt);
4316 
4317  /* Don't leak the old list, if there is one */
4318  list_free_deep(oldlist);
4319 
4320  return result;
4321 }
4322 
4323 /*
4324  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4325  *
4326  * The index list is created only if someone requests it. We scan pg_index
4327  * to find relevant indexes, and add the list to the relcache entry so that
4328  * we won't have to compute it again. Note that shared cache inval of a
4329  * relcache entry will delete the old list and set rd_indexvalid to 0,
4330  * so that we must recompute the index list on next request. This handles
4331  * creation or deletion of an index.
4332  *
4333  * Indexes that are marked not IndexIsLive are omitted from the returned list.
4334  * Such indexes are expected to be dropped momentarily, and should not be
4335  * touched at all by any caller of this function.
4336  *
4337  * The returned list is guaranteed to be sorted in order by OID. This is
4338  * needed by the executor, since for index types that we obtain exclusive
4339  * locks on when updating the index, all backends must lock the indexes in
4340  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4341  * consistent ordering would do, but ordering by OID is easy.
4342  *
4343  * Since shared cache inval causes the relcache's copy of the list to go away,
4344  * we return a copy of the list palloc'd in the caller's context. The caller
4345  * may list_free() the returned list after scanning it. This is necessary
4346  * since the caller will typically be doing syscache lookups on the relevant
4347  * indexes, and syscache lookup could cause SI messages to be processed!
4348  *
4349  * We also update rd_oidindex, which this module treats as effectively part
4350  * of the index list. rd_oidindex is valid when rd_indexvalid isn't zero;
4351  * it is the pg_class OID of a unique index on OID when the relation has one,
4352  * and InvalidOid if there is no such index.
4353  *
4354  * In exactly the same way, we update rd_pkindex, which is the OID of the
4355  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4356  * which is the pg_class OID of an index to be used as the relation's
4357  * replication identity index, or InvalidOid if there is no such index.
4358  */
4359 List *
4361 {
4362  Relation indrel;
4363  SysScanDesc indscan;
4364  ScanKeyData skey;
4365  HeapTuple htup;
4366  List *result;
4367  List *oldlist;
4368  char replident = relation->rd_rel->relreplident;
4369  Oid oidIndex = InvalidOid;
4370  Oid pkeyIndex = InvalidOid;
4371  Oid candidateIndex = InvalidOid;
4372  MemoryContext oldcxt;
4373 
4374  /* Quick exit if we already computed the list. */
4375  if (relation->rd_indexvalid != 0)
4376  return list_copy(relation->rd_indexlist);
4377 
4378  /*
4379  * We build the list we intend to return (in the caller's context) while
4380  * doing the scan. After successfully completing the scan, we copy that
4381  * list into the relcache entry. This avoids cache-context memory leakage
4382  * if we get some sort of error partway through.
4383  */
4384  result = NIL;
4385  oidIndex = InvalidOid;
4386 
4387  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4388  ScanKeyInit(&skey,
4390  BTEqualStrategyNumber, F_OIDEQ,
4391  ObjectIdGetDatum(RelationGetRelid(relation)));
4392 
4394  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4395  NULL, 1, &skey);
4396 
4397  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4398  {
4400  Datum indclassDatum;
4401  oidvector *indclass;
4402  bool isnull;
4403 
4404  /*
4405  * Ignore any indexes that are currently being dropped. This will
4406  * prevent them from being searched, inserted into, or considered in
4407  * HOT-safety decisions. It's unsafe to touch such an index at all
4408  * since its catalog entries could disappear at any instant.
4409  */
4410  if (!IndexIsLive(index))
4411  continue;
4412 
4413  /* Add index's OID to result list in the proper order */
4414  result = insert_ordered_oid(result, index->indexrelid);
4415 
4416  /*
4417  * indclass cannot be referenced directly through the C struct,
4418  * because it comes after the variable-width indkey field. Must
4419  * extract the datum the hard way...
4420  */
4421  indclassDatum = heap_getattr(htup,
4424  &isnull);
4425  Assert(!isnull);
4426  indclass = (oidvector *) DatumGetPointer(indclassDatum);
4427 
4428  /*
4429  * Invalid, non-unique, non-immediate or predicate indexes aren't
4430  * interesting for either oid indexes or replication identity indexes,
4431  * so don't check them.
4432  */
4433  if (!IndexIsValid(index) || !index->indisunique ||
4434  !index->indimmediate ||
4436  continue;
4437 
4438  /* Check to see if is a usable btree index on OID */
4439  if (index->indnatts == 1 &&
4440  index->indkey.values[0] == ObjectIdAttributeNumber &&
4441  indclass->values[0] == OID_BTREE_OPS_OID)
4442  oidIndex = index->indexrelid;
4443 
4444  /* remember primary key index if any */
4445  if (index->indisprimary)
4446  pkeyIndex = index->indexrelid;
4447 
4448  /* remember explicitly chosen replica index */
4449  if (index->indisreplident)
4450  candidateIndex = index->indexrelid;
4451  }
4452 
4453  systable_endscan(indscan);
4454 
4455  heap_close(indrel, AccessShareLock);
4456 
4457  /* Now save a copy of the completed list in the relcache entry. */
4459  oldlist = relation->rd_indexlist;
4460  relation->rd_indexlist = list_copy(result);
4461  relation->rd_oidindex = oidIndex;
4462  relation->rd_pkindex = pkeyIndex;
4463  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4464  relation->rd_replidindex = pkeyIndex;
4465  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4466  relation->rd_replidindex = candidateIndex;
4467  else
4468  relation->rd_replidindex = InvalidOid;
4469  relation->rd_indexvalid = 1;
4470  MemoryContextSwitchTo(oldcxt);
4471 
4472  /* Don't leak the old list, if there is one */
4473  list_free(oldlist);
4474 
4475  return result;
4476 }
4477 
4478 /*
4479  * RelationGetStatExtList
4480  * get a list of OIDs of statistics objects on this relation
4481  *
4482  * The statistics list is created only if someone requests it, in a way
4483  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4484  * relevant statistics, and add the list to the relcache entry so that we
4485  * won't have to compute it again. Note that shared cache inval of a
4486  * relcache entry will delete the old list and set rd_statvalid to 0,
4487  * so that we must recompute the statistics list on next request. This
4488  * handles creation or deletion of a statistics object.
4489  *
4490  * The returned list is guaranteed to be sorted in order by OID, although
4491  * this is not currently needed.
4492  *
4493  * Since shared cache inval causes the relcache's copy of the list to go away,
4494  * we return a copy of the list palloc'd in the caller's context. The caller
4495  * may list_free() the returned list after scanning it. This is necessary
4496  * since the caller will typically be doing syscache lookups on the relevant
4497  * statistics, and syscache lookup could cause SI messages to be processed!
4498  */
4499 List *
4501 {
4502  Relation indrel;
4503  SysScanDesc indscan;
4504  ScanKeyData skey;
4505  HeapTuple htup;
4506  List *result;
4507  List *oldlist;
4508  MemoryContext oldcxt;
4509 
4510  /* Quick exit if we already computed the list. */
4511  if (relation->rd_statvalid != 0)
4512  return list_copy(relation->rd_statlist);
4513 
4514  /*
4515  * We build the list we intend to return (in the caller's context) while
4516  * doing the scan. After successfully completing the scan, we copy that
4517  * list into the relcache entry. This avoids cache-context memory leakage
4518  * if we get some sort of error partway through.
4519  */
4520  result = NIL;
4521 
4522  /*
4523  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4524  * rel.
4525  */
4526  ScanKeyInit(&skey,
4528  BTEqualStrategyNumber, F_OIDEQ,
4529  ObjectIdGetDatum(RelationGetRelid(relation)));
4530 
4532  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4533  NULL, 1, &skey);
4534 
4535  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4536  result = insert_ordered_oid(result, HeapTupleGetOid(htup));
4537 
4538  systable_endscan(indscan);
4539 
4540  heap_close(indrel, AccessShareLock);
4541 
4542  /* Now save a copy of the completed list in the relcache entry. */
4544  oldlist = relation->rd_statlist;
4545  relation->rd_statlist = list_copy(result);
4546 
4547  relation->rd_statvalid = true;
4548  MemoryContextSwitchTo(oldcxt);
4549 
4550  /* Don't leak the old list, if there is one */
4551  list_free(oldlist);
4552 
4553  return result;
4554 }
4555 
4556 /*
4557  * insert_ordered_oid
4558  * Insert a new Oid into a sorted list of Oids, preserving ordering
4559  *
4560  * Building the ordered list this way is O(N^2), but with a pretty small
4561  * constant, so for the number of entries we expect it will probably be
4562  * faster than trying to apply qsort(). Most tables don't have very many
4563  * indexes...
4564  */
4565 static List *
4567 {
4568  ListCell *prev;
4569 
4570  /* Does the datum belong at the front? */
4571  if (list == NIL || datum < linitial_oid(list))
4572  return lcons_oid(datum, list);
4573  /* No, so find the entry it belongs after */
4574  prev = list_head(list);
4575  for (;;)
4576  {
4577  ListCell *curr = lnext(prev);
4578 
4579  if (curr == NULL || datum < lfirst_oid(curr))
4580  break; /* it belongs after 'prev', before 'curr' */
4581 
4582  prev = curr;
4583  }
4584  /* Insert datum into list after 'prev' */
4585  lappend_cell_oid(list, prev, datum);
4586  return list;
4587 }
4588 
4589 /*
4590  * RelationSetIndexList -- externally force the index list contents
4591  *
4592  * This is used to temporarily override what we think the set of valid
4593  * indexes is (including the presence or absence of an OID index).
4594  * The forcing will be valid only until transaction commit or abort.
4595  *
4596  * This should only be applied to nailed relations, because in a non-nailed
4597  * relation the hacked index list could be lost at any time due to SI
4598  * messages. In practice it is only used on pg_class (see REINDEX).
4599  *
4600  * It is up to the caller to make sure the given list is correctly ordered.
4601  *
4602  * We deliberately do not change rd_indexattr here: even when operating
4603  * with a temporary partial index list, HOT-update decisions must be made
4604  * correctly with respect to the full index set. It is up to the caller
4605  * to ensure that a correct rd_indexattr set has been cached before first
4606  * calling RelationSetIndexList; else a subsequent inquiry might cause a
4607  * wrong rd_indexattr set to get computed and cached. Likewise, we do not
4608  * touch rd_keyattr, rd_pkattr or rd_idattr.
4609  */
4610 void
4611 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
4612 {
4613  MemoryContext oldcxt;
4614 
4615  Assert(relation->rd_isnailed);
4616  /* Copy the list into the cache context (could fail for lack of mem) */
4618  indexIds = list_copy(indexIds);
4619  MemoryContextSwitchTo(oldcxt);
4620  /* Okay to replace old list */
4621  list_free(relation->rd_indexlist);
4622  relation->rd_indexlist = indexIds;
4623  relation->rd_oidindex = oidIndex;
4624 
4625  /*
4626  * For the moment, assume the target rel hasn't got a pk or replica index.
4627  * We'll load them on demand in the API that wraps access to them.
4628  */
4629  relation->rd_pkindex = InvalidOid;
4630  relation->rd_replidindex = InvalidOid;
4631  relation->rd_indexvalid = 2; /* mark list as forced */
4632  /* Flag relation as needing eoxact cleanup (to reset the list) */
4633  EOXactListAdd(relation);
4634 }
4635 
4636 /*
4637  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
4638  *
4639  * Returns InvalidOid if there is no such index.
4640  */
4641 Oid
4643 {
4644  List *ilist;
4645 
4646  /*
4647  * If relation doesn't have OIDs at all, caller is probably confused. (We
4648  * could just silently return InvalidOid, but it seems better to throw an
4649  * assertion.)
4650  */
4651  Assert(relation->rd_rel->relhasoids);
4652 
4653  if (relation->rd_indexvalid == 0)
4654  {
4655  /* RelationGetIndexList does the heavy lifting. */
4656  ilist = RelationGetIndexList(relation);
4657  list_free(ilist);
4658  Assert(relation->rd_indexvalid != 0);
4659  }
4660 
4661  return relation->rd_oidindex;
4662 }
4663 
4664 /*
4665  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4666  *
4667  * Returns InvalidOid if there is no such index.
4668  */
4669 Oid
4671 {
4672  List *ilist;
4673 
4674  if (relation->rd_indexvalid == 0)
4675  {
4676  /* RelationGetIndexList does the heavy lifting. */
4677  ilist = RelationGetIndexList(relation);
4678  list_free(ilist);
4679  Assert(relation->rd_indexvalid != 0);
4680  }
4681 
4682  return relation->rd_pkindex;
4683 }
4684 
4685 /*
4686  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4687  *
4688  * Returns InvalidOid if there is no such index.
4689  */
4690 Oid
4692 {
4693  List *ilist;
4694 
4695  if (relation->rd_indexvalid == 0)
4696  {
4697  /* RelationGetIndexList does the heavy lifting. */
4698  ilist = RelationGetIndexList(relation);
4699  list_free(ilist);
4700  Assert(relation->rd_indexvalid != 0);
4701  }
4702 
4703  return relation->rd_replidindex;
4704 }
4705 
4706 /*
4707  * RelationGetIndexExpressions -- get the index expressions for an index
4708  *
4709  * We cache the result of transforming pg_index.indexprs into a node tree.
4710  * If the rel is not an index or has no expressional columns, we return NIL.
4711  * Otherwise, the returned tree is copied into the caller's memory context.
4712  * (We don't want to return a pointer to the relcache copy, since it could
4713  * disappear due to relcache invalidation.)
4714  */
4715 List *
4717 {
4718  List *result;
4719  Datum exprsDatum;
4720  bool isnull;
4721  char *exprsString;
4722  MemoryContext oldcxt;
4723 
4724  /* Quick exit if we already computed the result. */
4725  if (relation->rd_indexprs)
4726  return copyObject(relation->rd_indexprs);
4727 
4728  /* Quick exit if there is nothing to do. */
4729  if (relation->rd_indextuple == NULL ||
4731  return NIL;
4732 
4733  /*
4734  * We build the tree we intend to return in the caller's context. After
4735  * successfully completing the work, we copy it into the relcache entry.
4736  * This avoids problems if we get some sort of error partway through.
4737  */
4738  exprsDatum = heap_getattr(relation->rd_indextuple,
4741  &isnull);
4742  Assert(!isnull);
4743  exprsString = TextDatumGetCString(exprsDatum);
4744  result = (List *) stringToNode(exprsString);
4745  pfree(exprsString);
4746 
4747  /*
4748  * Run the expressions through eval_const_expressions. This is not just an
4749  * optimization, but is necessary, because the planner will be comparing
4750  * them to similarly-processed qual clauses, and may fail to detect valid
4751  * matches without this. We don't bother with canonicalize_qual, however.
4752  */
4753  result = (List *) eval_const_expressions(NULL, (Node *) result);
4754 
4755  result = (List *) canonicalize_qual((Expr *) result);
4756 
4757  /* May as well fix opfuncids too */
4758  fix_opfuncids((Node *) result);
4759 
4760  /* Now save a copy of the completed tree in the relcache entry. */
4761  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4762  relation->rd_indexprs = copyObject(result);
4763  MemoryContextSwitchTo(oldcxt);
4764 
4765  return result;
4766 }
4767 
4768 /*
4769  * RelationGetIndexPredicate -- get the index predicate for an index
4770  *
4771  * We cache the result of transforming pg_index.indpred into an implicit-AND
4772  * node tree (suitable for use in planning).
4773  * If the rel is not an index or has no predicate, we return NIL.
4774  * Otherwise, the returned tree is copied into the caller's memory context.
4775  * (We don't want to return a pointer to the relcache copy, since it could
4776  * disappear due to relcache invalidation.)
4777  */
4778 List *
4780 {
4781  List *result;
4782  Datum predDatum;
4783  bool isnull;
4784  char *predString;
4785  MemoryContext oldcxt;
4786 
4787  /* Quick exit if we already computed the result. */
4788  if (relation->rd_indpred)
4789  return copyObject(relation->rd_indpred);
4790 
4791  /* Quick exit if there is nothing to do. */
4792  if (relation->rd_indextuple == NULL ||
4794  return NIL;
4795 
4796  /*
4797  * We build the tree we intend to return in the caller's context. After
4798  * successfully completing the work, we copy it into the relcache entry.
4799  * This avoids problems if we get some sort of error partway through.
4800  */
4801  predDatum = heap_getattr(relation->rd_indextuple,
4804  &isnull);
4805  Assert(!isnull);
4806  predString = TextDatumGetCString(predDatum);
4807  result = (List *) stringToNode(predString);
4808  pfree(predString);
4809 
4810  /*
4811  * Run the expression through const-simplification and canonicalization.
4812  * This is not just an optimization, but is necessary, because the planner
4813  * will be comparing it to similarly-processed qual clauses, and may fail
4814  * to detect valid matches without this. This must match the processing
4815  * done to qual clauses in preprocess_expression()! (We can skip the
4816  * stuff involving subqueries, however, since we don't allow any in index
4817  * predicates.)
4818  */
4819  result = (List *) eval_const_expressions(NULL, (Node *) result);
4820 
4821  result = (List *) canonicalize_qual((Expr *) result);
4822 
4823  /* Also convert to implicit-AND format */
4824  result = make_ands_implicit((Expr *) result);
4825 
4826  /* May as well fix opfuncids too */
4827  fix_opfuncids((Node *) result);
4828 
4829  /* Now save a copy of the completed tree in the relcache entry. */
4830  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4831  relation->rd_indpred = copyObject(result);
4832  MemoryContextSwitchTo(oldcxt);
4833 
4834  return result;
4835 }
4836 
4837 /*
4838  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4839  *
4840  * The result has a bit set for each attribute used anywhere in the index
4841  * definitions of all the indexes on this relation. (This includes not only
4842  * simple index keys, but attributes used in expressions and partial-index
4843  * predicates.)
4844  *
4845  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4846  * for all potential foreign key columns, or for all columns in the configured
4847  * replica identity index is returned.
4848  *
4849  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4850  * we can include system attributes (e.g., OID) in the bitmap representation.
4851  *
4852  * Caller had better hold at least RowExclusiveLock on the target relation
4853  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4854  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4855  * that lock level doesn't guarantee a stable set of indexes, so we have to
4856  * be prepared to retry here in case of a change in the set of indexes.
4857  *
4858  * The returned result is palloc'd in the caller's memory context and should
4859  * be bms_free'd when not needed anymore.
4860  */
4861 Bitmapset *
4863 {
4864  Bitmapset *indexattrs; /* indexed columns */
4865  Bitmapset *uindexattrs; /* columns in unique indexes */
4866  Bitmapset *pkindexattrs; /* columns in the primary index */
4867  Bitmapset *idindexattrs; /* columns in the replica identity */
4868  List *indexoidlist;
4869  List *newindexoidlist;
4870  Oid relpkindex;
4871  Oid relreplindex;
4872  ListCell *l;
4873  MemoryContext oldcxt;
4874 
4875  /* Quick exit if we already computed the result. */
4876  if (relation->rd_indexattr != NULL)
4877  {
4878  switch (attrKind)
4879  {
4880  case INDEX_ATTR_BITMAP_ALL:
4881  return bms_copy(relation->rd_indexattr);
4882  case INDEX_ATTR_BITMAP_KEY:
4883  return bms_copy(relation->rd_keyattr);
4885  return bms_copy(relation->rd_pkattr);
4887  return bms_copy(relation->rd_idattr);
4888  default:
4889  elog(ERROR, "unknown attrKind %u", attrKind);
4890  }
4891  }
4892 
4893  /* Fast path if definitely no indexes */
4894  if (!RelationGetForm(relation)->relhasindex)
4895  return NULL;
4896 
4897  /*
4898  * Get cached list of index OIDs. If we have to start over, we do so here.
4899  */
4900 restart:
4901  indexoidlist = RelationGetIndexList(relation);
4902 
4903  /* Fall out if no indexes (but relhasindex was set) */
4904  if (indexoidlist == NIL)
4905  return NULL;
4906 
4907  /*
4908  * Copy the rd_pkindex and rd_replidindex values computed by
4909  * RelationGetIndexList before proceeding. This is needed because a
4910  * relcache flush could occur inside index_open below, resetting the
4911  * fields managed by RelationGetIndexList. We need to do the work with
4912  * stable values of these fields.
4913  */
4914  relpkindex = relation->rd_pkindex;
4915  relreplindex = relation->rd_replidindex;
4916 
4917  /*
4918  * For each index, add referenced attributes to indexattrs.
4919  *
4920  * Note: we consider all indexes returned by RelationGetIndexList, even if
4921  * they are not indisready or indisvalid. This is important because an
4922  * index for which CREATE INDEX CONCURRENTLY has just started must be
4923  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4924  * CONCURRENTLY is far enough along that we should ignore the index, it
4925  * won't be returned at all by RelationGetIndexList.
4926  */
4927  indexattrs = NULL;
4928  uindexattrs = NULL;
4929  pkindexattrs = NULL;
4930  idindexattrs = NULL;
4931  foreach(l, indexoidlist)
4932  {
4933  Oid indexOid = lfirst_oid(l);
4934  Relation indexDesc;
4935  IndexInfo *indexInfo;
4936  int i;
4937  bool isKey; /* candidate key */
4938  bool isPK; /* primary key */
4939  bool isIDKey; /* replica identity index */
4940 
4941  indexDesc = index_open(indexOid, AccessShareLock);
4942 
4943  /* Extract index key information from the index's pg_index row */
4944  indexInfo = BuildIndexInfo(indexDesc);
4945 
4946  /* Can this index be referenced by a foreign key? */
4947  isKey = indexInfo->ii_Unique &&
4948  indexInfo->ii_Expressions == NIL &&
4949  indexInfo->ii_Predicate == NIL;
4950 
4951  /* Is this a primary key? */
4952  isPK = (indexOid == relpkindex);
4953 
4954  /* Is this index the configured (or default) replica identity? */
4955  isIDKey = (indexOid == relreplindex);
4956 
4957  /* Collect simple attribute references */
4958  for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
4959  {
4960  int attrnum = indexInfo->ii_KeyAttrNumbers[i];
4961 
4962  if (attrnum != 0)
4963  {
4964  indexattrs = bms_add_member(indexattrs,
4966 
4967  if (isKey)
4968  uindexattrs = bms_add_member(uindexattrs,
4970 
4971  if (isPK)
4972  pkindexattrs = bms_add_member(pkindexattrs,
4974 
4975  if (isIDKey)
4976  idindexattrs = bms_add_member(idindexattrs,
4978  }
4979  }
4980 
4981  /* Collect all attributes used in expressions, too */
4982  pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
4983 
4984  /* Collect all attributes in the index predicate, too */
4985  pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
4986 
4987  index_close(indexDesc, AccessShareLock);
4988  }
4989 
4990  /*
4991  * During one of the index_opens in the above loop, we might have received
4992  * a relcache flush event on this relcache entry, which might have been
4993  * signaling a change in the rel's index list. If so, we'd better start
4994  * over to ensure we deliver up-to-date attribute bitmaps.
4995  */
4996  newindexoidlist = RelationGetIndexList(relation);
4997  if (equal(indexoidlist, newindexoidlist) &&
4998  relpkindex == relation->rd_pkindex &&
4999  relreplindex == relation->rd_replidindex)
5000  {
5001  /* Still the same index set, so proceed */
5002  list_free(newindexoidlist);
5003  list_free(indexoidlist);
5004  }
5005  else
5006  {
5007  /* Gotta do it over ... might as well not leak memory */
5008  list_free(newindexoidlist);
5009  list_free(indexoidlist);
5010  bms_free(uindexattrs);
5011  bms_free(pkindexattrs);
5012  bms_free(idindexattrs);
5013  bms_free(indexattrs);
5014 
5015  goto restart;
5016  }
5017 
5018  /* Don't leak the old values of these bitmaps, if any */
5019  bms_free(relation->rd_indexattr);
5020  relation->rd_indexattr = NULL;
5021  bms_free(relation->rd_keyattr);
5022  relation->rd_keyattr = NULL;
5023  bms_free(relation->rd_pkattr);
5024  relation->rd_pkattr = NULL;
5025  bms_free(relation->rd_idattr);
5026  relation->rd_idattr = NULL;
5027 
5028  /*
5029  * Now save copies of the bitmaps in the relcache entry. We intentionally
5030  * set rd_indexattr last, because that's the one that signals validity of
5031  * the values; if we run out of memory before making that copy, we won't
5032  * leave the relcache entry looking like the other ones are valid but
5033  * empty.
5034  */
5036  relation->rd_keyattr = bms_copy(uindexattrs);
5037  relation->rd_pkattr = bms_copy(pkindexattrs);
5038  relation->rd_idattr = bms_copy(idindexattrs);
5039  relation->rd_indexattr = bms_copy(indexattrs);
5040  MemoryContextSwitchTo(oldcxt);
5041 
5042  /* We return our original working copy for caller to play with */
5043  switch (attrKind)
5044  {
5045  case INDEX_ATTR_BITMAP_ALL:
5046  return indexattrs;
5047  case INDEX_ATTR_BITMAP_KEY:
5048  return uindexattrs;
5050  return bms_copy(relation->rd_pkattr);
5052  return idindexattrs;
5053  default:
5054  elog(ERROR, "unknown attrKind %u", attrKind);
5055  return NULL;
5056  }
5057 }
5058 
5059 /*
5060  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5061  *
5062  * This should be called only for an index that is known to have an
5063  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5064  * context) of the exclusion operator OIDs, their underlying functions'
5065  * OIDs, and their strategy numbers in the index's opclasses. We cache
5066  * all this information since it requires a fair amount of work to get.
5067  */
5068 void
5070  Oid **operators,
5071  Oid **procs,
5072  uint16 **strategies)
5073 {
5074  int ncols = indexRelation->rd_rel->relnatts;
5075  Oid *ops;
5076  Oid *funcs;
5077  uint16 *strats;
5078  Relation conrel;
5079  SysScanDesc conscan;
5080  ScanKeyData skey[1];
5081  HeapTuple htup;
5082  bool found;
5083  MemoryContext oldcxt;
5084  int i;
5085 
5086  /* Allocate result space in caller context */
5087  *operators = ops = (Oid *) palloc(sizeof(Oid) * ncols);
5088  *procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols);
5089  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols);
5090 
5091  /* Quick exit if we have the data cached already */
5092  if (indexRelation->rd_exclstrats != NULL)
5093  {
5094  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols);
5095  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols);
5096  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols);
5097  return;
5098  }
5099 
5100  /*
5101  * Search pg_constraint for the constraint associated with the index. To
5102  * make this not too painfully slow, we use the index on conrelid; that
5103  * will hold the parent relation's OID not the index's own OID.
5104  */
5105  ScanKeyInit(&skey[0],
5107  BTEqualStrategyNumber, F_OIDEQ,
5108  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5109 
5111  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
5112  NULL, 1, skey);
5113  found = false;
5114 
5115  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5116  {
5118  Datum val;
5119  bool isnull;
5120  ArrayType *arr;
5121  int nelem;
5122 
5123  /* We want the exclusion constraint owning the index */
5124  if (conform->contype != CONSTRAINT_EXCLUSION ||
5125  conform->conindid != RelationGetRelid(indexRelation))
5126  continue;
5127 
5128  /* There should be only one */
5129  if (found)
5130  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5131  RelationGetRelationName(indexRelation));
5132  found = true;
5133 
5134  /* Extract the operator OIDS from conexclop */
5135  val = fastgetattr(htup,
5137  conrel->rd_att, &isnull);
5138  if (isnull)
5139  elog(ERROR, "null conexclop for rel %s",
5140  RelationGetRelationName(indexRelation));
5141 
5142  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5143  nelem = ARR_DIMS(arr)[0];
5144  if (ARR_NDIM(arr) != 1 ||
5145  nelem != ncols ||
5146  ARR_HASNULL(arr) ||
5147  ARR_ELEMTYPE(arr) != OIDOID)
5148  elog(ERROR, "conexclop is not a 1-D Oid array");
5149 
5150  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols);
5151  }
5152 
5153  systable_endscan(conscan);
5154  heap_close(conrel, AccessShareLock);
5155 
5156  if (!found)
5157  elog(ERROR, "exclusion constraint record missing for rel %s",
5158  RelationGetRelationName(indexRelation));
5159 
5160  /* We need the func OIDs and strategy numbers too */
5161  for (i = 0; i < ncols; i++)
5162  {
5163  funcs[i] = get_opcode(ops[i]);
5164  strats[i] = get_op_opfamily_strategy(ops[i],
5165  indexRelation->rd_opfamily[i]);
5166  /* shouldn't fail, since it was checked at index creation */
5167  if (strats[i] == InvalidStrategy)
5168  elog(ERROR, "could not find strategy for operator %u in family %u",
5169  ops[i], indexRelation->rd_opfamily[i]);
5170  }
5171 
5172  /* Save a copy of the results in the relcache entry. */
5173  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5174  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols);
5175  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols);
5176  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols);
5177  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols);
5178  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols);
5179  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols);
5180  MemoryContextSwitchTo(oldcxt);
5181 }
5182 
5183 /*
5184  * Get publication actions for the given relation.
5185  */
5186 struct PublicationActions *
5188 {
5189  List *puboids;
5190  ListCell *lc;
5191  MemoryContext oldcxt;
5192  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5193 
5194  if (relation->rd_pubactions)
5195  return memcpy(pubactions, relation->rd_pubactions,
5196  sizeof(PublicationActions));
5197 
5198  /* Fetch the publication membership info. */
5199  puboids = GetRelationPublications(RelationGetRelid(relation));
5200  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5201 
5202  foreach(lc, puboids)
5203  {
5204  Oid pubid = lfirst_oid(lc);
5205  HeapTuple tup;
5206  Form_pg_publication pubform;
5207 
5209 
5210  if (!HeapTupleIsValid(tup))
5211  elog(ERROR, "cache lookup failed for publication %u", pubid);
5212 
5213  pubform = (Form_pg_publication) GETSTRUCT(tup);
5214 
5215  pubactions->pubinsert |= pubform->pubinsert;
5216  pubactions->pubupdate |= pubform->pubupdate;
5217  pubactions->pubdelete |= pubform->pubdelete;
5218 
5219  ReleaseSysCache(tup);
5220 
5221  /*
5222  * If we know everything is replicated, there is no point to check for
5223  * other publications.
5224  */
5225  if (pubactions->pubinsert && pubactions->pubupdate &&
5226  pubactions->pubdelete)
5227  break;
5228  }
5229 
5230  if (relation->rd_pubactions)
5231  {
5232  pfree(relation->rd_pubactions);
5233  relation->rd_pubactions = NULL;
5234  }
5235 
5236  /* Now save copy of the actions in the relcache entry. */
5238  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5239  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5240  MemoryContextSwitchTo(oldcxt);
5241 
5242  return pubactions;
5243 }
5244 
5245 /*
5246  * Routines to support ereport() reports of relation-related errors
5247  *
5248  * These could have been put into elog.c, but it seems like a module layering
5249  * violation to have elog.c calling relcache or syscache stuff --- and we
5250  * definitely don't want elog.h including rel.h. So we put them here.
5251  */
5252 
5253 /*
5254  * errtable --- stores schema_name and table_name of a table
5255  * within the current errordata.
5256  */
5257 int
5259 {
5263 
5264  return 0; /* return value does not matter */
5265 }
5266 
5267 /*
5268  * errtablecol --- stores schema_name, table_name and column_name
5269  * of a table column within the current errordata.
5270  *
5271  * The column is specified by attribute number --- for most callers, this is
5272  * easier and less error-prone than getting the column name for themselves.
5273  */
5274 int
5275 errtablecol(Relation rel, int attnum)
5276 {
5278  const char *colname;
5279 
5280  /* Use reldesc if it's a user attribute, else consult the catalogs */
5281  if (attnum > 0 && attnum <= reldesc->natts)
5282  colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5283  else
5284  colname = get_relid_attribute_name(RelationGetRelid(rel), attnum);
5285 
5286  return errtablecolname(rel, colname);
5287 }
5288 
5289 /*
5290  * errtablecolname --- stores schema_name, table_name and column_name
5291  * of a table column within the current errordata, where the column name is
5292  * given directly rather than extracted from the relation's catalog data.
5293  *
5294  * Don't use this directly unless errtablecol() is inconvenient for some
5295  * reason. This might possibly be needed during intermediate states in ALTER
5296  * TABLE, for instance.
5297  */
5298 int
5299 errtablecolname(Relation rel, const char *colname)
5300 {
5301  errtable(rel);
5303 
5304  return 0; /* return value does not matter */
5305 }
5306 
5307 /*
5308  * errtableconstraint --- stores schema_name, table_name and constraint_name
5309  * of a table-related constraint within the current errordata.
5310  */
5311 int
5312 errtableconstraint(Relation rel, const char *conname)
5313 {
5314  errtable(rel);
5316 
5317  return 0; /* return value does not matter */
5318 }
5319 
5320 
5321 /*
5322  * load_relcache_init_file, write_relcache_init_file
5323  *
5324  * In late 1992, we started regularly having databases with more than
5325  * a thousand classes in them. With this number of classes, it became
5326  * critical to do indexed lookups on the system catalogs.
5327  *
5328  * Bootstrapping these lookups is very hard. We want to be able to
5329  * use an index on pg_attribute, for example, but in order to do so,
5330  * we must have read pg_attribute for the attributes in the index,
5331  * which implies that we need to use the index.
5332  *
5333  * In order to get around the problem, we do the following:
5334  *
5335  * + When the database system is initialized (at initdb time), we
5336  * don't use indexes. We do sequential scans.
5337  *
5338  * + When the backend is started up in normal mode, we load an image
5339  * of the appropriate relation descriptors, in internal format,
5340  * from an initialization file in the data/base/... directory.
5341  *
5342  * + If the initialization file isn't there, then we create the
5343  * relation descriptors using sequential scans and write 'em to
5344  * the initialization file for use by subsequent backends.
5345  *
5346  * As of Postgres 9.0, there is one local initialization file in each
5347  * database, plus one shared initialization file for shared catalogs.
5348  *
5349  * We could dispense with the initialization files and just build the
5350  * critical reldescs the hard way on every backend startup, but that
5351  * slows down backend startup noticeably.
5352  *
5353  * We can in fact go further, and save more relcache entries than
5354  * just the ones that are absolutely critical; this allows us to speed
5355  * up backend startup by not having to build such entries the hard way.
5356  * Presently, all the catalog and index entries that are referred to
5357  * by catcaches are stored in the initialization files.
5358  *
5359  * The same mechanism that detects when catcache and relcache entries
5360  * need to be invalidated (due to catalog updates) also arranges to
5361  * unlink the initialization files when the contents may be out of date.
5362  * The files will then be rebuilt during the next backend startup.
5363  */
5364 
5365 /*
5366  * load_relcache_init_file -- attempt to load cache from the shared
5367  * or local cache init file
5368  *
5369  * If successful, return true and set criticalRelcachesBuilt or
5370  * criticalSharedRelcachesBuilt to true.
5371  * If not successful, return false.
5372  *
5373  * NOTE: we assume we are already switched into CacheMemoryContext.
5374  */
5375 static bool
5377 {
5378  FILE *fp;
5379  char initfilename[MAXPGPATH];
5380  Relation *rels;
5381  int relno,
5382  num_rels,
5383  max_rels,
5384  nailed_rels,
5385  nailed_indexes,
5386  magic;
5387  int i;
5388 
5389  if (shared)
5390  snprintf(initfilename, sizeof(initfilename), "global/%s",
5392  else
5393  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5395 
5396  fp = AllocateFile(initfilename, PG_BINARY_R);
5397  if (fp == NULL)
5398  return false;
5399 
5400  /*
5401  * Read the index relcache entries from the file. Note we will not enter
5402  * any of them into the cache if the read fails partway through; this
5403  * helps to guard against broken init files.
5404  */
5405  max_rels = 100;
5406  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5407  num_rels = 0;
5408  nailed_rels = nailed_indexes = 0;
5409 
5410  /* check for correct magic number (compatible version) */
5411  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5412  goto read_failed;
5413  if (magic != RELCACHE_INIT_FILEMAGIC)
5414  goto read_failed;
5415 
5416  for (relno = 0;; relno++)
5417  {
5418  Size len;
5419  size_t nread;
5420  Relation rel;
5421  Form_pg_class relform;
5422  bool has_not_null;
5423 
5424  /* first read the relation descriptor length */
5425  nread = fread(&len, 1, sizeof(len), fp);
5426  if (nread != sizeof(len))
5427  {
5428  if (nread == 0)
5429  break; /* end of file */
5430  goto read_failed;
5431  }
5432 
5433  /* safety check for incompatible relcache layout */
5434  if (len != sizeof(RelationData))
5435  goto read_failed;
5436 
5437  /* allocate another relcache header */
5438  if (num_rels >= max_rels)
5439  {
5440  max_rels *= 2;
5441  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5442  }
5443 
5444  rel = rels[num_rels++] = (Relation) palloc(len);
5445 
5446  /* then, read the Relation structure */
5447  if (fread(rel, 1, len, fp) != len)
5448  goto read_failed;
5449 
5450  /* next read the relation tuple form */
5451  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5452  goto read_failed;
5453 
5454  relform = (Form_pg_class) palloc(len);
5455  if (fread(relform, 1, len, fp) != len)
5456  goto read_failed;
5457 
5458  rel->rd_rel = relform;
5459 
5460  /* initialize attribute tuple forms */
5461  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
5462  relform->relhasoids);
5463  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5464 
5465  rel->rd_att->tdtypeid = relform->reltype;
5466  rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5467 
5468  /* next read all the attribute tuple form data entries */
5469  has_not_null = false;
5470  for (i = 0; i < relform->relnatts; i++)
5471  {
5472  Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5473 
5474  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5475  goto read_failed;
5476  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5477  goto read_failed;
5478  if (fread(attr, 1, len, fp) != len)
5479  goto read_failed;
5480 
5481  has_not_null |= attr->attnotnull;
5482  }
5483 
5484  /* next read the access method specific field */
5485  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5486  goto read_failed;
5487  if (len > 0)
5488  {
5489  rel->rd_options = palloc(len);
5490  if (fread(rel->rd_options, 1, len, fp) != len)
5491  goto read_failed;
5492  if (len != VARSIZE(rel->rd_options))
5493  goto read_failed; /* sanity check */
5494  }
5495  else
5496  {
5497  rel->rd_options = NULL;
5498  }
5499 
5500  /* mark not-null status */
5501  if (has_not_null)
5502  {
5503  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5504 
5505  constr->has_not_null = true;
5506  rel->rd_att->constr = constr;
5507  }
5508 
5509  /* If it's an index, there's more to do */
5510  if (rel->rd_rel->relkind == RELKIND_INDEX)
5511  {
5512  MemoryContext indexcxt;
5513  Oid *opfamily;
5514  Oid *opcintype;
5515  RegProcedure *support;
5516  int nsupport;
5517  int16 *indoption;
5518  Oid *indcollation;
5519 
5520  /* Count nailed indexes to ensure we have 'em all */
5521  if (rel->rd_isnailed)
5522  nailed_indexes++;
5523 
5524  /* next, read the pg_index tuple */
5525  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5526  goto read_failed;
5527 
5528  rel->rd_indextuple = (HeapTuple) palloc(len);
5529  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5530  goto read_failed;
5531 
5532  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5533  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5535 
5536  /*
5537  * prepare index info context --- parameters should match
5538  * RelationInitIndexAccessInfo
5539  */
5543  rel->rd_indexcxt = indexcxt;
5544 
5545  /*
5546  * Now we can fetch the index AM's API struct. (We can't store
5547  * that in the init file, since it contains function pointers that
5548  * might vary across server executions. Fortunately, it should be
5549  * safe to call the amhandler even while bootstrapping indexes.)
5550  */
5551  InitIndexAmRoutine(rel);
5552 
5553  /* next, read the vector of opfamily OIDs */
5554  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5555  goto read_failed;
5556 
5557  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5558  if (fread(opfamily, 1, len, fp) != len)
5559  goto read_failed;
5560 
5561  rel->rd_opfamily = opfamily;
5562 
5563  /* next, read the vector of opcintype OIDs */
5564  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5565  goto read_failed;
5566 
5567  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5568  if (fread(opcintype, 1, len, fp) != len)
5569  goto read_failed;
5570 
5571  rel->rd_opcintype = opcintype;
5572 
5573  /* next, read the vector of support procedure OIDs */
5574  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5575  goto read_failed;
5576  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5577  if (fread(support, 1, len, fp) != len)
5578  goto read_failed;
5579 
5580  rel->rd_support = support;
5581 
5582  /* next, read the vector of collation OIDs */
5583  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5584  goto read_failed;
5585 
5586  indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5587  if (fread(indcollation, 1, len, fp) != len)
5588  goto read_failed;
5589 
5590  rel->rd_indcollation = indcollation;
5591 
5592  /* finally, read the vector of indoption values */
5593  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5594  goto read_failed;
5595 
5596  indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5597  if (fread(indoption, 1, len, fp) != len)
5598  goto read_failed;
5599 
5600  rel->rd_indoption = indoption;
5601 
5602  /* set up zeroed fmgr-info vector */
5603  nsupport = relform->relnatts * rel->rd_amroutine->amsupport;
5604  rel->rd_supportinfo = (FmgrInfo *)
5605  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5606  }
5607  else
5608  {
5609  /* Count nailed rels to ensure we have 'em all */
5610  if (rel->rd_isnailed)
5611  nailed_rels++;
5612 
5613  Assert(rel->rd_index == NULL);
5614  Assert(rel->rd_indextuple == NULL);
5615  Assert(rel->rd_indexcxt == NULL);
5616  Assert(rel->rd_amroutine == NULL);
5617  Assert(rel->rd_opfamily == NULL);
5618  Assert(rel->rd_opcintype == NULL);
5619  Assert(rel->rd_support == NULL);
5620  Assert(rel->rd_supportinfo == NULL);
5621  Assert(rel->rd_indoption == NULL);
5622  Assert(rel->rd_indcollation == NULL);
5623  }
5624 
5625  /*
5626  * Rules and triggers are not saved (mainly because the internal
5627  * format is complex and subject to change). They must be rebuilt if
5628  * needed by RelationCacheInitializePhase3. This is not expected to
5629  * be a big performance hit since few system catalogs have such. Ditto
5630  * for RLS policy data, index expressions, predicates, exclusion info,
5631  * and FDW info.
5632  */
5633  rel->rd_rules = NULL;
5634  rel->rd_rulescxt = NULL;
5635  rel->trigdesc = NULL;
5636  rel->rd_rsdesc = NULL;
5637  rel->rd_partkeycxt = NULL;
5638  rel->rd_partkey = NULL;
5639  rel->rd_pdcxt = NULL;
5640  rel->rd_partdesc = NULL;
5641  rel->rd_partcheck = NIL;
5642  rel->rd_indexprs = NIL;
5643  rel->rd_indpred = NIL;
5644  rel->rd_exclops = NULL;
5645  rel->rd_exclprocs = NULL;
5646  rel->rd_exclstrats = NULL;
5647  rel->rd_fdwroutine = NULL;
5648 
5649  /*
5650  * Reset transient-state fields in the relcache entry
5651  */
5652  rel->rd_smgr = NULL;
5653  if (rel->rd_isnailed)
5654  rel->rd_refcnt = 1;
5655  else
5656  rel->rd_refcnt = 0;
5657  rel->rd_indexvalid = 0;
5658  rel->rd_fkeylist = NIL;
5659  rel->rd_fkeyvalid = false;
5660  rel->rd_indexlist = NIL;
5661  rel->rd_oidindex = InvalidOid;
5662  rel->rd_pkindex = InvalidOid;
5663  rel->rd_replidindex = InvalidOid;
5664  rel->rd_indexattr = NULL;
5665  rel->rd_keyattr = NULL;
5666  rel->rd_pkattr = NULL;
5667  rel->rd_idattr = NULL;
5668  rel->rd_pubactions = NULL;
5669  rel->rd_statvalid = false;
5670  rel->rd_statlist = NIL;
5673  rel->rd_amcache = NULL;
5674  MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5675 
5676  /*
5677  * Recompute lock and physical addressing info. This is needed in
5678  * case the pg_internal.init file was copied from some other database
5679  * by CREATE DATABASE.
5680  */
5681  RelationInitLockInfo(rel);
5683  }
5684 
5685  /*
5686  * We reached the end of the init file without apparent problem. Did we
5687  * get the right number of nailed items? This is a useful crosscheck in
5688  * case the set of critical rels or indexes changes. However, that should
5689  * not happen in a normally-running system, so let's bleat if it does.
5690  *
5691  * For the shared init file, we're called before client authentication is
5692  * done, which means that elog(WARNING) will go only to the postmaster
5693  * log, where it's easily missed. To ensure that developers notice bad
5694  * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5695  * an Assert(false) there.
5696  */
5697  if (shared)
5698  {
5699  if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
5700  nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5701  {
5702  elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5703  nailed_rels, nailed_indexes,
5705  /* Make sure we get developers' attention about this */
5706  Assert(false);
5707  /* In production builds, recover by bootstrapping the relcache */
5708  goto read_failed;
5709  }
5710  }
5711  else
5712  {
5713  if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
5714  nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5715  {
5716  elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5717  nailed_rels, nailed_indexes,
5719  /* We don't need an Assert() in this case */
5720  goto read_failed;
5721  }
5722  }
5723 
5724  /*
5725  * OK, all appears well.
5726  *
5727  * Now insert all the new relcache entries into the cache.
5728  */
5729  for (relno = 0; relno < num_rels; relno++)
5730  {
5731  RelationCacheInsert(rels[relno], false);
5732  }
5733 
5734  pfree(rels);
5735  FreeFile(fp);
5736 
5737  if (shared)
5739  else
5740  criticalRelcachesBuilt = true;
5741  return true;
5742 
5743  /*
5744  * init file is broken, so do it the hard way. We don't bother trying to
5745  * free the clutter we just allocated; it's not in the relcache so it
5746  * won't hurt.
5747  */
5748 read_failed:
5749  pfree(rels);
5750  FreeFile(fp);
5751 
5752  return false;
5753 }
5754 
5755 /*
5756  * Write out a new initialization file with the current contents
5757  * of the relcache (either shared rels or local rels, as indicated).
5758  */
5759 static void
5761 {
5762  FILE *fp;
5763  char tempfilename[MAXPGPATH];
5764  char finalfilename[MAXPGPATH];
5765  int magic;
5767  RelIdCacheEnt *idhentry;
5768  int i;
5769 
5770  /*
5771  * If we have already received any relcache inval events, there's no
5772  * chance of succeeding so we may as well skip the whole thing.
5773  */
5774  if (relcacheInvalsReceived != 0L)
5775  return;
5776 
5777  /*
5778  * We must write a temporary file and rename it into place. Otherwise,
5779  * another backend starting at about the same time might crash trying to
5780  * read the partially-complete file.
5781  */
5782  if (shared)
5783  {
5784  snprintf(tempfilename, sizeof(tempfilename), &qu