PostgreSQL Source Code  git master
cluster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * cluster.c
4  * CLUSTER a table on an index. This is now also used for VACUUM FULL.
5  *
6  * There is hardly anything left of Paul Brown's original implementation...
7  *
8  *
9  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994-5, Regents of the University of California
11  *
12  *
13  * IDENTIFICATION
14  * src/backend/commands/cluster.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/amapi.h"
21 #include "access/heapam.h"
22 #include "access/multixact.h"
23 #include "access/relscan.h"
24 #include "access/tableam.h"
25 #include "access/toast_internals.h"
26 #include "access/transam.h"
27 #include "access/xact.h"
28 #include "access/xlog.h"
29 #include "catalog/catalog.h"
30 #include "catalog/dependency.h"
31 #include "catalog/heap.h"
32 #include "catalog/index.h"
33 #include "catalog/namespace.h"
34 #include "catalog/objectaccess.h"
35 #include "catalog/pg_am.h"
36 #include "catalog/toasting.h"
37 #include "commands/cluster.h"
38 #include "commands/defrem.h"
39 #include "commands/progress.h"
40 #include "commands/tablecmds.h"
41 #include "commands/vacuum.h"
42 #include "miscadmin.h"
43 #include "optimizer/optimizer.h"
44 #include "pgstat.h"
45 #include "storage/bufmgr.h"
46 #include "storage/lmgr.h"
47 #include "storage/predicate.h"
48 #include "utils/acl.h"
49 #include "utils/fmgroids.h"
50 #include "utils/inval.h"
51 #include "utils/lsyscache.h"
52 #include "utils/memutils.h"
53 #include "utils/pg_rusage.h"
54 #include "utils/relmapper.h"
55 #include "utils/snapmgr.h"
56 #include "utils/syscache.h"
57 #include "utils/tuplesort.h"
58 
59 /*
60  * This struct is used to pass around the information on tables to be
61  * clustered. We need this so we can make a list of them when invoked without
62  * a specific table/index pair.
63  */
64 typedef struct
65 {
68 } RelToCluster;
69 
70 
71 static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
72 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
73  bool verbose, bool *pSwapToastByContent,
74  TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
75 static List *get_tables_to_cluster(MemoryContext cluster_context);
76 
77 
78 /*---------------------------------------------------------------------------
79  * This cluster code allows for clustering multiple tables at once. Because
80  * of this, we cannot just run everything on a single transaction, or we
81  * would be forced to acquire exclusive locks on all the tables being
82  * clustered, simultaneously --- very likely leading to deadlock.
83  *
84  * To solve this we follow a similar strategy to VACUUM code,
85  * clustering each relation in a separate transaction. For this to work,
86  * we need to:
87  * - provide a separate memory context so that we can pass information in
88  * a way that survives across transactions
89  * - start a new transaction every time a new relation is clustered
90  * - check for validity of the information on to-be-clustered relations,
91  * as someone might have deleted a relation behind our back, or
92  * clustered one on a different index
93  * - end the transaction
94  *
95  * The single-relation case does not have any such overhead.
96  *
97  * We also allow a relation to be specified without index. In that case,
98  * the indisclustered bit will be looked up, and an ERROR will be thrown
99  * if there is no index with the bit set.
100  *---------------------------------------------------------------------------
101  */
102 void
103 cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
104 {
105  ListCell *lc;
106  ClusterParams params = {0};
107  bool verbose = false;
108 
109  /* Parse option list */
110  foreach(lc, stmt->params)
111  {
112  DefElem *opt = (DefElem *) lfirst(lc);
113 
114  if (strcmp(opt->defname, "verbose") == 0)
115  verbose = defGetBoolean(opt);
116  else
117  ereport(ERROR,
118  (errcode(ERRCODE_SYNTAX_ERROR),
119  errmsg("unrecognized CLUSTER option \"%s\"",
120  opt->defname),
121  parser_errposition(pstate, opt->location)));
122  }
123 
124  params.options = (verbose ? CLUOPT_VERBOSE : 0);
125 
126  if (stmt->relation != NULL)
127  {
128  /* This is the single-relation case. */
129  Oid tableOid,
130  indexOid = InvalidOid;
131  Relation rel;
132 
133  /* Find, lock, and check permissions on the table */
134  tableOid = RangeVarGetRelidExtended(stmt->relation,
136  0,
138  rel = table_open(tableOid, NoLock);
139 
140  /*
141  * Reject clustering a remote temp table ... their local buffer
142  * manager is not going to cope.
143  */
144  if (RELATION_IS_OTHER_TEMP(rel))
145  ereport(ERROR,
146  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
147  errmsg("cannot cluster temporary tables of other sessions")));
148 
149  /*
150  * Reject clustering a partitioned table.
151  */
152  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
153  ereport(ERROR,
154  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
155  errmsg("cannot cluster a partitioned table")));
156 
157  if (stmt->indexname == NULL)
158  {
159  ListCell *index;
160 
161  /* We need to find the index that has indisclustered set. */
162  foreach(index, RelationGetIndexList(rel))
163  {
164  indexOid = lfirst_oid(index);
165  if (get_index_isclustered(indexOid))
166  break;
167  indexOid = InvalidOid;
168  }
169 
170  if (!OidIsValid(indexOid))
171  ereport(ERROR,
172  (errcode(ERRCODE_UNDEFINED_OBJECT),
173  errmsg("there is no previously clustered index for table \"%s\"",
174  stmt->relation->relname)));
175  }
176  else
177  {
178  /*
179  * The index is expected to be in the same namespace as the
180  * relation.
181  */
182  indexOid = get_relname_relid(stmt->indexname,
183  rel->rd_rel->relnamespace);
184  if (!OidIsValid(indexOid))
185  ereport(ERROR,
186  (errcode(ERRCODE_UNDEFINED_OBJECT),
187  errmsg("index \"%s\" for table \"%s\" does not exist",
188  stmt->indexname, stmt->relation->relname)));
189  }
190 
191  /* close relation, keep lock till commit */
192  table_close(rel, NoLock);
193 
194  /* Do the job. */
195  cluster_rel(tableOid, indexOid, &params);
196  }
197  else
198  {
199  /*
200  * This is the "multi relation" case. We need to cluster all tables
201  * that have some index with indisclustered set.
202  */
203  MemoryContext cluster_context;
204  List *rvs;
205  ListCell *rv;
206 
207  /*
208  * We cannot run this form of CLUSTER inside a user transaction block;
209  * we'd be holding locks way too long.
210  */
211  PreventInTransactionBlock(isTopLevel, "CLUSTER");
212 
213  /*
214  * Create special memory context for cross-transaction storage.
215  *
216  * Since it is a child of PortalContext, it will go away even in case
217  * of error.
218  */
219  cluster_context = AllocSetContextCreate(PortalContext,
220  "Cluster",
222 
223  /*
224  * Build the list of relations to cluster. Note that this lives in
225  * cluster_context.
226  */
227  rvs = get_tables_to_cluster(cluster_context);
228 
229  /* Commit to get out of starting transaction */
232 
233  /* Ok, now that we've got them all, cluster them one by one */
234  foreach(rv, rvs)
235  {
236  RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
237  ClusterParams cluster_params = params;
238 
239  /* Start a new transaction for each relation. */
241  /* functions in indexes may want a snapshot set */
243  /* Do the job. */
244  cluster_params.options |= CLUOPT_RECHECK;
245  cluster_rel(rvtc->tableOid, rvtc->indexOid,
246  &cluster_params);
249  }
250 
251  /* Start a new transaction for the cleanup work. */
253 
254  /* Clean up working storage */
255  MemoryContextDelete(cluster_context);
256  }
257 }
258 
259 /*
260  * cluster_rel
261  *
262  * This clusters the table by creating a new, clustered table and
263  * swapping the relfilenodes of the new table and the old table, so
264  * the OID of the original table is preserved. Thus we do not lose
265  * GRANT, inheritance nor references to this table (this was a bug
266  * in releases through 7.3).
267  *
268  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
269  * the new table, it's better to create the indexes afterwards than to fill
270  * them incrementally while we load the table.
271  *
272  * If indexOid is InvalidOid, the table will be rewritten in physical order
273  * instead of index order. This is the new implementation of VACUUM FULL,
274  * and error messages should refer to the operation as VACUUM not CLUSTER.
275  */
276 void
277 cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
278 {
279  Relation OldHeap;
280  bool verbose = ((params->options & CLUOPT_VERBOSE) != 0);
281  bool recheck = ((params->options & CLUOPT_RECHECK) != 0);
282 
283  /* Check for user-requested abort. */
285 
287  if (OidIsValid(indexOid))
290  else
293 
294  /*
295  * We grab exclusive access to the target rel and index for the duration
296  * of the transaction. (This is redundant for the single-transaction
297  * case, since cluster() already did it.) The index lock is taken inside
298  * check_index_is_clusterable.
299  */
300  OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
301 
302  /* If the table has gone away, we can skip processing it */
303  if (!OldHeap)
304  {
306  return;
307  }
308 
309  /*
310  * Since we may open a new transaction for each relation, we have to check
311  * that the relation still is what we think it is.
312  *
313  * If this is a single-transaction CLUSTER, we can skip these tests. We
314  * *must* skip the one on indisclustered since it would reject an attempt
315  * to cluster a not-previously-clustered index.
316  */
317  if (recheck)
318  {
319  /* Check that the user still owns the relation */
320  if (!pg_class_ownercheck(tableOid, GetUserId()))
321  {
324  return;
325  }
326 
327  /*
328  * Silently skip a temp table for a remote session. Only doing this
329  * check in the "recheck" case is appropriate (which currently means
330  * somebody is executing a database-wide CLUSTER), because there is
331  * another check in cluster() which will stop any attempt to cluster
332  * remote temp tables by name. There is another check in cluster_rel
333  * which is redundant, but we leave it for extra safety.
334  */
335  if (RELATION_IS_OTHER_TEMP(OldHeap))
336  {
339  return;
340  }
341 
342  if (OidIsValid(indexOid))
343  {
344  /*
345  * Check that the index still exists
346  */
348  {
351  return;
352  }
353 
354  /*
355  * Check that the index is still the one with indisclustered set.
356  */
357  if (!get_index_isclustered(indexOid))
358  {
361  return;
362  }
363  }
364  }
365 
366  /*
367  * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
368  * would work in most respects, but the index would only get marked as
369  * indisclustered in the current database, leading to unexpected behavior
370  * if CLUSTER were later invoked in another database.
371  */
372  if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
373  ereport(ERROR,
374  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
375  errmsg("cannot cluster a shared catalog")));
376 
377  /*
378  * Don't process temp tables of other backends ... their local buffer
379  * manager is not going to cope.
380  */
381  if (RELATION_IS_OTHER_TEMP(OldHeap))
382  {
383  if (OidIsValid(indexOid))
384  ereport(ERROR,
385  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
386  errmsg("cannot cluster temporary tables of other sessions")));
387  else
388  ereport(ERROR,
389  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
390  errmsg("cannot vacuum temporary tables of other sessions")));
391  }
392 
393  /*
394  * Also check for active uses of the relation in the current transaction,
395  * including open scans and pending AFTER trigger events.
396  */
397  CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
398 
399  /* Check heap and index are valid to cluster on */
400  if (OidIsValid(indexOid))
401  check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
402 
403  /*
404  * Quietly ignore the request if this is a materialized view which has not
405  * been populated from its query. No harm is done because there is no data
406  * to deal with, and we don't want to throw an error if this is part of a
407  * multi-relation request -- for example, CLUSTER was run on the entire
408  * database.
409  */
410  if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
411  !RelationIsPopulated(OldHeap))
412  {
415  return;
416  }
417 
418  /*
419  * All predicate locks on the tuples or pages are about to be made
420  * invalid, because we move tuples around. Promote them to relation
421  * locks. Predicate locks on indexes will be promoted when they are
422  * reindexed.
423  */
425 
426  /* rebuild_relation does all the dirty work */
427  rebuild_relation(OldHeap, indexOid, verbose);
428 
429  /* NB: rebuild_relation does table_close() on OldHeap */
430 
432 }
433 
434 /*
435  * Verify that the specified heap and index are valid to cluster on
436  *
437  * Side effect: obtains lock on the index. The caller may
438  * in some cases already have AccessExclusiveLock on the table, but
439  * not in all cases so we can't rely on the table-level lock for
440  * protection here.
441  */
442 void
443 check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
444 {
445  Relation OldIndex;
446 
447  OldIndex = index_open(indexOid, lockmode);
448 
449  /*
450  * Check that index is in fact an index on the given relation
451  */
452  if (OldIndex->rd_index == NULL ||
453  OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
454  ereport(ERROR,
455  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
456  errmsg("\"%s\" is not an index for table \"%s\"",
457  RelationGetRelationName(OldIndex),
458  RelationGetRelationName(OldHeap))));
459 
460  /* Index AM must allow clustering */
461  if (!OldIndex->rd_indam->amclusterable)
462  ereport(ERROR,
463  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
464  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
465  RelationGetRelationName(OldIndex))));
466 
467  /*
468  * Disallow clustering on incomplete indexes (those that might not index
469  * every row of the relation). We could relax this by making a separate
470  * seqscan pass over the table to copy the missing rows, but that seems
471  * expensive and tedious.
472  */
473  if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
474  ereport(ERROR,
475  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
476  errmsg("cannot cluster on partial index \"%s\"",
477  RelationGetRelationName(OldIndex))));
478 
479  /*
480  * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
481  * it might well not contain entries for every heap row, or might not even
482  * be internally consistent. (But note that we don't check indcheckxmin;
483  * the worst consequence of following broken HOT chains would be that we
484  * might put recently-dead tuples out-of-order in the new table, and there
485  * is little harm in that.)
486  */
487  if (!OldIndex->rd_index->indisvalid)
488  ereport(ERROR,
489  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
490  errmsg("cannot cluster on invalid index \"%s\"",
491  RelationGetRelationName(OldIndex))));
492 
493  /* Drop relcache refcnt on OldIndex, but keep lock */
494  index_close(OldIndex, NoLock);
495 }
496 
497 /*
498  * mark_index_clustered: mark the specified index as the one clustered on
499  *
500  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
501  */
502 void
503 mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
504 {
505  HeapTuple indexTuple;
506  Form_pg_index indexForm;
507  Relation pg_index;
508  ListCell *index;
509 
510  /* Disallow applying to a partitioned table */
511  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
512  ereport(ERROR,
513  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
514  errmsg("cannot mark index clustered in partitioned table")));
515 
516  /*
517  * If the index is already marked clustered, no need to do anything.
518  */
519  if (OidIsValid(indexOid))
520  {
521  if (get_index_isclustered(indexOid))
522  return;
523  }
524 
525  /*
526  * Check each index of the relation and set/clear the bit as needed.
527  */
528  pg_index = table_open(IndexRelationId, RowExclusiveLock);
529 
530  foreach(index, RelationGetIndexList(rel))
531  {
532  Oid thisIndexOid = lfirst_oid(index);
533 
534  indexTuple = SearchSysCacheCopy1(INDEXRELID,
535  ObjectIdGetDatum(thisIndexOid));
536  if (!HeapTupleIsValid(indexTuple))
537  elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
538  indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
539 
540  /*
541  * Unset the bit if set. We know it's wrong because we checked this
542  * earlier.
543  */
544  if (indexForm->indisclustered)
545  {
546  indexForm->indisclustered = false;
547  CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
548  }
549  else if (thisIndexOid == indexOid)
550  {
551  /* this was checked earlier, but let's be real sure */
552  if (!indexForm->indisvalid)
553  elog(ERROR, "cannot cluster on invalid index %u", indexOid);
554  indexForm->indisclustered = true;
555  CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
556  }
557 
558  InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
559  InvalidOid, is_internal);
560 
561  heap_freetuple(indexTuple);
562  }
563 
564  table_close(pg_index, RowExclusiveLock);
565 }
566 
567 /*
568  * rebuild_relation: rebuild an existing relation in index or physical order
569  *
570  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
571  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
572  *
573  * NB: this routine closes OldHeap at the right time; caller should not.
574  */
575 static void
576 rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
577 {
578  Oid tableOid = RelationGetRelid(OldHeap);
579  Oid tableSpace = OldHeap->rd_rel->reltablespace;
580  Oid OIDNewHeap;
581  char relpersistence;
582  bool is_system_catalog;
583  bool swap_toast_by_content;
584  TransactionId frozenXid;
585  MultiXactId cutoffMulti;
586 
587  /* Mark the correct index as clustered */
588  if (OidIsValid(indexOid))
589  mark_index_clustered(OldHeap, indexOid, true);
590 
591  /* Remember info about rel before closing OldHeap */
592  relpersistence = OldHeap->rd_rel->relpersistence;
593  is_system_catalog = IsSystemRelation(OldHeap);
594 
595  /* Close relcache entry, but keep lock until transaction commit */
596  table_close(OldHeap, NoLock);
597 
598  /* Create the transient table that will receive the re-ordered data */
599  OIDNewHeap = make_new_heap(tableOid, tableSpace,
600  relpersistence,
602 
603  /* Copy the heap data into the new table in the desired order */
604  copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
605  &swap_toast_by_content, &frozenXid, &cutoffMulti);
606 
607  /*
608  * Swap the physical files of the target and transient tables, then
609  * rebuild the target's indexes and throw away the transient table.
610  */
611  finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
612  swap_toast_by_content, false, true,
613  frozenXid, cutoffMulti,
614  relpersistence);
615 }
616 
617 
618 /*
619  * Create the transient table that will be filled with new data during
620  * CLUSTER, ALTER TABLE, and similar operations. The transient table
621  * duplicates the logical structure of the OldHeap, but is placed in
622  * NewTableSpace which might be different from OldHeap's. Also, it's built
623  * with the specified persistence, which might differ from the original's.
624  *
625  * After this, the caller should load the new heap with transferred/modified
626  * data, then call finish_heap_swap to complete the operation.
627  */
628 Oid
629 make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
630  LOCKMODE lockmode)
631 {
632  TupleDesc OldHeapDesc;
633  char NewHeapName[NAMEDATALEN];
634  Oid OIDNewHeap;
635  Oid toastid;
636  Relation OldHeap;
637  HeapTuple tuple;
638  Datum reloptions;
639  bool isNull;
640  Oid namespaceid;
641 
642  OldHeap = table_open(OIDOldHeap, lockmode);
643  OldHeapDesc = RelationGetDescr(OldHeap);
644 
645  /*
646  * Note that the NewHeap will not receive any of the defaults or
647  * constraints associated with the OldHeap; we don't need 'em, and there's
648  * no reason to spend cycles inserting them into the catalogs only to
649  * delete them.
650  */
651 
652  /*
653  * But we do want to use reloptions of the old heap for new heap.
654  */
655  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
656  if (!HeapTupleIsValid(tuple))
657  elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
658  reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
659  &isNull);
660  if (isNull)
661  reloptions = (Datum) 0;
662 
663  if (relpersistence == RELPERSISTENCE_TEMP)
664  namespaceid = LookupCreationNamespace("pg_temp");
665  else
666  namespaceid = RelationGetNamespace(OldHeap);
667 
668  /*
669  * Create the new heap, using a temporary name in the same namespace as
670  * the existing table. NOTE: there is some risk of collision with user
671  * relnames. Working around this seems more trouble than it's worth; in
672  * particular, we can't create the new heap in a different namespace from
673  * the old, or we will have problems with the TEMP status of temp tables.
674  *
675  * Note: the new heap is not a shared relation, even if we are rebuilding
676  * a shared rel. However, we do make the new heap mapped if the source is
677  * mapped. This simplifies swap_relation_files, and is absolutely
678  * necessary for rebuilding pg_class, for reasons explained there.
679  */
680  snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
681 
682  OIDNewHeap = heap_create_with_catalog(NewHeapName,
683  namespaceid,
684  NewTableSpace,
685  InvalidOid,
686  InvalidOid,
687  InvalidOid,
688  OldHeap->rd_rel->relowner,
689  OldHeap->rd_rel->relam,
690  OldHeapDesc,
691  NIL,
692  RELKIND_RELATION,
693  relpersistence,
694  false,
695  RelationIsMapped(OldHeap),
697  reloptions,
698  false,
699  true,
700  true,
701  OIDOldHeap,
702  NULL);
703  Assert(OIDNewHeap != InvalidOid);
704 
705  ReleaseSysCache(tuple);
706 
707  /*
708  * Advance command counter so that the newly-created relation's catalog
709  * tuples will be visible to table_open.
710  */
712 
713  /*
714  * If necessary, create a TOAST table for the new relation.
715  *
716  * If the relation doesn't have a TOAST table already, we can't need one
717  * for the new relation. The other way around is possible though: if some
718  * wide columns have been dropped, NewHeapCreateToastTable can decide that
719  * no TOAST table is needed for the new table.
720  *
721  * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
722  * that the TOAST table will be visible for insertion.
723  */
724  toastid = OldHeap->rd_rel->reltoastrelid;
725  if (OidIsValid(toastid))
726  {
727  /* keep the existing toast table's reloptions, if any */
728  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
729  if (!HeapTupleIsValid(tuple))
730  elog(ERROR, "cache lookup failed for relation %u", toastid);
731  reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
732  &isNull);
733  if (isNull)
734  reloptions = (Datum) 0;
735 
736  NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode);
737 
738  ReleaseSysCache(tuple);
739  }
740 
741  table_close(OldHeap, NoLock);
742 
743  return OIDNewHeap;
744 }
745 
746 /*
747  * Do the physical copying of table data.
748  *
749  * There are three output parameters:
750  * *pSwapToastByContent is set true if toast tables must be swapped by content.
751  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
752  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
753  */
754 static void
755 copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
756  bool *pSwapToastByContent, TransactionId *pFreezeXid,
757  MultiXactId *pCutoffMulti)
758 {
759  Relation NewHeap,
760  OldHeap,
761  OldIndex;
762  Relation relRelation;
763  HeapTuple reltup;
764  Form_pg_class relform;
767  TransactionId OldestXmin;
768  TransactionId FreezeXid;
769  MultiXactId MultiXactCutoff;
770  bool use_sort;
771  double num_tuples = 0,
772  tups_vacuumed = 0,
773  tups_recently_dead = 0;
774  BlockNumber num_pages;
775  int elevel = verbose ? INFO : DEBUG2;
776  PGRUsage ru0;
777 
778  pg_rusage_init(&ru0);
779 
780  /*
781  * Open the relations we need.
782  */
783  NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
784  OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
785  if (OidIsValid(OIDOldIndex))
786  OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
787  else
788  OldIndex = NULL;
789 
790  /*
791  * Their tuple descriptors should be exactly alike, but here we only need
792  * assume that they have the same number of columns.
793  */
794  oldTupDesc = RelationGetDescr(OldHeap);
795  newTupDesc = RelationGetDescr(NewHeap);
796  Assert(newTupDesc->natts == oldTupDesc->natts);
797 
798  /*
799  * If the OldHeap has a toast table, get lock on the toast table to keep
800  * it from being vacuumed. This is needed because autovacuum processes
801  * toast tables independently of their main tables, with no lock on the
802  * latter. If an autovacuum were to start on the toast table after we
803  * compute our OldestXmin below, it would use a later OldestXmin, and then
804  * possibly remove as DEAD toast tuples belonging to main tuples we think
805  * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
806  * tuples.
807  *
808  * We don't need to open the toast relation here, just lock it. The lock
809  * will be held till end of transaction.
810  */
811  if (OldHeap->rd_rel->reltoastrelid)
812  LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
813 
814  /*
815  * If both tables have TOAST tables, perform toast swap by content. It is
816  * possible that the old table has a toast table but the new one doesn't,
817  * if toastable columns have been dropped. In that case we have to do
818  * swap by links. This is okay because swap by content is only essential
819  * for system catalogs, and we don't support schema changes for them.
820  */
821  if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
822  {
823  *pSwapToastByContent = true;
824 
825  /*
826  * When doing swap by content, any toast pointers written into NewHeap
827  * must use the old toast table's OID, because that's where the toast
828  * data will eventually be found. Set this up by setting rd_toastoid.
829  * This also tells toast_save_datum() to preserve the toast value
830  * OIDs, which we want so as not to invalidate toast pointers in
831  * system catalog caches, and to avoid making multiple copies of a
832  * single toast value.
833  *
834  * Note that we must hold NewHeap open until we are done writing data,
835  * since the relcache will not guarantee to remember this setting once
836  * the relation is closed. Also, this technique depends on the fact
837  * that no one will try to read from the NewHeap until after we've
838  * finished writing it and swapping the rels --- otherwise they could
839  * follow the toast pointers to the wrong place. (It would actually
840  * work for values copied over from the old toast table, but not for
841  * any values that we toast which were previously not toasted.)
842  */
843  NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
844  }
845  else
846  *pSwapToastByContent = false;
847 
848  /*
849  * Compute xids used to freeze and weed out dead tuples and multixacts.
850  * Since we're going to rewrite the whole table anyway, there's no reason
851  * not to be aggressive about this.
852  */
853  vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
854  &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
855  NULL);
856 
857  /*
858  * FreezeXid will become the table's new relfrozenxid, and that mustn't go
859  * backwards, so take the max.
860  */
861  if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
862  TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
863  FreezeXid = OldHeap->rd_rel->relfrozenxid;
864 
865  /*
866  * MultiXactCutoff, similarly, shouldn't go backwards either.
867  */
868  if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
869  MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
870  MultiXactCutoff = OldHeap->rd_rel->relminmxid;
871 
872  /*
873  * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
874  * the OldHeap. We know how to use a sort to duplicate the ordering of a
875  * btree index, and will use seqscan-and-sort for that case if the planner
876  * tells us it's cheaper. Otherwise, always indexscan if an index is
877  * provided, else plain seqscan.
878  */
879  if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
880  use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
881  else
882  use_sort = false;
883 
884  /* Log what we're doing */
885  if (OldIndex != NULL && !use_sort)
886  ereport(elevel,
887  (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
889  RelationGetRelationName(OldHeap),
890  RelationGetRelationName(OldIndex))));
891  else if (use_sort)
892  ereport(elevel,
893  (errmsg("clustering \"%s.%s\" using sequential scan and sort",
895  RelationGetRelationName(OldHeap))));
896  else
897  ereport(elevel,
898  (errmsg("vacuuming \"%s.%s\"",
900  RelationGetRelationName(OldHeap))));
901 
902  /*
903  * Hand of the actual copying to AM specific function, the generic code
904  * cannot know how to deal with visibility across AMs. Note that this
905  * routine is allowed to set FreezeXid / MultiXactCutoff to different
906  * values (e.g. because the AM doesn't use freezing).
907  */
908  table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
909  OldestXmin, &FreezeXid, &MultiXactCutoff,
910  &num_tuples, &tups_vacuumed,
911  &tups_recently_dead);
912 
913  /* return selected values to caller, get set as relfrozenxid/minmxid */
914  *pFreezeXid = FreezeXid;
915  *pCutoffMulti = MultiXactCutoff;
916 
917  /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
918  NewHeap->rd_toastoid = InvalidOid;
919 
920  num_pages = RelationGetNumberOfBlocks(NewHeap);
921 
922  /* Log what we did */
923  ereport(elevel,
924  (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
925  RelationGetRelationName(OldHeap),
926  tups_vacuumed, num_tuples,
927  RelationGetNumberOfBlocks(OldHeap)),
928  errdetail("%.0f dead row versions cannot be removed yet.\n"
929  "%s.",
930  tups_recently_dead,
931  pg_rusage_show(&ru0))));
932 
933  if (OldIndex != NULL)
934  index_close(OldIndex, NoLock);
935  table_close(OldHeap, NoLock);
936  table_close(NewHeap, NoLock);
937 
938  /* Update pg_class to reflect the correct values of pages and tuples. */
939  relRelation = table_open(RelationRelationId, RowExclusiveLock);
940 
941  reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
942  if (!HeapTupleIsValid(reltup))
943  elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
944  relform = (Form_pg_class) GETSTRUCT(reltup);
945 
946  relform->relpages = num_pages;
947  relform->reltuples = num_tuples;
948 
949  /* Don't update the stats for pg_class. See swap_relation_files. */
950  if (OIDOldHeap != RelationRelationId)
951  CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
952  else
954 
955  /* Clean up. */
956  heap_freetuple(reltup);
957  table_close(relRelation, RowExclusiveLock);
958 
959  /* Make the update visible */
961 }
962 
963 /*
964  * Swap the physical files of two given relations.
965  *
966  * We swap the physical identity (reltablespace, relfilenode) while keeping the
967  * same logical identities of the two relations. relpersistence is also
968  * swapped, which is critical since it determines where buffers live for each
969  * relation.
970  *
971  * We can swap associated TOAST data in either of two ways: recursively swap
972  * the physical content of the toast tables (and their indexes), or swap the
973  * TOAST links in the given relations' pg_class entries. The former is needed
974  * to manage rewrites of shared catalogs (where we cannot change the pg_class
975  * links) while the latter is the only way to handle cases in which a toast
976  * table is added or removed altogether.
977  *
978  * Additionally, the first relation is marked with relfrozenxid set to
979  * frozenXid. It seems a bit ugly to have this here, but the caller would
980  * have to do it anyway, so having it here saves a heap_update. Note: in
981  * the swap-toast-links case, we assume we don't need to change the toast
982  * table's relfrozenxid: the new version of the toast table should already
983  * have relfrozenxid set to RecentXmin, which is good enough.
984  *
985  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
986  * their OIDs are emitted into mapped_tables[]. This is hacky but beats
987  * having to look the information up again later in finish_heap_swap.
988  */
989 static void
990 swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
991  bool swap_toast_by_content,
992  bool is_internal,
993  TransactionId frozenXid,
994  MultiXactId cutoffMulti,
995  Oid *mapped_tables)
996 {
997  Relation relRelation;
998  HeapTuple reltup1,
999  reltup2;
1000  Form_pg_class relform1,
1001  relform2;
1002  Oid relfilenode1,
1003  relfilenode2;
1004  Oid swaptemp;
1005  char swptmpchr;
1006 
1007  /* We need writable copies of both pg_class tuples. */
1008  relRelation = table_open(RelationRelationId, RowExclusiveLock);
1009 
1011  if (!HeapTupleIsValid(reltup1))
1012  elog(ERROR, "cache lookup failed for relation %u", r1);
1013  relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1014 
1016  if (!HeapTupleIsValid(reltup2))
1017  elog(ERROR, "cache lookup failed for relation %u", r2);
1018  relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1019 
1020  relfilenode1 = relform1->relfilenode;
1021  relfilenode2 = relform2->relfilenode;
1022 
1023  if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
1024  {
1025  /*
1026  * Normal non-mapped relations: swap relfilenodes, reltablespaces,
1027  * relpersistence
1028  */
1029  Assert(!target_is_pg_class);
1030 
1031  swaptemp = relform1->relfilenode;
1032  relform1->relfilenode = relform2->relfilenode;
1033  relform2->relfilenode = swaptemp;
1034 
1035  swaptemp = relform1->reltablespace;
1036  relform1->reltablespace = relform2->reltablespace;
1037  relform2->reltablespace = swaptemp;
1038 
1039  swptmpchr = relform1->relpersistence;
1040  relform1->relpersistence = relform2->relpersistence;
1041  relform2->relpersistence = swptmpchr;
1042 
1043  /* Also swap toast links, if we're swapping by links */
1044  if (!swap_toast_by_content)
1045  {
1046  swaptemp = relform1->reltoastrelid;
1047  relform1->reltoastrelid = relform2->reltoastrelid;
1048  relform2->reltoastrelid = swaptemp;
1049  }
1050  }
1051  else
1052  {
1053  /*
1054  * Mapped-relation case. Here we have to swap the relation mappings
1055  * instead of modifying the pg_class columns. Both must be mapped.
1056  */
1057  if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
1058  elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1059  NameStr(relform1->relname));
1060 
1061  /*
1062  * We can't change the tablespace nor persistence of a mapped rel, and
1063  * we can't handle toast link swapping for one either, because we must
1064  * not apply any critical changes to its pg_class row. These cases
1065  * should be prevented by upstream permissions tests, so these checks
1066  * are non-user-facing emergency backstop.
1067  */
1068  if (relform1->reltablespace != relform2->reltablespace)
1069  elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1070  NameStr(relform1->relname));
1071  if (relform1->relpersistence != relform2->relpersistence)
1072  elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1073  NameStr(relform1->relname));
1074  if (!swap_toast_by_content &&
1075  (relform1->reltoastrelid || relform2->reltoastrelid))
1076  elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1077  NameStr(relform1->relname));
1078 
1079  /*
1080  * Fetch the mappings --- shouldn't fail, but be paranoid
1081  */
1082  relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
1083  if (!OidIsValid(relfilenode1))
1084  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1085  NameStr(relform1->relname), r1);
1086  relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
1087  if (!OidIsValid(relfilenode2))
1088  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1089  NameStr(relform2->relname), r2);
1090 
1091  /*
1092  * Send replacement mappings to relmapper. Note these won't actually
1093  * take effect until CommandCounterIncrement.
1094  */
1095  RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
1096  RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
1097 
1098  /* Pass OIDs of mapped r2 tables back to caller */
1099  *mapped_tables++ = r2;
1100  }
1101 
1102  /*
1103  * Recognize that rel1's relfilenode (swapped from rel2) is new in this
1104  * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1105  * new.
1106  */
1107  {
1108  Relation rel1,
1109  rel2;
1110 
1111  rel1 = relation_open(r1, NoLock);
1112  rel2 = relation_open(r2, NoLock);
1113  rel2->rd_createSubid = rel1->rd_createSubid;
1117  relation_close(rel1, NoLock);
1118  relation_close(rel2, NoLock);
1119  }
1120 
1121  /*
1122  * In the case of a shared catalog, these next few steps will only affect
1123  * our own database's pg_class row; but that's okay, because they are all
1124  * noncritical updates. That's also an important fact for the case of a
1125  * mapped catalog, because it's possible that we'll commit the map change
1126  * and then fail to commit the pg_class update.
1127  */
1128 
1129  /* set rel1's frozen Xid and minimum MultiXid */
1130  if (relform1->relkind != RELKIND_INDEX)
1131  {
1132  Assert(!TransactionIdIsValid(frozenXid) ||
1133  TransactionIdIsNormal(frozenXid));
1134  relform1->relfrozenxid = frozenXid;
1135  relform1->relminmxid = cutoffMulti;
1136  }
1137 
1138  /* swap size statistics too, since new rel has freshly-updated stats */
1139  {
1140  int32 swap_pages;
1141  float4 swap_tuples;
1142  int32 swap_allvisible;
1143 
1144  swap_pages = relform1->relpages;
1145  relform1->relpages = relform2->relpages;
1146  relform2->relpages = swap_pages;
1147 
1148  swap_tuples = relform1->reltuples;
1149  relform1->reltuples = relform2->reltuples;
1150  relform2->reltuples = swap_tuples;
1151 
1152  swap_allvisible = relform1->relallvisible;
1153  relform1->relallvisible = relform2->relallvisible;
1154  relform2->relallvisible = swap_allvisible;
1155  }
1156 
1157  /*
1158  * Update the tuples in pg_class --- unless the target relation of the
1159  * swap is pg_class itself. In that case, there is zero point in making
1160  * changes because we'd be updating the old data that we're about to throw
1161  * away. Because the real work being done here for a mapped relation is
1162  * just to change the relation map settings, it's all right to not update
1163  * the pg_class rows in this case. The most important changes will instead
1164  * performed later, in finish_heap_swap() itself.
1165  */
1166  if (!target_is_pg_class)
1167  {
1168  CatalogIndexState indstate;
1169 
1170  indstate = CatalogOpenIndexes(relRelation);
1171  CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
1172  indstate);
1173  CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
1174  indstate);
1175  CatalogCloseIndexes(indstate);
1176  }
1177  else
1178  {
1179  /* no update ... but we do still need relcache inval */
1182  }
1183 
1184  /*
1185  * Post alter hook for modified relations. The change to r2 is always
1186  * internal, but r1 depends on the invocation context.
1187  */
1188  InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
1189  InvalidOid, is_internal);
1190  InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
1191  InvalidOid, true);
1192 
1193  /*
1194  * If we have toast tables associated with the relations being swapped,
1195  * deal with them too.
1196  */
1197  if (relform1->reltoastrelid || relform2->reltoastrelid)
1198  {
1199  if (swap_toast_by_content)
1200  {
1201  if (relform1->reltoastrelid && relform2->reltoastrelid)
1202  {
1203  /* Recursively swap the contents of the toast tables */
1204  swap_relation_files(relform1->reltoastrelid,
1205  relform2->reltoastrelid,
1206  target_is_pg_class,
1207  swap_toast_by_content,
1208  is_internal,
1209  frozenXid,
1210  cutoffMulti,
1211  mapped_tables);
1212  }
1213  else
1214  {
1215  /* caller messed up */
1216  elog(ERROR, "cannot swap toast files by content when there's only one");
1217  }
1218  }
1219  else
1220  {
1221  /*
1222  * We swapped the ownership links, so we need to change dependency
1223  * data to match.
1224  *
1225  * NOTE: it is possible that only one table has a toast table.
1226  *
1227  * NOTE: at present, a TOAST table's only dependency is the one on
1228  * its owning table. If more are ever created, we'd need to use
1229  * something more selective than deleteDependencyRecordsFor() to
1230  * get rid of just the link we want.
1231  */
1232  ObjectAddress baseobject,
1233  toastobject;
1234  long count;
1235 
1236  /*
1237  * We disallow this case for system catalogs, to avoid the
1238  * possibility that the catalog we're rebuilding is one of the
1239  * ones the dependency changes would change. It's too late to be
1240  * making any data changes to the target catalog.
1241  */
1242  if (IsSystemClass(r1, relform1))
1243  elog(ERROR, "cannot swap toast files by links for system catalogs");
1244 
1245  /* Delete old dependencies */
1246  if (relform1->reltoastrelid)
1247  {
1248  count = deleteDependencyRecordsFor(RelationRelationId,
1249  relform1->reltoastrelid,
1250  false);
1251  if (count != 1)
1252  elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1253  count);
1254  }
1255  if (relform2->reltoastrelid)
1256  {
1257  count = deleteDependencyRecordsFor(RelationRelationId,
1258  relform2->reltoastrelid,
1259  false);
1260  if (count != 1)
1261  elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1262  count);
1263  }
1264 
1265  /* Register new dependencies */
1266  baseobject.classId = RelationRelationId;
1267  baseobject.objectSubId = 0;
1268  toastobject.classId = RelationRelationId;
1269  toastobject.objectSubId = 0;
1270 
1271  if (relform1->reltoastrelid)
1272  {
1273  baseobject.objectId = r1;
1274  toastobject.objectId = relform1->reltoastrelid;
1275  recordDependencyOn(&toastobject, &baseobject,
1277  }
1278 
1279  if (relform2->reltoastrelid)
1280  {
1281  baseobject.objectId = r2;
1282  toastobject.objectId = relform2->reltoastrelid;
1283  recordDependencyOn(&toastobject, &baseobject,
1285  }
1286  }
1287  }
1288 
1289  /*
1290  * If we're swapping two toast tables by content, do the same for their
1291  * valid index. The swap can actually be safely done only if the relations
1292  * have indexes.
1293  */
1294  if (swap_toast_by_content &&
1295  relform1->relkind == RELKIND_TOASTVALUE &&
1296  relform2->relkind == RELKIND_TOASTVALUE)
1297  {
1298  Oid toastIndex1,
1299  toastIndex2;
1300 
1301  /* Get valid index for each relation */
1302  toastIndex1 = toast_get_valid_index(r1,
1304  toastIndex2 = toast_get_valid_index(r2,
1306 
1307  swap_relation_files(toastIndex1,
1308  toastIndex2,
1309  target_is_pg_class,
1310  swap_toast_by_content,
1311  is_internal,
1314  mapped_tables);
1315  }
1316 
1317  /* Clean up. */
1318  heap_freetuple(reltup1);
1319  heap_freetuple(reltup2);
1320 
1321  table_close(relRelation, RowExclusiveLock);
1322 
1323  /*
1324  * Close both relcache entries' smgr links. We need this kluge because
1325  * both links will be invalidated during upcoming CommandCounterIncrement.
1326  * Whichever of the rels is the second to be cleared will have a dangling
1327  * reference to the other's smgr entry. Rather than trying to avoid this
1328  * by ordering operations just so, it's easiest to close the links first.
1329  * (Fortunately, since one of the entries is local in our transaction,
1330  * it's sufficient to clear out our own relcache this way; the problem
1331  * cannot arise for other backends when they see our update on the
1332  * non-transient relation.)
1333  *
1334  * Caution: the placement of this step interacts with the decision to
1335  * handle toast rels by recursion. When we are trying to rebuild pg_class
1336  * itself, the smgr close on pg_class must happen after all accesses in
1337  * this function.
1338  */
1341 }
1342 
1343 /*
1344  * Remove the transient table that was built by make_new_heap, and finish
1345  * cleaning up (including rebuilding all indexes on the old heap).
1346  */
1347 void
1348 finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1349  bool is_system_catalog,
1350  bool swap_toast_by_content,
1351  bool check_constraints,
1352  bool is_internal,
1353  TransactionId frozenXid,
1354  MultiXactId cutoffMulti,
1355  char newrelpersistence)
1356 {
1357  ObjectAddress object;
1358  Oid mapped_tables[4];
1359  int reindex_flags;
1360  ReindexParams reindex_params = {0};
1361  int i;
1362 
1363  /* Report that we are now swapping relation files */
1366 
1367  /* Zero out possible results from swapped_relation_files */
1368  memset(mapped_tables, 0, sizeof(mapped_tables));
1369 
1370  /*
1371  * Swap the contents of the heap relations (including any toast tables).
1372  * Also set old heap's relfrozenxid to frozenXid.
1373  */
1374  swap_relation_files(OIDOldHeap, OIDNewHeap,
1375  (OIDOldHeap == RelationRelationId),
1376  swap_toast_by_content, is_internal,
1377  frozenXid, cutoffMulti, mapped_tables);
1378 
1379  /*
1380  * If it's a system catalog, queue a sinval message to flush all catcaches
1381  * on the catalog when we reach CommandCounterIncrement.
1382  */
1383  if (is_system_catalog)
1384  CacheInvalidateCatalog(OIDOldHeap);
1385 
1386  /*
1387  * Rebuild each index on the relation (but not the toast table, which is
1388  * all-new at this point). It is important to do this before the DROP
1389  * step because if we are processing a system catalog that will be used
1390  * during DROP, we want to have its indexes available. There is no
1391  * advantage to the other order anyway because this is all transactional,
1392  * so no chance to reclaim disk space before commit. We do not need a
1393  * final CommandCounterIncrement() because reindex_relation does it.
1394  *
1395  * Note: because index_build is called via reindex_relation, it will never
1396  * set indcheckxmin true for the indexes. This is OK even though in some
1397  * sense we are building new indexes rather than rebuilding existing ones,
1398  * because the new heap won't contain any HOT chains at all, let alone
1399  * broken ones, so it can't be necessary to set indcheckxmin.
1400  */
1401  reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
1402  if (check_constraints)
1403  reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1404 
1405  /*
1406  * Ensure that the indexes have the same persistence as the parent
1407  * relation.
1408  */
1409  if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1410  reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
1411  else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1412  reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
1413 
1414  /* Report that we are now reindexing relations */
1417 
1418  reindex_relation(OIDOldHeap, reindex_flags, &reindex_params);
1419 
1420  /* Report that we are now doing clean up */
1423 
1424  /*
1425  * If the relation being rebuilt is pg_class, swap_relation_files()
1426  * couldn't update pg_class's own pg_class entry (check comments in
1427  * swap_relation_files()), thus relfrozenxid was not updated. That's
1428  * annoying because a potential reason for doing a VACUUM FULL is a
1429  * imminent or actual anti-wraparound shutdown. So, now that we can
1430  * access the new relation using its indices, update relfrozenxid.
1431  * pg_class doesn't have a toast relation, so we don't need to update the
1432  * corresponding toast relation. Not that there's little point moving all
1433  * relfrozenxid updates here since swap_relation_files() needs to write to
1434  * pg_class for non-mapped relations anyway.
1435  */
1436  if (OIDOldHeap == RelationRelationId)
1437  {
1438  Relation relRelation;
1439  HeapTuple reltup;
1440  Form_pg_class relform;
1441 
1442  relRelation = table_open(RelationRelationId, RowExclusiveLock);
1443 
1444  reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
1445  if (!HeapTupleIsValid(reltup))
1446  elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1447  relform = (Form_pg_class) GETSTRUCT(reltup);
1448 
1449  relform->relfrozenxid = frozenXid;
1450  relform->relminmxid = cutoffMulti;
1451 
1452  CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1453 
1454  table_close(relRelation, RowExclusiveLock);
1455  }
1456 
1457  /* Destroy new heap with old filenode */
1458  object.classId = RelationRelationId;
1459  object.objectId = OIDNewHeap;
1460  object.objectSubId = 0;
1461 
1462  /*
1463  * The new relation is local to our transaction and we know nothing
1464  * depends on it, so DROP_RESTRICT should be OK.
1465  */
1467 
1468  /* performDeletion does CommandCounterIncrement at end */
1469 
1470  /*
1471  * Now we must remove any relation mapping entries that we set up for the
1472  * transient table, as well as its toast table and toast index if any. If
1473  * we fail to do this before commit, the relmapper will complain about new
1474  * permanent map entries being added post-bootstrap.
1475  */
1476  for (i = 0; OidIsValid(mapped_tables[i]); i++)
1477  RelationMapRemoveMapping(mapped_tables[i]);
1478 
1479  /*
1480  * At this point, everything is kosher except that, if we did toast swap
1481  * by links, the toast table's name corresponds to the transient table.
1482  * The name is irrelevant to the backend because it's referenced by OID,
1483  * but users looking at the catalogs could be confused. Rename it to
1484  * prevent this problem.
1485  *
1486  * Note no lock required on the relation, because we already hold an
1487  * exclusive lock on it.
1488  */
1489  if (!swap_toast_by_content)
1490  {
1491  Relation newrel;
1492 
1493  newrel = table_open(OIDOldHeap, NoLock);
1494  if (OidIsValid(newrel->rd_rel->reltoastrelid))
1495  {
1496  Oid toastidx;
1497  char NewToastName[NAMEDATALEN];
1498 
1499  /* Get the associated valid index to be renamed */
1500  toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1501  NoLock);
1502 
1503  /* rename the toast table ... */
1504  snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1505  OIDOldHeap);
1506  RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1507  NewToastName, true, false);
1508 
1509  /* ... and its valid index too. */
1510  snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1511  OIDOldHeap);
1512 
1513  RenameRelationInternal(toastidx,
1514  NewToastName, true, true);
1515  }
1516  relation_close(newrel, NoLock);
1517  }
1518 
1519  /* if it's not a catalog table, clear any missing attribute settings */
1520  if (!is_system_catalog)
1521  {
1522  Relation newrel;
1523 
1524  newrel = table_open(OIDOldHeap, NoLock);
1525  RelationClearMissing(newrel);
1526  relation_close(newrel, NoLock);
1527  }
1528 }
1529 
1530 
1531 /*
1532  * Get a list of tables that the current user owns and
1533  * have indisclustered set. Return the list in a List * of RelToCluster
1534  * (stored in the specified memory context), each one giving the tableOid
1535  * and the indexOid on which the table is already clustered.
1536  */
1537 static List *
1539 {
1540  Relation indRelation;
1541  TableScanDesc scan;
1542  ScanKeyData entry;
1543  HeapTuple indexTuple;
1545  MemoryContext old_context;
1546  RelToCluster *rvtc;
1547  List *rvs = NIL;
1548 
1549  /*
1550  * Get all indexes that have indisclustered set and are owned by
1551  * appropriate user.
1552  */
1553  indRelation = table_open(IndexRelationId, AccessShareLock);
1554  ScanKeyInit(&entry,
1555  Anum_pg_index_indisclustered,
1556  BTEqualStrategyNumber, F_BOOLEQ,
1557  BoolGetDatum(true));
1558  scan = table_beginscan_catalog(indRelation, 1, &entry);
1559  while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1560  {
1561  index = (Form_pg_index) GETSTRUCT(indexTuple);
1562 
1563  if (!pg_class_ownercheck(index->indrelid, GetUserId()))
1564  continue;
1565 
1566  /*
1567  * We have to build the list in a different memory context so it will
1568  * survive the cross-transaction processing
1569  */
1570  old_context = MemoryContextSwitchTo(cluster_context);
1571 
1572  rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1573  rvtc->tableOid = index->indrelid;
1574  rvtc->indexOid = index->indexrelid;
1575  rvs = lappend(rvs, rvtc);
1576 
1577  MemoryContextSwitchTo(old_context);
1578  }
1579  table_endscan(scan);
1580 
1581  relation_close(indRelation, AccessShareLock);
1582 
1583  return rvs;
1584 }
List * params
Definition: parsenodes.h:3312
#define RelationIsPopulated(relation)
Definition: rel.h:631
#define NIL
Definition: pg_list.h:65
struct IndexAmRoutine * rd_indam
Definition: rel.h:201
void RangeVarCallbackOwnsTable(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg)
Definition: tablecmds.c:16347
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
#define AllocSetContextCreate
Definition: memutils.h:173
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
bool plan_cluster_use_sort(Oid tableOid, Oid indexOid)
Definition: planner.c:5733
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
bool IsSystemRelation(Relation relation)
Definition: catalog.c:73
void cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
Definition: cluster.c:103
void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool is_system_catalog, bool swap_toast_by_content, bool check_constraints, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, char newrelpersistence)
Definition: cluster.c:1348
uint32 TransactionId
Definition: c.h:587
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
#define RelationGetDescr(relation)
Definition: rel.h:495
int LOCKMODE
Definition: lockdefs.h:26
Oid GetUserId(void)
Definition: miscinit.c:478
Oid LookupCreationNamespace(const char *nspname)
Definition: namespace.c:2935
void CommitTransactionCommand(void)
Definition: xact.c:2939
long deleteDependencyRecordsFor(Oid classId, Oid objectId, bool skipExtensionDeps)
Definition: pg_depend.c:232
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
int errcode(int sqlerrcode)
Definition: elog.c:698
#define PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES
Definition: progress.h:69
#define INFO
Definition: elog.h:33
SubTransactionId rd_newRelfilenodeSubid
Definition: rel.h:103
bool heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
Definition: heaptuple.c:359
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:943
#define CLUOPT_VERBOSE
Definition: cluster.h:24
uint32 BlockNumber
Definition: block.h:31
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
Definition: pg_depend.c:43
#define REINDEX_REL_SUPPRESS_INDEX_USE
Definition: index.h:156
Form_pg_class rd_rel
Definition: rel.h:109
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:710
#define InvokeObjectPostAlterHookArg(classId, objectId, subId, auxiliaryId, is_internal)
Definition: objectaccess.h:178
Relation try_relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:89
void pgstat_progress_end_command(void)
Oid tableOid
Definition: cluster.c:66
signed int int32
Definition: c.h:429
bool IsSystemClass(Oid relid, Form_pg_class reltuple)
Definition: catalog.c:85
struct HeapTupleData * rd_indextuple
Definition: rel.h:189
MemoryContext PortalContext
Definition: mcxt.c:57
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
Definition: type.h:89
#define NAMEDATALEN
char * relname
Definition: primnodes.h:68
bool defGetBoolean(DefElem *def)
Definition: define.c:111
Form_pg_index rd_index
Definition: rel.h:187
#define SearchSysCacheExists1(cacheId, key1)
Definition: syscache.h:184
char * indexname
Definition: parsenodes.h:3311
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
Definition: cluster.c:576
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:48
ItemPointerData t_self
Definition: htup.h:65
#define PROGRESS_CLUSTER_COMMAND_VACUUM_FULL
Definition: progress.h:75
Oid get_relname_relid(const char *relname, Oid relnamespace)
Definition: lsyscache.c:1856
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
#define DEBUG2
Definition: elog.h:24
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3316
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
#define NoLock
Definition: lockdefs.h:34
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1340
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
Oid rd_toastoid
Definition: rel.h:246
int location
Definition: parsenodes.h:749
#define RowExclusiveLock
Definition: lockdefs.h:38
int errdetail(const char *fmt,...)
Definition: elog.c:1042
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition: xact.c:3379
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition: dependency.c:312
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:503
void cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
Definition: cluster.c:277
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition: namespace.c:236
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
#define PROGRESS_CLUSTER_COMMAND_CLUSTER
Definition: progress.h:74
void RelationClearMissing(Relation rel)
Definition: heap.c:2082
void CheckTableNotInUse(Relation rel, const char *stmt)
Definition: tablecmds.c:3929
static List * get_tables_to_cluster(MemoryContext cluster_context)
Definition: cluster.c:1538
#define PROGRESS_CLUSTER_COMMAND
Definition: progress.h:55
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void TransferPredicateLocksToHeapRelation(Relation relation)
Definition: predicate.c:3146
List * lappend(List *list, void *datum)
Definition: list.c:336
static int verbose
void CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup, CatalogIndexState indstate)
Definition: indexing.c:324
#define RelationIsMapped(relation)
Definition: rel.h:518
FormData_pg_index * Form_pg_index
Definition: pg_index.h:69
void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
Definition: cluster.c:503
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1127
float float4
Definition: c.h:564
SubTransactionId rd_createSubid
Definition: rel.h:102
static int elevel
Definition: vacuumlazy.c:400
void RelationAssumeNewRelfilenode(Relation relation)
Definition: relcache.c:3746
SubTransactionId rd_firstRelfilenodeSubid
Definition: rel.h:105
Oid RelationMapOidToFilenode(Oid relationId, bool shared)
Definition: relmapper.c:159
uintptr_t Datum
Definition: postgres.h:411
void CommandCounterIncrement(void)
Definition: xact.c:1021
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1175
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1388
#define InvalidMultiXactId
Definition: multixact.h:24
bool amclusterable
Definition: amapi.h:238
bits32 options
Definition: cluster.h:29
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:213
static void swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, bool swap_toast_by_content, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, Oid *mapped_tables)
Definition: cluster.c:990
#define BoolGetDatum(X)
Definition: postgres.h:446
#define InvalidOid
Definition: postgres_ext.h:36
#define ereport(elevel,...)
Definition: elog.h:157
void RelationCloseSmgrByOid(Oid relationId)
Definition: relcache.c:2940
TransactionId MultiXactId
Definition: c.h:597
void CacheInvalidateCatalog(Oid catalogId)
Definition: inval.c:1254
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define REINDEX_REL_FORCE_INDEXES_UNLOGGED
Definition: index.h:158
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
void RelationMapRemoveMapping(Oid relationId)
Definition: relmapper.c:373
Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, Oid reltypeid, Oid reloftypeid, Oid ownerid, Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, char relpersistence, bool shared_relation, bool mapped_relation, OnCommitAction oncommit, Datum reloptions, bool use_user_acl, bool allow_system_table_mods, bool is_internal, Oid relrewrite, ObjectAddress *typaddress)
Definition: heap.c:1144
void pgstat_progress_update_param(int index, int64 val)
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:612
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4823
void StartTransactionCommand(void)
Definition: xact.c:2838
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
Definition: indexing.c:43
void CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
Definition: indexing.c:301
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:111
#define REINDEX_REL_CHECK_CONSTRAINTS
Definition: index.h:157
#define PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP
Definition: progress.h:71
static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti)
Definition: cluster.c:755
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3156
bool get_index_isclustered(Oid index_oid)
Definition: lsyscache.c:3555
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4570
Oid indexOid
Definition: cluster.c:67
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
#define CLUOPT_RECHECK
Definition: cluster.h:23
bool reindex_relation(Oid relid, int flags, ReindexParams *params)
Definition: index.c:3795
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:991
FormData_pg_class * Form_pg_class
Definition: pg_class.h:153
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:175
#define AccessExclusiveLock
Definition: lockdefs.h:45
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
void check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
Definition: cluster.c:443
#define elog(elevel,...)
Definition: elog.h:232
int i
#define NameStr(name)
Definition: c.h:681
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define REINDEX_REL_FORCE_INDEXES_PERMANENT
Definition: index.h:159
void CatalogCloseIndexes(CatalogIndexState indstate)
Definition: indexing.c:61
char * defname
Definition: parsenodes.h:746
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:102
void CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
Definition: inval.c:1314
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:109
#define PROGRESS_CLUSTER_PHASE_REBUILD_INDEX
Definition: progress.h:70
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
RangeVar * relation
Definition: parsenodes.h:3310
void RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
Definition: tablecmds.c:3827
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:56
Definition: pg_list.h:50
#define snprintf
Definition: port.h:216
#define RelationGetRelid(relation)
Definition: rel.h:469
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1646
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence, LOCKMODE lockmode)
Definition: cluster.c:629
#define lfirst_oid(lc)
Definition: pg_list.h:171
void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, bool immediate)
Definition: relmapper.c:261
Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock)
#define PERFORM_DELETION_INTERNAL
Definition: dependency.h:141
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
#define RelationGetNamespace(relation)
Definition: rel.h:510
void NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode)
Definition: toasting.c:64