PostgreSQL Source Code  git master
cluster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * cluster.c
4  * CLUSTER a table on an index. This is now also used for VACUUM FULL.
5  *
6  * There is hardly anything left of Paul Brown's original implementation...
7  *
8  *
9  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994-5, Regents of the University of California
11  *
12  *
13  * IDENTIFICATION
14  * src/backend/commands/cluster.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/amapi.h"
21 #include "access/heapam.h"
22 #include "access/multixact.h"
23 #include "access/relscan.h"
24 #include "access/tableam.h"
25 #include "access/toast_internals.h"
26 #include "access/transam.h"
27 #include "access/xact.h"
28 #include "access/xlog.h"
29 #include "catalog/catalog.h"
30 #include "catalog/dependency.h"
31 #include "catalog/heap.h"
32 #include "catalog/index.h"
33 #include "catalog/namespace.h"
34 #include "catalog/objectaccess.h"
35 #include "catalog/pg_am.h"
36 #include "catalog/toasting.h"
37 #include "commands/cluster.h"
38 #include "commands/progress.h"
39 #include "commands/tablecmds.h"
40 #include "commands/vacuum.h"
41 #include "miscadmin.h"
42 #include "optimizer/optimizer.h"
43 #include "pgstat.h"
44 #include "storage/bufmgr.h"
45 #include "storage/lmgr.h"
46 #include "storage/predicate.h"
47 #include "utils/acl.h"
48 #include "utils/fmgroids.h"
49 #include "utils/inval.h"
50 #include "utils/lsyscache.h"
51 #include "utils/memutils.h"
52 #include "utils/pg_rusage.h"
53 #include "utils/relmapper.h"
54 #include "utils/snapmgr.h"
55 #include "utils/syscache.h"
56 #include "utils/tuplesort.h"
57 
58 /*
59  * This struct is used to pass around the information on tables to be
60  * clustered. We need this so we can make a list of them when invoked without
61  * a specific table/index pair.
62  */
63 typedef struct
64 {
67 } RelToCluster;
68 
69 
70 static void rebuild_relation(Relation OldHeap, Oid indexOid,
71  bool isTopLevel, bool verbose);
72 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
73  bool isTopLevel, bool verbose,
74  bool *pSwapToastByContent,
75  TransactionId *pFreezeXid,
76  MultiXactId *pCutoffMulti);
77 static List *get_tables_to_cluster(MemoryContext cluster_context);
78 
79 
80 /*---------------------------------------------------------------------------
81  * This cluster code allows for clustering multiple tables at once. Because
82  * of this, we cannot just run everything on a single transaction, or we
83  * would be forced to acquire exclusive locks on all the tables being
84  * clustered, simultaneously --- very likely leading to deadlock.
85  *
86  * To solve this we follow a similar strategy to VACUUM code,
87  * clustering each relation in a separate transaction. For this to work,
88  * we need to:
89  * - provide a separate memory context so that we can pass information in
90  * a way that survives across transactions
91  * - start a new transaction every time a new relation is clustered
92  * - check for validity of the information on to-be-clustered relations,
93  * as someone might have deleted a relation behind our back, or
94  * clustered one on a different index
95  * - end the transaction
96  *
97  * The single-relation case does not have any such overhead.
98  *
99  * We also allow a relation to be specified without index. In that case,
100  * the indisclustered bit will be looked up, and an ERROR will be thrown
101  * if there is no index with the bit set.
102  *---------------------------------------------------------------------------
103  */
104 void
105 cluster(ClusterStmt *stmt, bool isTopLevel)
106 {
107  if (stmt->relation != NULL)
108  {
109  /* This is the single-relation case. */
110  Oid tableOid,
111  indexOid = InvalidOid;
112  Relation rel;
113 
114  /* Find, lock, and check permissions on the table */
115  tableOid = RangeVarGetRelidExtended(stmt->relation,
117  0,
119  rel = table_open(tableOid, NoLock);
120 
121  /*
122  * Reject clustering a remote temp table ... their local buffer
123  * manager is not going to cope.
124  */
125  if (RELATION_IS_OTHER_TEMP(rel))
126  ereport(ERROR,
127  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
128  errmsg("cannot cluster temporary tables of other sessions")));
129 
130  /*
131  * Reject clustering a partitioned table.
132  */
133  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
134  ereport(ERROR,
135  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
136  errmsg("cannot cluster a partitioned table")));
137 
138  if (stmt->indexname == NULL)
139  {
140  ListCell *index;
141 
142  /* We need to find the index that has indisclustered set. */
143  foreach(index, RelationGetIndexList(rel))
144  {
145  indexOid = lfirst_oid(index);
146  if (get_index_isclustered(indexOid))
147  break;
148  indexOid = InvalidOid;
149  }
150 
151  if (!OidIsValid(indexOid))
152  ereport(ERROR,
153  (errcode(ERRCODE_UNDEFINED_OBJECT),
154  errmsg("there is no previously clustered index for table \"%s\"",
155  stmt->relation->relname)));
156  }
157  else
158  {
159  /*
160  * The index is expected to be in the same namespace as the
161  * relation.
162  */
163  indexOid = get_relname_relid(stmt->indexname,
164  rel->rd_rel->relnamespace);
165  if (!OidIsValid(indexOid))
166  ereport(ERROR,
167  (errcode(ERRCODE_UNDEFINED_OBJECT),
168  errmsg("index \"%s\" for table \"%s\" does not exist",
169  stmt->indexname, stmt->relation->relname)));
170  }
171 
172  /* close relation, keep lock till commit */
173  table_close(rel, NoLock);
174 
175  /* Do the job. */
176  cluster_rel(tableOid, indexOid, stmt->options, isTopLevel);
177  }
178  else
179  {
180  /*
181  * This is the "multi relation" case. We need to cluster all tables
182  * that have some index with indisclustered set.
183  */
184  MemoryContext cluster_context;
185  List *rvs;
186  ListCell *rv;
187 
188  /*
189  * We cannot run this form of CLUSTER inside a user transaction block;
190  * we'd be holding locks way too long.
191  */
192  PreventInTransactionBlock(isTopLevel, "CLUSTER");
193 
194  /*
195  * Create special memory context for cross-transaction storage.
196  *
197  * Since it is a child of PortalContext, it will go away even in case
198  * of error.
199  */
200  cluster_context = AllocSetContextCreate(PortalContext,
201  "Cluster",
203 
204  /*
205  * Build the list of relations to cluster. Note that this lives in
206  * cluster_context.
207  */
208  rvs = get_tables_to_cluster(cluster_context);
209 
210  /* Commit to get out of starting transaction */
213 
214  /* Ok, now that we've got them all, cluster them one by one */
215  foreach(rv, rvs)
216  {
217  RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
218 
219  /* Start a new transaction for each relation. */
221  /* functions in indexes may want a snapshot set */
223  /* Do the job. */
224  cluster_rel(rvtc->tableOid, rvtc->indexOid,
225  stmt->options | CLUOPT_RECHECK,
226  isTopLevel);
229  }
230 
231  /* Start a new transaction for the cleanup work. */
233 
234  /* Clean up working storage */
235  MemoryContextDelete(cluster_context);
236  }
237 }
238 
239 /*
240  * cluster_rel
241  *
242  * This clusters the table by creating a new, clustered table and
243  * swapping the relfilenodes of the new table and the old table, so
244  * the OID of the original table is preserved. Thus we do not lose
245  * GRANT, inheritance nor references to this table (this was a bug
246  * in releases through 7.3).
247  *
248  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
249  * the new table, it's better to create the indexes afterwards than to fill
250  * them incrementally while we load the table.
251  *
252  * If indexOid is InvalidOid, the table will be rewritten in physical order
253  * instead of index order. This is the new implementation of VACUUM FULL,
254  * and error messages should refer to the operation as VACUUM not CLUSTER.
255  */
256 void
257 cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
258 {
259  Relation OldHeap;
260  bool verbose = ((options & CLUOPT_VERBOSE) != 0);
261  bool recheck = ((options & CLUOPT_RECHECK) != 0);
262 
263  /* Check for user-requested abort. */
265 
267  if (OidIsValid(indexOid))
270  else
273 
274  /*
275  * We grab exclusive access to the target rel and index for the duration
276  * of the transaction. (This is redundant for the single-transaction
277  * case, since cluster() already did it.) The index lock is taken inside
278  * check_index_is_clusterable.
279  */
280  OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
281 
282  /* If the table has gone away, we can skip processing it */
283  if (!OldHeap)
284  {
286  return;
287  }
288 
289  /*
290  * Since we may open a new transaction for each relation, we have to check
291  * that the relation still is what we think it is.
292  *
293  * If this is a single-transaction CLUSTER, we can skip these tests. We
294  * *must* skip the one on indisclustered since it would reject an attempt
295  * to cluster a not-previously-clustered index.
296  */
297  if (recheck)
298  {
299  /* Check that the user still owns the relation */
300  if (!pg_class_ownercheck(tableOid, GetUserId()))
301  {
304  return;
305  }
306 
307  /*
308  * Silently skip a temp table for a remote session. Only doing this
309  * check in the "recheck" case is appropriate (which currently means
310  * somebody is executing a database-wide CLUSTER), because there is
311  * another check in cluster() which will stop any attempt to cluster
312  * remote temp tables by name. There is another check in cluster_rel
313  * which is redundant, but we leave it for extra safety.
314  */
315  if (RELATION_IS_OTHER_TEMP(OldHeap))
316  {
319  return;
320  }
321 
322  if (OidIsValid(indexOid))
323  {
324  /*
325  * Check that the index still exists
326  */
328  {
331  return;
332  }
333 
334  /*
335  * Check that the index is still the one with indisclustered set.
336  */
337  if (!get_index_isclustered(indexOid))
338  {
341  return;
342  }
343  }
344  }
345 
346  /*
347  * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
348  * would work in most respects, but the index would only get marked as
349  * indisclustered in the current database, leading to unexpected behavior
350  * if CLUSTER were later invoked in another database.
351  */
352  if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
353  ereport(ERROR,
354  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
355  errmsg("cannot cluster a shared catalog")));
356 
357  /*
358  * Don't process temp tables of other backends ... their local buffer
359  * manager is not going to cope.
360  */
361  if (RELATION_IS_OTHER_TEMP(OldHeap))
362  {
363  if (OidIsValid(indexOid))
364  ereport(ERROR,
365  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
366  errmsg("cannot cluster temporary tables of other sessions")));
367  else
368  ereport(ERROR,
369  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
370  errmsg("cannot vacuum temporary tables of other sessions")));
371  }
372 
373  /*
374  * Also check for active uses of the relation in the current transaction,
375  * including open scans and pending AFTER trigger events.
376  */
377  CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
378 
379  /* Check heap and index are valid to cluster on */
380  if (OidIsValid(indexOid))
381  check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
382 
383  /*
384  * Quietly ignore the request if this is a materialized view which has not
385  * been populated from its query. No harm is done because there is no data
386  * to deal with, and we don't want to throw an error if this is part of a
387  * multi-relation request -- for example, CLUSTER was run on the entire
388  * database.
389  */
390  if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
391  !RelationIsPopulated(OldHeap))
392  {
395  return;
396  }
397 
398  /*
399  * All predicate locks on the tuples or pages are about to be made
400  * invalid, because we move tuples around. Promote them to relation
401  * locks. Predicate locks on indexes will be promoted when they are
402  * reindexed.
403  */
405 
406  /* rebuild_relation does all the dirty work */
407  rebuild_relation(OldHeap, indexOid, isTopLevel, verbose);
408 
409  /* NB: rebuild_relation does table_close() on OldHeap */
410 
412 }
413 
414 /*
415  * Verify that the specified heap and index are valid to cluster on
416  *
417  * Side effect: obtains lock on the index. The caller may
418  * in some cases already have AccessExclusiveLock on the table, but
419  * not in all cases so we can't rely on the table-level lock for
420  * protection here.
421  */
422 void
423 check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
424 {
425  Relation OldIndex;
426 
427  OldIndex = index_open(indexOid, lockmode);
428 
429  /*
430  * Check that index is in fact an index on the given relation
431  */
432  if (OldIndex->rd_index == NULL ||
433  OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
434  ereport(ERROR,
435  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
436  errmsg("\"%s\" is not an index for table \"%s\"",
437  RelationGetRelationName(OldIndex),
438  RelationGetRelationName(OldHeap))));
439 
440  /* Index AM must allow clustering */
441  if (!OldIndex->rd_indam->amclusterable)
442  ereport(ERROR,
443  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
444  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
445  RelationGetRelationName(OldIndex))));
446 
447  /*
448  * Disallow clustering on incomplete indexes (those that might not index
449  * every row of the relation). We could relax this by making a separate
450  * seqscan pass over the table to copy the missing rows, but that seems
451  * expensive and tedious.
452  */
453  if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
454  ereport(ERROR,
455  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
456  errmsg("cannot cluster on partial index \"%s\"",
457  RelationGetRelationName(OldIndex))));
458 
459  /*
460  * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
461  * it might well not contain entries for every heap row, or might not even
462  * be internally consistent. (But note that we don't check indcheckxmin;
463  * the worst consequence of following broken HOT chains would be that we
464  * might put recently-dead tuples out-of-order in the new table, and there
465  * is little harm in that.)
466  */
467  if (!OldIndex->rd_index->indisvalid)
468  ereport(ERROR,
469  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
470  errmsg("cannot cluster on invalid index \"%s\"",
471  RelationGetRelationName(OldIndex))));
472 
473  /* Drop relcache refcnt on OldIndex, but keep lock */
474  index_close(OldIndex, NoLock);
475 }
476 
477 /*
478  * mark_index_clustered: mark the specified index as the one clustered on
479  *
480  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
481  */
482 void
483 mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
484 {
485  HeapTuple indexTuple;
486  Form_pg_index indexForm;
487  Relation pg_index;
488  ListCell *index;
489 
490  /* Disallow applying to a partitioned table */
491  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
492  ereport(ERROR,
493  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
494  errmsg("cannot mark index clustered in partitioned table")));
495 
496  /*
497  * If the index is already marked clustered, no need to do anything.
498  */
499  if (OidIsValid(indexOid))
500  {
501  if (get_index_isclustered(indexOid))
502  return;
503  }
504 
505  /*
506  * Check each index of the relation and set/clear the bit as needed.
507  */
508  pg_index = table_open(IndexRelationId, RowExclusiveLock);
509 
510  foreach(index, RelationGetIndexList(rel))
511  {
512  Oid thisIndexOid = lfirst_oid(index);
513 
514  indexTuple = SearchSysCacheCopy1(INDEXRELID,
515  ObjectIdGetDatum(thisIndexOid));
516  if (!HeapTupleIsValid(indexTuple))
517  elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
518  indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
519 
520  /*
521  * Unset the bit if set. We know it's wrong because we checked this
522  * earlier.
523  */
524  if (indexForm->indisclustered)
525  {
526  indexForm->indisclustered = false;
527  CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
528  }
529  else if (thisIndexOid == indexOid)
530  {
531  /* this was checked earlier, but let's be real sure */
532  if (!indexForm->indisvalid)
533  elog(ERROR, "cannot cluster on invalid index %u", indexOid);
534  indexForm->indisclustered = true;
535  CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
536  }
537 
538  InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
539  InvalidOid, is_internal);
540 
541  heap_freetuple(indexTuple);
542  }
543 
544  table_close(pg_index, RowExclusiveLock);
545 }
546 
547 /*
548  * rebuild_relation: rebuild an existing relation in index or physical order
549  *
550  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
551  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
552  * isTopLevel: should be passed down from ProcessUtility.
553  *
554  * NB: this routine closes OldHeap at the right time; caller should not.
555  */
556 static void
557 rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose)
558 {
559  Oid tableOid = RelationGetRelid(OldHeap);
560  Oid tableSpace = OldHeap->rd_rel->reltablespace;
561  Oid OIDNewHeap;
562  char relpersistence;
563  bool is_system_catalog;
564  bool swap_toast_by_content;
565  TransactionId frozenXid;
566  MultiXactId cutoffMulti;
567 
568  /* Mark the correct index as clustered */
569  if (OidIsValid(indexOid))
570  mark_index_clustered(OldHeap, indexOid, true);
571 
572  /* Remember info about rel before closing OldHeap */
573  relpersistence = OldHeap->rd_rel->relpersistence;
574  is_system_catalog = IsSystemRelation(OldHeap);
575 
576  /* Close relcache entry, but keep lock until transaction commit */
577  table_close(OldHeap, NoLock);
578 
579  /* Create the transient table that will receive the re-ordered data */
580  OIDNewHeap = make_new_heap(tableOid, tableSpace,
581  relpersistence,
583 
584  /* Copy the heap data into the new table in the desired order */
585  copy_table_data(OIDNewHeap, tableOid, indexOid, isTopLevel, verbose,
586  &swap_toast_by_content, &frozenXid, &cutoffMulti);
587 
588  /*
589  * Swap the physical files of the target and transient tables, then
590  * rebuild the target's indexes and throw away the transient table.
591  */
592  finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
593  swap_toast_by_content, false, true,
594  frozenXid, cutoffMulti,
595  relpersistence);
596 }
597 
598 
599 /*
600  * Create the transient table that will be filled with new data during
601  * CLUSTER, ALTER TABLE, and similar operations. The transient table
602  * duplicates the logical structure of the OldHeap, but is placed in
603  * NewTableSpace which might be different from OldHeap's. Also, it's built
604  * with the specified persistence, which might differ from the original's.
605  *
606  * After this, the caller should load the new heap with transferred/modified
607  * data, then call finish_heap_swap to complete the operation.
608  */
609 Oid
610 make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
611  LOCKMODE lockmode)
612 {
613  TupleDesc OldHeapDesc;
614  char NewHeapName[NAMEDATALEN];
615  Oid OIDNewHeap;
616  Oid toastid;
617  Relation OldHeap;
618  HeapTuple tuple;
619  Datum reloptions;
620  bool isNull;
621  Oid namespaceid;
622 
623  OldHeap = table_open(OIDOldHeap, lockmode);
624  OldHeapDesc = RelationGetDescr(OldHeap);
625 
626  /*
627  * Note that the NewHeap will not receive any of the defaults or
628  * constraints associated with the OldHeap; we don't need 'em, and there's
629  * no reason to spend cycles inserting them into the catalogs only to
630  * delete them.
631  */
632 
633  /*
634  * But we do want to use reloptions of the old heap for new heap.
635  */
636  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
637  if (!HeapTupleIsValid(tuple))
638  elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
639  reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
640  &isNull);
641  if (isNull)
642  reloptions = (Datum) 0;
643 
644  if (relpersistence == RELPERSISTENCE_TEMP)
645  namespaceid = LookupCreationNamespace("pg_temp");
646  else
647  namespaceid = RelationGetNamespace(OldHeap);
648 
649  /*
650  * Create the new heap, using a temporary name in the same namespace as
651  * the existing table. NOTE: there is some risk of collision with user
652  * relnames. Working around this seems more trouble than it's worth; in
653  * particular, we can't create the new heap in a different namespace from
654  * the old, or we will have problems with the TEMP status of temp tables.
655  *
656  * Note: the new heap is not a shared relation, even if we are rebuilding
657  * a shared rel. However, we do make the new heap mapped if the source is
658  * mapped. This simplifies swap_relation_files, and is absolutely
659  * necessary for rebuilding pg_class, for reasons explained there.
660  */
661  snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
662 
663  OIDNewHeap = heap_create_with_catalog(NewHeapName,
664  namespaceid,
665  NewTableSpace,
666  InvalidOid,
667  InvalidOid,
668  InvalidOid,
669  OldHeap->rd_rel->relowner,
670  OldHeap->rd_rel->relam,
671  OldHeapDesc,
672  NIL,
673  RELKIND_RELATION,
674  relpersistence,
675  false,
676  RelationIsMapped(OldHeap),
678  reloptions,
679  false,
680  true,
681  true,
682  OIDOldHeap,
683  NULL);
684  Assert(OIDNewHeap != InvalidOid);
685 
686  ReleaseSysCache(tuple);
687 
688  /*
689  * Advance command counter so that the newly-created relation's catalog
690  * tuples will be visible to table_open.
691  */
693 
694  /*
695  * If necessary, create a TOAST table for the new relation.
696  *
697  * If the relation doesn't have a TOAST table already, we can't need one
698  * for the new relation. The other way around is possible though: if some
699  * wide columns have been dropped, NewHeapCreateToastTable can decide that
700  * no TOAST table is needed for the new table.
701  *
702  * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
703  * that the TOAST table will be visible for insertion.
704  */
705  toastid = OldHeap->rd_rel->reltoastrelid;
706  if (OidIsValid(toastid))
707  {
708  /* keep the existing toast table's reloptions, if any */
709  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
710  if (!HeapTupleIsValid(tuple))
711  elog(ERROR, "cache lookup failed for relation %u", toastid);
712  reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
713  &isNull);
714  if (isNull)
715  reloptions = (Datum) 0;
716 
717  NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode);
718 
719  ReleaseSysCache(tuple);
720  }
721 
722  table_close(OldHeap, NoLock);
723 
724  return OIDNewHeap;
725 }
726 
727 /*
728  * Do the physical copying of table data.
729  *
730  * There are three output parameters:
731  * *pSwapToastByContent is set true if toast tables must be swapped by content.
732  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
733  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
734  */
735 static void
736 copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
737  bool isTopLevel, bool verbose,
738  bool *pSwapToastByContent, TransactionId *pFreezeXid,
739  MultiXactId *pCutoffMulti)
740 {
741  Relation NewHeap,
742  OldHeap,
743  OldIndex;
744  Relation relRelation;
745  HeapTuple reltup;
746  Form_pg_class relform;
750  TransactionId FreezeXid;
752  bool use_sort;
753  double num_tuples = 0,
754  tups_vacuumed = 0,
755  tups_recently_dead = 0;
756  BlockNumber num_pages;
757  int elevel = verbose ? INFO : DEBUG2;
758  PGRUsage ru0;
759 
760  pg_rusage_init(&ru0);
761 
762  /*
763  * Open the relations we need.
764  */
765  NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
766  OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
767  if (OidIsValid(OIDOldIndex))
768  OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
769  else
770  OldIndex = NULL;
771 
772  /*
773  * Their tuple descriptors should be exactly alike, but here we only need
774  * assume that they have the same number of columns.
775  */
776  oldTupDesc = RelationGetDescr(OldHeap);
777  newTupDesc = RelationGetDescr(NewHeap);
778  Assert(newTupDesc->natts == oldTupDesc->natts);
779 
780  /*
781  * If the OldHeap has a toast table, get lock on the toast table to keep
782  * it from being vacuumed. This is needed because autovacuum processes
783  * toast tables independently of their main tables, with no lock on the
784  * latter. If an autovacuum were to start on the toast table after we
785  * compute our OldestXmin below, it would use a later OldestXmin, and then
786  * possibly remove as DEAD toast tuples belonging to main tuples we think
787  * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
788  * tuples.
789  *
790  * We don't need to open the toast relation here, just lock it. The lock
791  * will be held till end of transaction.
792  */
793  if (OldHeap->rd_rel->reltoastrelid)
794  LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
795 
796  /*
797  * If both tables have TOAST tables, perform toast swap by content. It is
798  * possible that the old table has a toast table but the new one doesn't,
799  * if toastable columns have been dropped. In that case we have to do
800  * swap by links. This is okay because swap by content is only essential
801  * for system catalogs, and we don't support schema changes for them.
802  */
803  if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
804  {
805  *pSwapToastByContent = true;
806 
807  /*
808  * When doing swap by content, any toast pointers written into NewHeap
809  * must use the old toast table's OID, because that's where the toast
810  * data will eventually be found. Set this up by setting rd_toastoid.
811  * This also tells toast_save_datum() to preserve the toast value
812  * OIDs, which we want so as not to invalidate toast pointers in
813  * system catalog caches, and to avoid making multiple copies of a
814  * single toast value.
815  *
816  * Note that we must hold NewHeap open until we are done writing data,
817  * since the relcache will not guarantee to remember this setting once
818  * the relation is closed. Also, this technique depends on the fact
819  * that no one will try to read from the NewHeap until after we've
820  * finished writing it and swapping the rels --- otherwise they could
821  * follow the toast pointers to the wrong place. (It would actually
822  * work for values copied over from the old toast table, but not for
823  * any values that we toast which were previously not toasted.)
824  */
825  NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
826  }
827  else
828  *pSwapToastByContent = false;
829 
830  /*
831  * Compute xids used to freeze and weed out dead tuples and multixacts.
832  * Since we're going to rewrite the whole table anyway, there's no reason
833  * not to be aggressive about this.
834  */
835  vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, isTopLevel,
836  &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
837  NULL);
838 
839  /*
840  * FreezeXid will become the table's new relfrozenxid, and that mustn't go
841  * backwards, so take the max.
842  */
843  if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
844  TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
845  FreezeXid = OldHeap->rd_rel->relfrozenxid;
846 
847  /*
848  * MultiXactCutoff, similarly, shouldn't go backwards either.
849  */
850  if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
851  MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
852  MultiXactCutoff = OldHeap->rd_rel->relminmxid;
853 
854  /*
855  * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
856  * the OldHeap. We know how to use a sort to duplicate the ordering of a
857  * btree index, and will use seqscan-and-sort for that case if the planner
858  * tells us it's cheaper. Otherwise, always indexscan if an index is
859  * provided, else plain seqscan.
860  */
861  if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
862  use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
863  else
864  use_sort = false;
865 
866  /* Log what we're doing */
867  if (OldIndex != NULL && !use_sort)
868  ereport(elevel,
869  (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
871  RelationGetRelationName(OldHeap),
872  RelationGetRelationName(OldIndex))));
873  else if (use_sort)
874  ereport(elevel,
875  (errmsg("clustering \"%s.%s\" using sequential scan and sort",
877  RelationGetRelationName(OldHeap))));
878  else
879  ereport(elevel,
880  (errmsg("vacuuming \"%s.%s\"",
882  RelationGetRelationName(OldHeap))));
883 
884  /*
885  * Hand of the actual copying to AM specific function, the generic code
886  * cannot know how to deal with visibility across AMs. Note that this
887  * routine is allowed to set FreezeXid / MultiXactCutoff to different
888  * values (e.g. because the AM doesn't use freezing).
889  */
890  table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
891  OldestXmin, &FreezeXid, &MultiXactCutoff,
892  &num_tuples, &tups_vacuumed,
893  &tups_recently_dead);
894 
895  /* return selected values to caller, get set as relfrozenxid/minmxid */
896  *pFreezeXid = FreezeXid;
897  *pCutoffMulti = MultiXactCutoff;
898 
899  /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
900  NewHeap->rd_toastoid = InvalidOid;
901 
902  num_pages = RelationGetNumberOfBlocks(NewHeap);
903 
904  /* Log what we did */
905  ereport(elevel,
906  (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
907  RelationGetRelationName(OldHeap),
908  tups_vacuumed, num_tuples,
909  RelationGetNumberOfBlocks(OldHeap)),
910  errdetail("%.0f dead row versions cannot be removed yet.\n"
911  "%s.",
912  tups_recently_dead,
913  pg_rusage_show(&ru0))));
914 
915  if (OldIndex != NULL)
916  index_close(OldIndex, NoLock);
917  table_close(OldHeap, NoLock);
918  table_close(NewHeap, NoLock);
919 
920  /* Update pg_class to reflect the correct values of pages and tuples. */
921  relRelation = table_open(RelationRelationId, RowExclusiveLock);
922 
923  reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
924  if (!HeapTupleIsValid(reltup))
925  elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
926  relform = (Form_pg_class) GETSTRUCT(reltup);
927 
928  relform->relpages = num_pages;
929  relform->reltuples = num_tuples;
930 
931  /* Don't update the stats for pg_class. See swap_relation_files. */
932  if (OIDOldHeap != RelationRelationId)
933  CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
934  else
936 
937  /* Clean up. */
938  heap_freetuple(reltup);
939  table_close(relRelation, RowExclusiveLock);
940 
941  /* Make the update visible */
943 }
944 
945 /*
946  * Swap the physical files of two given relations.
947  *
948  * We swap the physical identity (reltablespace, relfilenode) while keeping the
949  * same logical identities of the two relations. relpersistence is also
950  * swapped, which is critical since it determines where buffers live for each
951  * relation.
952  *
953  * We can swap associated TOAST data in either of two ways: recursively swap
954  * the physical content of the toast tables (and their indexes), or swap the
955  * TOAST links in the given relations' pg_class entries. The former is needed
956  * to manage rewrites of shared catalogs (where we cannot change the pg_class
957  * links) while the latter is the only way to handle cases in which a toast
958  * table is added or removed altogether.
959  *
960  * Additionally, the first relation is marked with relfrozenxid set to
961  * frozenXid. It seems a bit ugly to have this here, but the caller would
962  * have to do it anyway, so having it here saves a heap_update. Note: in
963  * the swap-toast-links case, we assume we don't need to change the toast
964  * table's relfrozenxid: the new version of the toast table should already
965  * have relfrozenxid set to RecentXmin, which is good enough.
966  *
967  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
968  * their OIDs are emitted into mapped_tables[]. This is hacky but beats
969  * having to look the information up again later in finish_heap_swap.
970  */
971 static void
972 swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
973  bool swap_toast_by_content,
974  bool is_internal,
975  TransactionId frozenXid,
976  MultiXactId cutoffMulti,
977  Oid *mapped_tables)
978 {
979  Relation relRelation;
980  HeapTuple reltup1,
981  reltup2;
982  Form_pg_class relform1,
983  relform2;
984  Oid relfilenode1,
985  relfilenode2;
986  Oid swaptemp;
987  char swptmpchr;
988 
989  /* We need writable copies of both pg_class tuples. */
990  relRelation = table_open(RelationRelationId, RowExclusiveLock);
991 
993  if (!HeapTupleIsValid(reltup1))
994  elog(ERROR, "cache lookup failed for relation %u", r1);
995  relform1 = (Form_pg_class) GETSTRUCT(reltup1);
996 
998  if (!HeapTupleIsValid(reltup2))
999  elog(ERROR, "cache lookup failed for relation %u", r2);
1000  relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1001 
1002  relfilenode1 = relform1->relfilenode;
1003  relfilenode2 = relform2->relfilenode;
1004 
1005  if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
1006  {
1007  /*
1008  * Normal non-mapped relations: swap relfilenodes, reltablespaces,
1009  * relpersistence
1010  */
1011  Assert(!target_is_pg_class);
1012 
1013  swaptemp = relform1->relfilenode;
1014  relform1->relfilenode = relform2->relfilenode;
1015  relform2->relfilenode = swaptemp;
1016 
1017  swaptemp = relform1->reltablespace;
1018  relform1->reltablespace = relform2->reltablespace;
1019  relform2->reltablespace = swaptemp;
1020 
1021  swptmpchr = relform1->relpersistence;
1022  relform1->relpersistence = relform2->relpersistence;
1023  relform2->relpersistence = swptmpchr;
1024 
1025  /* Also swap toast links, if we're swapping by links */
1026  if (!swap_toast_by_content)
1027  {
1028  swaptemp = relform1->reltoastrelid;
1029  relform1->reltoastrelid = relform2->reltoastrelid;
1030  relform2->reltoastrelid = swaptemp;
1031  }
1032  }
1033  else
1034  {
1035  /*
1036  * Mapped-relation case. Here we have to swap the relation mappings
1037  * instead of modifying the pg_class columns. Both must be mapped.
1038  */
1039  if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
1040  elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1041  NameStr(relform1->relname));
1042 
1043  /*
1044  * We can't change the tablespace nor persistence of a mapped rel, and
1045  * we can't handle toast link swapping for one either, because we must
1046  * not apply any critical changes to its pg_class row. These cases
1047  * should be prevented by upstream permissions tests, so these checks
1048  * are non-user-facing emergency backstop.
1049  */
1050  if (relform1->reltablespace != relform2->reltablespace)
1051  elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1052  NameStr(relform1->relname));
1053  if (relform1->relpersistence != relform2->relpersistence)
1054  elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1055  NameStr(relform1->relname));
1056  if (!swap_toast_by_content &&
1057  (relform1->reltoastrelid || relform2->reltoastrelid))
1058  elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1059  NameStr(relform1->relname));
1060 
1061  /*
1062  * Fetch the mappings --- shouldn't fail, but be paranoid
1063  */
1064  relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
1065  if (!OidIsValid(relfilenode1))
1066  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1067  NameStr(relform1->relname), r1);
1068  relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
1069  if (!OidIsValid(relfilenode2))
1070  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1071  NameStr(relform2->relname), r2);
1072 
1073  /*
1074  * Send replacement mappings to relmapper. Note these won't actually
1075  * take effect until CommandCounterIncrement.
1076  */
1077  RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
1078  RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
1079 
1080  /* Pass OIDs of mapped r2 tables back to caller */
1081  *mapped_tables++ = r2;
1082  }
1083 
1084  /*
1085  * Recognize that rel1's relfilenode (swapped from rel2) is new in this
1086  * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1087  * new.
1088  */
1089  {
1090  Relation rel1,
1091  rel2;
1092 
1093  rel1 = relation_open(r1, NoLock);
1094  rel2 = relation_open(r2, NoLock);
1095  rel2->rd_createSubid = rel1->rd_createSubid;
1099  relation_close(rel1, NoLock);
1100  relation_close(rel2, NoLock);
1101  }
1102 
1103  /*
1104  * In the case of a shared catalog, these next few steps will only affect
1105  * our own database's pg_class row; but that's okay, because they are all
1106  * noncritical updates. That's also an important fact for the case of a
1107  * mapped catalog, because it's possible that we'll commit the map change
1108  * and then fail to commit the pg_class update.
1109  */
1110 
1111  /* set rel1's frozen Xid and minimum MultiXid */
1112  if (relform1->relkind != RELKIND_INDEX)
1113  {
1114  Assert(!TransactionIdIsValid(frozenXid) ||
1115  TransactionIdIsNormal(frozenXid));
1116  relform1->relfrozenxid = frozenXid;
1117  relform1->relminmxid = cutoffMulti;
1118  }
1119 
1120  /* swap size statistics too, since new rel has freshly-updated stats */
1121  {
1122  int32 swap_pages;
1123  float4 swap_tuples;
1124  int32 swap_allvisible;
1125 
1126  swap_pages = relform1->relpages;
1127  relform1->relpages = relform2->relpages;
1128  relform2->relpages = swap_pages;
1129 
1130  swap_tuples = relform1->reltuples;
1131  relform1->reltuples = relform2->reltuples;
1132  relform2->reltuples = swap_tuples;
1133 
1134  swap_allvisible = relform1->relallvisible;
1135  relform1->relallvisible = relform2->relallvisible;
1136  relform2->relallvisible = swap_allvisible;
1137  }
1138 
1139  /*
1140  * Update the tuples in pg_class --- unless the target relation of the
1141  * swap is pg_class itself. In that case, there is zero point in making
1142  * changes because we'd be updating the old data that we're about to throw
1143  * away. Because the real work being done here for a mapped relation is
1144  * just to change the relation map settings, it's all right to not update
1145  * the pg_class rows in this case. The most important changes will instead
1146  * performed later, in finish_heap_swap() itself.
1147  */
1148  if (!target_is_pg_class)
1149  {
1150  CatalogIndexState indstate;
1151 
1152  indstate = CatalogOpenIndexes(relRelation);
1153  CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
1154  indstate);
1155  CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
1156  indstate);
1157  CatalogCloseIndexes(indstate);
1158  }
1159  else
1160  {
1161  /* no update ... but we do still need relcache inval */
1164  }
1165 
1166  /*
1167  * Post alter hook for modified relations. The change to r2 is always
1168  * internal, but r1 depends on the invocation context.
1169  */
1170  InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
1171  InvalidOid, is_internal);
1172  InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
1173  InvalidOid, true);
1174 
1175  /*
1176  * If we have toast tables associated with the relations being swapped,
1177  * deal with them too.
1178  */
1179  if (relform1->reltoastrelid || relform2->reltoastrelid)
1180  {
1181  if (swap_toast_by_content)
1182  {
1183  if (relform1->reltoastrelid && relform2->reltoastrelid)
1184  {
1185  /* Recursively swap the contents of the toast tables */
1186  swap_relation_files(relform1->reltoastrelid,
1187  relform2->reltoastrelid,
1188  target_is_pg_class,
1189  swap_toast_by_content,
1190  is_internal,
1191  frozenXid,
1192  cutoffMulti,
1193  mapped_tables);
1194  }
1195  else
1196  {
1197  /* caller messed up */
1198  elog(ERROR, "cannot swap toast files by content when there's only one");
1199  }
1200  }
1201  else
1202  {
1203  /*
1204  * We swapped the ownership links, so we need to change dependency
1205  * data to match.
1206  *
1207  * NOTE: it is possible that only one table has a toast table.
1208  *
1209  * NOTE: at present, a TOAST table's only dependency is the one on
1210  * its owning table. If more are ever created, we'd need to use
1211  * something more selective than deleteDependencyRecordsFor() to
1212  * get rid of just the link we want.
1213  */
1214  ObjectAddress baseobject,
1215  toastobject;
1216  long count;
1217 
1218  /*
1219  * We disallow this case for system catalogs, to avoid the
1220  * possibility that the catalog we're rebuilding is one of the
1221  * ones the dependency changes would change. It's too late to be
1222  * making any data changes to the target catalog.
1223  */
1224  if (IsSystemClass(r1, relform1))
1225  elog(ERROR, "cannot swap toast files by links for system catalogs");
1226 
1227  /* Delete old dependencies */
1228  if (relform1->reltoastrelid)
1229  {
1230  count = deleteDependencyRecordsFor(RelationRelationId,
1231  relform1->reltoastrelid,
1232  false);
1233  if (count != 1)
1234  elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1235  count);
1236  }
1237  if (relform2->reltoastrelid)
1238  {
1239  count = deleteDependencyRecordsFor(RelationRelationId,
1240  relform2->reltoastrelid,
1241  false);
1242  if (count != 1)
1243  elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1244  count);
1245  }
1246 
1247  /* Register new dependencies */
1248  baseobject.classId = RelationRelationId;
1249  baseobject.objectSubId = 0;
1250  toastobject.classId = RelationRelationId;
1251  toastobject.objectSubId = 0;
1252 
1253  if (relform1->reltoastrelid)
1254  {
1255  baseobject.objectId = r1;
1256  toastobject.objectId = relform1->reltoastrelid;
1257  recordDependencyOn(&toastobject, &baseobject,
1259  }
1260 
1261  if (relform2->reltoastrelid)
1262  {
1263  baseobject.objectId = r2;
1264  toastobject.objectId = relform2->reltoastrelid;
1265  recordDependencyOn(&toastobject, &baseobject,
1267  }
1268  }
1269  }
1270 
1271  /*
1272  * If we're swapping two toast tables by content, do the same for their
1273  * valid index. The swap can actually be safely done only if the relations
1274  * have indexes.
1275  */
1276  if (swap_toast_by_content &&
1277  relform1->relkind == RELKIND_TOASTVALUE &&
1278  relform2->relkind == RELKIND_TOASTVALUE)
1279  {
1280  Oid toastIndex1,
1281  toastIndex2;
1282 
1283  /* Get valid index for each relation */
1284  toastIndex1 = toast_get_valid_index(r1,
1286  toastIndex2 = toast_get_valid_index(r2,
1288 
1289  swap_relation_files(toastIndex1,
1290  toastIndex2,
1291  target_is_pg_class,
1292  swap_toast_by_content,
1293  is_internal,
1296  mapped_tables);
1297  }
1298 
1299  /* Clean up. */
1300  heap_freetuple(reltup1);
1301  heap_freetuple(reltup2);
1302 
1303  table_close(relRelation, RowExclusiveLock);
1304 
1305  /*
1306  * Close both relcache entries' smgr links. We need this kluge because
1307  * both links will be invalidated during upcoming CommandCounterIncrement.
1308  * Whichever of the rels is the second to be cleared will have a dangling
1309  * reference to the other's smgr entry. Rather than trying to avoid this
1310  * by ordering operations just so, it's easiest to close the links first.
1311  * (Fortunately, since one of the entries is local in our transaction,
1312  * it's sufficient to clear out our own relcache this way; the problem
1313  * cannot arise for other backends when they see our update on the
1314  * non-transient relation.)
1315  *
1316  * Caution: the placement of this step interacts with the decision to
1317  * handle toast rels by recursion. When we are trying to rebuild pg_class
1318  * itself, the smgr close on pg_class must happen after all accesses in
1319  * this function.
1320  */
1323 }
1324 
1325 /*
1326  * Remove the transient table that was built by make_new_heap, and finish
1327  * cleaning up (including rebuilding all indexes on the old heap).
1328  */
1329 void
1330 finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1331  bool is_system_catalog,
1332  bool swap_toast_by_content,
1333  bool check_constraints,
1334  bool is_internal,
1335  TransactionId frozenXid,
1336  MultiXactId cutoffMulti,
1337  char newrelpersistence)
1338 {
1339  ObjectAddress object;
1340  Oid mapped_tables[4];
1341  int reindex_flags;
1342  int i;
1343 
1344  /* Report that we are now swapping relation files */
1347 
1348  /* Zero out possible results from swapped_relation_files */
1349  memset(mapped_tables, 0, sizeof(mapped_tables));
1350 
1351  /*
1352  * Swap the contents of the heap relations (including any toast tables).
1353  * Also set old heap's relfrozenxid to frozenXid.
1354  */
1355  swap_relation_files(OIDOldHeap, OIDNewHeap,
1356  (OIDOldHeap == RelationRelationId),
1357  swap_toast_by_content, is_internal,
1358  frozenXid, cutoffMulti, mapped_tables);
1359 
1360  /*
1361  * If it's a system catalog, queue a sinval message to flush all catcaches
1362  * on the catalog when we reach CommandCounterIncrement.
1363  */
1364  if (is_system_catalog)
1365  CacheInvalidateCatalog(OIDOldHeap);
1366 
1367  /*
1368  * Rebuild each index on the relation (but not the toast table, which is
1369  * all-new at this point). It is important to do this before the DROP
1370  * step because if we are processing a system catalog that will be used
1371  * during DROP, we want to have its indexes available. There is no
1372  * advantage to the other order anyway because this is all transactional,
1373  * so no chance to reclaim disk space before commit. We do not need a
1374  * final CommandCounterIncrement() because reindex_relation does it.
1375  *
1376  * Note: because index_build is called via reindex_relation, it will never
1377  * set indcheckxmin true for the indexes. This is OK even though in some
1378  * sense we are building new indexes rather than rebuilding existing ones,
1379  * because the new heap won't contain any HOT chains at all, let alone
1380  * broken ones, so it can't be necessary to set indcheckxmin.
1381  */
1382  reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
1383  if (check_constraints)
1384  reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1385 
1386  /*
1387  * Ensure that the indexes have the same persistence as the parent
1388  * relation.
1389  */
1390  if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1391  reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
1392  else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1393  reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
1394 
1395  /* Report that we are now reindexing relations */
1398 
1399  reindex_relation(OIDOldHeap, reindex_flags, 0);
1400 
1401  /* Report that we are now doing clean up */
1404 
1405  /*
1406  * If the relation being rebuild is pg_class, swap_relation_files()
1407  * couldn't update pg_class's own pg_class entry (check comments in
1408  * swap_relation_files()), thus relfrozenxid was not updated. That's
1409  * annoying because a potential reason for doing a VACUUM FULL is a
1410  * imminent or actual anti-wraparound shutdown. So, now that we can
1411  * access the new relation using its indices, update relfrozenxid.
1412  * pg_class doesn't have a toast relation, so we don't need to update the
1413  * corresponding toast relation. Not that there's little point moving all
1414  * relfrozenxid updates here since swap_relation_files() needs to write to
1415  * pg_class for non-mapped relations anyway.
1416  */
1417  if (OIDOldHeap == RelationRelationId)
1418  {
1419  Relation relRelation;
1420  HeapTuple reltup;
1421  Form_pg_class relform;
1422 
1423  relRelation = table_open(RelationRelationId, RowExclusiveLock);
1424 
1425  reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
1426  if (!HeapTupleIsValid(reltup))
1427  elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1428  relform = (Form_pg_class) GETSTRUCT(reltup);
1429 
1430  relform->relfrozenxid = frozenXid;
1431  relform->relminmxid = cutoffMulti;
1432 
1433  CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1434 
1435  table_close(relRelation, RowExclusiveLock);
1436  }
1437 
1438  /* Destroy new heap with old filenode */
1439  object.classId = RelationRelationId;
1440  object.objectId = OIDNewHeap;
1441  object.objectSubId = 0;
1442 
1443  /*
1444  * The new relation is local to our transaction and we know nothing
1445  * depends on it, so DROP_RESTRICT should be OK.
1446  */
1448 
1449  /* performDeletion does CommandCounterIncrement at end */
1450 
1451  /*
1452  * Now we must remove any relation mapping entries that we set up for the
1453  * transient table, as well as its toast table and toast index if any. If
1454  * we fail to do this before commit, the relmapper will complain about new
1455  * permanent map entries being added post-bootstrap.
1456  */
1457  for (i = 0; OidIsValid(mapped_tables[i]); i++)
1458  RelationMapRemoveMapping(mapped_tables[i]);
1459 
1460  /*
1461  * At this point, everything is kosher except that, if we did toast swap
1462  * by links, the toast table's name corresponds to the transient table.
1463  * The name is irrelevant to the backend because it's referenced by OID,
1464  * but users looking at the catalogs could be confused. Rename it to
1465  * prevent this problem.
1466  *
1467  * Note no lock required on the relation, because we already hold an
1468  * exclusive lock on it.
1469  */
1470  if (!swap_toast_by_content)
1471  {
1472  Relation newrel;
1473 
1474  newrel = table_open(OIDOldHeap, NoLock);
1475  if (OidIsValid(newrel->rd_rel->reltoastrelid))
1476  {
1477  Oid toastidx;
1478  char NewToastName[NAMEDATALEN];
1479 
1480  /* Get the associated valid index to be renamed */
1481  toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1482  NoLock);
1483 
1484  /* rename the toast table ... */
1485  snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1486  OIDOldHeap);
1487  RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1488  NewToastName, true, false);
1489 
1490  /* ... and its valid index too. */
1491  snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1492  OIDOldHeap);
1493 
1494  RenameRelationInternal(toastidx,
1495  NewToastName, true, true);
1496  }
1497  relation_close(newrel, NoLock);
1498  }
1499 
1500  /* if it's not a catalog table, clear any missing attribute settings */
1501  if (!is_system_catalog)
1502  {
1503  Relation newrel;
1504 
1505  newrel = table_open(OIDOldHeap, NoLock);
1506  RelationClearMissing(newrel);
1507  relation_close(newrel, NoLock);
1508  }
1509 }
1510 
1511 
1512 /*
1513  * Get a list of tables that the current user owns and
1514  * have indisclustered set. Return the list in a List * of RelToCluster
1515  * (stored in the specified memory context), each one giving the tableOid
1516  * and the indexOid on which the table is already clustered.
1517  */
1518 static List *
1520 {
1521  Relation indRelation;
1522  TableScanDesc scan;
1523  ScanKeyData entry;
1524  HeapTuple indexTuple;
1526  MemoryContext old_context;
1527  RelToCluster *rvtc;
1528  List *rvs = NIL;
1529 
1530  /*
1531  * Get all indexes that have indisclustered set and are owned by
1532  * appropriate user.
1533  */
1534  indRelation = table_open(IndexRelationId, AccessShareLock);
1535  ScanKeyInit(&entry,
1536  Anum_pg_index_indisclustered,
1537  BTEqualStrategyNumber, F_BOOLEQ,
1538  BoolGetDatum(true));
1539  scan = table_beginscan_catalog(indRelation, 1, &entry);
1540  while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1541  {
1542  index = (Form_pg_index) GETSTRUCT(indexTuple);
1543 
1544  if (!pg_class_ownercheck(index->indrelid, GetUserId()))
1545  continue;
1546 
1547  /*
1548  * We have to build the list in a different memory context so it will
1549  * survive the cross-transaction processing
1550  */
1551  old_context = MemoryContextSwitchTo(cluster_context);
1552 
1553  rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1554  rvtc->tableOid = index->indrelid;
1555  rvtc->indexOid = index->indexrelid;
1556  rvs = lappend(rvs, rvtc);
1557 
1558  MemoryContextSwitchTo(old_context);
1559  }
1560  table_endscan(scan);
1561 
1562  relation_close(indRelation, AccessShareLock);
1563 
1564  return rvs;
1565 }
#define RelationIsPopulated(relation)
Definition: rel.h:612
#define NIL
Definition: pg_list.h:65
struct IndexAmRoutine * rd_indam
Definition: rel.h:188
void RangeVarCallbackOwnsTable(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg)
Definition: tablecmds.c:15559
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
#define AllocSetContextCreate
Definition: memutils.h:170
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
bool plan_cluster_use_sort(Oid tableOid, Oid indexOid)
Definition: planner.c:6258
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
bool IsSystemRelation(Relation relation)
Definition: catalog.c:68
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, bool isTopLevel, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:932
void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool is_system_catalog, bool swap_toast_by_content, bool check_constraints, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, char newrelpersistence)
Definition: cluster.c:1330
uint32 TransactionId
Definition: c.h:520
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
#define RelationGetDescr(relation)
Definition: rel.h:482
int LOCKMODE
Definition: lockdefs.h:26
Oid GetUserId(void)
Definition: miscinit.c:476
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
Definition: pgstat.c:3210
Oid LookupCreationNamespace(const char *nspname)
Definition: namespace.c:2935
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3231
void CommitTransactionCommand(void)
Definition: xact.c:2947
long deleteDependencyRecordsFor(Oid classId, Oid objectId, bool skipExtensionDeps)
Definition: pg_depend.c:232
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
int errcode(int sqlerrcode)
Definition: elog.c:610
#define PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES
Definition: progress.h:69
#define INFO
Definition: elog.h:33
SubTransactionId rd_newRelfilenodeSubid
Definition: rel.h:103
bool heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
Definition: heaptuple.c:359
uint32 BlockNumber
Definition: block.h:31
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
Definition: pg_depend.c:43
#define REINDEX_REL_SUPPRESS_INDEX_USE
Definition: index.h:141
Form_pg_class rd_rel
Definition: rel.h:109
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:651
#define InvokeObjectPostAlterHookArg(classId, objectId, subId, auxiliaryId, is_internal)
Definition: objectaccess.h:178
Relation try_relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:89
Oid tableOid
Definition: cluster.c:65
signed int int32
Definition: c.h:362
bool IsSystemClass(Oid relid, Form_pg_class reltuple)
Definition: catalog.c:80
struct HeapTupleData * rd_indextuple
Definition: rel.h:176
MemoryContext PortalContext
Definition: mcxt.c:53
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
Definition: type.h:89
#define NAMEDATALEN
char * relname
Definition: primnodes.h:68
Form_pg_index rd_index
Definition: rel.h:174
#define SearchSysCacheExists1(cacheId, key1)
Definition: syscache.h:183
char * indexname
Definition: parsenodes.h:3213
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
static void rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose)
Definition: cluster.c:557
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:48
ItemPointerData t_self
Definition: htup.h:65
#define PROGRESS_CLUSTER_COMMAND_VACUUM_FULL
Definition: progress.h:75
Oid get_relname_relid(const char *relname, Oid relnamespace)
Definition: lsyscache.c:1797
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define DEBUG2
Definition: elog.h:24
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3191
#define NoLock
Definition: lockdefs.h:34
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1286
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
Oid rd_toastoid
Definition: rel.h:233
#define RowExclusiveLock
Definition: lockdefs.h:38
int errdetail(const char *fmt,...)
Definition: elog.c:957
static MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:337
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition: xact.c:3380
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition: dependency.c:312
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:490
static TransactionId OldestXmin
Definition: vacuumlazy.c:335
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition: namespace.c:236
void cluster(ClusterStmt *stmt, bool isTopLevel)
Definition: cluster.c:105
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
#define PROGRESS_CLUSTER_COMMAND_CLUSTER
Definition: progress.h:74
void RelationClearMissing(Relation rel)
Definition: heap.c:2061
void CheckTableNotInUse(Relation rel, const char *stmt)
Definition: tablecmds.c:3589
static List * get_tables_to_cluster(MemoryContext cluster_context)
Definition: cluster.c:1519
#define PROGRESS_CLUSTER_COMMAND
Definition: progress.h:55
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void TransferPredicateLocksToHeapRelation(Relation relation)
Definition: predicate.c:3075
List * lappend(List *list, void *datum)
Definition: list.c:321
static int verbose
void CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup, CatalogIndexState indstate)
Definition: indexing.c:324
#define RelationIsMapped(relation)
Definition: rel.h:505
FormData_pg_index * Form_pg_index
Definition: pg_index.h:68
void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
Definition: cluster.c:483
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
float float4
Definition: c.h:497
SubTransactionId rd_createSubid
Definition: rel.h:102
static int elevel
Definition: vacuumlazy.c:333
void RelationAssumeNewRelfilenode(Relation relation)
Definition: relcache.c:3738
SubTransactionId rd_firstRelfilenodeSubid
Definition: rel.h:105
void pgstat_progress_end_command(void)
Definition: pgstat.c:3282
Oid RelationMapOidToFilenode(Oid relationId, bool shared)
Definition: relmapper.c:159
uintptr_t Datum
Definition: postgres.h:367
void CommandCounterIncrement(void)
Definition: xact.c:1021
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1377
#define InvalidMultiXactId
Definition: multixact.h:24
bool amclusterable
Definition: amapi.h:237
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
static void swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, bool swap_toast_by_content, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, Oid *mapped_tables)
Definition: cluster.c:972
#define BoolGetDatum(X)
Definition: postgres.h:402
#define InvalidOid
Definition: postgres_ext.h:36
#define ereport(elevel,...)
Definition: elog.h:144
void RelationCloseSmgrByOid(Oid relationId)
Definition: relcache.c:2934
TransactionId MultiXactId
Definition: c.h:530
void CacheInvalidateCatalog(Oid catalogId)
Definition: inval.c:1254
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define REINDEX_REL_FORCE_INDEXES_UNLOGGED
Definition: index.h:143
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define Assert(condition)
Definition: c.h:745
#define lfirst(lc)
Definition: pg_list.h:190
void RelationMapRemoveMapping(Oid relationId)
Definition: relmapper.c:373
Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, Oid reltypeid, Oid reloftypeid, Oid ownerid, Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, char relpersistence, bool shared_relation, bool mapped_relation, OnCommitAction oncommit, Datum reloptions, bool use_user_acl, bool allow_system_table_mods, bool is_internal, Oid relrewrite, ObjectAddress *typaddress)
Definition: heap.c:1131
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:593
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4687
void StartTransactionCommand(void)
Definition: xact.c:2846
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
Definition: indexing.c:43
void CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
Definition: indexing.c:301
#define REINDEX_REL_CHECK_CONSTRAINTS
Definition: index.h:142
#define PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP
Definition: progress.h:71
void cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
Definition: cluster.c:257
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3127
bool get_index_isclustered(Oid index_oid)
Definition: lsyscache.c:3380
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4514
Oid indexOid
Definition: cluster.c:66
static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool isTopLevel, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti)
Definition: cluster.c:736
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:863
FormData_pg_class * Form_pg_class
Definition: pg_class.h:153
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:174
#define AccessExclusiveLock
Definition: lockdefs.h:45
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:824
void check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
Definition: cluster.c:423
#define elog(elevel,...)
Definition: elog.h:214
int i
#define NameStr(name)
Definition: c.h:622
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define REINDEX_REL_FORCE_INDEXES_PERMANENT
Definition: index.h:144
void CatalogCloseIndexes(CatalogIndexState indstate)
Definition: indexing.c:61
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
Definition: inval.c:1314
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:108
#define PROGRESS_CLUSTER_PHASE_REBUILD_INDEX
Definition: progress.h:70
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
RangeVar * relation
Definition: parsenodes.h:3212
void RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
Definition: tablecmds.c:3487
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:56
bool reindex_relation(Oid relid, int flags, int options)
Definition: index.c:3676
Definition: pg_list.h:50
#define snprintf
Definition: port.h:193
#define RelationGetRelid(relation)
Definition: rel.h:456
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1453
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence, LOCKMODE lockmode)
Definition: cluster.c:610
#define lfirst_oid(lc)
Definition: pg_list.h:192
void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, bool immediate)
Definition: relmapper.c:261
Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock)
#define PERFORM_DELETION_INTERNAL
Definition: dependency.h:134
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:121
#define RelationGetNamespace(relation)
Definition: rel.h:497
void NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode)
Definition: toasting.c:63