PostgreSQL Source Code  git master
vacuum.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  * The postgres vacuum cleaner.
5  *
6  * This file now includes only control and dispatch code for VACUUM and
7  * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9  * in cluster.c.
10  *
11  *
12  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  *
16  * IDENTIFICATION
17  * src/backend/commands/vacuum.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22 
23 #include <math.h>
24 
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/tableam.h"
32 #include "access/transam.h"
33 #include "access/xact.h"
34 #include "catalog/namespace.h"
35 #include "catalog/pg_database.h"
36 #include "catalog/pg_inherits.h"
37 #include "catalog/pg_namespace.h"
38 #include "commands/cluster.h"
39 #include "commands/defrem.h"
40 #include "commands/vacuum.h"
41 #include "miscadmin.h"
42 #include "nodes/makefuncs.h"
43 #include "pgstat.h"
44 #include "postmaster/autovacuum.h"
46 #include "storage/bufmgr.h"
47 #include "storage/lmgr.h"
48 #include "storage/proc.h"
49 #include "storage/procarray.h"
50 #include "utils/acl.h"
51 #include "utils/fmgroids.h"
52 #include "utils/guc.h"
53 #include "utils/memutils.h"
54 #include "utils/snapmgr.h"
55 #include "utils/syscache.h"
56 
57 
58 /*
59  * GUC parameters
60  */
67 
68 
69 /* A few variables that don't seem worth passing around as parameters */
70 static MemoryContext vac_context = NULL;
72 
73 
74 /*
75  * Variables for cost-based parallel vacuum. See comments atop
76  * compute_parallel_delay to understand how it works.
77  */
81 
82 /* non-export function prototypes */
83 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
84 static List *get_all_vacuum_rels(int options);
85 static void vac_truncate_clog(TransactionId frozenXID,
86  MultiXactId minMulti,
87  TransactionId lastSaneFrozenXid,
88  MultiXactId lastSaneMinMulti);
89 static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
90 static double compute_parallel_delay(void);
92 
93 /*
94  * Primary entry point for manual VACUUM and ANALYZE commands
95  *
96  * This is mainly a preparation wrapper for the real operations that will
97  * happen in vacuum().
98  */
99 void
100 ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
101 {
102  VacuumParams params;
103  bool verbose = false;
104  bool skip_locked = false;
105  bool analyze = false;
106  bool freeze = false;
107  bool full = false;
108  bool disable_page_skipping = false;
109  bool process_toast = true;
110  ListCell *lc;
111 
112  /* Set default value */
115 
116  /* By default parallel vacuum is enabled */
117  params.nworkers = 0;
118 
119  /* Parse options list */
120  foreach(lc, vacstmt->options)
121  {
122  DefElem *opt = (DefElem *) lfirst(lc);
123 
124  /* Parse common options for VACUUM and ANALYZE */
125  if (strcmp(opt->defname, "verbose") == 0)
126  verbose = defGetBoolean(opt);
127  else if (strcmp(opt->defname, "skip_locked") == 0)
128  skip_locked = defGetBoolean(opt);
129  else if (!vacstmt->is_vacuumcmd)
130  ereport(ERROR,
131  (errcode(ERRCODE_SYNTAX_ERROR),
132  errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
133  parser_errposition(pstate, opt->location)));
134 
135  /* Parse options available on VACUUM */
136  else if (strcmp(opt->defname, "analyze") == 0)
137  analyze = defGetBoolean(opt);
138  else if (strcmp(opt->defname, "freeze") == 0)
139  freeze = defGetBoolean(opt);
140  else if (strcmp(opt->defname, "full") == 0)
141  full = defGetBoolean(opt);
142  else if (strcmp(opt->defname, "disable_page_skipping") == 0)
143  disable_page_skipping = defGetBoolean(opt);
144  else if (strcmp(opt->defname, "index_cleanup") == 0)
146  else if (strcmp(opt->defname, "process_toast") == 0)
147  process_toast = defGetBoolean(opt);
148  else if (strcmp(opt->defname, "truncate") == 0)
149  params.truncate = get_vacopt_ternary_value(opt);
150  else if (strcmp(opt->defname, "parallel") == 0)
151  {
152  if (opt->arg == NULL)
153  {
154  ereport(ERROR,
155  (errcode(ERRCODE_SYNTAX_ERROR),
156  errmsg("parallel option requires a value between 0 and %d",
158  parser_errposition(pstate, opt->location)));
159  }
160  else
161  {
162  int nworkers;
163 
164  nworkers = defGetInt32(opt);
165  if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
166  ereport(ERROR,
167  (errcode(ERRCODE_SYNTAX_ERROR),
168  errmsg("parallel vacuum degree must be between 0 and %d",
169  MAX_PARALLEL_WORKER_LIMIT),
170  parser_errposition(pstate, opt->location)));
171 
172  /*
173  * Disable parallel vacuum, if user has specified parallel
174  * degree as zero.
175  */
176  if (nworkers == 0)
177  params.nworkers = -1;
178  else
179  params.nworkers = nworkers;
180  }
181  }
182  else
183  ereport(ERROR,
184  (errcode(ERRCODE_SYNTAX_ERROR),
185  errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
186  parser_errposition(pstate, opt->location)));
187  }
188 
189  /* Set vacuum options */
190  params.options =
191  (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
192  (verbose ? VACOPT_VERBOSE : 0) |
193  (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
194  (analyze ? VACOPT_ANALYZE : 0) |
195  (freeze ? VACOPT_FREEZE : 0) |
196  (full ? VACOPT_FULL : 0) |
197  (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
198  (process_toast ? VACOPT_PROCESS_TOAST : 0);
199 
200  /* sanity checks on options */
201  Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
202  Assert((params.options & VACOPT_VACUUM) ||
203  !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
204 
205  if ((params.options & VACOPT_FULL) && params.nworkers > 0)
206  ereport(ERROR,
207  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
208  errmsg("VACUUM FULL cannot be performed in parallel")));
209 
210  /*
211  * Make sure VACOPT_ANALYZE is specified if any column lists are present.
212  */
213  if (!(params.options & VACOPT_ANALYZE))
214  {
215  ListCell *lc;
216 
217  foreach(lc, vacstmt->rels)
218  {
220 
221  if (vrel->va_cols != NIL)
222  ereport(ERROR,
223  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
224  errmsg("ANALYZE option must be specified when a column list is provided")));
225  }
226  }
227 
228  /*
229  * All freeze ages are zero if the FREEZE option is given; otherwise pass
230  * them as -1 which means to use the default values.
231  */
232  if (params.options & VACOPT_FREEZE)
233  {
234  params.freeze_min_age = 0;
235  params.freeze_table_age = 0;
236  params.multixact_freeze_min_age = 0;
237  params.multixact_freeze_table_age = 0;
238  }
239  else
240  {
241  params.freeze_min_age = -1;
242  params.freeze_table_age = -1;
243  params.multixact_freeze_min_age = -1;
244  params.multixact_freeze_table_age = -1;
245  }
246 
247  /* user-invoked vacuum is never "for wraparound" */
248  params.is_wraparound = false;
249 
250  /* user-invoked vacuum never uses this parameter */
251  params.log_min_duration = -1;
252 
253  /* Now go through the common routine */
254  vacuum(vacstmt->rels, &params, NULL, isTopLevel);
255 }
256 
257 /*
258  * Internal entry point for VACUUM and ANALYZE commands.
259  *
260  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
261  * we process all relevant tables in the database. For each VacuumRelation,
262  * if a valid OID is supplied, the table with that OID is what to process;
263  * otherwise, the VacuumRelation's RangeVar indicates what to process.
264  *
265  * params contains a set of parameters that can be used to customize the
266  * behavior.
267  *
268  * bstrategy is normally given as NULL, but in autovacuum it can be passed
269  * in to use the same buffer strategy object across multiple vacuum() calls.
270  *
271  * isTopLevel should be passed down from ProcessUtility.
272  *
273  * It is the caller's responsibility that all parameters are allocated in a
274  * memory context that will not disappear at transaction commit.
275  */
276 void
277 vacuum(List *relations, VacuumParams *params,
278  BufferAccessStrategy bstrategy, bool isTopLevel)
279 {
280  static bool in_vacuum = false;
281 
282  const char *stmttype;
283  volatile bool in_outer_xact,
284  use_own_xacts;
285 
286  Assert(params != NULL);
287 
288  stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
289 
290  /*
291  * We cannot run VACUUM inside a user transaction block; if we were inside
292  * a transaction, then our commit- and start-transaction-command calls
293  * would not have the intended effect! There are numerous other subtle
294  * dependencies on this, too.
295  *
296  * ANALYZE (without VACUUM) can run either way.
297  */
298  if (params->options & VACOPT_VACUUM)
299  {
300  PreventInTransactionBlock(isTopLevel, stmttype);
301  in_outer_xact = false;
302  }
303  else
304  in_outer_xact = IsInTransactionBlock(isTopLevel);
305 
306  /*
307  * Due to static variables vac_context, anl_context and vac_strategy,
308  * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
309  * calls a hostile index expression that itself calls ANALYZE.
310  */
311  if (in_vacuum)
312  ereport(ERROR,
313  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
314  errmsg("%s cannot be executed from VACUUM or ANALYZE",
315  stmttype)));
316 
317  /*
318  * Sanity check DISABLE_PAGE_SKIPPING option.
319  */
320  if ((params->options & VACOPT_FULL) != 0 &&
321  (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
322  ereport(ERROR,
323  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
324  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
325 
326  /* sanity check for PROCESS_TOAST */
327  if ((params->options & VACOPT_FULL) != 0 &&
328  (params->options & VACOPT_PROCESS_TOAST) == 0)
329  ereport(ERROR,
330  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
331  errmsg("PROCESS_TOAST required with VACUUM FULL")));
332 
333  /*
334  * Send info about dead objects to the statistics collector, unless we are
335  * in autovacuum --- autovacuum.c does this for itself.
336  */
337  if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
339 
340  /*
341  * Create special memory context for cross-transaction storage.
342  *
343  * Since it is a child of PortalContext, it will go away eventually even
344  * if we suffer an error; there's no need for special abort cleanup logic.
345  */
346  vac_context = AllocSetContextCreate(PortalContext,
347  "Vacuum",
349 
350  /*
351  * If caller didn't give us a buffer strategy object, make one in the
352  * cross-transaction memory context.
353  */
354  if (bstrategy == NULL)
355  {
356  MemoryContext old_context = MemoryContextSwitchTo(vac_context);
357 
358  bstrategy = GetAccessStrategy(BAS_VACUUM);
359  MemoryContextSwitchTo(old_context);
360  }
361  vac_strategy = bstrategy;
362 
363  /*
364  * Build list of relation(s) to process, putting any new data in
365  * vac_context for safekeeping.
366  */
367  if (relations != NIL)
368  {
369  List *newrels = NIL;
370  ListCell *lc;
371 
372  foreach(lc, relations)
373  {
375  List *sublist;
376  MemoryContext old_context;
377 
378  sublist = expand_vacuum_rel(vrel, params->options);
379  old_context = MemoryContextSwitchTo(vac_context);
380  newrels = list_concat(newrels, sublist);
381  MemoryContextSwitchTo(old_context);
382  }
383  relations = newrels;
384  }
385  else
386  relations = get_all_vacuum_rels(params->options);
387 
388  /*
389  * Decide whether we need to start/commit our own transactions.
390  *
391  * For VACUUM (with or without ANALYZE): always do so, so that we can
392  * release locks as soon as possible. (We could possibly use the outer
393  * transaction for a one-table VACUUM, but handling TOAST tables would be
394  * problematic.)
395  *
396  * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
397  * start/commit our own transactions. Also, there's no need to do so if
398  * only processing one relation. For multiple relations when not within a
399  * transaction block, and also in an autovacuum worker, use own
400  * transactions so we can release locks sooner.
401  */
402  if (params->options & VACOPT_VACUUM)
403  use_own_xacts = true;
404  else
405  {
406  Assert(params->options & VACOPT_ANALYZE);
408  use_own_xacts = true;
409  else if (in_outer_xact)
410  use_own_xacts = false;
411  else if (list_length(relations) > 1)
412  use_own_xacts = true;
413  else
414  use_own_xacts = false;
415  }
416 
417  /*
418  * vacuum_rel expects to be entered with no transaction active; it will
419  * start and commit its own transaction. But we are called by an SQL
420  * command, and so we are executing inside a transaction already. We
421  * commit the transaction started in PostgresMain() here, and start
422  * another one before exiting to match the commit waiting for us back in
423  * PostgresMain().
424  */
425  if (use_own_xacts)
426  {
427  Assert(!in_outer_xact);
428 
429  /* ActiveSnapshot is not set by autovacuum */
430  if (ActiveSnapshotSet())
432 
433  /* matches the StartTransaction in PostgresMain() */
435  }
436 
437  /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
438  PG_TRY();
439  {
440  ListCell *cur;
441 
442  in_vacuum = true;
444  VacuumCostBalance = 0;
445  VacuumPageHit = 0;
446  VacuumPageMiss = 0;
447  VacuumPageDirty = 0;
449  VacuumSharedCostBalance = NULL;
450  VacuumActiveNWorkers = NULL;
451 
452  /*
453  * Loop to process each selected relation.
454  */
455  foreach(cur, relations)
456  {
458 
459  if (params->options & VACOPT_VACUUM)
460  {
461  if (!vacuum_rel(vrel->oid, vrel->relation, params))
462  continue;
463  }
464 
465  if (params->options & VACOPT_ANALYZE)
466  {
467  /*
468  * If using separate xacts, start one for analyze. Otherwise,
469  * we can use the outer transaction.
470  */
471  if (use_own_xacts)
472  {
474  /* functions in indexes may want a snapshot set */
476  }
477 
478  analyze_rel(vrel->oid, vrel->relation, params,
479  vrel->va_cols, in_outer_xact, vac_strategy);
480 
481  if (use_own_xacts)
482  {
485  }
486  else
487  {
488  /*
489  * If we're not using separate xacts, better separate the
490  * ANALYZE actions with CCIs. This avoids trouble if user
491  * says "ANALYZE t, t".
492  */
494  }
495  }
496  }
497  }
498  PG_FINALLY();
499  {
500  in_vacuum = false;
501  VacuumCostActive = false;
502  }
503  PG_END_TRY();
504 
505  /*
506  * Finish up processing.
507  */
508  if (use_own_xacts)
509  {
510  /* here, we are not in a transaction */
511 
512  /*
513  * This matches the CommitTransaction waiting for us in
514  * PostgresMain().
515  */
517  }
518 
519  if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
520  {
521  /*
522  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
523  * (autovacuum.c does this for itself.)
524  */
526  }
527 
528  /*
529  * Clean up working storage --- note we must do this after
530  * StartTransactionCommand, else we might be trying to delete the active
531  * context!
532  */
533  MemoryContextDelete(vac_context);
534  vac_context = NULL;
535 }
536 
537 /*
538  * Check if a given relation can be safely vacuumed or analyzed. If the
539  * user is not the relation owner, issue a WARNING log message and return
540  * false to let the caller decide what to do with this relation. This
541  * routine is used to decide if a relation can be processed for VACUUM or
542  * ANALYZE.
543  */
544 bool
546 {
547  char *relname;
548 
549  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
550 
551  /*
552  * Check permissions.
553  *
554  * We allow the user to vacuum or analyze a table if he is superuser, the
555  * table owner, or the database owner (but in the latter case, only if
556  * it's not a shared relation). pg_class_ownercheck includes the
557  * superuser case.
558  *
559  * Note we choose to treat permissions failure as a WARNING and keep
560  * trying to vacuum or analyze the rest of the DB --- is this appropriate?
561  */
562  if (pg_class_ownercheck(relid, GetUserId()) ||
563  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
564  return true;
565 
566  relname = NameStr(reltuple->relname);
567 
568  if ((options & VACOPT_VACUUM) != 0)
569  {
570  if (reltuple->relisshared)
572  (errmsg("skipping \"%s\" --- only superuser can vacuum it",
573  relname)));
574  else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
576  (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
577  relname)));
578  else
580  (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
581  relname)));
582 
583  /*
584  * For VACUUM ANALYZE, both logs could show up, but just generate
585  * information for VACUUM as that would be the first one to be
586  * processed.
587  */
588  return false;
589  }
590 
591  if ((options & VACOPT_ANALYZE) != 0)
592  {
593  if (reltuple->relisshared)
595  (errmsg("skipping \"%s\" --- only superuser can analyze it",
596  relname)));
597  else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
599  (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
600  relname)));
601  else
603  (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
604  relname)));
605  }
606 
607  return false;
608 }
609 
610 
611 /*
612  * vacuum_open_relation
613  *
614  * This routine is used for attempting to open and lock a relation which
615  * is going to be vacuumed or analyzed. If the relation cannot be opened
616  * or locked, a log is emitted if possible.
617  */
618 Relation
620  bool verbose, LOCKMODE lmode)
621 {
622  Relation rel;
623  bool rel_lock = true;
624  int elevel;
625 
626  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
627 
628  /*
629  * Open the relation and get the appropriate lock on it.
630  *
631  * There's a race condition here: the relation may have gone away since
632  * the last time we saw it. If so, we don't need to vacuum or analyze it.
633  *
634  * If we've been asked not to wait for the relation lock, acquire it first
635  * in non-blocking mode, before calling try_relation_open().
636  */
637  if (!(options & VACOPT_SKIP_LOCKED))
638  rel = try_relation_open(relid, lmode);
639  else if (ConditionalLockRelationOid(relid, lmode))
640  rel = try_relation_open(relid, NoLock);
641  else
642  {
643  rel = NULL;
644  rel_lock = false;
645  }
646 
647  /* if relation is opened, leave */
648  if (rel)
649  return rel;
650 
651  /*
652  * Relation could not be opened, hence generate if possible a log
653  * informing on the situation.
654  *
655  * If the RangeVar is not defined, we do not have enough information to
656  * provide a meaningful log statement. Chances are that the caller has
657  * intentionally not provided this information so that this logging is
658  * skipped, anyway.
659  */
660  if (relation == NULL)
661  return NULL;
662 
663  /*
664  * Determine the log level.
665  *
666  * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
667  * statements in the permission checks; otherwise, only log if the caller
668  * so requested.
669  */
671  elevel = WARNING;
672  else if (verbose)
673  elevel = LOG;
674  else
675  return NULL;
676 
677  if ((options & VACOPT_VACUUM) != 0)
678  {
679  if (!rel_lock)
680  ereport(elevel,
681  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
682  errmsg("skipping vacuum of \"%s\" --- lock not available",
683  relation->relname)));
684  else
685  ereport(elevel,
687  errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
688  relation->relname)));
689 
690  /*
691  * For VACUUM ANALYZE, both logs could show up, but just generate
692  * information for VACUUM as that would be the first one to be
693  * processed.
694  */
695  return NULL;
696  }
697 
698  if ((options & VACOPT_ANALYZE) != 0)
699  {
700  if (!rel_lock)
701  ereport(elevel,
702  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
703  errmsg("skipping analyze of \"%s\" --- lock not available",
704  relation->relname)));
705  else
706  ereport(elevel,
708  errmsg("skipping analyze of \"%s\" --- relation no longer exists",
709  relation->relname)));
710  }
711 
712  return NULL;
713 }
714 
715 
716 /*
717  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
718  * and optionally add VacuumRelations for partitions of the table.
719  *
720  * If a VacuumRelation does not have an OID supplied and is a partitioned
721  * table, an extra entry will be added to the output for each partition.
722  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
723  * it does not want us to expand partitioned tables.
724  *
725  * We take care not to modify the input data structure, but instead build
726  * new VacuumRelation(s) to return. (But note that they will reference
727  * unmodified parts of the input, eg column lists.) New data structures
728  * are made in vac_context.
729  */
730 static List *
732 {
733  List *vacrels = NIL;
734  MemoryContext oldcontext;
735 
736  /* If caller supplied OID, there's nothing we need do here. */
737  if (OidIsValid(vrel->oid))
738  {
739  oldcontext = MemoryContextSwitchTo(vac_context);
740  vacrels = lappend(vacrels, vrel);
741  MemoryContextSwitchTo(oldcontext);
742  }
743  else
744  {
745  /* Process a specific relation, and possibly partitions thereof */
746  Oid relid;
747  HeapTuple tuple;
748  Form_pg_class classForm;
749  bool include_parts;
750  int rvr_opts;
751 
752  /*
753  * Since autovacuum workers supply OIDs when calling vacuum(), no
754  * autovacuum worker should reach this code.
755  */
757 
758  /*
759  * We transiently take AccessShareLock to protect the syscache lookup
760  * below, as well as find_all_inheritors's expectation that the caller
761  * holds some lock on the starting relation.
762  */
763  rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
764  relid = RangeVarGetRelidExtended(vrel->relation,
766  rvr_opts,
767  NULL, NULL);
768 
769  /*
770  * If the lock is unavailable, emit the same log statement that
771  * vacuum_rel() and analyze_rel() would.
772  */
773  if (!OidIsValid(relid))
774  {
775  if (options & VACOPT_VACUUM)
777  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
778  errmsg("skipping vacuum of \"%s\" --- lock not available",
779  vrel->relation->relname)));
780  else
782  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
783  errmsg("skipping analyze of \"%s\" --- lock not available",
784  vrel->relation->relname)));
785  return vacrels;
786  }
787 
788  /*
789  * To check whether the relation is a partitioned table and its
790  * ownership, fetch its syscache entry.
791  */
792  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
793  if (!HeapTupleIsValid(tuple))
794  elog(ERROR, "cache lookup failed for relation %u", relid);
795  classForm = (Form_pg_class) GETSTRUCT(tuple);
796 
797  /*
798  * Make a returnable VacuumRelation for this rel if user is a proper
799  * owner.
800  */
801  if (vacuum_is_relation_owner(relid, classForm, options))
802  {
803  oldcontext = MemoryContextSwitchTo(vac_context);
804  vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
805  relid,
806  vrel->va_cols));
807  MemoryContextSwitchTo(oldcontext);
808  }
809 
810 
811  include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
812  ReleaseSysCache(tuple);
813 
814  /*
815  * If it is, make relation list entries for its partitions. Note that
816  * the list returned by find_all_inheritors() includes the passed-in
817  * OID, so we have to skip that. There's no point in taking locks on
818  * the individual partitions yet, and doing so would just add
819  * unnecessary deadlock risk. For this last reason we do not check
820  * yet the ownership of the partitions, which get added to the list to
821  * process. Ownership will be checked later on anyway.
822  */
823  if (include_parts)
824  {
825  List *part_oids = find_all_inheritors(relid, NoLock, NULL);
826  ListCell *part_lc;
827 
828  foreach(part_lc, part_oids)
829  {
830  Oid part_oid = lfirst_oid(part_lc);
831 
832  if (part_oid == relid)
833  continue; /* ignore original table */
834 
835  /*
836  * We omit a RangeVar since it wouldn't be appropriate to
837  * complain about failure to open one of these relations
838  * later.
839  */
840  oldcontext = MemoryContextSwitchTo(vac_context);
841  vacrels = lappend(vacrels, makeVacuumRelation(NULL,
842  part_oid,
843  vrel->va_cols));
844  MemoryContextSwitchTo(oldcontext);
845  }
846  }
847 
848  /*
849  * Release lock again. This means that by the time we actually try to
850  * process the table, it might be gone or renamed. In the former case
851  * we'll silently ignore it; in the latter case we'll process it
852  * anyway, but we must beware that the RangeVar doesn't necessarily
853  * identify it anymore. This isn't ideal, perhaps, but there's little
854  * practical alternative, since we're typically going to commit this
855  * transaction and begin a new one between now and then. Moreover,
856  * holding locks on multiple relations would create significant risk
857  * of deadlock.
858  */
860  }
861 
862  return vacrels;
863 }
864 
865 /*
866  * Construct a list of VacuumRelations for all vacuumable rels in
867  * the current database. The list is built in vac_context.
868  */
869 static List *
871 {
872  List *vacrels = NIL;
873  Relation pgclass;
874  TableScanDesc scan;
875  HeapTuple tuple;
876 
877  pgclass = table_open(RelationRelationId, AccessShareLock);
878 
879  scan = table_beginscan_catalog(pgclass, 0, NULL);
880 
881  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
882  {
883  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
884  MemoryContext oldcontext;
885  Oid relid = classForm->oid;
886 
887  /* check permissions of relation */
888  if (!vacuum_is_relation_owner(relid, classForm, options))
889  continue;
890 
891  /*
892  * We include partitioned tables here; depending on which operation is
893  * to be performed, caller will decide whether to process or ignore
894  * them.
895  */
896  if (classForm->relkind != RELKIND_RELATION &&
897  classForm->relkind != RELKIND_MATVIEW &&
898  classForm->relkind != RELKIND_PARTITIONED_TABLE)
899  continue;
900 
901  /*
902  * Build VacuumRelation(s) specifying the table OIDs to be processed.
903  * We omit a RangeVar since it wouldn't be appropriate to complain
904  * about failure to open one of these relations later.
905  */
906  oldcontext = MemoryContextSwitchTo(vac_context);
907  vacrels = lappend(vacrels, makeVacuumRelation(NULL,
908  relid,
909  NIL));
910  MemoryContextSwitchTo(oldcontext);
911  }
912 
913  table_endscan(scan);
914  table_close(pgclass, AccessShareLock);
915 
916  return vacrels;
917 }
918 
919 /*
920  * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
921  *
922  * Input parameters are the target relation, applicable freeze age settings.
923  *
924  * The output parameters are:
925  * - oldestXmin is the cutoff value used to distinguish whether tuples are
926  * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
927  * - freezeLimit is the Xid below which all Xids are replaced by
928  * FrozenTransactionId during vacuum.
929  * - xidFullScanLimit (computed from freeze_table_age parameter)
930  * represents a minimum Xid value; a table whose relfrozenxid is older than
931  * this will have a full-table vacuum applied to it, to freeze tuples across
932  * the whole table. Vacuuming a table younger than this value can use a
933  * partial scan.
934  * - multiXactCutoff is the value below which all MultiXactIds are removed from
935  * Xmax.
936  * - mxactFullScanLimit is a value against which a table's relminmxid value is
937  * compared to produce a full-table vacuum, as with xidFullScanLimit.
938  *
939  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
940  * not interested.
941  */
942 void
944  int freeze_min_age,
945  int freeze_table_age,
946  int multixact_freeze_min_age,
947  int multixact_freeze_table_age,
948  TransactionId *oldestXmin,
949  TransactionId *freezeLimit,
950  TransactionId *xidFullScanLimit,
951  MultiXactId *multiXactCutoff,
952  MultiXactId *mxactFullScanLimit)
953 {
954  int freezemin;
955  int mxid_freezemin;
956  int effective_multixact_freeze_max_age;
957  TransactionId limit;
958  TransactionId safeLimit;
959  MultiXactId oldestMxact;
960  MultiXactId mxactLimit;
961  MultiXactId safeMxactLimit;
962 
963  /*
964  * We can always ignore processes running lazy vacuum. This is because we
965  * use these values only for deciding which tuples we must keep in the
966  * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
967  * XID assigned), it's safe to ignore it. In theory it could be
968  * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
969  * that only one vacuum process can be working on a particular table at
970  * any time, and that each vacuum is always an independent transaction.
971  */
972  *oldestXmin = GetOldestNonRemovableTransactionId(rel);
973 
975  {
976  TransactionId limit_xmin;
977  TimestampTz limit_ts;
978 
979  if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
980  &limit_xmin, &limit_ts))
981  {
982  /*
983  * TODO: We should only set the threshold if we are pruning on the
984  * basis of the increased limits. Not as crucial here as it is
985  * for opportunistic pruning (which often happens at a much higher
986  * frequency), but would still be a significant improvement.
987  */
988  SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
989  *oldestXmin = limit_xmin;
990  }
991  }
992 
993  Assert(TransactionIdIsNormal(*oldestXmin));
994 
995  /*
996  * Determine the minimum freeze age to use: as specified by the caller, or
997  * vacuum_freeze_min_age, but in any case not more than half
998  * autovacuum_freeze_max_age, so that autovacuums to prevent XID
999  * wraparound won't occur too frequently.
1000  */
1001  freezemin = freeze_min_age;
1002  if (freezemin < 0)
1003  freezemin = vacuum_freeze_min_age;
1004  freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
1005  Assert(freezemin >= 0);
1006 
1007  /*
1008  * Compute the cutoff XID, being careful not to generate a "permanent" XID
1009  */
1010  limit = *oldestXmin - freezemin;
1011  if (!TransactionIdIsNormal(limit))
1012  limit = FirstNormalTransactionId;
1013 
1014  /*
1015  * If oldestXmin is very far back (in practice, more than
1016  * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
1017  * freeze age of zero.
1018  */
1020  if (!TransactionIdIsNormal(safeLimit))
1021  safeLimit = FirstNormalTransactionId;
1022 
1023  if (TransactionIdPrecedes(limit, safeLimit))
1024  {
1025  ereport(WARNING,
1026  (errmsg("oldest xmin is far in the past"),
1027  errhint("Close open transactions soon to avoid wraparound problems.\n"
1028  "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1029  limit = *oldestXmin;
1030  }
1031 
1032  *freezeLimit = limit;
1033 
1034  /*
1035  * Compute the multixact age for which freezing is urgent. This is
1036  * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1037  * short of multixact member space.
1038  */
1039  effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1040 
1041  /*
1042  * Determine the minimum multixact freeze age to use: as specified by
1043  * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1044  * than half effective_multixact_freeze_max_age, so that autovacuums to
1045  * prevent MultiXact wraparound won't occur too frequently.
1046  */
1047  mxid_freezemin = multixact_freeze_min_age;
1048  if (mxid_freezemin < 0)
1049  mxid_freezemin = vacuum_multixact_freeze_min_age;
1050  mxid_freezemin = Min(mxid_freezemin,
1051  effective_multixact_freeze_max_age / 2);
1052  Assert(mxid_freezemin >= 0);
1053 
1054  /* compute the cutoff multi, being careful to generate a valid value */
1055  oldestMxact = GetOldestMultiXactId();
1056  mxactLimit = oldestMxact - mxid_freezemin;
1057  if (mxactLimit < FirstMultiXactId)
1058  mxactLimit = FirstMultiXactId;
1059 
1060  safeMxactLimit =
1061  ReadNextMultiXactId() - effective_multixact_freeze_max_age;
1062  if (safeMxactLimit < FirstMultiXactId)
1063  safeMxactLimit = FirstMultiXactId;
1064 
1065  if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
1066  {
1067  ereport(WARNING,
1068  (errmsg("oldest multixact is far in the past"),
1069  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
1070  /* Use the safe limit, unless an older mxact is still running */
1071  if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
1072  mxactLimit = oldestMxact;
1073  else
1074  mxactLimit = safeMxactLimit;
1075  }
1076 
1077  *multiXactCutoff = mxactLimit;
1078 
1079  if (xidFullScanLimit != NULL)
1080  {
1081  int freezetable;
1082 
1083  Assert(mxactFullScanLimit != NULL);
1084 
1085  /*
1086  * Determine the table freeze age to use: as specified by the caller,
1087  * or vacuum_freeze_table_age, but in any case not more than
1088  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1089  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
1090  * before anti-wraparound autovacuum is launched.
1091  */
1092  freezetable = freeze_table_age;
1093  if (freezetable < 0)
1094  freezetable = vacuum_freeze_table_age;
1095  freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
1096  Assert(freezetable >= 0);
1097 
1098  /*
1099  * Compute XID limit causing a full-table vacuum, being careful not to
1100  * generate a "permanent" XID.
1101  */
1102  limit = ReadNextTransactionId() - freezetable;
1103  if (!TransactionIdIsNormal(limit))
1104  limit = FirstNormalTransactionId;
1105 
1106  *xidFullScanLimit = limit;
1107 
1108  /*
1109  * Similar to the above, determine the table freeze age to use for
1110  * multixacts: as specified by the caller, or
1111  * vacuum_multixact_freeze_table_age, but in any case not more than
1112  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
1113  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
1114  * freeze multixacts before anti-wraparound autovacuum is launched.
1115  */
1116  freezetable = multixact_freeze_table_age;
1117  if (freezetable < 0)
1118  freezetable = vacuum_multixact_freeze_table_age;
1119  freezetable = Min(freezetable,
1120  effective_multixact_freeze_max_age * 0.95);
1121  Assert(freezetable >= 0);
1122 
1123  /*
1124  * Compute MultiXact limit causing a full-table vacuum, being careful
1125  * to generate a valid MultiXact value.
1126  */
1127  mxactLimit = ReadNextMultiXactId() - freezetable;
1128  if (mxactLimit < FirstMultiXactId)
1129  mxactLimit = FirstMultiXactId;
1130 
1131  *mxactFullScanLimit = mxactLimit;
1132  }
1133  else
1134  {
1135  Assert(mxactFullScanLimit == NULL);
1136  }
1137 }
1138 
1139 /*
1140  * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1141  * mechanism to determine if its table's relfrozenxid and relminmxid are now
1142  * dangerously far in the past.
1143  *
1144  * Input parameters are the target relation's relfrozenxid and relminmxid.
1145  *
1146  * When we return true, VACUUM caller triggers the failsafe.
1147  */
1148 bool
1150 {
1151  TransactionId xid_skip_limit;
1152  MultiXactId multi_skip_limit;
1153  int skip_index_vacuum;
1154 
1155  Assert(TransactionIdIsNormal(relfrozenxid));
1156  Assert(MultiXactIdIsValid(relminmxid));
1157 
1158  /*
1159  * Determine the index skipping age to use. In any case no less than
1160  * autovacuum_freeze_max_age * 1.05.
1161  */
1162  skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1163 
1164  xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1165  if (!TransactionIdIsNormal(xid_skip_limit))
1166  xid_skip_limit = FirstNormalTransactionId;
1167 
1168  if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1169  {
1170  /* The table's relfrozenxid is too old */
1171  return true;
1172  }
1173 
1174  /*
1175  * Similar to above, determine the index skipping age to use for
1176  * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1177  * 1.05.
1178  */
1179  skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1181 
1182  multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1183  if (multi_skip_limit < FirstMultiXactId)
1184  multi_skip_limit = FirstMultiXactId;
1185 
1186  if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1187  {
1188  /* The table's relminmxid is too old */
1189  return true;
1190  }
1191 
1192  return false;
1193 }
1194 
1195 /*
1196  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1197  *
1198  * If we scanned the whole relation then we should just use the count of
1199  * live tuples seen; but if we did not, we should not blindly extrapolate
1200  * from that number, since VACUUM may have scanned a quite nonrandom
1201  * subset of the table. When we have only partial information, we take
1202  * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1203  * of the tuple density in the unscanned pages.
1204  *
1205  * Note: scanned_tuples should count only *live* tuples, since
1206  * pg_class.reltuples is defined that way.
1207  */
1208 double
1210  BlockNumber total_pages,
1211  BlockNumber scanned_pages,
1212  double scanned_tuples)
1213 {
1214  BlockNumber old_rel_pages = relation->rd_rel->relpages;
1215  double old_rel_tuples = relation->rd_rel->reltuples;
1216  double old_density;
1217  double unscanned_pages;
1218  double total_tuples;
1219 
1220  /* If we did scan the whole table, just use the count as-is */
1221  if (scanned_pages >= total_pages)
1222  return scanned_tuples;
1223 
1224  /*
1225  * If scanned_pages is zero but total_pages isn't, keep the existing value
1226  * of reltuples. (Note: we might be returning -1 in this case.)
1227  */
1228  if (scanned_pages == 0)
1229  return old_rel_tuples;
1230 
1231  /*
1232  * If old density is unknown, we can't do much except scale up
1233  * scanned_tuples to match total_pages.
1234  */
1235  if (old_rel_tuples < 0 || old_rel_pages == 0)
1236  return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1237 
1238  /*
1239  * Okay, we've covered the corner cases. The normal calculation is to
1240  * convert the old measurement to a density (tuples per page), then
1241  * estimate the number of tuples in the unscanned pages using that figure,
1242  * and finally add on the number of tuples in the scanned pages.
1243  */
1244  old_density = old_rel_tuples / old_rel_pages;
1245  unscanned_pages = (double) total_pages - (double) scanned_pages;
1246  total_tuples = old_density * unscanned_pages + scanned_tuples;
1247  return floor(total_tuples + 0.5);
1248 }
1249 
1250 
1251 /*
1252  * vac_update_relstats() -- update statistics for one relation
1253  *
1254  * Update the whole-relation statistics that are kept in its pg_class
1255  * row. There are additional stats that will be updated if we are
1256  * doing ANALYZE, but we always update these stats. This routine works
1257  * for both index and heap relation entries in pg_class.
1258  *
1259  * We violate transaction semantics here by overwriting the rel's
1260  * existing pg_class tuple with the new values. This is reasonably
1261  * safe as long as we're sure that the new values are correct whether or
1262  * not this transaction commits. The reason for doing this is that if
1263  * we updated these tuples in the usual way, vacuuming pg_class itself
1264  * wouldn't work very well --- by the time we got done with a vacuum
1265  * cycle, most of the tuples in pg_class would've been obsoleted. Of
1266  * course, this only works for fixed-size not-null columns, but these are.
1267  *
1268  * Another reason for doing it this way is that when we are in a lazy
1269  * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1270  * Somebody vacuuming pg_class might think they could delete a tuple
1271  * marked with xmin = our xid.
1272  *
1273  * In addition to fundamentally nontransactional statistics such as
1274  * relpages and relallvisible, we try to maintain certain lazily-updated
1275  * DDL flags such as relhasindex, by clearing them if no longer correct.
1276  * It's safe to do this in VACUUM, which can't run in parallel with
1277  * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1278  * However, it's *not* safe to do it in an ANALYZE that's within an
1279  * outer transaction, because for example the current transaction might
1280  * have dropped the last index; then we'd think relhasindex should be
1281  * cleared, but if the transaction later rolls back this would be wrong.
1282  * So we refrain from updating the DDL flags if we're inside an outer
1283  * transaction. This is OK since postponing the flag maintenance is
1284  * always allowable.
1285  *
1286  * Note: num_tuples should count only *live* tuples, since
1287  * pg_class.reltuples is defined that way.
1288  *
1289  * This routine is shared by VACUUM and ANALYZE.
1290  */
1291 void
1293  BlockNumber num_pages, double num_tuples,
1294  BlockNumber num_all_visible_pages,
1295  bool hasindex, TransactionId frozenxid,
1296  MultiXactId minmulti,
1297  bool in_outer_xact)
1298 {
1299  Oid relid = RelationGetRelid(relation);
1300  Relation rd;
1301  HeapTuple ctup;
1302  Form_pg_class pgcform;
1303  bool dirty;
1304 
1305  rd = table_open(RelationRelationId, RowExclusiveLock);
1306 
1307  /* Fetch a copy of the tuple to scribble on */
1309  if (!HeapTupleIsValid(ctup))
1310  elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1311  relid);
1312  pgcform = (Form_pg_class) GETSTRUCT(ctup);
1313 
1314  /* Apply statistical updates, if any, to copied tuple */
1315 
1316  dirty = false;
1317  if (pgcform->relpages != (int32) num_pages)
1318  {
1319  pgcform->relpages = (int32) num_pages;
1320  dirty = true;
1321  }
1322  if (pgcform->reltuples != (float4) num_tuples)
1323  {
1324  pgcform->reltuples = (float4) num_tuples;
1325  dirty = true;
1326  }
1327  if (pgcform->relallvisible != (int32) num_all_visible_pages)
1328  {
1329  pgcform->relallvisible = (int32) num_all_visible_pages;
1330  dirty = true;
1331  }
1332 
1333  /* Apply DDL updates, but not inside an outer transaction (see above) */
1334 
1335  if (!in_outer_xact)
1336  {
1337  /*
1338  * If we didn't find any indexes, reset relhasindex.
1339  */
1340  if (pgcform->relhasindex && !hasindex)
1341  {
1342  pgcform->relhasindex = false;
1343  dirty = true;
1344  }
1345 
1346  /* We also clear relhasrules and relhastriggers if needed */
1347  if (pgcform->relhasrules && relation->rd_rules == NULL)
1348  {
1349  pgcform->relhasrules = false;
1350  dirty = true;
1351  }
1352  if (pgcform->relhastriggers && relation->trigdesc == NULL)
1353  {
1354  pgcform->relhastriggers = false;
1355  dirty = true;
1356  }
1357  }
1358 
1359  /*
1360  * Update relfrozenxid, unless caller passed InvalidTransactionId
1361  * indicating it has no new data.
1362  *
1363  * Ordinarily, we don't let relfrozenxid go backwards: if things are
1364  * working correctly, the only way the new frozenxid could be older would
1365  * be if a previous VACUUM was done with a tighter freeze_min_age, in
1366  * which case we don't want to forget the work it already did. However,
1367  * if the stored relfrozenxid is "in the future", then it must be corrupt
1368  * and it seems best to overwrite it with the cutoff we used this time.
1369  * This should match vac_update_datfrozenxid() concerning what we consider
1370  * to be "in the future".
1371  */
1372  if (TransactionIdIsNormal(frozenxid) &&
1373  pgcform->relfrozenxid != frozenxid &&
1374  (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
1376  pgcform->relfrozenxid)))
1377  {
1378  pgcform->relfrozenxid = frozenxid;
1379  dirty = true;
1380  }
1381 
1382  /* Similarly for relminmxid */
1383  if (MultiXactIdIsValid(minmulti) &&
1384  pgcform->relminmxid != minmulti &&
1385  (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
1386  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
1387  {
1388  pgcform->relminmxid = minmulti;
1389  dirty = true;
1390  }
1391 
1392  /* If anything changed, write out the tuple. */
1393  if (dirty)
1394  heap_inplace_update(rd, ctup);
1395 
1397 }
1398 
1399 
1400 /*
1401  * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1402  *
1403  * Update pg_database's datfrozenxid entry for our database to be the
1404  * minimum of the pg_class.relfrozenxid values.
1405  *
1406  * Similarly, update our datminmxid to be the minimum of the
1407  * pg_class.relminmxid values.
1408  *
1409  * If we are able to advance either pg_database value, also try to
1410  * truncate pg_xact and pg_multixact.
1411  *
1412  * We violate transaction semantics here by overwriting the database's
1413  * existing pg_database tuple with the new values. This is reasonably
1414  * safe since the new values are correct whether or not this transaction
1415  * commits. As with vac_update_relstats, this avoids leaving dead tuples
1416  * behind after a VACUUM.
1417  */
1418 void
1420 {
1421  HeapTuple tuple;
1422  Form_pg_database dbform;
1423  Relation relation;
1424  SysScanDesc scan;
1425  HeapTuple classTup;
1426  TransactionId newFrozenXid;
1427  MultiXactId newMinMulti;
1428  TransactionId lastSaneFrozenXid;
1429  MultiXactId lastSaneMinMulti;
1430  bool bogus = false;
1431  bool dirty = false;
1432  ScanKeyData key[1];
1433 
1434  /*
1435  * Restrict this task to one backend per database. This avoids race
1436  * conditions that would move datfrozenxid or datminmxid backward. It
1437  * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1438  * datfrozenxid passed to an earlier vac_truncate_clog() call.
1439  */
1441 
1442  /*
1443  * Initialize the "min" calculation with
1444  * GetOldestNonRemovableTransactionId(), which is a reasonable
1445  * approximation to the minimum relfrozenxid for not-yet-committed
1446  * pg_class entries for new tables; see AddNewRelationTuple(). So we
1447  * cannot produce a wrong minimum by starting with this.
1448  */
1449  newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1450 
1451  /*
1452  * Similarly, initialize the MultiXact "min" with the value that would be
1453  * used on pg_class for new tables. See AddNewRelationTuple().
1454  */
1455  newMinMulti = GetOldestMultiXactId();
1456 
1457  /*
1458  * Identify the latest relfrozenxid and relminmxid values that we could
1459  * validly see during the scan. These are conservative values, but it's
1460  * not really worth trying to be more exact.
1461  */
1462  lastSaneFrozenXid = ReadNextTransactionId();
1463  lastSaneMinMulti = ReadNextMultiXactId();
1464 
1465  /*
1466  * We must seqscan pg_class to find the minimum Xid, because there is no
1467  * index that can help us here.
1468  */
1469  relation = table_open(RelationRelationId, AccessShareLock);
1470 
1471  scan = systable_beginscan(relation, InvalidOid, false,
1472  NULL, 0, NULL);
1473 
1474  while ((classTup = systable_getnext(scan)) != NULL)
1475  {
1476  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1477 
1478  /*
1479  * Only consider relations able to hold unfrozen XIDs (anything else
1480  * should have InvalidTransactionId in relfrozenxid anyway).
1481  */
1482  if (classForm->relkind != RELKIND_RELATION &&
1483  classForm->relkind != RELKIND_MATVIEW &&
1484  classForm->relkind != RELKIND_TOASTVALUE)
1485  {
1486  Assert(!TransactionIdIsValid(classForm->relfrozenxid));
1487  Assert(!MultiXactIdIsValid(classForm->relminmxid));
1488  continue;
1489  }
1490 
1491  /*
1492  * Some table AMs might not need per-relation xid / multixid horizons.
1493  * It therefore seems reasonable to allow relfrozenxid and relminmxid
1494  * to not be set (i.e. set to their respective Invalid*Id)
1495  * independently. Thus validate and compute horizon for each only if
1496  * set.
1497  *
1498  * If things are working properly, no relation should have a
1499  * relfrozenxid or relminmxid that is "in the future". However, such
1500  * cases have been known to arise due to bugs in pg_upgrade. If we
1501  * see any entries that are "in the future", chicken out and don't do
1502  * anything. This ensures we won't truncate clog & multixact SLRUs
1503  * before those relations have been scanned and cleaned up.
1504  */
1505 
1506  if (TransactionIdIsValid(classForm->relfrozenxid))
1507  {
1508  Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1509 
1510  /* check for values in the future */
1511  if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
1512  {
1513  bogus = true;
1514  break;
1515  }
1516 
1517  /* determine new horizon */
1518  if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1519  newFrozenXid = classForm->relfrozenxid;
1520  }
1521 
1522  if (MultiXactIdIsValid(classForm->relminmxid))
1523  {
1524  /* check for values in the future */
1525  if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1526  {
1527  bogus = true;
1528  break;
1529  }
1530 
1531  /* determine new horizon */
1532  if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1533  newMinMulti = classForm->relminmxid;
1534  }
1535  }
1536 
1537  /* we're done with pg_class */
1538  systable_endscan(scan);
1539  table_close(relation, AccessShareLock);
1540 
1541  /* chicken out if bogus data found */
1542  if (bogus)
1543  return;
1544 
1545  Assert(TransactionIdIsNormal(newFrozenXid));
1546  Assert(MultiXactIdIsValid(newMinMulti));
1547 
1548  /* Now fetch the pg_database tuple we need to update. */
1549  relation = table_open(DatabaseRelationId, RowExclusiveLock);
1550 
1551  /*
1552  * Get the pg_database tuple to scribble on. Note that this does not
1553  * directly rely on the syscache to avoid issues with flattened toast
1554  * values for the in-place update.
1555  */
1556  ScanKeyInit(&key[0],
1557  Anum_pg_database_oid,
1558  BTEqualStrategyNumber, F_OIDEQ,
1560 
1561  scan = systable_beginscan(relation, DatabaseOidIndexId, true,
1562  NULL, 1, key);
1563  tuple = systable_getnext(scan);
1564  tuple = heap_copytuple(tuple);
1565  systable_endscan(scan);
1566 
1567  if (!HeapTupleIsValid(tuple))
1568  elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1569 
1570  dbform = (Form_pg_database) GETSTRUCT(tuple);
1571 
1572  /*
1573  * As in vac_update_relstats(), we ordinarily don't want to let
1574  * datfrozenxid go backward; but if it's "in the future" then it must be
1575  * corrupt and it seems best to overwrite it.
1576  */
1577  if (dbform->datfrozenxid != newFrozenXid &&
1578  (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1579  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1580  {
1581  dbform->datfrozenxid = newFrozenXid;
1582  dirty = true;
1583  }
1584  else
1585  newFrozenXid = dbform->datfrozenxid;
1586 
1587  /* Ditto for datminmxid */
1588  if (dbform->datminmxid != newMinMulti &&
1589  (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1590  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1591  {
1592  dbform->datminmxid = newMinMulti;
1593  dirty = true;
1594  }
1595  else
1596  newMinMulti = dbform->datminmxid;
1597 
1598  if (dirty)
1599  heap_inplace_update(relation, tuple);
1600 
1601  heap_freetuple(tuple);
1602  table_close(relation, RowExclusiveLock);
1603 
1604  /*
1605  * If we were able to advance datfrozenxid or datminmxid, see if we can
1606  * truncate pg_xact and/or pg_multixact. Also do it if the shared
1607  * XID-wrap-limit info is stale, since this action will update that too.
1608  */
1609  if (dirty || ForceTransactionIdLimitUpdate())
1610  vac_truncate_clog(newFrozenXid, newMinMulti,
1611  lastSaneFrozenXid, lastSaneMinMulti);
1612 }
1613 
1614 
1615 /*
1616  * vac_truncate_clog() -- attempt to truncate the commit log
1617  *
1618  * Scan pg_database to determine the system-wide oldest datfrozenxid,
1619  * and use it to truncate the transaction commit log (pg_xact).
1620  * Also update the XID wrap limit info maintained by varsup.c.
1621  * Likewise for datminmxid.
1622  *
1623  * The passed frozenXID and minMulti are the updated values for my own
1624  * pg_database entry. They're used to initialize the "min" calculations.
1625  * The caller also passes the "last sane" XID and MXID, since it has
1626  * those at hand already.
1627  *
1628  * This routine is only invoked when we've managed to change our
1629  * DB's datfrozenxid/datminmxid values, or we found that the shared
1630  * XID-wrap-limit info is stale.
1631  */
1632 static void
1634  MultiXactId minMulti,
1635  TransactionId lastSaneFrozenXid,
1636  MultiXactId lastSaneMinMulti)
1637 {
1639  Relation relation;
1640  TableScanDesc scan;
1641  HeapTuple tuple;
1642  Oid oldestxid_datoid;
1643  Oid minmulti_datoid;
1644  bool bogus = false;
1645  bool frozenAlreadyWrapped = false;
1646 
1647  /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1648  LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1649 
1650  /* init oldest datoids to sync with my frozenXID/minMulti values */
1651  oldestxid_datoid = MyDatabaseId;
1652  minmulti_datoid = MyDatabaseId;
1653 
1654  /*
1655  * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1656  *
1657  * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1658  * the values could change while we look at them. Fetch each one just
1659  * once to ensure sane behavior of the comparison logic. (Here, as in
1660  * many other places, we assume that fetching or updating an XID in shared
1661  * storage is atomic.)
1662  *
1663  * Note: we need not worry about a race condition with new entries being
1664  * inserted by CREATE DATABASE. Any such entry will have a copy of some
1665  * existing DB's datfrozenxid, and that source DB cannot be ours because
1666  * of the interlock against copying a DB containing an active backend.
1667  * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1668  * concurrently modify the datfrozenxid's of different databases, the
1669  * worst possible outcome is that pg_xact is not truncated as aggressively
1670  * as it could be.
1671  */
1672  relation = table_open(DatabaseRelationId, AccessShareLock);
1673 
1674  scan = table_beginscan_catalog(relation, 0, NULL);
1675 
1676  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1677  {
1678  volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1679  TransactionId datfrozenxid = dbform->datfrozenxid;
1680  TransactionId datminmxid = dbform->datminmxid;
1681 
1682  Assert(TransactionIdIsNormal(datfrozenxid));
1683  Assert(MultiXactIdIsValid(datminmxid));
1684 
1685  /*
1686  * If things are working properly, no database should have a
1687  * datfrozenxid or datminmxid that is "in the future". However, such
1688  * cases have been known to arise due to bugs in pg_upgrade. If we
1689  * see any entries that are "in the future", chicken out and don't do
1690  * anything. This ensures we won't truncate clog before those
1691  * databases have been scanned and cleaned up. (We will issue the
1692  * "already wrapped" warning if appropriate, though.)
1693  */
1694  if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1695  MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1696  bogus = true;
1697 
1698  if (TransactionIdPrecedes(nextXID, datfrozenxid))
1699  frozenAlreadyWrapped = true;
1700  else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1701  {
1702  frozenXID = datfrozenxid;
1703  oldestxid_datoid = dbform->oid;
1704  }
1705 
1706  if (MultiXactIdPrecedes(datminmxid, minMulti))
1707  {
1708  minMulti = datminmxid;
1709  minmulti_datoid = dbform->oid;
1710  }
1711  }
1712 
1713  table_endscan(scan);
1714 
1715  table_close(relation, AccessShareLock);
1716 
1717  /*
1718  * Do not truncate CLOG if we seem to have suffered wraparound already;
1719  * the computed minimum XID might be bogus. This case should now be
1720  * impossible due to the defenses in GetNewTransactionId, but we keep the
1721  * test anyway.
1722  */
1723  if (frozenAlreadyWrapped)
1724  {
1725  ereport(WARNING,
1726  (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1727  errdetail("You might have already suffered transaction-wraparound data loss.")));
1728  return;
1729  }
1730 
1731  /* chicken out if data is bogus in any other way */
1732  if (bogus)
1733  return;
1734 
1735  /*
1736  * Advance the oldest value for commit timestamps before truncating, so
1737  * that if a user requests a timestamp for a transaction we're truncating
1738  * away right after this point, they get NULL instead of an ugly "file not
1739  * found" error from slru.c. This doesn't matter for xact/multixact
1740  * because they are not subject to arbitrary lookups from users.
1741  */
1742  AdvanceOldestCommitTsXid(frozenXID);
1743 
1744  /*
1745  * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1746  */
1747  TruncateCLOG(frozenXID, oldestxid_datoid);
1748  TruncateCommitTs(frozenXID);
1749  TruncateMultiXact(minMulti, minmulti_datoid);
1750 
1751  /*
1752  * Update the wrap limit for GetNewTransactionId and creation of new
1753  * MultiXactIds. Note: these functions will also signal the postmaster
1754  * for an(other) autovac cycle if needed. XXX should we avoid possibly
1755  * signaling twice?
1756  */
1757  SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1758  SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1759 
1760  LWLockRelease(WrapLimitsVacuumLock);
1761 }
1762 
1763 
1764 /*
1765  * vacuum_rel() -- vacuum one heap relation
1766  *
1767  * relid identifies the relation to vacuum. If relation is supplied,
1768  * use the name therein for reporting any failure to open/lock the rel;
1769  * do not use it once we've successfully opened the rel, since it might
1770  * be stale.
1771  *
1772  * Returns true if it's okay to proceed with a requested ANALYZE
1773  * operation on this table.
1774  *
1775  * Doing one heap at a time incurs extra overhead, since we need to
1776  * check that the heap exists again just before we vacuum it. The
1777  * reason that we do this is so that vacuuming can be spread across
1778  * many small transactions. Otherwise, two-phase locking would require
1779  * us to lock the entire database during one pass of the vacuum cleaner.
1780  *
1781  * At entry and exit, we are not inside a transaction.
1782  */
1783 static bool
1784 vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
1785 {
1786  LOCKMODE lmode;
1787  Relation rel;
1788  LockRelId lockrelid;
1789  Oid toast_relid;
1790  Oid save_userid;
1791  int save_sec_context;
1792  int save_nestlevel;
1793 
1794  Assert(params != NULL);
1795 
1796  /* Begin a transaction for vacuuming this relation */
1798 
1799  if (!(params->options & VACOPT_FULL))
1800  {
1801  /*
1802  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1803  * other concurrent VACUUMs know that they can ignore this one while
1804  * determining their OldestXmin. (The reason we don't set it during a
1805  * full VACUUM is exactly that we may have to run user-defined
1806  * functions for functional indexes, and we want to make sure that if
1807  * they use the snapshot set above, any tuples it requires can't get
1808  * removed from other tables. An index function that depends on the
1809  * contents of other tables is arguably broken, but we won't break it
1810  * here by violating transaction semantics.)
1811  *
1812  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1813  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1814  * in an emergency.
1815  *
1816  * Note: these flags remain set until CommitTransaction or
1817  * AbortTransaction. We don't want to clear them until we reset
1818  * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
1819  * might appear to go backwards, which is probably Not Good. (We also
1820  * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
1821  * xmin doesn't become visible ahead of setting the flag.)
1822  */
1823  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1825  if (params->is_wraparound)
1828  LWLockRelease(ProcArrayLock);
1829  }
1830 
1831  /*
1832  * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
1833  * cutoff xids in local memory wrapping around, and to have updated xmin
1834  * horizons.
1835  */
1837 
1838  /*
1839  * Check for user-requested abort. Note we want this to be inside a
1840  * transaction, so xact.c doesn't issue useless WARNING.
1841  */
1843 
1844  /*
1845  * Determine the type of lock we want --- hard exclusive lock for a FULL
1846  * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1847  * way, we can be sure that no other backend is vacuuming the same table.
1848  */
1849  lmode = (params->options & VACOPT_FULL) ?
1851 
1852  /* open the relation and get the appropriate lock on it */
1853  rel = vacuum_open_relation(relid, relation, params->options,
1854  params->log_min_duration >= 0, lmode);
1855 
1856  /* leave if relation could not be opened or locked */
1857  if (!rel)
1858  {
1861  return false;
1862  }
1863 
1864  /*
1865  * Check if relation needs to be skipped based on ownership. This check
1866  * happens also when building the relation list to vacuum for a manual
1867  * operation, and needs to be done additionally here as VACUUM could
1868  * happen across multiple transactions where relation ownership could have
1869  * changed in-between. Make sure to only generate logs for VACUUM in this
1870  * case.
1871  */
1873  rel->rd_rel,
1874  params->options & VACOPT_VACUUM))
1875  {
1876  relation_close(rel, lmode);
1879  return false;
1880  }
1881 
1882  /*
1883  * Check that it's of a vacuumable relkind.
1884  */
1885  if (rel->rd_rel->relkind != RELKIND_RELATION &&
1886  rel->rd_rel->relkind != RELKIND_MATVIEW &&
1887  rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1888  rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1889  {
1890  ereport(WARNING,
1891  (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1892  RelationGetRelationName(rel))));
1893  relation_close(rel, lmode);
1896  return false;
1897  }
1898 
1899  /*
1900  * Silently ignore tables that are temp tables of other backends ---
1901  * trying to vacuum these will lead to great unhappiness, since their
1902  * contents are probably not up-to-date on disk. (We don't throw a
1903  * warning here; it would just lead to chatter during a database-wide
1904  * VACUUM.)
1905  */
1906  if (RELATION_IS_OTHER_TEMP(rel))
1907  {
1908  relation_close(rel, lmode);
1911  return false;
1912  }
1913 
1914  /*
1915  * Silently ignore partitioned tables as there is no work to be done. The
1916  * useful work is on their child partitions, which have been queued up for
1917  * us separately.
1918  */
1919  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1920  {
1921  relation_close(rel, lmode);
1924  /* It's OK to proceed with ANALYZE on this table */
1925  return true;
1926  }
1927 
1928  /*
1929  * Get a session-level lock too. This will protect our access to the
1930  * relation across multiple transactions, so that we can vacuum the
1931  * relation's TOAST table (if any) secure in the knowledge that no one is
1932  * deleting the parent relation.
1933  *
1934  * NOTE: this cannot block, even if someone else is waiting for access,
1935  * because the lock manager knows that both lock requests are from the
1936  * same process.
1937  */
1938  lockrelid = rel->rd_lockInfo.lockRelId;
1939  LockRelationIdForSession(&lockrelid, lmode);
1940 
1941  /* Set index cleanup option based on reloptions if not yet */
1942  if (params->index_cleanup == VACOPT_TERNARY_DEFAULT)
1943  {
1944  if (rel->rd_options == NULL ||
1945  ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup)
1947  else
1949  }
1950 
1951  /* Set truncate option based on reloptions if not yet */
1952  if (params->truncate == VACOPT_TERNARY_DEFAULT)
1953  {
1954  if (rel->rd_options == NULL ||
1955  ((StdRdOptions *) rel->rd_options)->vacuum_truncate)
1956  params->truncate = VACOPT_TERNARY_ENABLED;
1957  else
1959  }
1960 
1961  /*
1962  * Remember the relation's TOAST relation for later, if the caller asked
1963  * us to process it. In VACUUM FULL, though, the toast table is
1964  * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1965  */
1966  if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
1967  (params->options & VACOPT_FULL) == 0)
1968  toast_relid = rel->rd_rel->reltoastrelid;
1969  else
1970  toast_relid = InvalidOid;
1971 
1972  /*
1973  * Switch to the table owner's userid, so that any index functions are run
1974  * as that user. Also lock down security-restricted operations and
1975  * arrange to make GUC variable changes local to this command. (This is
1976  * unnecessary, but harmless, for lazy VACUUM.)
1977  */
1978  GetUserIdAndSecContext(&save_userid, &save_sec_context);
1979  SetUserIdAndSecContext(rel->rd_rel->relowner,
1980  save_sec_context | SECURITY_RESTRICTED_OPERATION);
1981  save_nestlevel = NewGUCNestLevel();
1982 
1983  /*
1984  * Do the actual work --- either FULL or "lazy" vacuum
1985  */
1986  if (params->options & VACOPT_FULL)
1987  {
1988  ClusterParams cluster_params = {0};
1989 
1990  /* close relation before vacuuming, but hold lock until commit */
1991  relation_close(rel, NoLock);
1992  rel = NULL;
1993 
1994  if ((params->options & VACOPT_VERBOSE) != 0)
1995  cluster_params.options |= CLUOPT_VERBOSE;
1996 
1997  /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1998  cluster_rel(relid, InvalidOid, &cluster_params);
1999  }
2000  else
2001  table_relation_vacuum(rel, params, vac_strategy);
2002 
2003  /* Roll back any GUC changes executed by index functions */
2004  AtEOXact_GUC(false, save_nestlevel);
2005 
2006  /* Restore userid and security context */
2007  SetUserIdAndSecContext(save_userid, save_sec_context);
2008 
2009  /* all done with this class, but hold lock until commit */
2010  if (rel)
2011  relation_close(rel, NoLock);
2012 
2013  /*
2014  * Complete the transaction and free all temporary memory used.
2015  */
2018 
2019  /*
2020  * If the relation has a secondary toast rel, vacuum that too while we
2021  * still hold the session lock on the main table. Note however that
2022  * "analyze" will not get done on the toast table. This is good, because
2023  * the toaster always uses hardcoded index access and statistics are
2024  * totally unimportant for toast relations.
2025  */
2026  if (toast_relid != InvalidOid)
2027  vacuum_rel(toast_relid, NULL, params);
2028 
2029  /*
2030  * Now release the session-level lock on the main table.
2031  */
2032  UnlockRelationIdForSession(&lockrelid, lmode);
2033 
2034  /* Report that we really did it. */
2035  return true;
2036 }
2037 
2038 
2039 /*
2040  * Open all the vacuumable indexes of the given relation, obtaining the
2041  * specified kind of lock on each. Return an array of Relation pointers for
2042  * the indexes into *Irel, and the number of indexes into *nindexes.
2043  *
2044  * We consider an index vacuumable if it is marked insertable (indisready).
2045  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2046  * execution, and what we have is too corrupt to be processable. We will
2047  * vacuum even if the index isn't indisvalid; this is important because in a
2048  * unique index, uniqueness checks will be performed anyway and had better not
2049  * hit dangling index pointers.
2050  */
2051 void
2053  int *nindexes, Relation **Irel)
2054 {
2055  List *indexoidlist;
2056  ListCell *indexoidscan;
2057  int i;
2058 
2059  Assert(lockmode != NoLock);
2060 
2061  indexoidlist = RelationGetIndexList(relation);
2062 
2063  /* allocate enough memory for all indexes */
2064  i = list_length(indexoidlist);
2065 
2066  if (i > 0)
2067  *Irel = (Relation *) palloc(i * sizeof(Relation));
2068  else
2069  *Irel = NULL;
2070 
2071  /* collect just the ready indexes */
2072  i = 0;
2073  foreach(indexoidscan, indexoidlist)
2074  {
2075  Oid indexoid = lfirst_oid(indexoidscan);
2076  Relation indrel;
2077 
2078  indrel = index_open(indexoid, lockmode);
2079  if (indrel->rd_index->indisready)
2080  (*Irel)[i++] = indrel;
2081  else
2082  index_close(indrel, lockmode);
2083  }
2084 
2085  *nindexes = i;
2086 
2087  list_free(indexoidlist);
2088 }
2089 
2090 /*
2091  * Release the resources acquired by vac_open_indexes. Optionally release
2092  * the locks (say NoLock to keep 'em).
2093  */
2094 void
2095 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2096 {
2097  if (Irel == NULL)
2098  return;
2099 
2100  while (nindexes--)
2101  {
2102  Relation ind = Irel[nindexes];
2103 
2104  index_close(ind, lockmode);
2105  }
2106  pfree(Irel);
2107 }
2108 
2109 /*
2110  * vacuum_delay_point --- check for interrupts and cost-based delay.
2111  *
2112  * This should be called in each major loop of VACUUM processing,
2113  * typically once per page processed.
2114  */
2115 void
2117 {
2118  double msec = 0;
2119 
2120  /* Always check for interrupts */
2122 
2124  return;
2125 
2126  /*
2127  * For parallel vacuum, the delay is computed based on the shared cost
2128  * balance. See compute_parallel_delay.
2129  */
2130  if (VacuumSharedCostBalance != NULL)
2131  msec = compute_parallel_delay();
2132  else if (VacuumCostBalance >= VacuumCostLimit)
2134 
2135  /* Nap if appropriate */
2136  if (msec > 0)
2137  {
2138  if (msec > VacuumCostDelay * 4)
2139  msec = VacuumCostDelay * 4;
2140 
2141  (void) WaitLatch(MyLatch,
2143  msec,
2146 
2147  VacuumCostBalance = 0;
2148 
2149  /* update balance values for workers */
2151 
2152  /* Might have gotten an interrupt while sleeping */
2154  }
2155 }
2156 
2157 /*
2158  * Computes the vacuum delay for parallel workers.
2159  *
2160  * The basic idea of a cost-based delay for parallel vacuum is to allow each
2161  * worker to sleep in proportion to the share of work it's done. We achieve this
2162  * by allowing all parallel vacuum workers including the leader process to
2163  * have a shared view of cost related parameters (mainly VacuumCostBalance).
2164  * We allow each worker to update it as and when it has incurred any cost and
2165  * then based on that decide whether it needs to sleep. We compute the time
2166  * to sleep for a worker based on the cost it has incurred
2167  * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2168  * that amount. This avoids putting to sleep those workers which have done less
2169  * I/O than other workers and therefore ensure that workers
2170  * which are doing more I/O got throttled more.
2171  *
2172  * We allow a worker to sleep only if it has performed I/O above a certain
2173  * threshold, which is calculated based on the number of active workers
2174  * (VacuumActiveNWorkers), and the overall cost balance is more than
2175  * VacuumCostLimit set by the system. Testing reveals that we achieve
2176  * the required throttling if we force a worker that has done more than 50%
2177  * of its share of work to sleep.
2178  */
2179 static double
2181 {
2182  double msec = 0;
2183  uint32 shared_balance;
2184  int nworkers;
2185 
2186  /* Parallel vacuum must be active */
2187  Assert(VacuumSharedCostBalance);
2188 
2189  nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2190 
2191  /* At least count itself */
2192  Assert(nworkers >= 1);
2193 
2194  /* Update the shared cost balance value atomically */
2195  shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2196 
2197  /* Compute the total local balance for the current worker */
2199 
2200  if ((shared_balance >= VacuumCostLimit) &&
2201  (VacuumCostBalanceLocal > 0.5 * ((double) VacuumCostLimit / nworkers)))
2202  {
2203  /* Compute sleep time based on the local cost balance */
2205  pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2207  }
2208 
2209  /*
2210  * Reset the local balance as we accumulated it into the shared value.
2211  */
2212  VacuumCostBalance = 0;
2213 
2214  return msec;
2215 }
2216 
2217 /*
2218  * A wrapper function of defGetBoolean().
2219  *
2220  * This function returns VACOPT_TERNARY_ENABLED and VACOPT_TERNARY_DISABLED
2221  * instead of true and false.
2222  */
2223 static VacOptTernaryValue
2225 {
2227 }
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:542
HeapTuple heap_copytuple(HeapTuple tuple)
Definition: heaptuple.c:680
#define NIL
Definition: pg_list.h:65
bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:152
void analyze_rel(Oid relid, RangeVar *relation, VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy)
Definition: analyze.c:120
static double compute_parallel_delay(void)
Definition: vacuum.c:2180
int multixact_freeze_table_age
Definition: vacuum.h:213
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2095
LockRelId lockRelId
Definition: rel.h:44
void vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, bool isTopLevel)
Definition: vacuum.c:277
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
#define AllocSetContextCreate
Definition: memutils.h:173
int64 VacuumPageMiss
Definition: globals.c:148
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
int errhint(const char *fmt,...)
Definition: elog.c:1156
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:595
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:76
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:79
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1209
int VacuumCostBalance
Definition: globals.c:151
int vacuum_multixact_freeze_table_age
Definition: vacuum.c:64
void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
Definition: clog.c:874
#define WL_TIMEOUT
Definition: latch.h:128
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:185
static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
Definition: vacuum.c:1784
RangeVar * relation
Definition: parsenodes.h:3340
uint32 TransactionId
Definition: c.h:587
#define SECURITY_RESTRICTED_OPERATION
Definition: miscadmin.h:312
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
void vac_update_datfrozenxid(void)
Definition: vacuum.c:1419
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:590
int LOCKMODE
Definition: lockdefs.h:26
Oid GetUserId(void)
Definition: miscinit.c:478
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:200
FormData_pg_database * Form_pg_database
Definition: pg_database.h:81
PGPROC * MyProc
Definition: proc.c:68
#define ExclusiveLock
Definition: lockdefs.h:44
int64 TimestampTz
Definition: timestamp.h:39
VacuumRelation * makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols)
Definition: makefuncs.c:809
#define VACOPT_ANALYZE
Definition: vacuum.h:179
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:101
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
void LockDatabaseFrozenIds(LOCKMODE lockmode)
Definition: lmgr.c:470
void CommitTransactionCommand(void)
Definition: xact.c:2939
int64 VacuumPageHit
Definition: globals.c:147
#define Min(x, y)
Definition: c.h:986
bool is_vacuumcmd
Definition: parsenodes.h:3327
#define PROC_VACUUM_FOR_WRAPAROUND
Definition: proc.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1675
#define AccessShareLock
Definition: lockdefs.h:36
static BufferAccessStrategy vac_strategy
Definition: vacuum.c:71
int32 defGetInt32(DefElem *def)
Definition: define.c:166
struct cursor * cur
Definition: ecpg.c:28
int autovacuum_multixact_freeze_max_age
Definition: autovacuum.c:126
List * list_concat(List *list1, const List *list2)
Definition: list.c:530
int errcode(int sqlerrcode)
Definition: elog.c:698
PROC_HDR * ProcGlobal
Definition: proc.c:80
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:943
int64 VacuumPageDirty
Definition: globals.c:149
#define CLUOPT_VERBOSE
Definition: cluster.h:24
uint8 statusFlags
Definition: proc.h:189
uint32 BlockNumber
Definition: block.h:31
VacOptTernaryValue
Definition: vacuum.h:193
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:308
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:109
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1751
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
NameData relname
Definition: pg_class.h:38
unsigned int Oid
Definition: postgres_ext.h:31
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:710
void AdvanceOldestCommitTsXid(TransactionId oldestXact)
Definition: commit_ts.c:903
int freeze_table_age
Definition: vacuum.h:210
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:383
Relation try_relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:89
void ResetLatch(Latch *latch)
Definition: latch.c:660
void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1672
signed int int32
Definition: c.h:429
static List * expand_vacuum_rel(VacuumRelation *vrel, int options)
Definition: vacuum.c:731
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:452
#define VACOPT_PROCESS_TOAST
Definition: vacuum.h:184
MemoryContext PortalContext
Definition: mcxt.c:57
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1816
bool ForceTransactionIdLimitUpdate(void)
Definition: varsup.c:490
char * relname
Definition: primnodes.h:68
bool defGetBoolean(DefElem *def)
Definition: define.c:111
Form_pg_index rd_index
Definition: rel.h:187
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:502
bits32 options
Definition: vacuum.h:208
void pfree(void *pointer)
Definition: mcxt.c:1169
#define PROC_IN_VACUUM
Definition: proc.h:55
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1944
#define FirstNormalTransactionId
Definition: transam.h:34
void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode)
Definition: lmgr.c:383
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
Definition: rel.h:36
int VacuumCostLimit
Definition: globals.c:144
void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode)
Definition: lmgr.c:370
int autovacuum_freeze_max_age
Definition: autovacuum.c:125
int freeze_min_age
Definition: vacuum.h:209
int vacuum_multixact_freeze_min_age
Definition: vacuum.c:63
TriggerDesc * trigdesc
Definition: rel.h:115
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
bool is_wraparound
Definition: vacuum.h:215
#define lfirst_node(type, lc)
Definition: pg_list.h:172
#define NoLock
Definition: lockdefs.h:34
LockInfoData rd_lockInfo
Definition: rel.h:112
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1340
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:583
int location
Definition: parsenodes.h:749
#define RowExclusiveLock
Definition: lockdefs.h:38
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition: guc.c:6200
int errdetail(const char *fmt,...)
Definition: elog.c:1042
static MemoryContext vac_context
Definition: vacuum.c:70
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition: xact.c:3379
#define RelationGetRelationName(relation)
Definition: rel.h:503
int vacuum_failsafe_age
Definition: vacuum.c:65
void cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
Definition: cluster.c:277
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition: namespace.c:236
unsigned int uint32
Definition: c.h:441
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
bool ActiveSnapshotSet(void)
Definition: snapmgr.c:798
bool vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid)
Definition: vacuum.c:1149
#define FirstMultiXactId
Definition: multixact.h:25
#define VACOPT_FREEZE
Definition: vacuum.h:181
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3448
void pgstat_vacuum_stat(void)
Definition: pgstat.c:1064
int MultiXactMemberFreezeThreshold(void)
Definition: multixact.c:2825
bool pg_database_ownercheck(Oid db_oid, Oid roleid)
Definition: aclchk.c:5241
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void TruncateCommitTs(TransactionId oldestXact)
Definition: commit_ts.c:850
VacOptTernaryValue index_cleanup
Definition: vacuum.h:219
int vacuum_multixact_failsafe_age
Definition: vacuum.c:66
Node * arg
Definition: parsenodes.h:747
List * lappend(List *list, void *datum)
Definition: list.c:336
#define DatabaseOidIndexId
Definition: pg_database.h:90
bool IsInTransactionBlock(bool isTopLevel)
Definition: xact.c:3492
static int verbose
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2052
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1127
float float4
Definition: c.h:564
int VacuumCostBalanceLocal
Definition: vacuum.c:80
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:78
static int elevel
Definition: vacuumlazy.c:400
#define PG_FINALLY()
Definition: elog.h:330
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2503
void CommandCounterIncrement(void)
Definition: xact.c:1021
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1175
Oid MyDatabaseId
Definition: globals.c:88
bits32 options
Definition: cluster.h:29
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2210
#define InvalidOid
Definition: postgres_ext.h:36
VacOptTernaryValue truncate
Definition: vacuum.h:221
TransactionId datfrozenxid
Definition: pg_database.h:62
#define ereport(elevel,...)
Definition: elog.h:157
TransactionId MultiXactId
Definition: c.h:597
#define Max(x, y)
Definition: c.h:980
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
uint32 bits32
Definition: c.h:450
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
Definition: vacuum.c:100
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define Assert(condition)
Definition: c.h:804
#define VACOPT_VACUUM
Definition: vacuum.h:178
#define lfirst(lc)
Definition: pg_list.h:169
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:612
#define VACOPT_FULL
Definition: vacuum.h:182
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4823
void StartTransactionCommand(void)
Definition: xact.c:2838
RuleLock * rd_rules
Definition: rel.h:113
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:345
int nworkers
Definition: vacuum.h:229
static int list_length(const List *l)
Definition: pg_list.h:149
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:111
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
int vacuum_freeze_min_age
Definition: vacuum.c:61
TransactionId datminmxid
Definition: pg_database.h:65
int log_min_duration
Definition: vacuum.h:216
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3156
#define VACOPT_VERBOSE
Definition: vacuum.h:180
static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti, TransactionId lastSaneFrozenXid, MultiXactId lastSaneMinMulti)
Definition: vacuum.c:1633
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4570
int vacuum_freeze_table_age
Definition: vacuum.c:62
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:991
FormData_pg_class * Form_pg_class
Definition: pg_class.h:153
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:175
#define AccessExclusiveLock
Definition: lockdefs.h:45
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
Definition: pg_inherits.c:256
int NewGUCNestLevel(void)
Definition: guc.c:6186
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
void heap_inplace_update(Relation relation, HeapTuple tuple)
Definition: heapam.c:6059
double VacuumCostDelay
Definition: globals.c:145
#define VACOPT_SKIP_LOCKED
Definition: vacuum.h:183
List * options
Definition: parsenodes.h:3325
void list_free(List *list)
Definition: list.c:1391
#define elog(elevel,...)
Definition: elog.h:232
int i
int pgxactoff
Definition: proc.h:148
#define NameStr(name)
Definition: c.h:681
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
void AutoVacuumUpdateDelay(void)
Definition: autovacuum.c:1780
FormData_pg_database
Definition: pg_database.h:74
struct Latch * MyLatch
Definition: globals.c:57
char * defname
Definition: parsenodes.h:746
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
Relation vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options, bool verbose, LOCKMODE lmode)
Definition: vacuum.c:619
bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, bits32 options)
Definition: vacuum.c:545
uint8 * statusFlags
Definition: proc.h:333
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define MAX_PARALLEL_WORKER_LIMIT
void vacuum_delay_point(void)
Definition: vacuum.c:2116
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define PG_TRY()
Definition: elog.h:313
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition: multixact.c:2941
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1292
Definition: pg_list.h:50
#define WL_LATCH_SET
Definition: latch.h:125
#define RelationGetRelid(relation)
Definition: rel.h:469
int multixact_freeze_min_age
Definition: vacuum.h:211
static long analyze(struct nfa *nfa)
Definition: regc_nfa.c:2987
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
#define PG_END_TRY()
Definition: elog.h:338
bytea * rd_options
Definition: rel.h:170
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define lfirst_oid(lc)
Definition: pg_list.h:171
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
bool VacuumCostActive
Definition: globals.c:152
static List * get_all_vacuum_rels(int options)
Definition: vacuum.c:870
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
MultiXactId ReadNextMultiXactId(void)
Definition: multixact.c:723
List * rels
Definition: parsenodes.h:3326
static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def)
Definition: vacuum.c:2224