PostgreSQL Source Code  git master
vacuum.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  * The postgres vacuum cleaner.
5  *
6  * This file now includes only control and dispatch code for VACUUM and
7  * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9  * in cluster.c.
10  *
11  *
12  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  *
16  * IDENTIFICATION
17  * src/backend/commands/vacuum.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22 
23 #include <math.h>
24 
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/tableam.h"
32 #include "access/transam.h"
33 #include "access/xact.h"
34 #include "catalog/namespace.h"
35 #include "catalog/pg_database.h"
36 #include "catalog/pg_inherits.h"
37 #include "catalog/pg_namespace.h"
38 #include "commands/cluster.h"
39 #include "commands/defrem.h"
40 #include "commands/vacuum.h"
41 #include "miscadmin.h"
42 #include "nodes/makefuncs.h"
43 #include "pgstat.h"
44 #include "postmaster/autovacuum.h"
46 #include "storage/bufmgr.h"
47 #include "storage/lmgr.h"
48 #include "storage/proc.h"
49 #include "storage/procarray.h"
50 #include "utils/acl.h"
51 #include "utils/fmgroids.h"
52 #include "utils/guc.h"
53 #include "utils/memutils.h"
54 #include "utils/snapmgr.h"
55 #include "utils/syscache.h"
56 
57 
58 /*
59  * GUC parameters
60  */
65 
66 
67 /* A few variables that don't seem worth passing around as parameters */
68 static MemoryContext vac_context = NULL;
70 
71 
72 /*
73  * Variables for cost-based parallel vacuum. See comments atop
74  * compute_parallel_delay to understand how it works.
75  */
79 
80 /* non-export function prototypes */
81 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
82 static List *get_all_vacuum_rels(int options);
83 static void vac_truncate_clog(TransactionId frozenXID,
84  MultiXactId minMulti,
85  TransactionId lastSaneFrozenXid,
86  MultiXactId lastSaneMinMulti);
87 static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
88 static double compute_parallel_delay(void);
90 
91 /*
92  * Primary entry point for manual VACUUM and ANALYZE commands
93  *
94  * This is mainly a preparation wrapper for the real operations that will
95  * happen in vacuum().
96  */
97 void
98 ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
99 {
100  VacuumParams params;
101  bool verbose = false;
102  bool skip_locked = false;
103  bool analyze = false;
104  bool freeze = false;
105  bool full = false;
106  bool disable_page_skipping = false;
107  ListCell *lc;
108 
109  /* Set default value */
112 
113  /* By default parallel vacuum is enabled */
114  params.nworkers = 0;
115 
116  /* Parse options list */
117  foreach(lc, vacstmt->options)
118  {
119  DefElem *opt = (DefElem *) lfirst(lc);
120 
121  /* Parse common options for VACUUM and ANALYZE */
122  if (strcmp(opt->defname, "verbose") == 0)
123  verbose = defGetBoolean(opt);
124  else if (strcmp(opt->defname, "skip_locked") == 0)
125  skip_locked = defGetBoolean(opt);
126  else if (!vacstmt->is_vacuumcmd)
127  ereport(ERROR,
128  (errcode(ERRCODE_SYNTAX_ERROR),
129  errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
130  parser_errposition(pstate, opt->location)));
131 
132  /* Parse options available on VACUUM */
133  else if (strcmp(opt->defname, "analyze") == 0)
134  analyze = defGetBoolean(opt);
135  else if (strcmp(opt->defname, "freeze") == 0)
136  freeze = defGetBoolean(opt);
137  else if (strcmp(opt->defname, "full") == 0)
138  full = defGetBoolean(opt);
139  else if (strcmp(opt->defname, "disable_page_skipping") == 0)
140  disable_page_skipping = defGetBoolean(opt);
141  else if (strcmp(opt->defname, "index_cleanup") == 0)
143  else if (strcmp(opt->defname, "truncate") == 0)
144  params.truncate = get_vacopt_ternary_value(opt);
145  else if (strcmp(opt->defname, "parallel") == 0)
146  {
147  if (opt->arg == NULL)
148  {
149  ereport(ERROR,
150  (errcode(ERRCODE_SYNTAX_ERROR),
151  errmsg("parallel option requires a value between 0 and %d",
153  parser_errposition(pstate, opt->location)));
154  }
155  else
156  {
157  int nworkers;
158 
159  nworkers = defGetInt32(opt);
160  if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
161  ereport(ERROR,
162  (errcode(ERRCODE_SYNTAX_ERROR),
163  errmsg("parallel vacuum degree must be between 0 and %d",
164  MAX_PARALLEL_WORKER_LIMIT),
165  parser_errposition(pstate, opt->location)));
166 
167  /*
168  * Disable parallel vacuum, if user has specified parallel
169  * degree as zero.
170  */
171  if (nworkers == 0)
172  params.nworkers = -1;
173  else
174  params.nworkers = nworkers;
175  }
176  }
177  else
178  ereport(ERROR,
179  (errcode(ERRCODE_SYNTAX_ERROR),
180  errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
181  parser_errposition(pstate, opt->location)));
182  }
183 
184  /* Set vacuum options */
185  params.options =
186  (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
187  (verbose ? VACOPT_VERBOSE : 0) |
188  (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
189  (analyze ? VACOPT_ANALYZE : 0) |
190  (freeze ? VACOPT_FREEZE : 0) |
191  (full ? VACOPT_FULL : 0) |
192  (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
193 
194  /* sanity checks on options */
195  Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
196  Assert((params.options & VACOPT_VACUUM) ||
197  !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
198  Assert(!(params.options & VACOPT_SKIPTOAST));
199 
200  if ((params.options & VACOPT_FULL) && params.nworkers > 0)
201  ereport(ERROR,
202  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
203  errmsg("VACUUM FULL cannot be performed in parallel")));
204 
205  /*
206  * Make sure VACOPT_ANALYZE is specified if any column lists are present.
207  */
208  if (!(params.options & VACOPT_ANALYZE))
209  {
210  ListCell *lc;
211 
212  foreach(lc, vacstmt->rels)
213  {
215 
216  if (vrel->va_cols != NIL)
217  ereport(ERROR,
218  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
219  errmsg("ANALYZE option must be specified when a column list is provided")));
220  }
221  }
222 
223  /*
224  * All freeze ages are zero if the FREEZE option is given; otherwise pass
225  * them as -1 which means to use the default values.
226  */
227  if (params.options & VACOPT_FREEZE)
228  {
229  params.freeze_min_age = 0;
230  params.freeze_table_age = 0;
231  params.multixact_freeze_min_age = 0;
232  params.multixact_freeze_table_age = 0;
233  }
234  else
235  {
236  params.freeze_min_age = -1;
237  params.freeze_table_age = -1;
238  params.multixact_freeze_min_age = -1;
239  params.multixact_freeze_table_age = -1;
240  }
241 
242  /* user-invoked vacuum is never "for wraparound" */
243  params.is_wraparound = false;
244 
245  /* user-invoked vacuum never uses this parameter */
246  params.log_min_duration = -1;
247 
248  /* Now go through the common routine */
249  vacuum(vacstmt->rels, &params, NULL, isTopLevel);
250 }
251 
252 /*
253  * Internal entry point for VACUUM and ANALYZE commands.
254  *
255  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
256  * we process all relevant tables in the database. For each VacuumRelation,
257  * if a valid OID is supplied, the table with that OID is what to process;
258  * otherwise, the VacuumRelation's RangeVar indicates what to process.
259  *
260  * params contains a set of parameters that can be used to customize the
261  * behavior.
262  *
263  * bstrategy is normally given as NULL, but in autovacuum it can be passed
264  * in to use the same buffer strategy object across multiple vacuum() calls.
265  *
266  * isTopLevel should be passed down from ProcessUtility.
267  *
268  * It is the caller's responsibility that all parameters are allocated in a
269  * memory context that will not disappear at transaction commit.
270  */
271 void
272 vacuum(List *relations, VacuumParams *params,
273  BufferAccessStrategy bstrategy, bool isTopLevel)
274 {
275  static bool in_vacuum = false;
276 
277  const char *stmttype;
278  volatile bool in_outer_xact,
279  use_own_xacts;
280 
281  Assert(params != NULL);
282 
283  stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
284 
285  /*
286  * We cannot run VACUUM inside a user transaction block; if we were inside
287  * a transaction, then our commit- and start-transaction-command calls
288  * would not have the intended effect! There are numerous other subtle
289  * dependencies on this, too.
290  *
291  * ANALYZE (without VACUUM) can run either way.
292  */
293  if (params->options & VACOPT_VACUUM)
294  {
295  PreventInTransactionBlock(isTopLevel, stmttype);
296  in_outer_xact = false;
297  }
298  else
299  in_outer_xact = IsInTransactionBlock(isTopLevel);
300 
301  /*
302  * Due to static variables vac_context, anl_context and vac_strategy,
303  * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
304  * calls a hostile index expression that itself calls ANALYZE.
305  */
306  if (in_vacuum)
307  ereport(ERROR,
308  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
309  errmsg("%s cannot be executed from VACUUM or ANALYZE",
310  stmttype)));
311 
312  /*
313  * Sanity check DISABLE_PAGE_SKIPPING option.
314  */
315  if ((params->options & VACOPT_FULL) != 0 &&
316  (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
317  ereport(ERROR,
318  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
319  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
320 
321  /*
322  * Send info about dead objects to the statistics collector, unless we are
323  * in autovacuum --- autovacuum.c does this for itself.
324  */
325  if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
327 
328  /*
329  * Create special memory context for cross-transaction storage.
330  *
331  * Since it is a child of PortalContext, it will go away eventually even
332  * if we suffer an error; there's no need for special abort cleanup logic.
333  */
334  vac_context = AllocSetContextCreate(PortalContext,
335  "Vacuum",
337 
338  /*
339  * If caller didn't give us a buffer strategy object, make one in the
340  * cross-transaction memory context.
341  */
342  if (bstrategy == NULL)
343  {
344  MemoryContext old_context = MemoryContextSwitchTo(vac_context);
345 
346  bstrategy = GetAccessStrategy(BAS_VACUUM);
347  MemoryContextSwitchTo(old_context);
348  }
349  vac_strategy = bstrategy;
350 
351  /*
352  * Build list of relation(s) to process, putting any new data in
353  * vac_context for safekeeping.
354  */
355  if (relations != NIL)
356  {
357  List *newrels = NIL;
358  ListCell *lc;
359 
360  foreach(lc, relations)
361  {
363  List *sublist;
364  MemoryContext old_context;
365 
366  sublist = expand_vacuum_rel(vrel, params->options);
367  old_context = MemoryContextSwitchTo(vac_context);
368  newrels = list_concat(newrels, sublist);
369  MemoryContextSwitchTo(old_context);
370  }
371  relations = newrels;
372  }
373  else
374  relations = get_all_vacuum_rels(params->options);
375 
376  /*
377  * Decide whether we need to start/commit our own transactions.
378  *
379  * For VACUUM (with or without ANALYZE): always do so, so that we can
380  * release locks as soon as possible. (We could possibly use the outer
381  * transaction for a one-table VACUUM, but handling TOAST tables would be
382  * problematic.)
383  *
384  * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
385  * start/commit our own transactions. Also, there's no need to do so if
386  * only processing one relation. For multiple relations when not within a
387  * transaction block, and also in an autovacuum worker, use own
388  * transactions so we can release locks sooner.
389  */
390  if (params->options & VACOPT_VACUUM)
391  use_own_xacts = true;
392  else
393  {
394  Assert(params->options & VACOPT_ANALYZE);
396  use_own_xacts = true;
397  else if (in_outer_xact)
398  use_own_xacts = false;
399  else if (list_length(relations) > 1)
400  use_own_xacts = true;
401  else
402  use_own_xacts = false;
403  }
404 
405  /*
406  * vacuum_rel expects to be entered with no transaction active; it will
407  * start and commit its own transaction. But we are called by an SQL
408  * command, and so we are executing inside a transaction already. We
409  * commit the transaction started in PostgresMain() here, and start
410  * another one before exiting to match the commit waiting for us back in
411  * PostgresMain().
412  */
413  if (use_own_xacts)
414  {
415  Assert(!in_outer_xact);
416 
417  /* ActiveSnapshot is not set by autovacuum */
418  if (ActiveSnapshotSet())
420 
421  /* matches the StartTransaction in PostgresMain() */
423  }
424 
425  /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
426  PG_TRY();
427  {
428  ListCell *cur;
429 
430  in_vacuum = true;
432  VacuumCostBalance = 0;
433  VacuumPageHit = 0;
434  VacuumPageMiss = 0;
435  VacuumPageDirty = 0;
437  VacuumSharedCostBalance = NULL;
438  VacuumActiveNWorkers = NULL;
439 
440  /*
441  * Loop to process each selected relation.
442  */
443  foreach(cur, relations)
444  {
446 
447  if (params->options & VACOPT_VACUUM)
448  {
449  if (!vacuum_rel(vrel->oid, vrel->relation, params))
450  continue;
451  }
452 
453  if (params->options & VACOPT_ANALYZE)
454  {
455  /*
456  * If using separate xacts, start one for analyze. Otherwise,
457  * we can use the outer transaction.
458  */
459  if (use_own_xacts)
460  {
462  /* functions in indexes may want a snapshot set */
464  }
465 
466  analyze_rel(vrel->oid, vrel->relation, params,
467  vrel->va_cols, in_outer_xact, vac_strategy);
468 
469  if (use_own_xacts)
470  {
473  }
474  else
475  {
476  /*
477  * If we're not using separate xacts, better separate the
478  * ANALYZE actions with CCIs. This avoids trouble if user
479  * says "ANALYZE t, t".
480  */
482  }
483  }
484  }
485  }
486  PG_FINALLY();
487  {
488  in_vacuum = false;
489  VacuumCostActive = false;
490  }
491  PG_END_TRY();
492 
493  /*
494  * Finish up processing.
495  */
496  if (use_own_xacts)
497  {
498  /* here, we are not in a transaction */
499 
500  /*
501  * This matches the CommitTransaction waiting for us in
502  * PostgresMain().
503  */
505  }
506 
507  if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
508  {
509  /*
510  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
511  * (autovacuum.c does this for itself.)
512  */
514  }
515 
516  /*
517  * Clean up working storage --- note we must do this after
518  * StartTransactionCommand, else we might be trying to delete the active
519  * context!
520  */
521  MemoryContextDelete(vac_context);
522  vac_context = NULL;
523 }
524 
525 /*
526  * Check if a given relation can be safely vacuumed or analyzed. If the
527  * user is not the relation owner, issue a WARNING log message and return
528  * false to let the caller decide what to do with this relation. This
529  * routine is used to decide if a relation can be processed for VACUUM or
530  * ANALYZE.
531  */
532 bool
534 {
535  char *relname;
536 
537  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
538 
539  /*
540  * Check permissions.
541  *
542  * We allow the user to vacuum or analyze a table if he is superuser, the
543  * table owner, or the database owner (but in the latter case, only if
544  * it's not a shared relation). pg_class_ownercheck includes the
545  * superuser case.
546  *
547  * Note we choose to treat permissions failure as a WARNING and keep
548  * trying to vacuum or analyze the rest of the DB --- is this appropriate?
549  */
550  if (pg_class_ownercheck(relid, GetUserId()) ||
551  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
552  return true;
553 
554  relname = NameStr(reltuple->relname);
555 
556  if ((options & VACOPT_VACUUM) != 0)
557  {
558  if (reltuple->relisshared)
560  (errmsg("skipping \"%s\" --- only superuser can vacuum it",
561  relname)));
562  else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
564  (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
565  relname)));
566  else
568  (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
569  relname)));
570 
571  /*
572  * For VACUUM ANALYZE, both logs could show up, but just generate
573  * information for VACUUM as that would be the first one to be
574  * processed.
575  */
576  return false;
577  }
578 
579  if ((options & VACOPT_ANALYZE) != 0)
580  {
581  if (reltuple->relisshared)
583  (errmsg("skipping \"%s\" --- only superuser can analyze it",
584  relname)));
585  else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
587  (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
588  relname)));
589  else
591  (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
592  relname)));
593  }
594 
595  return false;
596 }
597 
598 
599 /*
600  * vacuum_open_relation
601  *
602  * This routine is used for attempting to open and lock a relation which
603  * is going to be vacuumed or analyzed. If the relation cannot be opened
604  * or locked, a log is emitted if possible.
605  */
606 Relation
608  bool verbose, LOCKMODE lmode)
609 {
610  Relation onerel;
611  bool rel_lock = true;
612  int elevel;
613 
614  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
615 
616  /*
617  * Open the relation and get the appropriate lock on it.
618  *
619  * There's a race condition here: the relation may have gone away since
620  * the last time we saw it. If so, we don't need to vacuum or analyze it.
621  *
622  * If we've been asked not to wait for the relation lock, acquire it first
623  * in non-blocking mode, before calling try_relation_open().
624  */
625  if (!(options & VACOPT_SKIP_LOCKED))
626  onerel = try_relation_open(relid, lmode);
627  else if (ConditionalLockRelationOid(relid, lmode))
628  onerel = try_relation_open(relid, NoLock);
629  else
630  {
631  onerel = NULL;
632  rel_lock = false;
633  }
634 
635  /* if relation is opened, leave */
636  if (onerel)
637  return onerel;
638 
639  /*
640  * Relation could not be opened, hence generate if possible a log
641  * informing on the situation.
642  *
643  * If the RangeVar is not defined, we do not have enough information to
644  * provide a meaningful log statement. Chances are that the caller has
645  * intentionally not provided this information so that this logging is
646  * skipped, anyway.
647  */
648  if (relation == NULL)
649  return NULL;
650 
651  /*
652  * Determine the log level.
653  *
654  * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
655  * statements in the permission checks; otherwise, only log if the caller
656  * so requested.
657  */
659  elevel = WARNING;
660  else if (verbose)
661  elevel = LOG;
662  else
663  return NULL;
664 
665  if ((options & VACOPT_VACUUM) != 0)
666  {
667  if (!rel_lock)
668  ereport(elevel,
669  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
670  errmsg("skipping vacuum of \"%s\" --- lock not available",
671  relation->relname)));
672  else
673  ereport(elevel,
675  errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
676  relation->relname)));
677 
678  /*
679  * For VACUUM ANALYZE, both logs could show up, but just generate
680  * information for VACUUM as that would be the first one to be
681  * processed.
682  */
683  return NULL;
684  }
685 
686  if ((options & VACOPT_ANALYZE) != 0)
687  {
688  if (!rel_lock)
689  ereport(elevel,
690  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
691  errmsg("skipping analyze of \"%s\" --- lock not available",
692  relation->relname)));
693  else
694  ereport(elevel,
696  errmsg("skipping analyze of \"%s\" --- relation no longer exists",
697  relation->relname)));
698  }
699 
700  return NULL;
701 }
702 
703 
704 /*
705  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
706  * and optionally add VacuumRelations for partitions of the table.
707  *
708  * If a VacuumRelation does not have an OID supplied and is a partitioned
709  * table, an extra entry will be added to the output for each partition.
710  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
711  * it does not want us to expand partitioned tables.
712  *
713  * We take care not to modify the input data structure, but instead build
714  * new VacuumRelation(s) to return. (But note that they will reference
715  * unmodified parts of the input, eg column lists.) New data structures
716  * are made in vac_context.
717  */
718 static List *
720 {
721  List *vacrels = NIL;
722  MemoryContext oldcontext;
723 
724  /* If caller supplied OID, there's nothing we need do here. */
725  if (OidIsValid(vrel->oid))
726  {
727  oldcontext = MemoryContextSwitchTo(vac_context);
728  vacrels = lappend(vacrels, vrel);
729  MemoryContextSwitchTo(oldcontext);
730  }
731  else
732  {
733  /* Process a specific relation, and possibly partitions thereof */
734  Oid relid;
735  HeapTuple tuple;
736  Form_pg_class classForm;
737  bool include_parts;
738  int rvr_opts;
739 
740  /*
741  * Since autovacuum workers supply OIDs when calling vacuum(), no
742  * autovacuum worker should reach this code.
743  */
745 
746  /*
747  * We transiently take AccessShareLock to protect the syscache lookup
748  * below, as well as find_all_inheritors's expectation that the caller
749  * holds some lock on the starting relation.
750  */
751  rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
752  relid = RangeVarGetRelidExtended(vrel->relation,
754  rvr_opts,
755  NULL, NULL);
756 
757  /*
758  * If the lock is unavailable, emit the same log statement that
759  * vacuum_rel() and analyze_rel() would.
760  */
761  if (!OidIsValid(relid))
762  {
763  if (options & VACOPT_VACUUM)
765  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
766  errmsg("skipping vacuum of \"%s\" --- lock not available",
767  vrel->relation->relname)));
768  else
770  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
771  errmsg("skipping analyze of \"%s\" --- lock not available",
772  vrel->relation->relname)));
773  return vacrels;
774  }
775 
776  /*
777  * To check whether the relation is a partitioned table and its
778  * ownership, fetch its syscache entry.
779  */
780  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
781  if (!HeapTupleIsValid(tuple))
782  elog(ERROR, "cache lookup failed for relation %u", relid);
783  classForm = (Form_pg_class) GETSTRUCT(tuple);
784 
785  /*
786  * Make a returnable VacuumRelation for this rel if user is a proper
787  * owner.
788  */
789  if (vacuum_is_relation_owner(relid, classForm, options))
790  {
791  oldcontext = MemoryContextSwitchTo(vac_context);
792  vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
793  relid,
794  vrel->va_cols));
795  MemoryContextSwitchTo(oldcontext);
796  }
797 
798 
799  include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
800  ReleaseSysCache(tuple);
801 
802  /*
803  * If it is, make relation list entries for its partitions. Note that
804  * the list returned by find_all_inheritors() includes the passed-in
805  * OID, so we have to skip that. There's no point in taking locks on
806  * the individual partitions yet, and doing so would just add
807  * unnecessary deadlock risk. For this last reason we do not check
808  * yet the ownership of the partitions, which get added to the list to
809  * process. Ownership will be checked later on anyway.
810  */
811  if (include_parts)
812  {
813  List *part_oids = find_all_inheritors(relid, NoLock, NULL);
814  ListCell *part_lc;
815 
816  foreach(part_lc, part_oids)
817  {
818  Oid part_oid = lfirst_oid(part_lc);
819 
820  if (part_oid == relid)
821  continue; /* ignore original table */
822 
823  /*
824  * We omit a RangeVar since it wouldn't be appropriate to
825  * complain about failure to open one of these relations
826  * later.
827  */
828  oldcontext = MemoryContextSwitchTo(vac_context);
829  vacrels = lappend(vacrels, makeVacuumRelation(NULL,
830  part_oid,
831  vrel->va_cols));
832  MemoryContextSwitchTo(oldcontext);
833  }
834  }
835 
836  /*
837  * Release lock again. This means that by the time we actually try to
838  * process the table, it might be gone or renamed. In the former case
839  * we'll silently ignore it; in the latter case we'll process it
840  * anyway, but we must beware that the RangeVar doesn't necessarily
841  * identify it anymore. This isn't ideal, perhaps, but there's little
842  * practical alternative, since we're typically going to commit this
843  * transaction and begin a new one between now and then. Moreover,
844  * holding locks on multiple relations would create significant risk
845  * of deadlock.
846  */
848  }
849 
850  return vacrels;
851 }
852 
853 /*
854  * Construct a list of VacuumRelations for all vacuumable rels in
855  * the current database. The list is built in vac_context.
856  */
857 static List *
859 {
860  List *vacrels = NIL;
861  Relation pgclass;
862  TableScanDesc scan;
863  HeapTuple tuple;
864 
865  pgclass = table_open(RelationRelationId, AccessShareLock);
866 
867  scan = table_beginscan_catalog(pgclass, 0, NULL);
868 
869  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
870  {
871  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
872  MemoryContext oldcontext;
873  Oid relid = classForm->oid;
874 
875  /* check permissions of relation */
876  if (!vacuum_is_relation_owner(relid, classForm, options))
877  continue;
878 
879  /*
880  * We include partitioned tables here; depending on which operation is
881  * to be performed, caller will decide whether to process or ignore
882  * them.
883  */
884  if (classForm->relkind != RELKIND_RELATION &&
885  classForm->relkind != RELKIND_MATVIEW &&
886  classForm->relkind != RELKIND_PARTITIONED_TABLE)
887  continue;
888 
889  /*
890  * Build VacuumRelation(s) specifying the table OIDs to be processed.
891  * We omit a RangeVar since it wouldn't be appropriate to complain
892  * about failure to open one of these relations later.
893  */
894  oldcontext = MemoryContextSwitchTo(vac_context);
895  vacrels = lappend(vacrels, makeVacuumRelation(NULL,
896  relid,
897  NIL));
898  MemoryContextSwitchTo(oldcontext);
899  }
900 
901  table_endscan(scan);
902  table_close(pgclass, AccessShareLock);
903 
904  return vacrels;
905 }
906 
907 /*
908  * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
909  *
910  * Input parameters are the target relation, applicable freeze age settings.
911  *
912  * The output parameters are:
913  * - oldestXmin is the cutoff value used to distinguish whether tuples are
914  * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
915  * - freezeLimit is the Xid below which all Xids are replaced by
916  * FrozenTransactionId during vacuum.
917  * - xidFullScanLimit (computed from freeze_table_age parameter)
918  * represents a minimum Xid value; a table whose relfrozenxid is older than
919  * this will have a full-table vacuum applied to it, to freeze tuples across
920  * the whole table. Vacuuming a table younger than this value can use a
921  * partial scan.
922  * - multiXactCutoff is the value below which all MultiXactIds are removed from
923  * Xmax.
924  * - mxactFullScanLimit is a value against which a table's relminmxid value is
925  * compared to produce a full-table vacuum, as with xidFullScanLimit.
926  *
927  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
928  * not interested.
929  */
930 void
932  int freeze_min_age,
933  int freeze_table_age,
934  int multixact_freeze_min_age,
935  int multixact_freeze_table_age,
936  TransactionId *oldestXmin,
937  TransactionId *freezeLimit,
938  TransactionId *xidFullScanLimit,
939  MultiXactId *multiXactCutoff,
940  MultiXactId *mxactFullScanLimit)
941 {
942  int freezemin;
943  int mxid_freezemin;
944  int effective_multixact_freeze_max_age;
945  TransactionId limit;
946  TransactionId safeLimit;
947  MultiXactId oldestMxact;
948  MultiXactId mxactLimit;
949  MultiXactId safeMxactLimit;
950 
951  /*
952  * We can always ignore processes running lazy vacuum. This is because we
953  * use these values only for deciding which tuples we must keep in the
954  * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
955  * XID assigned), it's safe to ignore it. In theory it could be
956  * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
957  * that only one vacuum process can be working on a particular table at
958  * any time, and that each vacuum is always an independent transaction.
959  */
960  *oldestXmin = GetOldestNonRemovableTransactionId(rel);
961 
963  {
964  TransactionId limit_xmin;
965  TimestampTz limit_ts;
966 
967  if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
968  &limit_xmin, &limit_ts))
969  {
970  /*
971  * TODO: We should only set the threshold if we are pruning on the
972  * basis of the increased limits. Not as crucial here as it is
973  * for opportunistic pruning (which often happens at a much higher
974  * frequency), but would still be a significant improvement.
975  */
976  SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
977  *oldestXmin = limit_xmin;
978  }
979  }
980 
981  Assert(TransactionIdIsNormal(*oldestXmin));
982 
983  /*
984  * Determine the minimum freeze age to use: as specified by the caller, or
985  * vacuum_freeze_min_age, but in any case not more than half
986  * autovacuum_freeze_max_age, so that autovacuums to prevent XID
987  * wraparound won't occur too frequently.
988  */
989  freezemin = freeze_min_age;
990  if (freezemin < 0)
991  freezemin = vacuum_freeze_min_age;
992  freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
993  Assert(freezemin >= 0);
994 
995  /*
996  * Compute the cutoff XID, being careful not to generate a "permanent" XID
997  */
998  limit = *oldestXmin - freezemin;
999  if (!TransactionIdIsNormal(limit))
1000  limit = FirstNormalTransactionId;
1001 
1002  /*
1003  * If oldestXmin is very far back (in practice, more than
1004  * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
1005  * freeze age of zero.
1006  */
1008  if (!TransactionIdIsNormal(safeLimit))
1009  safeLimit = FirstNormalTransactionId;
1010 
1011  if (TransactionIdPrecedes(limit, safeLimit))
1012  {
1013  ereport(WARNING,
1014  (errmsg("oldest xmin is far in the past"),
1015  errhint("Close open transactions soon to avoid wraparound problems.\n"
1016  "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1017  limit = *oldestXmin;
1018  }
1019 
1020  *freezeLimit = limit;
1021 
1022  /*
1023  * Compute the multixact age for which freezing is urgent. This is
1024  * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1025  * short of multixact member space.
1026  */
1027  effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1028 
1029  /*
1030  * Determine the minimum multixact freeze age to use: as specified by
1031  * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1032  * than half effective_multixact_freeze_max_age, so that autovacuums to
1033  * prevent MultiXact wraparound won't occur too frequently.
1034  */
1035  mxid_freezemin = multixact_freeze_min_age;
1036  if (mxid_freezemin < 0)
1037  mxid_freezemin = vacuum_multixact_freeze_min_age;
1038  mxid_freezemin = Min(mxid_freezemin,
1039  effective_multixact_freeze_max_age / 2);
1040  Assert(mxid_freezemin >= 0);
1041 
1042  /* compute the cutoff multi, being careful to generate a valid value */
1043  oldestMxact = GetOldestMultiXactId();
1044  mxactLimit = oldestMxact - mxid_freezemin;
1045  if (mxactLimit < FirstMultiXactId)
1046  mxactLimit = FirstMultiXactId;
1047 
1048  safeMxactLimit =
1049  ReadNextMultiXactId() - effective_multixact_freeze_max_age;
1050  if (safeMxactLimit < FirstMultiXactId)
1051  safeMxactLimit = FirstMultiXactId;
1052 
1053  if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
1054  {
1055  ereport(WARNING,
1056  (errmsg("oldest multixact is far in the past"),
1057  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
1058  /* Use the safe limit, unless an older mxact is still running */
1059  if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
1060  mxactLimit = oldestMxact;
1061  else
1062  mxactLimit = safeMxactLimit;
1063  }
1064 
1065  *multiXactCutoff = mxactLimit;
1066 
1067  if (xidFullScanLimit != NULL)
1068  {
1069  int freezetable;
1070 
1071  Assert(mxactFullScanLimit != NULL);
1072 
1073  /*
1074  * Determine the table freeze age to use: as specified by the caller,
1075  * or vacuum_freeze_table_age, but in any case not more than
1076  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1077  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
1078  * before anti-wraparound autovacuum is launched.
1079  */
1080  freezetable = freeze_table_age;
1081  if (freezetable < 0)
1082  freezetable = vacuum_freeze_table_age;
1083  freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
1084  Assert(freezetable >= 0);
1085 
1086  /*
1087  * Compute XID limit causing a full-table vacuum, being careful not to
1088  * generate a "permanent" XID.
1089  */
1090  limit = ReadNewTransactionId() - freezetable;
1091  if (!TransactionIdIsNormal(limit))
1092  limit = FirstNormalTransactionId;
1093 
1094  *xidFullScanLimit = limit;
1095 
1096  /*
1097  * Similar to the above, determine the table freeze age to use for
1098  * multixacts: as specified by the caller, or
1099  * vacuum_multixact_freeze_table_age, but in any case not more than
1100  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
1101  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
1102  * freeze multixacts before anti-wraparound autovacuum is launched.
1103  */
1104  freezetable = multixact_freeze_table_age;
1105  if (freezetable < 0)
1106  freezetable = vacuum_multixact_freeze_table_age;
1107  freezetable = Min(freezetable,
1108  effective_multixact_freeze_max_age * 0.95);
1109  Assert(freezetable >= 0);
1110 
1111  /*
1112  * Compute MultiXact limit causing a full-table vacuum, being careful
1113  * to generate a valid MultiXact value.
1114  */
1115  mxactLimit = ReadNextMultiXactId() - freezetable;
1116  if (mxactLimit < FirstMultiXactId)
1117  mxactLimit = FirstMultiXactId;
1118 
1119  *mxactFullScanLimit = mxactLimit;
1120  }
1121  else
1122  {
1123  Assert(mxactFullScanLimit == NULL);
1124  }
1125 }
1126 
1127 /*
1128  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1129  *
1130  * If we scanned the whole relation then we should just use the count of
1131  * live tuples seen; but if we did not, we should not blindly extrapolate
1132  * from that number, since VACUUM may have scanned a quite nonrandom
1133  * subset of the table. When we have only partial information, we take
1134  * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1135  * of the tuple density in the unscanned pages.
1136  *
1137  * Note: scanned_tuples should count only *live* tuples, since
1138  * pg_class.reltuples is defined that way.
1139  */
1140 double
1142  BlockNumber total_pages,
1143  BlockNumber scanned_pages,
1144  double scanned_tuples)
1145 {
1146  BlockNumber old_rel_pages = relation->rd_rel->relpages;
1147  double old_rel_tuples = relation->rd_rel->reltuples;
1148  double old_density;
1149  double unscanned_pages;
1150  double total_tuples;
1151 
1152  /* If we did scan the whole table, just use the count as-is */
1153  if (scanned_pages >= total_pages)
1154  return scanned_tuples;
1155 
1156  /*
1157  * If scanned_pages is zero but total_pages isn't, keep the existing value
1158  * of reltuples. (Note: we might be returning -1 in this case.)
1159  */
1160  if (scanned_pages == 0)
1161  return old_rel_tuples;
1162 
1163  /*
1164  * If old density is unknown, we can't do much except scale up
1165  * scanned_tuples to match total_pages.
1166  */
1167  if (old_rel_tuples < 0 || old_rel_pages == 0)
1168  return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1169 
1170  /*
1171  * Okay, we've covered the corner cases. The normal calculation is to
1172  * convert the old measurement to a density (tuples per page), then
1173  * estimate the number of tuples in the unscanned pages using that figure,
1174  * and finally add on the number of tuples in the scanned pages.
1175  */
1176  old_density = old_rel_tuples / old_rel_pages;
1177  unscanned_pages = (double) total_pages - (double) scanned_pages;
1178  total_tuples = old_density * unscanned_pages + scanned_tuples;
1179  return floor(total_tuples + 0.5);
1180 }
1181 
1182 
1183 /*
1184  * vac_update_relstats() -- update statistics for one relation
1185  *
1186  * Update the whole-relation statistics that are kept in its pg_class
1187  * row. There are additional stats that will be updated if we are
1188  * doing ANALYZE, but we always update these stats. This routine works
1189  * for both index and heap relation entries in pg_class.
1190  *
1191  * We violate transaction semantics here by overwriting the rel's
1192  * existing pg_class tuple with the new values. This is reasonably
1193  * safe as long as we're sure that the new values are correct whether or
1194  * not this transaction commits. The reason for doing this is that if
1195  * we updated these tuples in the usual way, vacuuming pg_class itself
1196  * wouldn't work very well --- by the time we got done with a vacuum
1197  * cycle, most of the tuples in pg_class would've been obsoleted. Of
1198  * course, this only works for fixed-size not-null columns, but these are.
1199  *
1200  * Another reason for doing it this way is that when we are in a lazy
1201  * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1202  * Somebody vacuuming pg_class might think they could delete a tuple
1203  * marked with xmin = our xid.
1204  *
1205  * In addition to fundamentally nontransactional statistics such as
1206  * relpages and relallvisible, we try to maintain certain lazily-updated
1207  * DDL flags such as relhasindex, by clearing them if no longer correct.
1208  * It's safe to do this in VACUUM, which can't run in parallel with
1209  * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1210  * However, it's *not* safe to do it in an ANALYZE that's within an
1211  * outer transaction, because for example the current transaction might
1212  * have dropped the last index; then we'd think relhasindex should be
1213  * cleared, but if the transaction later rolls back this would be wrong.
1214  * So we refrain from updating the DDL flags if we're inside an outer
1215  * transaction. This is OK since postponing the flag maintenance is
1216  * always allowable.
1217  *
1218  * Note: num_tuples should count only *live* tuples, since
1219  * pg_class.reltuples is defined that way.
1220  *
1221  * This routine is shared by VACUUM and ANALYZE.
1222  */
1223 void
1225  BlockNumber num_pages, double num_tuples,
1226  BlockNumber num_all_visible_pages,
1227  bool hasindex, TransactionId frozenxid,
1228  MultiXactId minmulti,
1229  bool in_outer_xact)
1230 {
1231  Oid relid = RelationGetRelid(relation);
1232  Relation rd;
1233  HeapTuple ctup;
1234  Form_pg_class pgcform;
1235  bool dirty;
1236 
1237  rd = table_open(RelationRelationId, RowExclusiveLock);
1238 
1239  /* Fetch a copy of the tuple to scribble on */
1241  if (!HeapTupleIsValid(ctup))
1242  elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1243  relid);
1244  pgcform = (Form_pg_class) GETSTRUCT(ctup);
1245 
1246  /* Apply statistical updates, if any, to copied tuple */
1247 
1248  dirty = false;
1249  if (pgcform->relpages != (int32) num_pages)
1250  {
1251  pgcform->relpages = (int32) num_pages;
1252  dirty = true;
1253  }
1254  if (pgcform->reltuples != (float4) num_tuples)
1255  {
1256  pgcform->reltuples = (float4) num_tuples;
1257  dirty = true;
1258  }
1259  if (pgcform->relallvisible != (int32) num_all_visible_pages)
1260  {
1261  pgcform->relallvisible = (int32) num_all_visible_pages;
1262  dirty = true;
1263  }
1264 
1265  /* Apply DDL updates, but not inside an outer transaction (see above) */
1266 
1267  if (!in_outer_xact)
1268  {
1269  /*
1270  * If we didn't find any indexes, reset relhasindex.
1271  */
1272  if (pgcform->relhasindex && !hasindex)
1273  {
1274  pgcform->relhasindex = false;
1275  dirty = true;
1276  }
1277 
1278  /* We also clear relhasrules and relhastriggers if needed */
1279  if (pgcform->relhasrules && relation->rd_rules == NULL)
1280  {
1281  pgcform->relhasrules = false;
1282  dirty = true;
1283  }
1284  if (pgcform->relhastriggers && relation->trigdesc == NULL)
1285  {
1286  pgcform->relhastriggers = false;
1287  dirty = true;
1288  }
1289  }
1290 
1291  /*
1292  * Update relfrozenxid, unless caller passed InvalidTransactionId
1293  * indicating it has no new data.
1294  *
1295  * Ordinarily, we don't let relfrozenxid go backwards: if things are
1296  * working correctly, the only way the new frozenxid could be older would
1297  * be if a previous VACUUM was done with a tighter freeze_min_age, in
1298  * which case we don't want to forget the work it already did. However,
1299  * if the stored relfrozenxid is "in the future", then it must be corrupt
1300  * and it seems best to overwrite it with the cutoff we used this time.
1301  * This should match vac_update_datfrozenxid() concerning what we consider
1302  * to be "in the future".
1303  */
1304  if (TransactionIdIsNormal(frozenxid) &&
1305  pgcform->relfrozenxid != frozenxid &&
1306  (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
1308  pgcform->relfrozenxid)))
1309  {
1310  pgcform->relfrozenxid = frozenxid;
1311  dirty = true;
1312  }
1313 
1314  /* Similarly for relminmxid */
1315  if (MultiXactIdIsValid(minmulti) &&
1316  pgcform->relminmxid != minmulti &&
1317  (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
1318  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
1319  {
1320  pgcform->relminmxid = minmulti;
1321  dirty = true;
1322  }
1323 
1324  /* If anything changed, write out the tuple. */
1325  if (dirty)
1326  heap_inplace_update(rd, ctup);
1327 
1329 }
1330 
1331 
1332 /*
1333  * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1334  *
1335  * Update pg_database's datfrozenxid entry for our database to be the
1336  * minimum of the pg_class.relfrozenxid values.
1337  *
1338  * Similarly, update our datminmxid to be the minimum of the
1339  * pg_class.relminmxid values.
1340  *
1341  * If we are able to advance either pg_database value, also try to
1342  * truncate pg_xact and pg_multixact.
1343  *
1344  * We violate transaction semantics here by overwriting the database's
1345  * existing pg_database tuple with the new values. This is reasonably
1346  * safe since the new values are correct whether or not this transaction
1347  * commits. As with vac_update_relstats, this avoids leaving dead tuples
1348  * behind after a VACUUM.
1349  */
1350 void
1352 {
1353  HeapTuple tuple;
1354  Form_pg_database dbform;
1355  Relation relation;
1356  SysScanDesc scan;
1357  HeapTuple classTup;
1358  TransactionId newFrozenXid;
1359  MultiXactId newMinMulti;
1360  TransactionId lastSaneFrozenXid;
1361  MultiXactId lastSaneMinMulti;
1362  bool bogus = false;
1363  bool dirty = false;
1364  ScanKeyData key[1];
1365 
1366  /*
1367  * Restrict this task to one backend per database. This avoids race
1368  * conditions that would move datfrozenxid or datminmxid backward. It
1369  * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1370  * datfrozenxid passed to an earlier vac_truncate_clog() call.
1371  */
1373 
1374  /*
1375  * Initialize the "min" calculation with
1376  * GetOldestNonRemovableTransactionId(), which is a reasonable
1377  * approximation to the minimum relfrozenxid for not-yet-committed
1378  * pg_class entries for new tables; see AddNewRelationTuple(). So we
1379  * cannot produce a wrong minimum by starting with this.
1380  */
1381  newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1382 
1383  /*
1384  * Similarly, initialize the MultiXact "min" with the value that would be
1385  * used on pg_class for new tables. See AddNewRelationTuple().
1386  */
1387  newMinMulti = GetOldestMultiXactId();
1388 
1389  /*
1390  * Identify the latest relfrozenxid and relminmxid values that we could
1391  * validly see during the scan. These are conservative values, but it's
1392  * not really worth trying to be more exact.
1393  */
1394  lastSaneFrozenXid = ReadNewTransactionId();
1395  lastSaneMinMulti = ReadNextMultiXactId();
1396 
1397  /*
1398  * We must seqscan pg_class to find the minimum Xid, because there is no
1399  * index that can help us here.
1400  */
1401  relation = table_open(RelationRelationId, AccessShareLock);
1402 
1403  scan = systable_beginscan(relation, InvalidOid, false,
1404  NULL, 0, NULL);
1405 
1406  while ((classTup = systable_getnext(scan)) != NULL)
1407  {
1408  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1409 
1410  /*
1411  * Only consider relations able to hold unfrozen XIDs (anything else
1412  * should have InvalidTransactionId in relfrozenxid anyway).
1413  */
1414  if (classForm->relkind != RELKIND_RELATION &&
1415  classForm->relkind != RELKIND_MATVIEW &&
1416  classForm->relkind != RELKIND_TOASTVALUE)
1417  {
1418  Assert(!TransactionIdIsValid(classForm->relfrozenxid));
1419  Assert(!MultiXactIdIsValid(classForm->relminmxid));
1420  continue;
1421  }
1422 
1423  /*
1424  * Some table AMs might not need per-relation xid / multixid horizons.
1425  * It therefore seems reasonable to allow relfrozenxid and relminmxid
1426  * to not be set (i.e. set to their respective Invalid*Id)
1427  * independently. Thus validate and compute horizon for each only if
1428  * set.
1429  *
1430  * If things are working properly, no relation should have a
1431  * relfrozenxid or relminmxid that is "in the future". However, such
1432  * cases have been known to arise due to bugs in pg_upgrade. If we
1433  * see any entries that are "in the future", chicken out and don't do
1434  * anything. This ensures we won't truncate clog & multixact SLRUs
1435  * before those relations have been scanned and cleaned up.
1436  */
1437 
1438  if (TransactionIdIsValid(classForm->relfrozenxid))
1439  {
1440  Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1441 
1442  /* check for values in the future */
1443  if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
1444  {
1445  bogus = true;
1446  break;
1447  }
1448 
1449  /* determine new horizon */
1450  if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1451  newFrozenXid = classForm->relfrozenxid;
1452  }
1453 
1454  if (MultiXactIdIsValid(classForm->relminmxid))
1455  {
1456  /* check for values in the future */
1457  if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1458  {
1459  bogus = true;
1460  break;
1461  }
1462 
1463  /* determine new horizon */
1464  if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1465  newMinMulti = classForm->relminmxid;
1466  }
1467  }
1468 
1469  /* we're done with pg_class */
1470  systable_endscan(scan);
1471  table_close(relation, AccessShareLock);
1472 
1473  /* chicken out if bogus data found */
1474  if (bogus)
1475  return;
1476 
1477  Assert(TransactionIdIsNormal(newFrozenXid));
1478  Assert(MultiXactIdIsValid(newMinMulti));
1479 
1480  /* Now fetch the pg_database tuple we need to update. */
1481  relation = table_open(DatabaseRelationId, RowExclusiveLock);
1482 
1483  /*
1484  * Get the pg_database tuple to scribble on. Note that this does not
1485  * directly rely on the syscache to avoid issues with flattened toast
1486  * values for the in-place update.
1487  */
1488  ScanKeyInit(&key[0],
1489  Anum_pg_database_oid,
1490  BTEqualStrategyNumber, F_OIDEQ,
1492 
1493  scan = systable_beginscan(relation, DatabaseOidIndexId, true,
1494  NULL, 1, key);
1495  tuple = systable_getnext(scan);
1496  tuple = heap_copytuple(tuple);
1497  systable_endscan(scan);
1498 
1499  if (!HeapTupleIsValid(tuple))
1500  elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1501 
1502  dbform = (Form_pg_database) GETSTRUCT(tuple);
1503 
1504  /*
1505  * As in vac_update_relstats(), we ordinarily don't want to let
1506  * datfrozenxid go backward; but if it's "in the future" then it must be
1507  * corrupt and it seems best to overwrite it.
1508  */
1509  if (dbform->datfrozenxid != newFrozenXid &&
1510  (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1511  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1512  {
1513  dbform->datfrozenxid = newFrozenXid;
1514  dirty = true;
1515  }
1516  else
1517  newFrozenXid = dbform->datfrozenxid;
1518 
1519  /* Ditto for datminmxid */
1520  if (dbform->datminmxid != newMinMulti &&
1521  (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1522  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1523  {
1524  dbform->datminmxid = newMinMulti;
1525  dirty = true;
1526  }
1527  else
1528  newMinMulti = dbform->datminmxid;
1529 
1530  if (dirty)
1531  heap_inplace_update(relation, tuple);
1532 
1533  heap_freetuple(tuple);
1534  table_close(relation, RowExclusiveLock);
1535 
1536  /*
1537  * If we were able to advance datfrozenxid or datminmxid, see if we can
1538  * truncate pg_xact and/or pg_multixact. Also do it if the shared
1539  * XID-wrap-limit info is stale, since this action will update that too.
1540  */
1541  if (dirty || ForceTransactionIdLimitUpdate())
1542  vac_truncate_clog(newFrozenXid, newMinMulti,
1543  lastSaneFrozenXid, lastSaneMinMulti);
1544 }
1545 
1546 
1547 /*
1548  * vac_truncate_clog() -- attempt to truncate the commit log
1549  *
1550  * Scan pg_database to determine the system-wide oldest datfrozenxid,
1551  * and use it to truncate the transaction commit log (pg_xact).
1552  * Also update the XID wrap limit info maintained by varsup.c.
1553  * Likewise for datminmxid.
1554  *
1555  * The passed frozenXID and minMulti are the updated values for my own
1556  * pg_database entry. They're used to initialize the "min" calculations.
1557  * The caller also passes the "last sane" XID and MXID, since it has
1558  * those at hand already.
1559  *
1560  * This routine is only invoked when we've managed to change our
1561  * DB's datfrozenxid/datminmxid values, or we found that the shared
1562  * XID-wrap-limit info is stale.
1563  */
1564 static void
1566  MultiXactId minMulti,
1567  TransactionId lastSaneFrozenXid,
1568  MultiXactId lastSaneMinMulti)
1569 {
1570  TransactionId nextXID = ReadNewTransactionId();
1571  Relation relation;
1572  TableScanDesc scan;
1573  HeapTuple tuple;
1574  Oid oldestxid_datoid;
1575  Oid minmulti_datoid;
1576  bool bogus = false;
1577  bool frozenAlreadyWrapped = false;
1578 
1579  /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1580  LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1581 
1582  /* init oldest datoids to sync with my frozenXID/minMulti values */
1583  oldestxid_datoid = MyDatabaseId;
1584  minmulti_datoid = MyDatabaseId;
1585 
1586  /*
1587  * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1588  *
1589  * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1590  * the values could change while we look at them. Fetch each one just
1591  * once to ensure sane behavior of the comparison logic. (Here, as in
1592  * many other places, we assume that fetching or updating an XID in shared
1593  * storage is atomic.)
1594  *
1595  * Note: we need not worry about a race condition with new entries being
1596  * inserted by CREATE DATABASE. Any such entry will have a copy of some
1597  * existing DB's datfrozenxid, and that source DB cannot be ours because
1598  * of the interlock against copying a DB containing an active backend.
1599  * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1600  * concurrently modify the datfrozenxid's of different databases, the
1601  * worst possible outcome is that pg_xact is not truncated as aggressively
1602  * as it could be.
1603  */
1604  relation = table_open(DatabaseRelationId, AccessShareLock);
1605 
1606  scan = table_beginscan_catalog(relation, 0, NULL);
1607 
1608  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1609  {
1610  volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1611  TransactionId datfrozenxid = dbform->datfrozenxid;
1612  TransactionId datminmxid = dbform->datminmxid;
1613 
1614  Assert(TransactionIdIsNormal(datfrozenxid));
1615  Assert(MultiXactIdIsValid(datminmxid));
1616 
1617  /*
1618  * If things are working properly, no database should have a
1619  * datfrozenxid or datminmxid that is "in the future". However, such
1620  * cases have been known to arise due to bugs in pg_upgrade. If we
1621  * see any entries that are "in the future", chicken out and don't do
1622  * anything. This ensures we won't truncate clog before those
1623  * databases have been scanned and cleaned up. (We will issue the
1624  * "already wrapped" warning if appropriate, though.)
1625  */
1626  if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1627  MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1628  bogus = true;
1629 
1630  if (TransactionIdPrecedes(nextXID, datfrozenxid))
1631  frozenAlreadyWrapped = true;
1632  else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1633  {
1634  frozenXID = datfrozenxid;
1635  oldestxid_datoid = dbform->oid;
1636  }
1637 
1638  if (MultiXactIdPrecedes(datminmxid, minMulti))
1639  {
1640  minMulti = datminmxid;
1641  minmulti_datoid = dbform->oid;
1642  }
1643  }
1644 
1645  table_endscan(scan);
1646 
1647  table_close(relation, AccessShareLock);
1648 
1649  /*
1650  * Do not truncate CLOG if we seem to have suffered wraparound already;
1651  * the computed minimum XID might be bogus. This case should now be
1652  * impossible due to the defenses in GetNewTransactionId, but we keep the
1653  * test anyway.
1654  */
1655  if (frozenAlreadyWrapped)
1656  {
1657  ereport(WARNING,
1658  (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1659  errdetail("You might have already suffered transaction-wraparound data loss.")));
1660  return;
1661  }
1662 
1663  /* chicken out if data is bogus in any other way */
1664  if (bogus)
1665  return;
1666 
1667  /*
1668  * Advance the oldest value for commit timestamps before truncating, so
1669  * that if a user requests a timestamp for a transaction we're truncating
1670  * away right after this point, they get NULL instead of an ugly "file not
1671  * found" error from slru.c. This doesn't matter for xact/multixact
1672  * because they are not subject to arbitrary lookups from users.
1673  */
1674  AdvanceOldestCommitTsXid(frozenXID);
1675 
1676  /*
1677  * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1678  */
1679  TruncateCLOG(frozenXID, oldestxid_datoid);
1680  TruncateCommitTs(frozenXID);
1681  TruncateMultiXact(minMulti, minmulti_datoid);
1682 
1683  /*
1684  * Update the wrap limit for GetNewTransactionId and creation of new
1685  * MultiXactIds. Note: these functions will also signal the postmaster
1686  * for an(other) autovac cycle if needed. XXX should we avoid possibly
1687  * signaling twice?
1688  */
1689  SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1690  SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1691 
1692  LWLockRelease(WrapLimitsVacuumLock);
1693 }
1694 
1695 
1696 /*
1697  * vacuum_rel() -- vacuum one heap relation
1698  *
1699  * relid identifies the relation to vacuum. If relation is supplied,
1700  * use the name therein for reporting any failure to open/lock the rel;
1701  * do not use it once we've successfully opened the rel, since it might
1702  * be stale.
1703  *
1704  * Returns true if it's okay to proceed with a requested ANALYZE
1705  * operation on this table.
1706  *
1707  * Doing one heap at a time incurs extra overhead, since we need to
1708  * check that the heap exists again just before we vacuum it. The
1709  * reason that we do this is so that vacuuming can be spread across
1710  * many small transactions. Otherwise, two-phase locking would require
1711  * us to lock the entire database during one pass of the vacuum cleaner.
1712  *
1713  * At entry and exit, we are not inside a transaction.
1714  */
1715 static bool
1716 vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
1717 {
1718  LOCKMODE lmode;
1719  Relation onerel;
1720  LockRelId onerelid;
1721  Oid toast_relid;
1722  Oid save_userid;
1723  int save_sec_context;
1724  int save_nestlevel;
1725 
1726  Assert(params != NULL);
1727 
1728  /* Begin a transaction for vacuuming this relation */
1730 
1731  if (!(params->options & VACOPT_FULL))
1732  {
1733  /*
1734  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1735  * other concurrent VACUUMs know that they can ignore this one while
1736  * determining their OldestXmin. (The reason we don't set it during a
1737  * full VACUUM is exactly that we may have to run user-defined
1738  * functions for functional indexes, and we want to make sure that if
1739  * they use the snapshot set above, any tuples it requires can't get
1740  * removed from other tables. An index function that depends on the
1741  * contents of other tables is arguably broken, but we won't break it
1742  * here by violating transaction semantics.)
1743  *
1744  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1745  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1746  * in an emergency.
1747  *
1748  * Note: these flags remain set until CommitTransaction or
1749  * AbortTransaction. We don't want to clear them until we reset
1750  * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
1751  * might appear to go backwards, which is probably Not Good. (We also
1752  * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
1753  * xmin doesn't become visible ahead of setting the flag.)
1754  */
1755  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1757  if (params->is_wraparound)
1760  LWLockRelease(ProcArrayLock);
1761  }
1762 
1763  /*
1764  * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
1765  * cutoff xids in local memory wrapping around, and to have updated xmin
1766  * horizons.
1767  */
1769 
1770  /*
1771  * Check for user-requested abort. Note we want this to be inside a
1772  * transaction, so xact.c doesn't issue useless WARNING.
1773  */
1775 
1776  /*
1777  * Determine the type of lock we want --- hard exclusive lock for a FULL
1778  * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1779  * way, we can be sure that no other backend is vacuuming the same table.
1780  */
1781  lmode = (params->options & VACOPT_FULL) ?
1783 
1784  /* open the relation and get the appropriate lock on it */
1785  onerel = vacuum_open_relation(relid, relation, params->options,
1786  params->log_min_duration >= 0, lmode);
1787 
1788  /* leave if relation could not be opened or locked */
1789  if (!onerel)
1790  {
1793  return false;
1794  }
1795 
1796  /*
1797  * Check if relation needs to be skipped based on ownership. This check
1798  * happens also when building the relation list to vacuum for a manual
1799  * operation, and needs to be done additionally here as VACUUM could
1800  * happen across multiple transactions where relation ownership could have
1801  * changed in-between. Make sure to only generate logs for VACUUM in this
1802  * case.
1803  */
1805  onerel->rd_rel,
1806  params->options & VACOPT_VACUUM))
1807  {
1808  relation_close(onerel, lmode);
1811  return false;
1812  }
1813 
1814  /*
1815  * Check that it's of a vacuumable relkind.
1816  */
1817  if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1818  onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1819  onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1820  onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1821  {
1822  ereport(WARNING,
1823  (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1824  RelationGetRelationName(onerel))));
1825  relation_close(onerel, lmode);
1828  return false;
1829  }
1830 
1831  /*
1832  * Silently ignore tables that are temp tables of other backends ---
1833  * trying to vacuum these will lead to great unhappiness, since their
1834  * contents are probably not up-to-date on disk. (We don't throw a
1835  * warning here; it would just lead to chatter during a database-wide
1836  * VACUUM.)
1837  */
1838  if (RELATION_IS_OTHER_TEMP(onerel))
1839  {
1840  relation_close(onerel, lmode);
1843  return false;
1844  }
1845 
1846  /*
1847  * Silently ignore partitioned tables as there is no work to be done. The
1848  * useful work is on their child partitions, which have been queued up for
1849  * us separately.
1850  */
1851  if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1852  {
1853  relation_close(onerel, lmode);
1856  /* It's OK to proceed with ANALYZE on this table */
1857  return true;
1858  }
1859 
1860  /*
1861  * Get a session-level lock too. This will protect our access to the
1862  * relation across multiple transactions, so that we can vacuum the
1863  * relation's TOAST table (if any) secure in the knowledge that no one is
1864  * deleting the parent relation.
1865  *
1866  * NOTE: this cannot block, even if someone else is waiting for access,
1867  * because the lock manager knows that both lock requests are from the
1868  * same process.
1869  */
1870  onerelid = onerel->rd_lockInfo.lockRelId;
1871  LockRelationIdForSession(&onerelid, lmode);
1872 
1873  /* Set index cleanup option based on reloptions if not yet */
1874  if (params->index_cleanup == VACOPT_TERNARY_DEFAULT)
1875  {
1876  if (onerel->rd_options == NULL ||
1877  ((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup)
1879  else
1881  }
1882 
1883  /* Set truncate option based on reloptions if not yet */
1884  if (params->truncate == VACOPT_TERNARY_DEFAULT)
1885  {
1886  if (onerel->rd_options == NULL ||
1887  ((StdRdOptions *) onerel->rd_options)->vacuum_truncate)
1888  params->truncate = VACOPT_TERNARY_ENABLED;
1889  else
1891  }
1892 
1893  /*
1894  * Remember the relation's TOAST relation for later, if the caller asked
1895  * us to process it. In VACUUM FULL, though, the toast table is
1896  * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1897  */
1898  if (!(params->options & VACOPT_SKIPTOAST) && !(params->options & VACOPT_FULL))
1899  toast_relid = onerel->rd_rel->reltoastrelid;
1900  else
1901  toast_relid = InvalidOid;
1902 
1903  /*
1904  * Switch to the table owner's userid, so that any index functions are run
1905  * as that user. Also lock down security-restricted operations and
1906  * arrange to make GUC variable changes local to this command. (This is
1907  * unnecessary, but harmless, for lazy VACUUM.)
1908  */
1909  GetUserIdAndSecContext(&save_userid, &save_sec_context);
1910  SetUserIdAndSecContext(onerel->rd_rel->relowner,
1911  save_sec_context | SECURITY_RESTRICTED_OPERATION);
1912  save_nestlevel = NewGUCNestLevel();
1913 
1914  /*
1915  * Do the actual work --- either FULL or "lazy" vacuum
1916  */
1917  if (params->options & VACOPT_FULL)
1918  {
1919  ClusterParams cluster_params = {0};
1920 
1921  /* close relation before vacuuming, but hold lock until commit */
1922  relation_close(onerel, NoLock);
1923  onerel = NULL;
1924 
1925  if ((params->options & VACOPT_VERBOSE) != 0)
1926  cluster_params.options |= CLUOPT_VERBOSE;
1927 
1928  /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1929  cluster_rel(relid, InvalidOid, &cluster_params);
1930  }
1931  else
1932  table_relation_vacuum(onerel, params, vac_strategy);
1933 
1934  /* Roll back any GUC changes executed by index functions */
1935  AtEOXact_GUC(false, save_nestlevel);
1936 
1937  /* Restore userid and security context */
1938  SetUserIdAndSecContext(save_userid, save_sec_context);
1939 
1940  /* all done with this class, but hold lock until commit */
1941  if (onerel)
1942  relation_close(onerel, NoLock);
1943 
1944  /*
1945  * Complete the transaction and free all temporary memory used.
1946  */
1949 
1950  /*
1951  * If the relation has a secondary toast rel, vacuum that too while we
1952  * still hold the session lock on the main table. Note however that
1953  * "analyze" will not get done on the toast table. This is good, because
1954  * the toaster always uses hardcoded index access and statistics are
1955  * totally unimportant for toast relations.
1956  */
1957  if (toast_relid != InvalidOid)
1958  vacuum_rel(toast_relid, NULL, params);
1959 
1960  /*
1961  * Now release the session-level lock on the main table.
1962  */
1963  UnlockRelationIdForSession(&onerelid, lmode);
1964 
1965  /* Report that we really did it. */
1966  return true;
1967 }
1968 
1969 
1970 /*
1971  * Open all the vacuumable indexes of the given relation, obtaining the
1972  * specified kind of lock on each. Return an array of Relation pointers for
1973  * the indexes into *Irel, and the number of indexes into *nindexes.
1974  *
1975  * We consider an index vacuumable if it is marked insertable (indisready).
1976  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1977  * execution, and what we have is too corrupt to be processable. We will
1978  * vacuum even if the index isn't indisvalid; this is important because in a
1979  * unique index, uniqueness checks will be performed anyway and had better not
1980  * hit dangling index pointers.
1981  */
1982 void
1984  int *nindexes, Relation **Irel)
1985 {
1986  List *indexoidlist;
1987  ListCell *indexoidscan;
1988  int i;
1989 
1990  Assert(lockmode != NoLock);
1991 
1992  indexoidlist = RelationGetIndexList(relation);
1993 
1994  /* allocate enough memory for all indexes */
1995  i = list_length(indexoidlist);
1996 
1997  if (i > 0)
1998  *Irel = (Relation *) palloc(i * sizeof(Relation));
1999  else
2000  *Irel = NULL;
2001 
2002  /* collect just the ready indexes */
2003  i = 0;
2004  foreach(indexoidscan, indexoidlist)
2005  {
2006  Oid indexoid = lfirst_oid(indexoidscan);
2007  Relation indrel;
2008 
2009  indrel = index_open(indexoid, lockmode);
2010  if (indrel->rd_index->indisready)
2011  (*Irel)[i++] = indrel;
2012  else
2013  index_close(indrel, lockmode);
2014  }
2015 
2016  *nindexes = i;
2017 
2018  list_free(indexoidlist);
2019 }
2020 
2021 /*
2022  * Release the resources acquired by vac_open_indexes. Optionally release
2023  * the locks (say NoLock to keep 'em).
2024  */
2025 void
2026 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2027 {
2028  if (Irel == NULL)
2029  return;
2030 
2031  while (nindexes--)
2032  {
2033  Relation ind = Irel[nindexes];
2034 
2035  index_close(ind, lockmode);
2036  }
2037  pfree(Irel);
2038 }
2039 
2040 /*
2041  * vacuum_delay_point --- check for interrupts and cost-based delay.
2042  *
2043  * This should be called in each major loop of VACUUM processing,
2044  * typically once per page processed.
2045  */
2046 void
2048 {
2049  double msec = 0;
2050 
2051  /* Always check for interrupts */
2053 
2055  return;
2056 
2057  /*
2058  * For parallel vacuum, the delay is computed based on the shared cost
2059  * balance. See compute_parallel_delay.
2060  */
2061  if (VacuumSharedCostBalance != NULL)
2062  msec = compute_parallel_delay();
2063  else if (VacuumCostBalance >= VacuumCostLimit)
2065 
2066  /* Nap if appropriate */
2067  if (msec > 0)
2068  {
2069  if (msec > VacuumCostDelay * 4)
2070  msec = VacuumCostDelay * 4;
2071 
2073  pg_usleep((long) (msec * 1000));
2075 
2076  VacuumCostBalance = 0;
2077 
2078  /* update balance values for workers */
2080 
2081  /* Might have gotten an interrupt while sleeping */
2083  }
2084 }
2085 
2086 /*
2087  * Computes the vacuum delay for parallel workers.
2088  *
2089  * The basic idea of a cost-based delay for parallel vacuum is to allow each
2090  * worker to sleep in proportion to the share of work it's done. We achieve this
2091  * by allowing all parallel vacuum workers including the leader process to
2092  * have a shared view of cost related parameters (mainly VacuumCostBalance).
2093  * We allow each worker to update it as and when it has incurred any cost and
2094  * then based on that decide whether it needs to sleep. We compute the time
2095  * to sleep for a worker based on the cost it has incurred
2096  * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2097  * that amount. This avoids putting to sleep those workers which have done less
2098  * I/O than other workers and therefore ensure that workers
2099  * which are doing more I/O got throttled more.
2100  *
2101  * We allow a worker to sleep only if it has performed I/O above a certain
2102  * threshold, which is calculated based on the number of active workers
2103  * (VacuumActiveNWorkers), and the overall cost balance is more than
2104  * VacuumCostLimit set by the system. Testing reveals that we achieve
2105  * the required throttling if we force a worker that has done more than 50%
2106  * of its share of work to sleep.
2107  */
2108 static double
2110 {
2111  double msec = 0;
2112  uint32 shared_balance;
2113  int nworkers;
2114 
2115  /* Parallel vacuum must be active */
2116  Assert(VacuumSharedCostBalance);
2117 
2118  nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2119 
2120  /* At least count itself */
2121  Assert(nworkers >= 1);
2122 
2123  /* Update the shared cost balance value atomically */
2124  shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2125 
2126  /* Compute the total local balance for the current worker */
2128 
2129  if ((shared_balance >= VacuumCostLimit) &&
2130  (VacuumCostBalanceLocal > 0.5 * ((double) VacuumCostLimit / nworkers)))
2131  {
2132  /* Compute sleep time based on the local cost balance */
2134  pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2136  }
2137 
2138  /*
2139  * Reset the local balance as we accumulated it into the shared value.
2140  */
2141  VacuumCostBalance = 0;
2142 
2143  return msec;
2144 }
2145 
2146 /*
2147  * A wrapper function of defGetBoolean().
2148  *
2149  * This function returns VACOPT_TERNARY_ENABLED and VACOPT_TERNARY_DISABLED
2150  * instead of true and false.
2151  */
2152 static VacOptTernaryValue
2154 {
2156 }
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:542
HeapTuple heap_copytuple(HeapTuple tuple)
Definition: heaptuple.c:680
#define NIL
Definition: pg_list.h:65
bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:151
void analyze_rel(Oid relid, RangeVar *relation, VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy)
Definition: analyze.c:119
static double compute_parallel_delay(void)
Definition: vacuum.c:2109
int multixact_freeze_table_age
Definition: vacuum.h:213
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2026
LockRelId lockRelId
Definition: rel.h:44
void vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, bool isTopLevel)
Definition: vacuum.c:272
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
#define AllocSetContextCreate
Definition: memutils.h:170
int64 VacuumPageMiss
Definition: globals.c:146
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
int errhint(const char *fmt,...)
Definition: elog.c:1162
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:593
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:74
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:77
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1141
int VacuumCostBalance
Definition: globals.c:149
int vacuum_multixact_freeze_table_age
Definition: vacuum.c:64
void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
Definition: clog.c:879
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:185
static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
Definition: vacuum.c:1716
RangeVar * relation
Definition: parsenodes.h:3251
uint32 TransactionId
Definition: c.h:575
#define SECURITY_RESTRICTED_OPERATION
Definition: miscadmin.h:300
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
void vac_update_datfrozenxid(void)
Definition: vacuum.c:1351
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:588
int LOCKMODE
Definition: lockdefs.h:26
Oid GetUserId(void)
Definition: miscinit.c:476
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:199
FormData_pg_database * Form_pg_database
Definition: pg_database.h:81
PGPROC * MyProc
Definition: proc.c:68
#define ExclusiveLock
Definition: lockdefs.h:44
int64 TimestampTz
Definition: timestamp.h:39
VacuumRelation * makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols)
Definition: makefuncs.c:809
#define VACOPT_ANALYZE
Definition: vacuum.h:179
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
void LockDatabaseFrozenIds(LOCKMODE lockmode)
Definition: lmgr.c:469
void CommitTransactionCommand(void)
Definition: xact.c:2948
int64 VacuumPageHit
Definition: globals.c:145
#define Min(x, y)
Definition: c.h:974
bool is_vacuumcmd
Definition: parsenodes.h:3238
#define PROC_VACUUM_FOR_WRAPAROUND
Definition: proc.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1588
#define AccessShareLock
Definition: lockdefs.h:36
static BufferAccessStrategy vac_strategy
Definition: vacuum.c:69
int32 defGetInt32(DefElem *def)
Definition: define.c:166
struct cursor * cur
Definition: ecpg.c:28
List * list_concat(List *list1, const List *list2)
Definition: list.c:530
int errcode(int sqlerrcode)
Definition: elog.c:704
PROC_HDR * ProcGlobal
Definition: proc.c:80
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:931
int64 VacuumPageDirty
Definition: globals.c:147
#define CLUOPT_VERBOSE
Definition: cluster.h:24
uint8 statusFlags
Definition: proc.h:187
uint32 BlockNumber
Definition: block.h:31
VacOptTernaryValue
Definition: vacuum.h:193
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:110
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1751
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
NameData relname
Definition: pg_class.h:38
unsigned int Oid
Definition: postgres_ext.h:31
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:698
void AdvanceOldestCommitTsXid(TransactionId oldestXact)
Definition: commit_ts.c:920
int freeze_table_age
Definition: vacuum.h:210
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:381
Relation try_relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:89
void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1672
signed int int32
Definition: c.h:417
static List * expand_vacuum_rel(VacuumRelation *vrel, int options)
Definition: vacuum.c:719
MemoryContext PortalContext
Definition: mcxt.c:53
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1808
bool ForceTransactionIdLimitUpdate(void)
Definition: varsup.c:490
char * relname
Definition: primnodes.h:68
void pg_usleep(long microsec)
Definition: signal.c:53
bool defGetBoolean(DefElem *def)
Definition: define.c:111
Form_pg_index rd_index
Definition: rel.h:175
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:500
bits32 options
Definition: vacuum.h:208
void pfree(void *pointer)
Definition: mcxt.c:1057
#define PROC_IN_VACUUM
Definition: proc.h:55
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1913
#define FirstNormalTransactionId
Definition: transam.h:34
void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode)
Definition: lmgr.c:382
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:45
Definition: rel.h:36
int VacuumCostLimit
Definition: globals.c:142
void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode)
Definition: lmgr.c:369
int autovacuum_freeze_max_age
Definition: autovacuum.c:124
int freeze_min_age
Definition: vacuum.h:209
int vacuum_multixact_freeze_min_age
Definition: vacuum.c:63
#define VACOPT_SKIPTOAST
Definition: vacuum.h:184
TriggerDesc * trigdesc
Definition: rel.h:116
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
bool is_wraparound
Definition: vacuum.h:215
#define lfirst_node(type, lc)
Definition: pg_list.h:172
#define NoLock
Definition: lockdefs.h:34
LockInfoData rd_lockInfo
Definition: rel.h:113
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1304
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:581
int location
Definition: parsenodes.h:736
#define RowExclusiveLock
Definition: lockdefs.h:38
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition: guc.c:6008
int errdetail(const char *fmt,...)
Definition: elog.c:1048
static MemoryContext vac_context
Definition: vacuum.c:68
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition: xact.c:3381
#define RelationGetRelationName(relation)
Definition: rel.h:491
void cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
Definition: cluster.c:277
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition: namespace.c:236
unsigned int uint32
Definition: c.h:429
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
bool ActiveSnapshotSet(void)
Definition: snapmgr.c:798
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1512
#define FirstMultiXactId
Definition: multixact.h:25
#define VACOPT_FREEZE
Definition: vacuum.h:181
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3394
void pgstat_vacuum_stat(void)
Definition: pgstat.c:1091
int MultiXactMemberFreezeThreshold(void)
Definition: multixact.c:2825
bool pg_database_ownercheck(Oid db_oid, Oid roleid)
Definition: aclchk.c:5108
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void TruncateCommitTs(TransactionId oldestXact)
Definition: commit_ts.c:867
VacOptTernaryValue index_cleanup
Definition: vacuum.h:219
Node * arg
Definition: parsenodes.h:734
List * lappend(List *list, void *datum)
Definition: list.c:336
#define DatabaseOidIndexId
Definition: pg_database.h:90
bool IsInTransactionBlock(bool isTopLevel)
Definition: xact.c:3494
static int verbose
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:1983
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1127
float float4
Definition: c.h:552
int VacuumCostBalanceLocal
Definition: vacuum.c:78
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:76
static int elevel
Definition: vacuumlazy.c:333
#define PG_FINALLY()
Definition: elog.h:326
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2503
void CommandCounterIncrement(void)
Definition: xact.c:1021
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1175
Oid MyDatabaseId
Definition: globals.c:86
bits32 options
Definition: cluster.h:29
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2210
#define InvalidOid
Definition: postgres_ext.h:36
VacOptTernaryValue truncate
Definition: vacuum.h:221
TransactionId datfrozenxid
Definition: pg_database.h:62
#define ereport(elevel,...)
Definition: elog.h:155
TransactionId MultiXactId
Definition: c.h:585
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
uint32 bits32
Definition: c.h:438
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
Definition: vacuum.c:98
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define Assert(condition)
Definition: c.h:792
#define VACOPT_VACUUM
Definition: vacuum.h:178
#define lfirst(lc)
Definition: pg_list.h:169
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:594
#define VACOPT_FULL
Definition: vacuum.h:182
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4690
void StartTransactionCommand(void)
Definition: xact.c:2847
RuleLock * rd_rules
Definition: rel.h:114
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1488
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:345
int nworkers
Definition: vacuum.h:229
static int list_length(const List *l)
Definition: pg_list.h:149
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:111
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
int vacuum_freeze_min_age
Definition: vacuum.c:61
TransactionId datminmxid
Definition: pg_database.h:65
int log_min_duration
Definition: vacuum.h:216
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3156
#define VACOPT_VERBOSE
Definition: vacuum.h:180
static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti, TransactionId lastSaneFrozenXid, MultiXactId lastSaneMinMulti)
Definition: vacuum.c:1565
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4526
int vacuum_freeze_table_age
Definition: vacuum.c:62
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
static TransactionId ReadNewTransactionId(void)
Definition: transam.h:308
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:962
FormData_pg_class * Form_pg_class
Definition: pg_class.h:153
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:175
#define AccessExclusiveLock
Definition: lockdefs.h:45
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
Definition: pg_inherits.c:165
int NewGUCNestLevel(void)
Definition: guc.c:5994
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:915
void heap_inplace_update(Relation relation, HeapTuple tuple)
Definition: heapam.c:5858
double VacuumCostDelay
Definition: globals.c:143
#define VACOPT_SKIP_LOCKED
Definition: vacuum.h:183
List * options
Definition: parsenodes.h:3236
void list_free(List *list)
Definition: list.c:1391
#define elog(elevel,...)
Definition: elog.h:228
int i
int pgxactoff
Definition: proc.h:148
#define NameStr(name)
Definition: c.h:669
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
void AutoVacuumUpdateDelay(void)
Definition: autovacuum.c:1779
FormData_pg_database
Definition: pg_database.h:74
char * defname
Definition: parsenodes.h:733
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:100
Relation vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options, bool verbose, LOCKMODE lmode)
Definition: vacuum.c:607
bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, bits32 options)
Definition: vacuum.c:533
uint8 * statusFlags
Definition: proc.h:331
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define MAX_PARALLEL_WORKER_LIMIT
void vacuum_delay_point(void)
Definition: vacuum.c:2047
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define PG_TRY()
Definition: elog.h:309
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition: multixact.c:2941
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1224
Definition: pg_list.h:50
#define RelationGetRelid(relation)
Definition: rel.h:457
int multixact_freeze_min_age
Definition: vacuum.h:211
static long analyze(struct nfa *nfa)
Definition: regc_nfa.c:2816
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
#define PG_END_TRY()
Definition: elog.h:334
bytea * rd_options
Definition: rel.h:158
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define lfirst_oid(lc)
Definition: pg_list.h:171
bool VacuumCostActive
Definition: globals.c:150
static List * get_all_vacuum_rels(int options)
Definition: vacuum.c:858
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
MultiXactId ReadNextMultiXactId(void)
Definition: multixact.c:723
List * rels
Definition: parsenodes.h:3237
static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def)
Definition: vacuum.c:2153