PostgreSQL Source Code  git master
vacuum.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  * The postgres vacuum cleaner.
5  *
6  * This file now includes only control and dispatch code for VACUUM and
7  * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9  * in cluster.c.
10  *
11  *
12  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  *
16  * IDENTIFICATION
17  * src/backend/commands/vacuum.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22 
23 #include <math.h>
24 
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/tableam.h"
32 #include "access/transam.h"
33 #include "access/xact.h"
34 #include "catalog/namespace.h"
35 #include "catalog/pg_database.h"
36 #include "catalog/pg_inherits.h"
37 #include "catalog/pg_namespace.h"
38 #include "commands/cluster.h"
39 #include "commands/defrem.h"
40 #include "commands/vacuum.h"
41 #include "miscadmin.h"
42 #include "nodes/makefuncs.h"
43 #include "pgstat.h"
44 #include "postmaster/autovacuum.h"
46 #include "storage/bufmgr.h"
47 #include "storage/lmgr.h"
48 #include "storage/proc.h"
49 #include "storage/procarray.h"
50 #include "utils/acl.h"
51 #include "utils/fmgroids.h"
52 #include "utils/guc.h"
53 #include "utils/memutils.h"
54 #include "utils/snapmgr.h"
55 #include "utils/syscache.h"
56 
57 
58 /*
59  * GUC parameters
60  */
65 
66 
67 /* A few variables that don't seem worth passing around as parameters */
68 static MemoryContext vac_context = NULL;
70 
71 
72 /*
73  * Variables for cost-based parallel vacuum. See comments atop
74  * compute_parallel_delay to understand how it works.
75  */
79 
80 /* non-export function prototypes */
81 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
82 static List *get_all_vacuum_rels(int options);
83 static void vac_truncate_clog(TransactionId frozenXID,
84  MultiXactId minMulti,
85  TransactionId lastSaneFrozenXid,
86  MultiXactId lastSaneMinMulti);
87 static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
88 static double compute_parallel_delay(void);
90 
91 /*
92  * Primary entry point for manual VACUUM and ANALYZE commands
93  *
94  * This is mainly a preparation wrapper for the real operations that will
95  * happen in vacuum().
96  */
97 void
98 ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
99 {
100  VacuumParams params;
101  bool verbose = false;
102  bool skip_locked = false;
103  bool analyze = false;
104  bool freeze = false;
105  bool full = false;
106  bool disable_page_skipping = false;
107  ListCell *lc;
108 
109  /* Set default value */
112 
113  /* By default parallel vacuum is enabled */
114  params.nworkers = 0;
115 
116  /* Parse options list */
117  foreach(lc, vacstmt->options)
118  {
119  DefElem *opt = (DefElem *) lfirst(lc);
120 
121  /* Parse common options for VACUUM and ANALYZE */
122  if (strcmp(opt->defname, "verbose") == 0)
123  verbose = defGetBoolean(opt);
124  else if (strcmp(opt->defname, "skip_locked") == 0)
125  skip_locked = defGetBoolean(opt);
126  else if (!vacstmt->is_vacuumcmd)
127  ereport(ERROR,
128  (errcode(ERRCODE_SYNTAX_ERROR),
129  errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
130  parser_errposition(pstate, opt->location)));
131 
132  /* Parse options available on VACUUM */
133  else if (strcmp(opt->defname, "analyze") == 0)
134  analyze = defGetBoolean(opt);
135  else if (strcmp(opt->defname, "freeze") == 0)
136  freeze = defGetBoolean(opt);
137  else if (strcmp(opt->defname, "full") == 0)
138  full = defGetBoolean(opt);
139  else if (strcmp(opt->defname, "disable_page_skipping") == 0)
140  disable_page_skipping = defGetBoolean(opt);
141  else if (strcmp(opt->defname, "index_cleanup") == 0)
143  else if (strcmp(opt->defname, "truncate") == 0)
144  params.truncate = get_vacopt_ternary_value(opt);
145  else if (strcmp(opt->defname, "parallel") == 0)
146  {
147  if (opt->arg == NULL)
148  {
149  ereport(ERROR,
150  (errcode(ERRCODE_SYNTAX_ERROR),
151  errmsg("parallel option requires a value between 0 and %d",
153  parser_errposition(pstate, opt->location)));
154  }
155  else
156  {
157  int nworkers;
158 
159  nworkers = defGetInt32(opt);
160  if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
161  ereport(ERROR,
162  (errcode(ERRCODE_SYNTAX_ERROR),
163  errmsg("parallel vacuum degree must be between 0 and %d",
164  MAX_PARALLEL_WORKER_LIMIT),
165  parser_errposition(pstate, opt->location)));
166 
167  /*
168  * Disable parallel vacuum, if user has specified parallel
169  * degree as zero.
170  */
171  if (nworkers == 0)
172  params.nworkers = -1;
173  else
174  params.nworkers = nworkers;
175  }
176  }
177  else
178  ereport(ERROR,
179  (errcode(ERRCODE_SYNTAX_ERROR),
180  errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
181  parser_errposition(pstate, opt->location)));
182  }
183 
184  /* Set vacuum options */
185  params.options =
186  (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
187  (verbose ? VACOPT_VERBOSE : 0) |
188  (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
189  (analyze ? VACOPT_ANALYZE : 0) |
190  (freeze ? VACOPT_FREEZE : 0) |
191  (full ? VACOPT_FULL : 0) |
192  (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
193 
194  /* sanity checks on options */
195  Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
196  Assert((params.options & VACOPT_VACUUM) ||
197  !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
198  Assert(!(params.options & VACOPT_SKIPTOAST));
199 
200  if ((params.options & VACOPT_FULL) && params.nworkers > 0)
201  ereport(ERROR,
202  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
203  errmsg("VACUUM FULL cannot be performed in parallel")));
204 
205  /*
206  * Make sure VACOPT_ANALYZE is specified if any column lists are present.
207  */
208  if (!(params.options & VACOPT_ANALYZE))
209  {
210  ListCell *lc;
211 
212  foreach(lc, vacstmt->rels)
213  {
215 
216  if (vrel->va_cols != NIL)
217  ereport(ERROR,
218  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
219  errmsg("ANALYZE option must be specified when a column list is provided")));
220  }
221  }
222 
223  /*
224  * All freeze ages are zero if the FREEZE option is given; otherwise pass
225  * them as -1 which means to use the default values.
226  */
227  if (params.options & VACOPT_FREEZE)
228  {
229  params.freeze_min_age = 0;
230  params.freeze_table_age = 0;
231  params.multixact_freeze_min_age = 0;
232  params.multixact_freeze_table_age = 0;
233  }
234  else
235  {
236  params.freeze_min_age = -1;
237  params.freeze_table_age = -1;
238  params.multixact_freeze_min_age = -1;
239  params.multixact_freeze_table_age = -1;
240  }
241 
242  /* user-invoked vacuum is never "for wraparound" */
243  params.is_wraparound = false;
244 
245  /* user-invoked vacuum never uses this parameter */
246  params.log_min_duration = -1;
247 
248  /* Now go through the common routine */
249  vacuum(vacstmt->rels, &params, NULL, isTopLevel);
250 }
251 
252 /*
253  * Internal entry point for VACUUM and ANALYZE commands.
254  *
255  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
256  * we process all relevant tables in the database. For each VacuumRelation,
257  * if a valid OID is supplied, the table with that OID is what to process;
258  * otherwise, the VacuumRelation's RangeVar indicates what to process.
259  *
260  * params contains a set of parameters that can be used to customize the
261  * behavior.
262  *
263  * bstrategy is normally given as NULL, but in autovacuum it can be passed
264  * in to use the same buffer strategy object across multiple vacuum() calls.
265  *
266  * isTopLevel should be passed down from ProcessUtility.
267  *
268  * It is the caller's responsibility that all parameters are allocated in a
269  * memory context that will not disappear at transaction commit.
270  */
271 void
272 vacuum(List *relations, VacuumParams *params,
273  BufferAccessStrategy bstrategy, bool isTopLevel)
274 {
275  static bool in_vacuum = false;
276 
277  const char *stmttype;
278  volatile bool in_outer_xact,
279  use_own_xacts;
280 
281  Assert(params != NULL);
282 
283  stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
284 
285  /*
286  * We cannot run VACUUM inside a user transaction block; if we were inside
287  * a transaction, then our commit- and start-transaction-command calls
288  * would not have the intended effect! There are numerous other subtle
289  * dependencies on this, too.
290  *
291  * ANALYZE (without VACUUM) can run either way.
292  */
293  if (params->options & VACOPT_VACUUM)
294  {
295  PreventInTransactionBlock(isTopLevel, stmttype);
296  in_outer_xact = false;
297  }
298  else
299  in_outer_xact = IsInTransactionBlock(isTopLevel);
300 
301  /*
302  * Due to static variables vac_context, anl_context and vac_strategy,
303  * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
304  * calls a hostile index expression that itself calls ANALYZE.
305  */
306  if (in_vacuum)
307  ereport(ERROR,
308  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
309  errmsg("%s cannot be executed from VACUUM or ANALYZE",
310  stmttype)));
311 
312  /*
313  * Sanity check DISABLE_PAGE_SKIPPING option.
314  */
315  if ((params->options & VACOPT_FULL) != 0 &&
316  (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
317  ereport(ERROR,
318  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
319  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
320 
321  /*
322  * Send info about dead objects to the statistics collector, unless we are
323  * in autovacuum --- autovacuum.c does this for itself.
324  */
325  if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
327 
328  /*
329  * Create special memory context for cross-transaction storage.
330  *
331  * Since it is a child of PortalContext, it will go away eventually even
332  * if we suffer an error; there's no need for special abort cleanup logic.
333  */
334  vac_context = AllocSetContextCreate(PortalContext,
335  "Vacuum",
337 
338  /*
339  * If caller didn't give us a buffer strategy object, make one in the
340  * cross-transaction memory context.
341  */
342  if (bstrategy == NULL)
343  {
344  MemoryContext old_context = MemoryContextSwitchTo(vac_context);
345 
346  bstrategy = GetAccessStrategy(BAS_VACUUM);
347  MemoryContextSwitchTo(old_context);
348  }
349  vac_strategy = bstrategy;
350 
351  /*
352  * Build list of relation(s) to process, putting any new data in
353  * vac_context for safekeeping.
354  */
355  if (relations != NIL)
356  {
357  List *newrels = NIL;
358  ListCell *lc;
359 
360  foreach(lc, relations)
361  {
363  List *sublist;
364  MemoryContext old_context;
365 
366  sublist = expand_vacuum_rel(vrel, params->options);
367  old_context = MemoryContextSwitchTo(vac_context);
368  newrels = list_concat(newrels, sublist);
369  MemoryContextSwitchTo(old_context);
370  }
371  relations = newrels;
372  }
373  else
374  relations = get_all_vacuum_rels(params->options);
375 
376  /*
377  * Decide whether we need to start/commit our own transactions.
378  *
379  * For VACUUM (with or without ANALYZE): always do so, so that we can
380  * release locks as soon as possible. (We could possibly use the outer
381  * transaction for a one-table VACUUM, but handling TOAST tables would be
382  * problematic.)
383  *
384  * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
385  * start/commit our own transactions. Also, there's no need to do so if
386  * only processing one relation. For multiple relations when not within a
387  * transaction block, and also in an autovacuum worker, use own
388  * transactions so we can release locks sooner.
389  */
390  if (params->options & VACOPT_VACUUM)
391  use_own_xacts = true;
392  else
393  {
394  Assert(params->options & VACOPT_ANALYZE);
396  use_own_xacts = true;
397  else if (in_outer_xact)
398  use_own_xacts = false;
399  else if (list_length(relations) > 1)
400  use_own_xacts = true;
401  else
402  use_own_xacts = false;
403  }
404 
405  /*
406  * vacuum_rel expects to be entered with no transaction active; it will
407  * start and commit its own transaction. But we are called by an SQL
408  * command, and so we are executing inside a transaction already. We
409  * commit the transaction started in PostgresMain() here, and start
410  * another one before exiting to match the commit waiting for us back in
411  * PostgresMain().
412  */
413  if (use_own_xacts)
414  {
415  Assert(!in_outer_xact);
416 
417  /* ActiveSnapshot is not set by autovacuum */
418  if (ActiveSnapshotSet())
420 
421  /* matches the StartTransaction in PostgresMain() */
423  }
424 
425  /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
426  PG_TRY();
427  {
428  ListCell *cur;
429 
430  in_vacuum = true;
432  VacuumCostBalance = 0;
433  VacuumPageHit = 0;
434  VacuumPageMiss = 0;
435  VacuumPageDirty = 0;
437  VacuumSharedCostBalance = NULL;
438  VacuumActiveNWorkers = NULL;
439 
440  /*
441  * Loop to process each selected relation.
442  */
443  foreach(cur, relations)
444  {
446 
447  if (params->options & VACOPT_VACUUM)
448  {
449  if (!vacuum_rel(vrel->oid, vrel->relation, params))
450  continue;
451  }
452 
453  if (params->options & VACOPT_ANALYZE)
454  {
455  /*
456  * If using separate xacts, start one for analyze. Otherwise,
457  * we can use the outer transaction.
458  */
459  if (use_own_xacts)
460  {
462  /* functions in indexes may want a snapshot set */
464  }
465 
466  analyze_rel(vrel->oid, vrel->relation, params,
467  vrel->va_cols, in_outer_xact, vac_strategy);
468 
469  if (use_own_xacts)
470  {
473  }
474  else
475  {
476  /*
477  * If we're not using separate xacts, better separate the
478  * ANALYZE actions with CCIs. This avoids trouble if user
479  * says "ANALYZE t, t".
480  */
482  }
483  }
484  }
485  }
486  PG_FINALLY();
487  {
488  in_vacuum = false;
489  VacuumCostActive = false;
490  }
491  PG_END_TRY();
492 
493  /*
494  * Finish up processing.
495  */
496  if (use_own_xacts)
497  {
498  /* here, we are not in a transaction */
499 
500  /*
501  * This matches the CommitTransaction waiting for us in
502  * PostgresMain().
503  */
505  }
506 
507  if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
508  {
509  /*
510  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
511  * (autovacuum.c does this for itself.)
512  */
514  }
515 
516  /*
517  * Clean up working storage --- note we must do this after
518  * StartTransactionCommand, else we might be trying to delete the active
519  * context!
520  */
521  MemoryContextDelete(vac_context);
522  vac_context = NULL;
523 }
524 
525 /*
526  * Check if a given relation can be safely vacuumed or analyzed. If the
527  * user is not the relation owner, issue a WARNING log message and return
528  * false to let the caller decide what to do with this relation. This
529  * routine is used to decide if a relation can be processed for VACUUM or
530  * ANALYZE.
531  */
532 bool
534 {
535  char *relname;
536 
537  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
538 
539  /*
540  * Check permissions.
541  *
542  * We allow the user to vacuum or analyze a table if he is superuser, the
543  * table owner, or the database owner (but in the latter case, only if
544  * it's not a shared relation). pg_class_ownercheck includes the
545  * superuser case.
546  *
547  * Note we choose to treat permissions failure as a WARNING and keep
548  * trying to vacuum or analyze the rest of the DB --- is this appropriate?
549  */
550  if (pg_class_ownercheck(relid, GetUserId()) ||
551  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
552  return true;
553 
554  relname = NameStr(reltuple->relname);
555 
556  if ((options & VACOPT_VACUUM) != 0)
557  {
558  if (reltuple->relisshared)
560  (errmsg("skipping \"%s\" --- only superuser can vacuum it",
561  relname)));
562  else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
564  (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
565  relname)));
566  else
568  (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
569  relname)));
570 
571  /*
572  * For VACUUM ANALYZE, both logs could show up, but just generate
573  * information for VACUUM as that would be the first one to be
574  * processed.
575  */
576  return false;
577  }
578 
579  if ((options & VACOPT_ANALYZE) != 0)
580  {
581  if (reltuple->relisshared)
583  (errmsg("skipping \"%s\" --- only superuser can analyze it",
584  relname)));
585  else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
587  (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
588  relname)));
589  else
591  (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
592  relname)));
593  }
594 
595  return false;
596 }
597 
598 
599 /*
600  * vacuum_open_relation
601  *
602  * This routine is used for attempting to open and lock a relation which
603  * is going to be vacuumed or analyzed. If the relation cannot be opened
604  * or locked, a log is emitted if possible.
605  */
606 Relation
607 vacuum_open_relation(Oid relid, RangeVar *relation, int options,
608  bool verbose, LOCKMODE lmode)
609 {
610  Relation onerel;
611  bool rel_lock = true;
612  int elevel;
613 
614  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
615 
616  /*
617  * Open the relation and get the appropriate lock on it.
618  *
619  * There's a race condition here: the relation may have gone away since
620  * the last time we saw it. If so, we don't need to vacuum or analyze it.
621  *
622  * If we've been asked not to wait for the relation lock, acquire it first
623  * in non-blocking mode, before calling try_relation_open().
624  */
625  if (!(options & VACOPT_SKIP_LOCKED))
626  onerel = try_relation_open(relid, lmode);
627  else if (ConditionalLockRelationOid(relid, lmode))
628  onerel = try_relation_open(relid, NoLock);
629  else
630  {
631  onerel = NULL;
632  rel_lock = false;
633  }
634 
635  /* if relation is opened, leave */
636  if (onerel)
637  return onerel;
638 
639  /*
640  * Relation could not be opened, hence generate if possible a log
641  * informing on the situation.
642  *
643  * If the RangeVar is not defined, we do not have enough information to
644  * provide a meaningful log statement. Chances are that the caller has
645  * intentionally not provided this information so that this logging is
646  * skipped, anyway.
647  */
648  if (relation == NULL)
649  return NULL;
650 
651  /*
652  * Determine the log level.
653  *
654  * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
655  * statements in the permission checks; otherwise, only log if the caller
656  * so requested.
657  */
659  elevel = WARNING;
660  else if (verbose)
661  elevel = LOG;
662  else
663  return NULL;
664 
665  if ((options & VACOPT_VACUUM) != 0)
666  {
667  if (!rel_lock)
668  ereport(elevel,
669  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
670  errmsg("skipping vacuum of \"%s\" --- lock not available",
671  relation->relname)));
672  else
673  ereport(elevel,
675  errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
676  relation->relname)));
677 
678  /*
679  * For VACUUM ANALYZE, both logs could show up, but just generate
680  * information for VACUUM as that would be the first one to be
681  * processed.
682  */
683  return NULL;
684  }
685 
686  if ((options & VACOPT_ANALYZE) != 0)
687  {
688  if (!rel_lock)
689  ereport(elevel,
690  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
691  errmsg("skipping analyze of \"%s\" --- lock not available",
692  relation->relname)));
693  else
694  ereport(elevel,
696  errmsg("skipping analyze of \"%s\" --- relation no longer exists",
697  relation->relname)));
698  }
699 
700  return NULL;
701 }
702 
703 
704 /*
705  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
706  * and optionally add VacuumRelations for partitions of the table.
707  *
708  * If a VacuumRelation does not have an OID supplied and is a partitioned
709  * table, an extra entry will be added to the output for each partition.
710  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
711  * it does not want us to expand partitioned tables.
712  *
713  * We take care not to modify the input data structure, but instead build
714  * new VacuumRelation(s) to return. (But note that they will reference
715  * unmodified parts of the input, eg column lists.) New data structures
716  * are made in vac_context.
717  */
718 static List *
720 {
721  List *vacrels = NIL;
722  MemoryContext oldcontext;
723 
724  /* If caller supplied OID, there's nothing we need do here. */
725  if (OidIsValid(vrel->oid))
726  {
727  oldcontext = MemoryContextSwitchTo(vac_context);
728  vacrels = lappend(vacrels, vrel);
729  MemoryContextSwitchTo(oldcontext);
730  }
731  else
732  {
733  /* Process a specific relation, and possibly partitions thereof */
734  Oid relid;
735  HeapTuple tuple;
736  Form_pg_class classForm;
737  bool include_parts;
738  int rvr_opts;
739 
740  /*
741  * Since autovacuum workers supply OIDs when calling vacuum(), no
742  * autovacuum worker should reach this code.
743  */
745 
746  /*
747  * We transiently take AccessShareLock to protect the syscache lookup
748  * below, as well as find_all_inheritors's expectation that the caller
749  * holds some lock on the starting relation.
750  */
751  rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
752  relid = RangeVarGetRelidExtended(vrel->relation,
754  rvr_opts,
755  NULL, NULL);
756 
757  /*
758  * If the lock is unavailable, emit the same log statement that
759  * vacuum_rel() and analyze_rel() would.
760  */
761  if (!OidIsValid(relid))
762  {
763  if (options & VACOPT_VACUUM)
765  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
766  errmsg("skipping vacuum of \"%s\" --- lock not available",
767  vrel->relation->relname)));
768  else
770  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
771  errmsg("skipping analyze of \"%s\" --- lock not available",
772  vrel->relation->relname)));
773  return vacrels;
774  }
775 
776  /*
777  * To check whether the relation is a partitioned table and its
778  * ownership, fetch its syscache entry.
779  */
780  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
781  if (!HeapTupleIsValid(tuple))
782  elog(ERROR, "cache lookup failed for relation %u", relid);
783  classForm = (Form_pg_class) GETSTRUCT(tuple);
784 
785  /*
786  * Make a returnable VacuumRelation for this rel if user is a proper
787  * owner.
788  */
789  if (vacuum_is_relation_owner(relid, classForm, options))
790  {
791  oldcontext = MemoryContextSwitchTo(vac_context);
792  vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
793  relid,
794  vrel->va_cols));
795  MemoryContextSwitchTo(oldcontext);
796  }
797 
798 
799  include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
800  ReleaseSysCache(tuple);
801 
802  /*
803  * If it is, make relation list entries for its partitions. Note that
804  * the list returned by find_all_inheritors() includes the passed-in
805  * OID, so we have to skip that. There's no point in taking locks on
806  * the individual partitions yet, and doing so would just add
807  * unnecessary deadlock risk. For this last reason we do not check
808  * yet the ownership of the partitions, which get added to the list to
809  * process. Ownership will be checked later on anyway.
810  */
811  if (include_parts)
812  {
813  List *part_oids = find_all_inheritors(relid, NoLock, NULL);
814  ListCell *part_lc;
815 
816  foreach(part_lc, part_oids)
817  {
818  Oid part_oid = lfirst_oid(part_lc);
819 
820  if (part_oid == relid)
821  continue; /* ignore original table */
822 
823  /*
824  * We omit a RangeVar since it wouldn't be appropriate to
825  * complain about failure to open one of these relations
826  * later.
827  */
828  oldcontext = MemoryContextSwitchTo(vac_context);
829  vacrels = lappend(vacrels, makeVacuumRelation(NULL,
830  part_oid,
831  vrel->va_cols));
832  MemoryContextSwitchTo(oldcontext);
833  }
834  }
835 
836  /*
837  * Release lock again. This means that by the time we actually try to
838  * process the table, it might be gone or renamed. In the former case
839  * we'll silently ignore it; in the latter case we'll process it
840  * anyway, but we must beware that the RangeVar doesn't necessarily
841  * identify it anymore. This isn't ideal, perhaps, but there's little
842  * practical alternative, since we're typically going to commit this
843  * transaction and begin a new one between now and then. Moreover,
844  * holding locks on multiple relations would create significant risk
845  * of deadlock.
846  */
848  }
849 
850  return vacrels;
851 }
852 
853 /*
854  * Construct a list of VacuumRelations for all vacuumable rels in
855  * the current database. The list is built in vac_context.
856  */
857 static List *
859 {
860  List *vacrels = NIL;
861  Relation pgclass;
862  TableScanDesc scan;
863  HeapTuple tuple;
864 
865  pgclass = table_open(RelationRelationId, AccessShareLock);
866 
867  scan = table_beginscan_catalog(pgclass, 0, NULL);
868 
869  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
870  {
871  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
872  MemoryContext oldcontext;
873  Oid relid = classForm->oid;
874 
875  /* check permissions of relation */
876  if (!vacuum_is_relation_owner(relid, classForm, options))
877  continue;
878 
879  /*
880  * We include partitioned tables here; depending on which operation is
881  * to be performed, caller will decide whether to process or ignore
882  * them.
883  */
884  if (classForm->relkind != RELKIND_RELATION &&
885  classForm->relkind != RELKIND_MATVIEW &&
886  classForm->relkind != RELKIND_PARTITIONED_TABLE)
887  continue;
888 
889  /*
890  * Build VacuumRelation(s) specifying the table OIDs to be processed.
891  * We omit a RangeVar since it wouldn't be appropriate to complain
892  * about failure to open one of these relations later.
893  */
894  oldcontext = MemoryContextSwitchTo(vac_context);
895  vacrels = lappend(vacrels, makeVacuumRelation(NULL,
896  relid,
897  NIL));
898  MemoryContextSwitchTo(oldcontext);
899  }
900 
901  table_endscan(scan);
902  table_close(pgclass, AccessShareLock);
903 
904  return vacrels;
905 }
906 
907 /*
908  * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
909  *
910  * Input parameters are the target relation, applicable freeze age settings,
911  * and isTopLevel which should be passed down from ProcessUtility.
912  *
913  * The output parameters are:
914  * - oldestXmin is the cutoff value used to distinguish whether tuples are
915  * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
916  * - freezeLimit is the Xid below which all Xids are replaced by
917  * FrozenTransactionId during vacuum.
918  * - xidFullScanLimit (computed from freeze_table_age parameter)
919  * represents a minimum Xid value; a table whose relfrozenxid is older than
920  * this will have a full-table vacuum applied to it, to freeze tuples across
921  * the whole table. Vacuuming a table younger than this value can use a
922  * partial scan.
923  * - multiXactCutoff is the value below which all MultiXactIds are removed from
924  * Xmax.
925  * - mxactFullScanLimit is a value against which a table's relminmxid value is
926  * compared to produce a full-table vacuum, as with xidFullScanLimit.
927  *
928  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
929  * not interested.
930  */
931 void
933  int freeze_min_age,
934  int freeze_table_age,
935  int multixact_freeze_min_age,
936  int multixact_freeze_table_age,
937  bool isTopLevel,
938  TransactionId *oldestXmin,
939  TransactionId *freezeLimit,
940  TransactionId *xidFullScanLimit,
941  MultiXactId *multiXactCutoff,
942  MultiXactId *mxactFullScanLimit)
943 {
944  int freezemin;
945  int mxid_freezemin;
946  int effective_multixact_freeze_max_age;
947  TransactionId limit;
948  TransactionId safeLimit;
949  MultiXactId oldestMxact;
950  MultiXactId mxactLimit;
951  MultiXactId safeMxactLimit;
952 
953  if (RELATION_IS_LOCAL(rel) && !IsInTransactionBlock(isTopLevel))
954  {
955  /*
956  * If we are processing a temp relation (which by prior checks must be
957  * one belonging to our session), and we are not inside any
958  * transaction block, then there can be no tuples in the rel that are
959  * still in-doubt, nor can there be any that are dead but possibly
960  * still interesting to some snapshot our session holds. We don't
961  * need to care whether other sessions could see such tuples, either.
962  * So we can aggressively set the cutoff xmin to be the nextXid.
963  */
964  *oldestXmin = ReadNewTransactionId();
965  }
966  else
967  {
968  /*
969  * Otherwise, calculate the cutoff xmin normally.
970  *
971  * We can always ignore processes running lazy vacuum. This is
972  * because we use these values only for deciding which tuples we must
973  * keep in the tables. Since lazy vacuum doesn't write its XID
974  * anywhere (usually no XID assigned), it's safe to ignore it. In
975  * theory it could be problematic to ignore lazy vacuums in a full
976  * vacuum, but keep in mind that only one vacuum process can be
977  * working on a particular table at any time, and that each vacuum is
978  * always an independent transaction.
979  */
980  *oldestXmin = GetOldestNonRemovableTransactionId(rel);
981 
983  {
984  TransactionId limit_xmin;
985  TimestampTz limit_ts;
986 
987  if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
988  &limit_xmin, &limit_ts))
989  {
990  /*
991  * TODO: We should only set the threshold if we are pruning on
992  * the basis of the increased limits. Not as crucial here as
993  * it is for opportunistic pruning (which often happens at a
994  * much higher frequency), but would still be a significant
995  * improvement.
996  */
997  SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
998  *oldestXmin = limit_xmin;
999  }
1000  }
1001  }
1002 
1003  Assert(TransactionIdIsNormal(*oldestXmin));
1004 
1005  /*
1006  * Determine the minimum freeze age to use: as specified by the caller, or
1007  * vacuum_freeze_min_age, but in any case not more than half
1008  * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1009  * wraparound won't occur too frequently.
1010  */
1011  freezemin = freeze_min_age;
1012  if (freezemin < 0)
1013  freezemin = vacuum_freeze_min_age;
1014  freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
1015  Assert(freezemin >= 0);
1016 
1017  /*
1018  * Compute the cutoff XID, being careful not to generate a "permanent" XID
1019  */
1020  limit = *oldestXmin - freezemin;
1021  if (!TransactionIdIsNormal(limit))
1022  limit = FirstNormalTransactionId;
1023 
1024  /*
1025  * If oldestXmin is very far back (in practice, more than
1026  * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
1027  * freeze age of zero.
1028  */
1030  if (!TransactionIdIsNormal(safeLimit))
1031  safeLimit = FirstNormalTransactionId;
1032 
1033  if (TransactionIdPrecedes(limit, safeLimit))
1034  {
1035  ereport(WARNING,
1036  (errmsg("oldest xmin is far in the past"),
1037  errhint("Close open transactions soon to avoid wraparound problems.\n"
1038  "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1039  limit = *oldestXmin;
1040  }
1041 
1042  *freezeLimit = limit;
1043 
1044  /*
1045  * Compute the multixact age for which freezing is urgent. This is
1046  * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1047  * short of multixact member space.
1048  */
1049  effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1050 
1051  /*
1052  * Determine the minimum multixact freeze age to use: as specified by
1053  * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1054  * than half effective_multixact_freeze_max_age, so that autovacuums to
1055  * prevent MultiXact wraparound won't occur too frequently.
1056  */
1057  mxid_freezemin = multixact_freeze_min_age;
1058  if (mxid_freezemin < 0)
1059  mxid_freezemin = vacuum_multixact_freeze_min_age;
1060  mxid_freezemin = Min(mxid_freezemin,
1061  effective_multixact_freeze_max_age / 2);
1062  Assert(mxid_freezemin >= 0);
1063 
1064  /* compute the cutoff multi, being careful to generate a valid value */
1065  oldestMxact = GetOldestMultiXactId();
1066  mxactLimit = oldestMxact - mxid_freezemin;
1067  if (mxactLimit < FirstMultiXactId)
1068  mxactLimit = FirstMultiXactId;
1069 
1070  safeMxactLimit =
1071  ReadNextMultiXactId() - effective_multixact_freeze_max_age;
1072  if (safeMxactLimit < FirstMultiXactId)
1073  safeMxactLimit = FirstMultiXactId;
1074 
1075  if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
1076  {
1077  ereport(WARNING,
1078  (errmsg("oldest multixact is far in the past"),
1079  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
1080  /* Use the safe limit, unless an older mxact is still running */
1081  if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
1082  mxactLimit = oldestMxact;
1083  else
1084  mxactLimit = safeMxactLimit;
1085  }
1086 
1087  *multiXactCutoff = mxactLimit;
1088 
1089  if (xidFullScanLimit != NULL)
1090  {
1091  int freezetable;
1092 
1093  Assert(mxactFullScanLimit != NULL);
1094 
1095  /*
1096  * Determine the table freeze age to use: as specified by the caller,
1097  * or vacuum_freeze_table_age, but in any case not more than
1098  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1099  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
1100  * before anti-wraparound autovacuum is launched.
1101  */
1102  freezetable = freeze_table_age;
1103  if (freezetable < 0)
1104  freezetable = vacuum_freeze_table_age;
1105  freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
1106  Assert(freezetable >= 0);
1107 
1108  /*
1109  * Compute XID limit causing a full-table vacuum, being careful not to
1110  * generate a "permanent" XID.
1111  */
1112  limit = ReadNewTransactionId() - freezetable;
1113  if (!TransactionIdIsNormal(limit))
1114  limit = FirstNormalTransactionId;
1115 
1116  *xidFullScanLimit = limit;
1117 
1118  /*
1119  * Similar to the above, determine the table freeze age to use for
1120  * multixacts: as specified by the caller, or
1121  * vacuum_multixact_freeze_table_age, but in any case not more than
1122  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
1123  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
1124  * freeze multixacts before anti-wraparound autovacuum is launched.
1125  */
1126  freezetable = multixact_freeze_table_age;
1127  if (freezetable < 0)
1128  freezetable = vacuum_multixact_freeze_table_age;
1129  freezetable = Min(freezetable,
1130  effective_multixact_freeze_max_age * 0.95);
1131  Assert(freezetable >= 0);
1132 
1133  /*
1134  * Compute MultiXact limit causing a full-table vacuum, being careful
1135  * to generate a valid MultiXact value.
1136  */
1137  mxactLimit = ReadNextMultiXactId() - freezetable;
1138  if (mxactLimit < FirstMultiXactId)
1139  mxactLimit = FirstMultiXactId;
1140 
1141  *mxactFullScanLimit = mxactLimit;
1142  }
1143  else
1144  {
1145  Assert(mxactFullScanLimit == NULL);
1146  }
1147 }
1148 
1149 /*
1150  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1151  *
1152  * If we scanned the whole relation then we should just use the count of
1153  * live tuples seen; but if we did not, we should not blindly extrapolate
1154  * from that number, since VACUUM may have scanned a quite nonrandom
1155  * subset of the table. When we have only partial information, we take
1156  * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1157  * of the tuple density in the unscanned pages.
1158  *
1159  * Note: scanned_tuples should count only *live* tuples, since
1160  * pg_class.reltuples is defined that way.
1161  */
1162 double
1164  BlockNumber total_pages,
1165  BlockNumber scanned_pages,
1166  double scanned_tuples)
1167 {
1168  BlockNumber old_rel_pages = relation->rd_rel->relpages;
1169  double old_rel_tuples = relation->rd_rel->reltuples;
1170  double old_density;
1171  double unscanned_pages;
1172  double total_tuples;
1173 
1174  /* If we did scan the whole table, just use the count as-is */
1175  if (scanned_pages >= total_pages)
1176  return scanned_tuples;
1177 
1178  /*
1179  * If scanned_pages is zero but total_pages isn't, keep the existing value
1180  * of reltuples. (Note: we might be returning -1 in this case.)
1181  */
1182  if (scanned_pages == 0)
1183  return old_rel_tuples;
1184 
1185  /*
1186  * If old density is unknown, we can't do much except scale up
1187  * scanned_tuples to match total_pages.
1188  */
1189  if (old_rel_tuples < 0 || old_rel_pages == 0)
1190  return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1191 
1192  /*
1193  * Okay, we've covered the corner cases. The normal calculation is to
1194  * convert the old measurement to a density (tuples per page), then
1195  * estimate the number of tuples in the unscanned pages using that figure,
1196  * and finally add on the number of tuples in the scanned pages.
1197  */
1198  old_density = old_rel_tuples / old_rel_pages;
1199  unscanned_pages = (double) total_pages - (double) scanned_pages;
1200  total_tuples = old_density * unscanned_pages + scanned_tuples;
1201  return floor(total_tuples + 0.5);
1202 }
1203 
1204 
1205 /*
1206  * vac_update_relstats() -- update statistics for one relation
1207  *
1208  * Update the whole-relation statistics that are kept in its pg_class
1209  * row. There are additional stats that will be updated if we are
1210  * doing ANALYZE, but we always update these stats. This routine works
1211  * for both index and heap relation entries in pg_class.
1212  *
1213  * We violate transaction semantics here by overwriting the rel's
1214  * existing pg_class tuple with the new values. This is reasonably
1215  * safe as long as we're sure that the new values are correct whether or
1216  * not this transaction commits. The reason for doing this is that if
1217  * we updated these tuples in the usual way, vacuuming pg_class itself
1218  * wouldn't work very well --- by the time we got done with a vacuum
1219  * cycle, most of the tuples in pg_class would've been obsoleted. Of
1220  * course, this only works for fixed-size not-null columns, but these are.
1221  *
1222  * Another reason for doing it this way is that when we are in a lazy
1223  * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1224  * Somebody vacuuming pg_class might think they could delete a tuple
1225  * marked with xmin = our xid.
1226  *
1227  * In addition to fundamentally nontransactional statistics such as
1228  * relpages and relallvisible, we try to maintain certain lazily-updated
1229  * DDL flags such as relhasindex, by clearing them if no longer correct.
1230  * It's safe to do this in VACUUM, which can't run in parallel with
1231  * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1232  * However, it's *not* safe to do it in an ANALYZE that's within an
1233  * outer transaction, because for example the current transaction might
1234  * have dropped the last index; then we'd think relhasindex should be
1235  * cleared, but if the transaction later rolls back this would be wrong.
1236  * So we refrain from updating the DDL flags if we're inside an outer
1237  * transaction. This is OK since postponing the flag maintenance is
1238  * always allowable.
1239  *
1240  * Note: num_tuples should count only *live* tuples, since
1241  * pg_class.reltuples is defined that way.
1242  *
1243  * This routine is shared by VACUUM and ANALYZE.
1244  */
1245 void
1247  BlockNumber num_pages, double num_tuples,
1248  BlockNumber num_all_visible_pages,
1249  bool hasindex, TransactionId frozenxid,
1250  MultiXactId minmulti,
1251  bool in_outer_xact)
1252 {
1253  Oid relid = RelationGetRelid(relation);
1254  Relation rd;
1255  HeapTuple ctup;
1256  Form_pg_class pgcform;
1257  bool dirty;
1258 
1259  rd = table_open(RelationRelationId, RowExclusiveLock);
1260 
1261  /* Fetch a copy of the tuple to scribble on */
1263  if (!HeapTupleIsValid(ctup))
1264  elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1265  relid);
1266  pgcform = (Form_pg_class) GETSTRUCT(ctup);
1267 
1268  /* Apply statistical updates, if any, to copied tuple */
1269 
1270  dirty = false;
1271  if (pgcform->relpages != (int32) num_pages)
1272  {
1273  pgcform->relpages = (int32) num_pages;
1274  dirty = true;
1275  }
1276  if (pgcform->reltuples != (float4) num_tuples)
1277  {
1278  pgcform->reltuples = (float4) num_tuples;
1279  dirty = true;
1280  }
1281  if (pgcform->relallvisible != (int32) num_all_visible_pages)
1282  {
1283  pgcform->relallvisible = (int32) num_all_visible_pages;
1284  dirty = true;
1285  }
1286 
1287  /* Apply DDL updates, but not inside an outer transaction (see above) */
1288 
1289  if (!in_outer_xact)
1290  {
1291  /*
1292  * If we didn't find any indexes, reset relhasindex.
1293  */
1294  if (pgcform->relhasindex && !hasindex)
1295  {
1296  pgcform->relhasindex = false;
1297  dirty = true;
1298  }
1299 
1300  /* We also clear relhasrules and relhastriggers if needed */
1301  if (pgcform->relhasrules && relation->rd_rules == NULL)
1302  {
1303  pgcform->relhasrules = false;
1304  dirty = true;
1305  }
1306  if (pgcform->relhastriggers && relation->trigdesc == NULL)
1307  {
1308  pgcform->relhastriggers = false;
1309  dirty = true;
1310  }
1311  }
1312 
1313  /*
1314  * Update relfrozenxid, unless caller passed InvalidTransactionId
1315  * indicating it has no new data.
1316  *
1317  * Ordinarily, we don't let relfrozenxid go backwards: if things are
1318  * working correctly, the only way the new frozenxid could be older would
1319  * be if a previous VACUUM was done with a tighter freeze_min_age, in
1320  * which case we don't want to forget the work it already did. However,
1321  * if the stored relfrozenxid is "in the future", then it must be corrupt
1322  * and it seems best to overwrite it with the cutoff we used this time.
1323  * This should match vac_update_datfrozenxid() concerning what we consider
1324  * to be "in the future".
1325  */
1326  if (TransactionIdIsNormal(frozenxid) &&
1327  pgcform->relfrozenxid != frozenxid &&
1328  (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
1330  pgcform->relfrozenxid)))
1331  {
1332  pgcform->relfrozenxid = frozenxid;
1333  dirty = true;
1334  }
1335 
1336  /* Similarly for relminmxid */
1337  if (MultiXactIdIsValid(minmulti) &&
1338  pgcform->relminmxid != minmulti &&
1339  (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
1340  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
1341  {
1342  pgcform->relminmxid = minmulti;
1343  dirty = true;
1344  }
1345 
1346  /* If anything changed, write out the tuple. */
1347  if (dirty)
1348  heap_inplace_update(rd, ctup);
1349 
1351 }
1352 
1353 
1354 /*
1355  * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1356  *
1357  * Update pg_database's datfrozenxid entry for our database to be the
1358  * minimum of the pg_class.relfrozenxid values.
1359  *
1360  * Similarly, update our datminmxid to be the minimum of the
1361  * pg_class.relminmxid values.
1362  *
1363  * If we are able to advance either pg_database value, also try to
1364  * truncate pg_xact and pg_multixact.
1365  *
1366  * We violate transaction semantics here by overwriting the database's
1367  * existing pg_database tuple with the new values. This is reasonably
1368  * safe since the new values are correct whether or not this transaction
1369  * commits. As with vac_update_relstats, this avoids leaving dead tuples
1370  * behind after a VACUUM.
1371  */
1372 void
1374 {
1375  HeapTuple tuple;
1376  Form_pg_database dbform;
1377  Relation relation;
1378  SysScanDesc scan;
1379  HeapTuple classTup;
1380  TransactionId newFrozenXid;
1381  MultiXactId newMinMulti;
1382  TransactionId lastSaneFrozenXid;
1383  MultiXactId lastSaneMinMulti;
1384  bool bogus = false;
1385  bool dirty = false;
1386 
1387  /*
1388  * Restrict this task to one backend per database. This avoids race
1389  * conditions that would move datfrozenxid or datminmxid backward. It
1390  * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1391  * datfrozenxid passed to an earlier vac_truncate_clog() call.
1392  */
1394 
1395  /*
1396  * Initialize the "min" calculation with
1397  * GetOldestNonRemovableTransactionId(), which is a reasonable
1398  * approximation to the minimum relfrozenxid for not-yet-committed
1399  * pg_class entries for new tables; see AddNewRelationTuple(). So we
1400  * cannot produce a wrong minimum by starting with this.
1401  */
1402  newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1403 
1404  /*
1405  * Similarly, initialize the MultiXact "min" with the value that would be
1406  * used on pg_class for new tables. See AddNewRelationTuple().
1407  */
1408  newMinMulti = GetOldestMultiXactId();
1409 
1410  /*
1411  * Identify the latest relfrozenxid and relminmxid values that we could
1412  * validly see during the scan. These are conservative values, but it's
1413  * not really worth trying to be more exact.
1414  */
1415  lastSaneFrozenXid = ReadNewTransactionId();
1416  lastSaneMinMulti = ReadNextMultiXactId();
1417 
1418  /*
1419  * We must seqscan pg_class to find the minimum Xid, because there is no
1420  * index that can help us here.
1421  */
1422  relation = table_open(RelationRelationId, AccessShareLock);
1423 
1424  scan = systable_beginscan(relation, InvalidOid, false,
1425  NULL, 0, NULL);
1426 
1427  while ((classTup = systable_getnext(scan)) != NULL)
1428  {
1429  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1430 
1431  /*
1432  * Only consider relations able to hold unfrozen XIDs (anything else
1433  * should have InvalidTransactionId in relfrozenxid anyway).
1434  */
1435  if (classForm->relkind != RELKIND_RELATION &&
1436  classForm->relkind != RELKIND_MATVIEW &&
1437  classForm->relkind != RELKIND_TOASTVALUE)
1438  {
1439  Assert(!TransactionIdIsValid(classForm->relfrozenxid));
1440  Assert(!MultiXactIdIsValid(classForm->relminmxid));
1441  continue;
1442  }
1443 
1444  /*
1445  * Some table AMs might not need per-relation xid / multixid horizons.
1446  * It therefore seems reasonable to allow relfrozenxid and relminmxid
1447  * to not be set (i.e. set to their respective Invalid*Id)
1448  * independently. Thus validate and compute horizon for each only if
1449  * set.
1450  *
1451  * If things are working properly, no relation should have a
1452  * relfrozenxid or relminmxid that is "in the future". However, such
1453  * cases have been known to arise due to bugs in pg_upgrade. If we
1454  * see any entries that are "in the future", chicken out and don't do
1455  * anything. This ensures we won't truncate clog & multixact SLRUs
1456  * before those relations have been scanned and cleaned up.
1457  */
1458 
1459  if (TransactionIdIsValid(classForm->relfrozenxid))
1460  {
1461  Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1462 
1463  /* check for values in the future */
1464  if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
1465  {
1466  bogus = true;
1467  break;
1468  }
1469 
1470  /* determine new horizon */
1471  if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1472  newFrozenXid = classForm->relfrozenxid;
1473  }
1474 
1475  if (MultiXactIdIsValid(classForm->relminmxid))
1476  {
1477  /* check for values in the future */
1478  if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1479  {
1480  bogus = true;
1481  break;
1482  }
1483 
1484  /* determine new horizon */
1485  if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1486  newMinMulti = classForm->relminmxid;
1487  }
1488  }
1489 
1490  /* we're done with pg_class */
1491  systable_endscan(scan);
1492  table_close(relation, AccessShareLock);
1493 
1494  /* chicken out if bogus data found */
1495  if (bogus)
1496  return;
1497 
1498  Assert(TransactionIdIsNormal(newFrozenXid));
1499  Assert(MultiXactIdIsValid(newMinMulti));
1500 
1501  /* Now fetch the pg_database tuple we need to update. */
1502  relation = table_open(DatabaseRelationId, RowExclusiveLock);
1503 
1504  /* Fetch a copy of the tuple to scribble on */
1506  if (!HeapTupleIsValid(tuple))
1507  elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1508  dbform = (Form_pg_database) GETSTRUCT(tuple);
1509 
1510  /*
1511  * As in vac_update_relstats(), we ordinarily don't want to let
1512  * datfrozenxid go backward; but if it's "in the future" then it must be
1513  * corrupt and it seems best to overwrite it.
1514  */
1515  if (dbform->datfrozenxid != newFrozenXid &&
1516  (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1517  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1518  {
1519  dbform->datfrozenxid = newFrozenXid;
1520  dirty = true;
1521  }
1522  else
1523  newFrozenXid = dbform->datfrozenxid;
1524 
1525  /* Ditto for datminmxid */
1526  if (dbform->datminmxid != newMinMulti &&
1527  (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1528  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1529  {
1530  dbform->datminmxid = newMinMulti;
1531  dirty = true;
1532  }
1533  else
1534  newMinMulti = dbform->datminmxid;
1535 
1536  if (dirty)
1537  heap_inplace_update(relation, tuple);
1538 
1539  heap_freetuple(tuple);
1540  table_close(relation, RowExclusiveLock);
1541 
1542  /*
1543  * If we were able to advance datfrozenxid or datminmxid, see if we can
1544  * truncate pg_xact and/or pg_multixact. Also do it if the shared
1545  * XID-wrap-limit info is stale, since this action will update that too.
1546  */
1547  if (dirty || ForceTransactionIdLimitUpdate())
1548  vac_truncate_clog(newFrozenXid, newMinMulti,
1549  lastSaneFrozenXid, lastSaneMinMulti);
1550 }
1551 
1552 
1553 /*
1554  * vac_truncate_clog() -- attempt to truncate the commit log
1555  *
1556  * Scan pg_database to determine the system-wide oldest datfrozenxid,
1557  * and use it to truncate the transaction commit log (pg_xact).
1558  * Also update the XID wrap limit info maintained by varsup.c.
1559  * Likewise for datminmxid.
1560  *
1561  * The passed frozenXID and minMulti are the updated values for my own
1562  * pg_database entry. They're used to initialize the "min" calculations.
1563  * The caller also passes the "last sane" XID and MXID, since it has
1564  * those at hand already.
1565  *
1566  * This routine is only invoked when we've managed to change our
1567  * DB's datfrozenxid/datminmxid values, or we found that the shared
1568  * XID-wrap-limit info is stale.
1569  */
1570 static void
1572  MultiXactId minMulti,
1573  TransactionId lastSaneFrozenXid,
1574  MultiXactId lastSaneMinMulti)
1575 {
1576  TransactionId nextXID = ReadNewTransactionId();
1577  Relation relation;
1578  TableScanDesc scan;
1579  HeapTuple tuple;
1580  Oid oldestxid_datoid;
1581  Oid minmulti_datoid;
1582  bool bogus = false;
1583  bool frozenAlreadyWrapped = false;
1584 
1585  /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1586  LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1587 
1588  /* init oldest datoids to sync with my frozenXID/minMulti values */
1589  oldestxid_datoid = MyDatabaseId;
1590  minmulti_datoid = MyDatabaseId;
1591 
1592  /*
1593  * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1594  *
1595  * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1596  * the values could change while we look at them. Fetch each one just
1597  * once to ensure sane behavior of the comparison logic. (Here, as in
1598  * many other places, we assume that fetching or updating an XID in shared
1599  * storage is atomic.)
1600  *
1601  * Note: we need not worry about a race condition with new entries being
1602  * inserted by CREATE DATABASE. Any such entry will have a copy of some
1603  * existing DB's datfrozenxid, and that source DB cannot be ours because
1604  * of the interlock against copying a DB containing an active backend.
1605  * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1606  * concurrently modify the datfrozenxid's of different databases, the
1607  * worst possible outcome is that pg_xact is not truncated as aggressively
1608  * as it could be.
1609  */
1610  relation = table_open(DatabaseRelationId, AccessShareLock);
1611 
1612  scan = table_beginscan_catalog(relation, 0, NULL);
1613 
1614  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1615  {
1616  volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1617  TransactionId datfrozenxid = dbform->datfrozenxid;
1618  TransactionId datminmxid = dbform->datminmxid;
1619 
1620  Assert(TransactionIdIsNormal(datfrozenxid));
1621  Assert(MultiXactIdIsValid(datminmxid));
1622 
1623  /*
1624  * If things are working properly, no database should have a
1625  * datfrozenxid or datminmxid that is "in the future". However, such
1626  * cases have been known to arise due to bugs in pg_upgrade. If we
1627  * see any entries that are "in the future", chicken out and don't do
1628  * anything. This ensures we won't truncate clog before those
1629  * databases have been scanned and cleaned up. (We will issue the
1630  * "already wrapped" warning if appropriate, though.)
1631  */
1632  if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1633  MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1634  bogus = true;
1635 
1636  if (TransactionIdPrecedes(nextXID, datfrozenxid))
1637  frozenAlreadyWrapped = true;
1638  else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1639  {
1640  frozenXID = datfrozenxid;
1641  oldestxid_datoid = dbform->oid;
1642  }
1643 
1644  if (MultiXactIdPrecedes(datminmxid, minMulti))
1645  {
1646  minMulti = datminmxid;
1647  minmulti_datoid = dbform->oid;
1648  }
1649  }
1650 
1651  table_endscan(scan);
1652 
1653  table_close(relation, AccessShareLock);
1654 
1655  /*
1656  * Do not truncate CLOG if we seem to have suffered wraparound already;
1657  * the computed minimum XID might be bogus. This case should now be
1658  * impossible due to the defenses in GetNewTransactionId, but we keep the
1659  * test anyway.
1660  */
1661  if (frozenAlreadyWrapped)
1662  {
1663  ereport(WARNING,
1664  (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1665  errdetail("You might have already suffered transaction-wraparound data loss.")));
1666  return;
1667  }
1668 
1669  /* chicken out if data is bogus in any other way */
1670  if (bogus)
1671  return;
1672 
1673  /*
1674  * Advance the oldest value for commit timestamps before truncating, so
1675  * that if a user requests a timestamp for a transaction we're truncating
1676  * away right after this point, they get NULL instead of an ugly "file not
1677  * found" error from slru.c. This doesn't matter for xact/multixact
1678  * because they are not subject to arbitrary lookups from users.
1679  */
1680  AdvanceOldestCommitTsXid(frozenXID);
1681 
1682  /*
1683  * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1684  */
1685  TruncateCLOG(frozenXID, oldestxid_datoid);
1686  TruncateCommitTs(frozenXID);
1687  TruncateMultiXact(minMulti, minmulti_datoid);
1688 
1689  /*
1690  * Update the wrap limit for GetNewTransactionId and creation of new
1691  * MultiXactIds. Note: these functions will also signal the postmaster
1692  * for an(other) autovac cycle if needed. XXX should we avoid possibly
1693  * signaling twice?
1694  */
1695  SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1696  SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1697 
1698  LWLockRelease(WrapLimitsVacuumLock);
1699 }
1700 
1701 
1702 /*
1703  * vacuum_rel() -- vacuum one heap relation
1704  *
1705  * relid identifies the relation to vacuum. If relation is supplied,
1706  * use the name therein for reporting any failure to open/lock the rel;
1707  * do not use it once we've successfully opened the rel, since it might
1708  * be stale.
1709  *
1710  * Returns true if it's okay to proceed with a requested ANALYZE
1711  * operation on this table.
1712  *
1713  * Doing one heap at a time incurs extra overhead, since we need to
1714  * check that the heap exists again just before we vacuum it. The
1715  * reason that we do this is so that vacuuming can be spread across
1716  * many small transactions. Otherwise, two-phase locking would require
1717  * us to lock the entire database during one pass of the vacuum cleaner.
1718  *
1719  * At entry and exit, we are not inside a transaction.
1720  */
1721 static bool
1722 vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
1723 {
1724  LOCKMODE lmode;
1725  Relation onerel;
1726  LockRelId onerelid;
1727  Oid toast_relid;
1728  Oid save_userid;
1729  int save_sec_context;
1730  int save_nestlevel;
1731 
1732  Assert(params != NULL);
1733 
1734  /* Begin a transaction for vacuuming this relation */
1736 
1737  /*
1738  * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
1739  * cutoff xids in local memory wrapping around, and to have updated xmin
1740  * horizons.
1741  */
1743 
1744  if (!(params->options & VACOPT_FULL))
1745  {
1746  /*
1747  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1748  * other concurrent VACUUMs know that they can ignore this one while
1749  * determining their OldestXmin. (The reason we don't set it during a
1750  * full VACUUM is exactly that we may have to run user-defined
1751  * functions for functional indexes, and we want to make sure that if
1752  * they use the snapshot set above, any tuples it requires can't get
1753  * removed from other tables. An index function that depends on the
1754  * contents of other tables is arguably broken, but we won't break it
1755  * here by violating transaction semantics.)
1756  *
1757  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1758  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1759  * in an emergency.
1760  *
1761  * Note: these flags remain set until CommitTransaction or
1762  * AbortTransaction. We don't want to clear them until we reset
1763  * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
1764  * might appear to go backwards, which is probably Not Good.
1765  */
1766  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1768  if (params->is_wraparound)
1771  LWLockRelease(ProcArrayLock);
1772  }
1773 
1774  /*
1775  * Check for user-requested abort. Note we want this to be inside a
1776  * transaction, so xact.c doesn't issue useless WARNING.
1777  */
1779 
1780  /*
1781  * Determine the type of lock we want --- hard exclusive lock for a FULL
1782  * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1783  * way, we can be sure that no other backend is vacuuming the same table.
1784  */
1785  lmode = (params->options & VACOPT_FULL) ?
1787 
1788  /* open the relation and get the appropriate lock on it */
1789  onerel = vacuum_open_relation(relid, relation, params->options,
1790  params->log_min_duration >= 0, lmode);
1791 
1792  /* leave if relation could not be opened or locked */
1793  if (!onerel)
1794  {
1797  return false;
1798  }
1799 
1800  /*
1801  * Check if relation needs to be skipped based on ownership. This check
1802  * happens also when building the relation list to vacuum for a manual
1803  * operation, and needs to be done additionally here as VACUUM could
1804  * happen across multiple transactions where relation ownership could have
1805  * changed in-between. Make sure to only generate logs for VACUUM in this
1806  * case.
1807  */
1809  onerel->rd_rel,
1810  params->options & VACOPT_VACUUM))
1811  {
1812  relation_close(onerel, lmode);
1815  return false;
1816  }
1817 
1818  /*
1819  * Check that it's of a vacuumable relkind.
1820  */
1821  if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1822  onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1823  onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1824  onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1825  {
1826  ereport(WARNING,
1827  (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1828  RelationGetRelationName(onerel))));
1829  relation_close(onerel, lmode);
1832  return false;
1833  }
1834 
1835  /*
1836  * Silently ignore tables that are temp tables of other backends ---
1837  * trying to vacuum these will lead to great unhappiness, since their
1838  * contents are probably not up-to-date on disk. (We don't throw a
1839  * warning here; it would just lead to chatter during a database-wide
1840  * VACUUM.)
1841  */
1842  if (RELATION_IS_OTHER_TEMP(onerel))
1843  {
1844  relation_close(onerel, lmode);
1847  return false;
1848  }
1849 
1850  /*
1851  * Silently ignore partitioned tables as there is no work to be done. The
1852  * useful work is on their child partitions, which have been queued up for
1853  * us separately.
1854  */
1855  if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1856  {
1857  relation_close(onerel, lmode);
1860  /* It's OK to proceed with ANALYZE on this table */
1861  return true;
1862  }
1863 
1864  /*
1865  * Get a session-level lock too. This will protect our access to the
1866  * relation across multiple transactions, so that we can vacuum the
1867  * relation's TOAST table (if any) secure in the knowledge that no one is
1868  * deleting the parent relation.
1869  *
1870  * NOTE: this cannot block, even if someone else is waiting for access,
1871  * because the lock manager knows that both lock requests are from the
1872  * same process.
1873  */
1874  onerelid = onerel->rd_lockInfo.lockRelId;
1875  LockRelationIdForSession(&onerelid, lmode);
1876 
1877  /* Set index cleanup option based on reloptions if not yet */
1878  if (params->index_cleanup == VACOPT_TERNARY_DEFAULT)
1879  {
1880  if (onerel->rd_options == NULL ||
1881  ((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup)
1883  else
1885  }
1886 
1887  /* Set truncate option based on reloptions if not yet */
1888  if (params->truncate == VACOPT_TERNARY_DEFAULT)
1889  {
1890  if (onerel->rd_options == NULL ||
1891  ((StdRdOptions *) onerel->rd_options)->vacuum_truncate)
1892  params->truncate = VACOPT_TERNARY_ENABLED;
1893  else
1895  }
1896 
1897  /*
1898  * Remember the relation's TOAST relation for later, if the caller asked
1899  * us to process it. In VACUUM FULL, though, the toast table is
1900  * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1901  */
1902  if (!(params->options & VACOPT_SKIPTOAST) && !(params->options & VACOPT_FULL))
1903  toast_relid = onerel->rd_rel->reltoastrelid;
1904  else
1905  toast_relid = InvalidOid;
1906 
1907  /*
1908  * Switch to the table owner's userid, so that any index functions are run
1909  * as that user. Also lock down security-restricted operations and
1910  * arrange to make GUC variable changes local to this command. (This is
1911  * unnecessary, but harmless, for lazy VACUUM.)
1912  */
1913  GetUserIdAndSecContext(&save_userid, &save_sec_context);
1914  SetUserIdAndSecContext(onerel->rd_rel->relowner,
1915  save_sec_context | SECURITY_RESTRICTED_OPERATION);
1916  save_nestlevel = NewGUCNestLevel();
1917 
1918  /*
1919  * Do the actual work --- either FULL or "lazy" vacuum
1920  */
1921  if (params->options & VACOPT_FULL)
1922  {
1923  int cluster_options = 0;
1924 
1925  /* close relation before vacuuming, but hold lock until commit */
1926  relation_close(onerel, NoLock);
1927  onerel = NULL;
1928 
1929  if ((params->options & VACOPT_VERBOSE) != 0)
1930  cluster_options |= CLUOPT_VERBOSE;
1931 
1932  /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1933  cluster_rel(relid, InvalidOid, cluster_options, true);
1934  }
1935  else
1936  table_relation_vacuum(onerel, params, vac_strategy);
1937 
1938  /* Roll back any GUC changes executed by index functions */
1939  AtEOXact_GUC(false, save_nestlevel);
1940 
1941  /* Restore userid and security context */
1942  SetUserIdAndSecContext(save_userid, save_sec_context);
1943 
1944  /* all done with this class, but hold lock until commit */
1945  if (onerel)
1946  relation_close(onerel, NoLock);
1947 
1948  /*
1949  * Complete the transaction and free all temporary memory used.
1950  */
1953 
1954  /*
1955  * If the relation has a secondary toast rel, vacuum that too while we
1956  * still hold the session lock on the main table. Note however that
1957  * "analyze" will not get done on the toast table. This is good, because
1958  * the toaster always uses hardcoded index access and statistics are
1959  * totally unimportant for toast relations.
1960  */
1961  if (toast_relid != InvalidOid)
1962  vacuum_rel(toast_relid, NULL, params);
1963 
1964  /*
1965  * Now release the session-level lock on the main table.
1966  */
1967  UnlockRelationIdForSession(&onerelid, lmode);
1968 
1969  /* Report that we really did it. */
1970  return true;
1971 }
1972 
1973 
1974 /*
1975  * Open all the vacuumable indexes of the given relation, obtaining the
1976  * specified kind of lock on each. Return an array of Relation pointers for
1977  * the indexes into *Irel, and the number of indexes into *nindexes.
1978  *
1979  * We consider an index vacuumable if it is marked insertable (indisready).
1980  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1981  * execution, and what we have is too corrupt to be processable. We will
1982  * vacuum even if the index isn't indisvalid; this is important because in a
1983  * unique index, uniqueness checks will be performed anyway and had better not
1984  * hit dangling index pointers.
1985  */
1986 void
1988  int *nindexes, Relation **Irel)
1989 {
1990  List *indexoidlist;
1991  ListCell *indexoidscan;
1992  int i;
1993 
1994  Assert(lockmode != NoLock);
1995 
1996  indexoidlist = RelationGetIndexList(relation);
1997 
1998  /* allocate enough memory for all indexes */
1999  i = list_length(indexoidlist);
2000 
2001  if (i > 0)
2002  *Irel = (Relation *) palloc(i * sizeof(Relation));
2003  else
2004  *Irel = NULL;
2005 
2006  /* collect just the ready indexes */
2007  i = 0;
2008  foreach(indexoidscan, indexoidlist)
2009  {
2010  Oid indexoid = lfirst_oid(indexoidscan);
2011  Relation indrel;
2012 
2013  indrel = index_open(indexoid, lockmode);
2014  if (indrel->rd_index->indisready)
2015  (*Irel)[i++] = indrel;
2016  else
2017  index_close(indrel, lockmode);
2018  }
2019 
2020  *nindexes = i;
2021 
2022  list_free(indexoidlist);
2023 }
2024 
2025 /*
2026  * Release the resources acquired by vac_open_indexes. Optionally release
2027  * the locks (say NoLock to keep 'em).
2028  */
2029 void
2030 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2031 {
2032  if (Irel == NULL)
2033  return;
2034 
2035  while (nindexes--)
2036  {
2037  Relation ind = Irel[nindexes];
2038 
2039  index_close(ind, lockmode);
2040  }
2041  pfree(Irel);
2042 }
2043 
2044 /*
2045  * vacuum_delay_point --- check for interrupts and cost-based delay.
2046  *
2047  * This should be called in each major loop of VACUUM processing,
2048  * typically once per page processed.
2049  */
2050 void
2052 {
2053  double msec = 0;
2054 
2055  /* Always check for interrupts */
2057 
2059  return;
2060 
2061  /*
2062  * For parallel vacuum, the delay is computed based on the shared cost
2063  * balance. See compute_parallel_delay.
2064  */
2065  if (VacuumSharedCostBalance != NULL)
2066  msec = compute_parallel_delay();
2067  else if (VacuumCostBalance >= VacuumCostLimit)
2069 
2070  /* Nap if appropriate */
2071  if (msec > 0)
2072  {
2073  if (msec > VacuumCostDelay * 4)
2074  msec = VacuumCostDelay * 4;
2075 
2077  pg_usleep((long) (msec * 1000));
2079 
2080  VacuumCostBalance = 0;
2081 
2082  /* update balance values for workers */
2084 
2085  /* Might have gotten an interrupt while sleeping */
2087  }
2088 }
2089 
2090 /*
2091  * Computes the vacuum delay for parallel workers.
2092  *
2093  * The basic idea of a cost-based delay for parallel vacuum is to allow each
2094  * worker to sleep in proportion to the share of work it's done. We achieve this
2095  * by allowing all parallel vacuum workers including the leader process to
2096  * have a shared view of cost related parameters (mainly VacuumCostBalance).
2097  * We allow each worker to update it as and when it has incurred any cost and
2098  * then based on that decide whether it needs to sleep. We compute the time
2099  * to sleep for a worker based on the cost it has incurred
2100  * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2101  * that amount. This avoids putting to sleep those workers which have done less
2102  * I/O than other workers and therefore ensure that workers
2103  * which are doing more I/O got throttled more.
2104  *
2105  * We allow a worker to sleep only if it has performed I/O above a certain
2106  * threshold, which is calculated based on the number of active workers
2107  * (VacuumActiveNWorkers), and the overall cost balance is more than
2108  * VacuumCostLimit set by the system. Testing reveals that we achieve
2109  * the required throttling if we force a worker that has done more than 50%
2110  * of its share of work to sleep.
2111  */
2112 static double
2114 {
2115  double msec = 0;
2116  uint32 shared_balance;
2117  int nworkers;
2118 
2119  /* Parallel vacuum must be active */
2120  Assert(VacuumSharedCostBalance);
2121 
2122  nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2123 
2124  /* At least count itself */
2125  Assert(nworkers >= 1);
2126 
2127  /* Update the shared cost balance value atomically */
2128  shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2129 
2130  /* Compute the total local balance for the current worker */
2132 
2133  if ((shared_balance >= VacuumCostLimit) &&
2134  (VacuumCostBalanceLocal > 0.5 * ((double) VacuumCostLimit / nworkers)))
2135  {
2136  /* Compute sleep time based on the local cost balance */
2138  pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2140  }
2141 
2142  /*
2143  * Reset the local balance as we accumulated it into the shared value.
2144  */
2145  VacuumCostBalance = 0;
2146 
2147  return msec;
2148 }
2149 
2150 /*
2151  * A wrapper function of defGetBoolean().
2152  *
2153  * This function returns VACOPT_TERNARY_ENABLED and VACOPT_TERNARY_DISABLED
2154  * instead of true and false.
2155  */
2156 static VacOptTernaryValue
2158 {
2160 }
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:542
#define NIL
Definition: pg_list.h:65
bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:151
void analyze_rel(Oid relid, RangeVar *relation, VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy)
Definition: analyze.c:119
static double compute_parallel_delay(void)
Definition: vacuum.c:2113
int multixact_freeze_table_age
Definition: vacuum.h:215
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2030
LockRelId lockRelId
Definition: rel.h:44
void vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, bool isTopLevel)
Definition: vacuum.c:272
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
#define AllocSetContextCreate
Definition: memutils.h:170
int64 VacuumPageMiss
Definition: globals.c:145
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
int errhint(const char *fmt,...)
Definition: elog.c:1068
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:569
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ERRCODE_UNDEFINED_TABLE
Definition: pgbench.c:74
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:77
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1163
int VacuumCostBalance
Definition: globals.c:148
int vacuum_multixact_freeze_table_age
Definition: vacuum.c:64
void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
Definition: clog.c:878
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, bool isTopLevel, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:932
static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
Definition: vacuum.c:1722
RangeVar * relation
Definition: parsenodes.h:3242
uint32 TransactionId
Definition: c.h:521
#define SECURITY_RESTRICTED_OPERATION
Definition: miscadmin.h:299
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
void vac_update_datfrozenxid(void)
Definition: vacuum.c:1373
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:588
int LOCKMODE
Definition: lockdefs.h:26
Oid GetUserId(void)
Definition: miscinit.c:476
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:199
FormData_pg_database * Form_pg_database
Definition: pg_database.h:81
PGPROC * MyProc
Definition: proc.c:67
#define ExclusiveLock
Definition: lockdefs.h:44
int64 TimestampTz
Definition: timestamp.h:39
VacuumRelation * makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols)
Definition: makefuncs.c:808
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
void LockDatabaseFrozenIds(LOCKMODE lockmode)
Definition: lmgr.c:469
void CommitTransactionCommand(void)
Definition: xact.c:2947
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:583
int64 VacuumPageHit
Definition: globals.c:144
#define Min(x, y)
Definition: c.h:928
bool is_vacuumcmd
Definition: parsenodes.h:3229
#define PROC_VACUUM_FOR_WRAPAROUND
Definition: proc.h:56
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1482
#define AccessShareLock
Definition: lockdefs.h:36
static BufferAccessStrategy vac_strategy
Definition: vacuum.c:69
int32 defGetInt32(DefElem *def)
Definition: define.c:166
struct cursor * cur
Definition: ecpg.c:28
List * list_concat(List *list1, const List *list2)
Definition: list.c:515
int errcode(int sqlerrcode)
Definition: elog.c:610
PROC_HDR * ProcGlobal
Definition: proc.c:79
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
int64 VacuumPageDirty
Definition: globals.c:146
uint32 BlockNumber
Definition: block.h:31
VacOptTernaryValue
Definition: vacuum.h:195
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:109
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1751
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
NameData relname
Definition: pg_class.h:38
unsigned int Oid
Definition: postgres_ext.h:31
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:652
void AdvanceOldestCommitTsXid(TransactionId oldestXact)
Definition: commit_ts.c:919
int freeze_table_age
Definition: vacuum.h:212
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:357
Relation try_relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:89
void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1672
signed int int32
Definition: c.h:363
static List * expand_vacuum_rel(VacuumRelation *vrel, int options)
Definition: vacuum.c:719
MemoryContext PortalContext
Definition: mcxt.c:53
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1812
bool ForceTransactionIdLimitUpdate(void)
Definition: varsup.c:490
char * relname
Definition: primnodes.h:68
void pg_usleep(long microsec)
Definition: signal.c:53
bool defGetBoolean(DefElem *def)
Definition: define.c:111
Form_pg_index rd_index
Definition: rel.h:174
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:476
void pfree(void *pointer)
Definition: mcxt.c:1057
#define PROC_IN_VACUUM
Definition: proc.h:55
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1870
#define FirstNormalTransactionId
Definition: transam.h:34
void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode)
Definition: lmgr.c:382
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
Definition: rel.h:36
int VacuumCostLimit
Definition: globals.c:141
void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode)
Definition: lmgr.c:369
int autovacuum_freeze_max_age
Definition: autovacuum.c:124
int freeze_min_age
Definition: vacuum.h:211
int vacuum_multixact_freeze_min_age
Definition: vacuum.c:63
TriggerDesc * trigdesc
Definition: rel.h:115
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
bool is_wraparound
Definition: vacuum.h:217
#define lfirst_node(type, lc)
Definition: pg_list.h:172
bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options)
Definition: vacuum.c:533
#define NoLock
Definition: lockdefs.h:34
LockInfoData rd_lockInfo
Definition: rel.h:112
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1286
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
Relation vacuum_open_relation(Oid relid, RangeVar *relation, int options, bool verbose, LOCKMODE lmode)
Definition: vacuum.c:607
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:581
int location
Definition: parsenodes.h:736
#define RowExclusiveLock
Definition: lockdefs.h:38
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition: guc.c:5956
int errdetail(const char *fmt,...)
Definition: elog.c:954
static MemoryContext vac_context
Definition: vacuum.c:68
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition: xact.c:3380
#define RelationGetRelationName(relation)
Definition: rel.h:490
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition: namespace.c:236
unsigned int uint32
Definition: c.h:375
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
bool ActiveSnapshotSet(void)
Definition: snapmgr.c:798
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1460
#define FirstMultiXactId
Definition: multixact.h:25
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3325
void pgstat_vacuum_stat(void)
Definition: pgstat.c:1066
int MultiXactMemberFreezeThreshold(void)
Definition: multixact.c:2823
bool pg_database_ownercheck(Oid db_oid, Oid roleid)
Definition: aclchk.c:5105
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void TruncateCommitTs(TransactionId oldestXact)
Definition: commit_ts.c:866
VacOptTernaryValue index_cleanup
Definition: vacuum.h:221
Node * arg
Definition: parsenodes.h:734
List * lappend(List *list, void *datum)
Definition: list.c:321
bool IsInTransactionBlock(bool isTopLevel)
Definition: xact.c:3493
static int verbose
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:1987
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
float float4
Definition: c.h:498
int VacuumCostBalanceLocal
Definition: vacuum.c:78
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:76
static int elevel
Definition: vacuumlazy.c:333
#define PG_FINALLY()
Definition: elog.h:312
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2501
void CommandCounterIncrement(void)
Definition: xact.c:1021
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
Oid MyDatabaseId
Definition: globals.c:85
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2208
#define InvalidOid
Definition: postgres_ext.h:36
VacOptTernaryValue truncate
Definition: vacuum.h:223
TransactionId datfrozenxid
Definition: pg_database.h:62
#define ereport(elevel,...)
Definition: elog.h:144
TransactionId MultiXactId
Definition: c.h:531
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
Definition: vacuum.c:98
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define Assert(condition)
Definition: c.h:746
#define lfirst(lc)
Definition: pg_list.h:169
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:593
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4687
void StartTransactionCommand(void)
Definition: xact.c:2846
RuleLock * rd_rules
Definition: rel.h:113
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1436
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:345
int nworkers
Definition: vacuum.h:231
static int list_length(const List *l)
Definition: pg_list.h:149
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:110
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1208
int vacuum_freeze_min_age
Definition: vacuum.c:61
TransactionId datminmxid
Definition: pg_database.h:65
int log_min_duration
Definition: vacuum.h:218
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
void cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
Definition: cluster.c:257
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3146
static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti, TransactionId lastSaneFrozenXid, MultiXactId lastSaneMinMulti)
Definition: vacuum.c:1571
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4514
int vacuum_freeze_table_age
Definition: vacuum.c:62
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
static TransactionId ReadNewTransactionId(void)
Definition: transam.h:308
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:863
FormData_pg_class * Form_pg_class
Definition: pg_class.h:153
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:174
#define AccessExclusiveLock
Definition: lockdefs.h:45
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
Definition: pg_inherits.c:165
int NewGUCNestLevel(void)
Definition: guc.c:5942
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:821
void heap_inplace_update(Relation relation, HeapTuple tuple)
Definition: heapam.c:5715
double VacuumCostDelay
Definition: globals.c:142
List * options
Definition: parsenodes.h:3227
void list_free(List *list)
Definition: list.c:1376
#define elog(elevel,...)
Definition: elog.h:214
int i
int options
Definition: vacuum.h:210
int pgxactoff
Definition: proc.h:139
uint8 * vacuumFlags
Definition: proc.h:321
#define NameStr(name)
Definition: c.h:623
void AutoVacuumUpdateDelay(void)
Definition: autovacuum.c:1775
FormData_pg_database
Definition: pg_database.h:74
uint8 vacuumFlags
Definition: proc.h:178
char * defname
Definition: parsenodes.h:733
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define MAX_PARALLEL_WORKER_LIMIT
void vacuum_delay_point(void)
Definition: vacuum.c:2051
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define PG_TRY()
Definition: elog.h:295
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition: multixact.c:2939
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1246
Definition: pg_list.h:50
#define RelationGetRelid(relation)
Definition: rel.h:456
int multixact_freeze_min_age
Definition: vacuum.h:213
static long analyze(struct nfa *nfa)
Definition: regc_nfa.c:2816
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
#define PG_END_TRY()
Definition: elog.h:320
bytea * rd_options
Definition: rel.h:157
#define lfirst_oid(lc)
Definition: pg_list.h:171
bool VacuumCostActive
Definition: globals.c:149
static List * get_all_vacuum_rels(int options)
Definition: vacuum.c:858
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
MultiXactId ReadNextMultiXactId(void)
Definition: multixact.c:723
List * rels
Definition: parsenodes.h:3228
static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def)
Definition: vacuum.c:2157