PostgreSQL Source Code  git master
autovacuum.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * autovacuum.c
4  *
5  * PostgreSQL Integrated Autovacuum Daemon
6  *
7  * The autovacuum system is structured in two different kinds of processes: the
8  * autovacuum launcher and the autovacuum worker. The launcher is an
9  * always-running process, started by the postmaster when the autovacuum GUC
10  * parameter is set. The launcher schedules autovacuum workers to be started
11  * when appropriate. The workers are the processes which execute the actual
12  * vacuuming; they connect to a database as determined in the launcher, and
13  * once connected they examine the catalogs to select the tables to vacuum.
14  *
15  * The autovacuum launcher cannot start the worker processes by itself,
16  * because doing so would cause robustness issues (namely, failure to shut
17  * them down on exceptional conditions, and also, since the launcher is
18  * connected to shared memory and is thus subject to corruption there, it is
19  * not as robust as the postmaster). So it leaves that task to the postmaster.
20  *
21  * There is an autovacuum shared memory area, where the launcher stores
22  * information about the database it wants vacuumed. When it wants a new
23  * worker to start, it sets a flag in shared memory and sends a signal to the
24  * postmaster. Then postmaster knows nothing more than it must start a worker;
25  * so it forks a new child, which turns into a worker. This new process
26  * connects to shared memory, and there it can inspect the information that the
27  * launcher has set up.
28  *
29  * If the fork() call fails in the postmaster, it sets a flag in the shared
30  * memory area, and sends a signal to the launcher. The launcher, upon
31  * noticing the flag, can try starting the worker again by resending the
32  * signal. Note that the failure can only be transient (fork failure due to
33  * high load, memory pressure, too many processes, etc); more permanent
34  * problems, like failure to connect to a database, are detected later in the
35  * worker and dealt with just by having the worker exit normally. The launcher
36  * will launch a new worker again later, per schedule.
37  *
38  * When the worker is done vacuuming it sends SIGUSR2 to the launcher. The
39  * launcher then wakes up and is able to launch another worker, if the schedule
40  * is so tight that a new worker is needed immediately. At this time the
41  * launcher can also balance the settings for the various remaining workers'
42  * cost-based vacuum delay feature.
43  *
44  * Note that there can be more than one worker in a database concurrently.
45  * They will store the table they are currently vacuuming in shared memory, so
46  * that other workers avoid being blocked waiting for the vacuum lock for that
47  * table. They will also reload the pgstats data just before vacuuming each
48  * table, to avoid vacuuming a table that was just finished being vacuumed by
49  * another worker and thus is no longer noted in shared memory. However,
50  * there is a window (caused by pgstat delay) on which a worker may choose a
51  * table that was already vacuumed; this is a bug in the current design.
52  *
53  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
54  * Portions Copyright (c) 1994, Regents of the University of California
55  *
56  *
57  * IDENTIFICATION
58  * src/backend/postmaster/autovacuum.c
59  *
60  *-------------------------------------------------------------------------
61  */
62 #include "postgres.h"
63 
64 #include <signal.h>
65 #include <sys/time.h>
66 #include <unistd.h>
67 
68 #include "access/heapam.h"
69 #include "access/htup_details.h"
70 #include "access/multixact.h"
71 #include "access/reloptions.h"
72 #include "access/tableam.h"
73 #include "access/transam.h"
74 #include "access/xact.h"
75 #include "catalog/dependency.h"
76 #include "catalog/namespace.h"
77 #include "catalog/pg_database.h"
78 #include "commands/dbcommands.h"
79 #include "commands/vacuum.h"
80 #include "lib/ilist.h"
81 #include "libpq/pqsignal.h"
82 #include "miscadmin.h"
83 #include "nodes/makefuncs.h"
84 #include "pgstat.h"
85 #include "postmaster/autovacuum.h"
87 #include "postmaster/interrupt.h"
88 #include "postmaster/postmaster.h"
89 #include "storage/bufmgr.h"
90 #include "storage/ipc.h"
91 #include "storage/latch.h"
92 #include "storage/lmgr.h"
93 #include "storage/pmsignal.h"
94 #include "storage/proc.h"
95 #include "storage/procsignal.h"
96 #include "storage/sinvaladt.h"
97 #include "storage/smgr.h"
98 #include "tcop/tcopprot.h"
99 #include "utils/fmgroids.h"
100 #include "utils/fmgrprotos.h"
101 #include "utils/lsyscache.h"
102 #include "utils/memutils.h"
103 #include "utils/ps_status.h"
104 #include "utils/rel.h"
105 #include "utils/snapmgr.h"
106 #include "utils/syscache.h"
107 #include "utils/timeout.h"
108 #include "utils/timestamp.h"
109 
110 
111 /*
112  * GUC parameters
113  */
126 
129 
131 
132 /* how long to keep pgstat data in the launcher, in milliseconds */
133 #define STATS_READ_DELAY 1000
134 
135 /* the minimum allowed time between two awakenings of the launcher */
136 #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
137 #define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */
138 
139 /* Flags to tell if we are in an autovacuum process */
140 static bool am_autovacuum_launcher = false;
141 static bool am_autovacuum_worker = false;
142 
143 /* Flags set by signal handlers */
144 static volatile sig_atomic_t got_SIGUSR2 = false;
145 
146 /* Comparison points for determining whether freeze_max_age is exceeded */
149 
150 /* Default freeze ages to use for autovacuum (varies by database) */
155 
156 /* Memory context for long-lived data */
158 
159 /* struct to keep track of databases in launcher */
160 typedef struct avl_dbase
161 {
162  Oid adl_datid; /* hash key -- must be first */
166 } avl_dbase;
167 
168 /* struct to keep track of databases in worker */
169 typedef struct avw_dbase
170 {
172  char *adw_name;
176 } avw_dbase;
177 
178 /* struct to keep track of tables to vacuum and/or analyze, in 1st pass */
179 typedef struct av_relation
180 {
181  Oid ar_toastrelid; /* hash key - must be first */
184  AutoVacOpts ar_reloptions; /* copy of AutoVacOpts from the main table's
185  * reloptions, or NULL if none */
186 } av_relation;
187 
188 /* struct to keep track of tables to vacuum and/or analyze, after rechecking */
189 typedef struct autovac_table
190 {
197  char *at_relname;
198  char *at_nspname;
199  char *at_datname;
200 } autovac_table;
201 
202 /*-------------
203  * This struct holds information about a single worker's whereabouts. We keep
204  * an array of these in shared memory, sized according to
205  * autovacuum_max_workers.
206  *
207  * wi_links entry into free list or running list
208  * wi_dboid OID of the database this worker is supposed to work on
209  * wi_tableoid OID of the table currently being vacuumed, if any
210  * wi_sharedrel flag indicating whether table is marked relisshared
211  * wi_proc pointer to PGPROC of the running worker, NULL if not started
212  * wi_launchtime Time at which this worker was launched
213  * wi_cost_* Vacuum cost-based delay parameters current in this worker
214  *
215  * All fields are protected by AutovacuumLock, except for wi_tableoid and
216  * wi_sharedrel which are protected by AutovacuumScheduleLock (note these
217  * two fields are read-only for everyone except that worker itself).
218  *-------------
219  */
220 typedef struct WorkerInfoData
221 {
233 
234 typedef struct WorkerInfoData *WorkerInfo;
235 
236 /*
237  * Possible signals received by the launcher from remote processes. These are
238  * stored atomically in shared memory so that other processes can set them
239  * without locking.
240  */
241 typedef enum
242 {
243  AutoVacForkFailed, /* failed trying to start a worker */
244  AutoVacRebalance, /* rebalance the cost limits */
245  AutoVacNumSignals /* must be last */
247 
248 /*
249  * Autovacuum workitem array, stored in AutoVacuumShmem->av_workItems. This
250  * list is mostly protected by AutovacuumLock, except that if an item is
251  * marked 'active' other processes must not modify the work-identifying
252  * members.
253  */
254 typedef struct AutoVacuumWorkItem
255 {
257  bool avw_used; /* below data is valid */
258  bool avw_active; /* being processed */
263 
264 #define NUM_WORKITEMS 256
265 
266 /*-------------
267  * The main autovacuum shmem struct. On shared memory we store this main
268  * struct and the array of WorkerInfo structs. This struct keeps:
269  *
270  * av_signal set by other processes to indicate various conditions
271  * av_launcherpid the PID of the autovacuum launcher
272  * av_freeWorkers the WorkerInfo freelist
273  * av_runningWorkers the WorkerInfo non-free queue
274  * av_startingWorker pointer to WorkerInfo currently being started (cleared by
275  * the worker itself as soon as it's up and running)
276  * av_workItems work item array
277  *
278  * This struct is protected by AutovacuumLock, except for av_signal and parts
279  * of the worker list (see above).
280  *-------------
281  */
282 typedef struct
283 {
284  sig_atomic_t av_signal[AutoVacNumSignals];
288  WorkerInfo av_startingWorker;
291 
293 
294 /*
295  * the database list (of avl_dbase elements) in the launcher, and the context
296  * that contains it
297  */
300 
301 /* Pointer to my own WorkerInfo, valid on each worker */
302 static WorkerInfo MyWorkerInfo = NULL;
303 
304 /* PID of launcher, valid only in worker while shutting down */
306 
307 #ifdef EXEC_BACKEND
308 static pid_t avlauncher_forkexec(void);
309 static pid_t avworker_forkexec(void);
310 #endif
311 NON_EXEC_STATIC void AutoVacWorkerMain(int argc, char *argv[]) pg_attribute_noreturn();
312 NON_EXEC_STATIC void AutoVacLauncherMain(int argc, char *argv[]) pg_attribute_noreturn();
313 
314 static Oid do_start_worker(void);
315 static void HandleAutoVacLauncherInterrupts(void);
317 static void launcher_determine_sleep(bool canlaunch, bool recursing,
318  struct timeval *nap);
319 static void launch_worker(TimestampTz now);
320 static List *get_database_list(void);
321 static void rebuild_database_list(Oid newdb);
322 static int db_comparator(const void *a, const void *b);
323 static void autovac_balance_cost(void);
324 
325 static void do_autovacuum(void);
326 static void FreeWorkerInfo(int code, Datum arg);
327 
328 static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
329  TupleDesc pg_class_desc,
330  int effective_multixact_freeze_max_age);
331 static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
332  Form_pg_class classForm,
333  PgStat_StatTabEntry *tabentry,
334  int effective_multixact_freeze_max_age,
335  bool *dovacuum, bool *doanalyze, bool *wraparound);
336 
338  BufferAccessStrategy bstrategy);
340  TupleDesc pg_class_desc);
341 static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
342  PgStat_StatDBEntry *shared,
343  PgStat_StatDBEntry *dbentry);
344 static void perform_work_item(AutoVacuumWorkItem *workitem);
345 static void autovac_report_activity(autovac_table *tab);
346 static void autovac_report_workitem(AutoVacuumWorkItem *workitem,
347  const char *nspname, const char *relname);
348 static void avl_sigusr2_handler(SIGNAL_ARGS);
349 static void autovac_refresh_stats(void);
350 
351 
352 
353 /********************************************************************
354  * AUTOVACUUM LAUNCHER CODE
355  ********************************************************************/
356 
357 #ifdef EXEC_BACKEND
358 /*
359  * forkexec routine for the autovacuum launcher process.
360  *
361  * Format up the arglist, then fork and exec.
362  */
363 static pid_t
364 avlauncher_forkexec(void)
365 {
366  char *av[10];
367  int ac = 0;
368 
369  av[ac++] = "postgres";
370  av[ac++] = "--forkavlauncher";
371  av[ac++] = NULL; /* filled in by postmaster_forkexec */
372  av[ac] = NULL;
373 
374  Assert(ac < lengthof(av));
375 
376  return postmaster_forkexec(ac, av);
377 }
378 
379 /*
380  * We need this set from the outside, before InitProcess is called
381  */
382 void
383 AutovacuumLauncherIAm(void)
384 {
385  am_autovacuum_launcher = true;
386 }
387 #endif
388 
389 /*
390  * Main entry point for autovacuum launcher process, to be called from the
391  * postmaster.
392  */
393 int
395 {
396  pid_t AutoVacPID;
397 
398 #ifdef EXEC_BACKEND
399  switch ((AutoVacPID = avlauncher_forkexec()))
400 #else
401  switch ((AutoVacPID = fork_process()))
402 #endif
403  {
404  case -1:
405  ereport(LOG,
406  (errmsg("could not fork autovacuum launcher process: %m")));
407  return 0;
408 
409 #ifndef EXEC_BACKEND
410  case 0:
411  /* in postmaster child ... */
413 
414  /* Close the postmaster's sockets */
415  ClosePostmasterPorts(false);
416 
417  AutoVacLauncherMain(0, NULL);
418  break;
419 #endif
420  default:
421  return (int) AutoVacPID;
422  }
423 
424  /* shouldn't get here */
425  return 0;
426 }
427 
428 /*
429  * Main loop for the autovacuum launcher process.
430  */
431 NON_EXEC_STATIC void
432 AutoVacLauncherMain(int argc, char *argv[])
433 {
434  sigjmp_buf local_sigjmp_buf;
435 
436  am_autovacuum_launcher = true;
437 
439  init_ps_display(NULL);
440 
441  ereport(DEBUG1,
442  (errmsg("autovacuum launcher started")));
443 
444  if (PostAuthDelay)
445  pg_usleep(PostAuthDelay * 1000000L);
446 
448 
449  /*
450  * Set up signal handlers. We operate on databases much like a regular
451  * backend, so we use the same signal handling. See equivalent code in
452  * tcop/postgres.c.
453  */
457  /* SIGQUIT handler was already set up by InitPostmasterChild */
458 
459  InitializeTimeouts(); /* establishes SIGALRM handler */
460 
466 
467  /* Early initialization */
468  BaseInit();
469 
470  /*
471  * Create a per-backend PGPROC struct in shared memory, except in the
472  * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
473  * this before we can use LWLocks (and in the EXEC_BACKEND case we already
474  * had to do some stuff with LWLocks).
475  */
476 #ifndef EXEC_BACKEND
477  InitProcess();
478 #endif
479 
480  InitPostgres(NULL, InvalidOid, NULL, InvalidOid, NULL, false);
481 
483 
484  /*
485  * Create a memory context that we will do all our work in. We do this so
486  * that we can reset the context during error recovery and thereby avoid
487  * possible memory leaks.
488  */
489  AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
490  "Autovacuum Launcher",
492  MemoryContextSwitchTo(AutovacMemCxt);
493 
494  /*
495  * If an exception is encountered, processing resumes here.
496  *
497  * This code is a stripped down version of PostgresMain error recovery.
498  *
499  * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask
500  * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus,
501  * signals other than SIGQUIT will be blocked until we complete error
502  * recovery. It might seem that this policy makes the HOLD_INTERRUPTS()
503  * call redundant, but it is not since InterruptPending might be set
504  * already.
505  */
506  if (sigsetjmp(local_sigjmp_buf, 1) != 0)
507  {
508  /* since not using PG_TRY, must reset error stack by hand */
509  error_context_stack = NULL;
510 
511  /* Prevents interrupts while cleaning up */
512  HOLD_INTERRUPTS();
513 
514  /* Forget any pending QueryCancel or timeout request */
515  disable_all_timeouts(false);
516  QueryCancelPending = false; /* second to avoid race condition */
517 
518  /* Report the error to the server log */
519  EmitErrorReport();
520 
521  /* Abort the current transaction in order to recover */
523 
524  /*
525  * Release any other resources, for the case where we were not in a
526  * transaction.
527  */
530  AbortBufferIO();
531  UnlockBuffers();
532  /* this is probably dead code, but let's be safe: */
535  AtEOXact_Buffers(false);
536  AtEOXact_SMgr();
537  AtEOXact_Files(false);
538  AtEOXact_HashTables(false);
539 
540  /*
541  * Now return to normal top-level context and clear ErrorContext for
542  * next time.
543  */
544  MemoryContextSwitchTo(AutovacMemCxt);
545  FlushErrorState();
546 
547  /* Flush any leaked data in the top-level context */
549 
550  /* don't leave dangling pointers to freed memory */
551  DatabaseListCxt = NULL;
552  dlist_init(&DatabaseList);
553 
554  /*
555  * Make sure pgstat also considers our stat data as gone. Note: we
556  * mustn't use autovac_refresh_stats here.
557  */
559 
560  /* Now we can allow interrupts again */
562 
563  /* if in shutdown mode, no need for anything further; just go away */
566 
567  /*
568  * Sleep at least 1 second after any error. We don't want to be
569  * filling the error logs as fast as we can.
570  */
571  pg_usleep(1000000L);
572  }
573 
574  /* We can now handle ereport(ERROR) */
575  PG_exception_stack = &local_sigjmp_buf;
576 
577  /* must unblock signals before calling rebuild_database_list */
579 
580  /*
581  * Set always-secure search path. Launcher doesn't connect to a database,
582  * so this has no effect.
583  */
584  SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
585 
586  /*
587  * Force zero_damaged_pages OFF in the autovac process, even if it is set
588  * in postgresql.conf. We don't really want such a dangerous option being
589  * applied non-interactively.
590  */
591  SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
592 
593  /*
594  * Force settable timeouts off to avoid letting these settings prevent
595  * regular maintenance from being executed.
596  */
597  SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
598  SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
599  SetConfigOption("idle_in_transaction_session_timeout", "0",
601 
602  /*
603  * Force default_transaction_isolation to READ COMMITTED. We don't want
604  * to pay the overhead of serializable mode, nor add any risk of causing
605  * deadlocks or delaying other transactions.
606  */
607  SetConfigOption("default_transaction_isolation", "read committed",
609 
610  /*
611  * In emergency mode, just start a worker (unless shutdown was requested)
612  * and go away.
613  */
614  if (!AutoVacuumingActive())
615  {
617  do_start_worker();
618  proc_exit(0); /* done */
619  }
620 
621  AutoVacuumShmem->av_launcherpid = MyProcPid;
622 
623  /*
624  * Create the initial database list. The invariant we want this list to
625  * keep is that it's ordered by decreasing next_time. As soon as an entry
626  * is updated to a higher time, it will be moved to the front (which is
627  * correct because the only operation is to add autovacuum_naptime to the
628  * entry, and time always increases).
629  */
631 
632  /* loop until shutdown request */
633  while (!ShutdownRequestPending)
634  {
635  struct timeval nap;
636  TimestampTz current_time = 0;
637  bool can_launch;
638 
639  /*
640  * This loop is a bit different from the normal use of WaitLatch,
641  * because we'd like to sleep before the first launch of a child
642  * process. So it's WaitLatch, then ResetLatch, then check for
643  * wakening conditions.
644  */
645 
647  false, &nap);
648 
649  /*
650  * Wait until naptime expires or we get some type of signal (all the
651  * signal handlers will wake us by calling SetLatch).
652  */
653  (void) WaitLatch(MyLatch,
655  (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),
657 
659 
661 
662  /*
663  * a worker finished, or postmaster signaled failure to start a worker
664  */
665  if (got_SIGUSR2)
666  {
667  got_SIGUSR2 = false;
668 
669  /* rebalance cost limits, if needed */
670  if (AutoVacuumShmem->av_signal[AutoVacRebalance])
671  {
672  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
673  AutoVacuumShmem->av_signal[AutoVacRebalance] = false;
675  LWLockRelease(AutovacuumLock);
676  }
677 
678  if (AutoVacuumShmem->av_signal[AutoVacForkFailed])
679  {
680  /*
681  * If the postmaster failed to start a new worker, we sleep
682  * for a little while and resend the signal. The new worker's
683  * state is still in memory, so this is sufficient. After
684  * that, we restart the main loop.
685  *
686  * XXX should we put a limit to the number of times we retry?
687  * I don't think it makes much sense, because a future start
688  * of a worker will continue to fail in the same way.
689  */
690  AutoVacuumShmem->av_signal[AutoVacForkFailed] = false;
691  pg_usleep(1000000L); /* 1s */
693  continue;
694  }
695  }
696 
697  /*
698  * There are some conditions that we need to check before trying to
699  * start a worker. First, we need to make sure that there is a worker
700  * slot available. Second, we need to make sure that no other worker
701  * failed while starting up.
702  */
703 
704  current_time = GetCurrentTimestamp();
705  LWLockAcquire(AutovacuumLock, LW_SHARED);
706 
707  can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers);
708 
709  if (AutoVacuumShmem->av_startingWorker != NULL)
710  {
711  int waittime;
712  WorkerInfo worker = AutoVacuumShmem->av_startingWorker;
713 
714  /*
715  * We can't launch another worker when another one is still
716  * starting up (or failed while doing so), so just sleep for a bit
717  * more; that worker will wake us up again as soon as it's ready.
718  * We will only wait autovacuum_naptime seconds (up to a maximum
719  * of 60 seconds) for this to happen however. Note that failure
720  * to connect to a particular database is not a problem here,
721  * because the worker removes itself from the startingWorker
722  * pointer before trying to connect. Problems detected by the
723  * postmaster (like fork() failure) are also reported and handled
724  * differently. The only problems that may cause this code to
725  * fire are errors in the earlier sections of AutoVacWorkerMain,
726  * before the worker removes the WorkerInfo from the
727  * startingWorker pointer.
728  */
729  waittime = Min(autovacuum_naptime, 60) * 1000;
730  if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,
731  waittime))
732  {
733  LWLockRelease(AutovacuumLock);
734  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
735 
736  /*
737  * No other process can put a worker in starting mode, so if
738  * startingWorker is still INVALID after exchanging our lock,
739  * we assume it's the same one we saw above (so we don't
740  * recheck the launch time).
741  */
742  if (AutoVacuumShmem->av_startingWorker != NULL)
743  {
744  worker = AutoVacuumShmem->av_startingWorker;
745  worker->wi_dboid = InvalidOid;
746  worker->wi_tableoid = InvalidOid;
747  worker->wi_sharedrel = false;
748  worker->wi_proc = NULL;
749  worker->wi_launchtime = 0;
750  dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
751  &worker->wi_links);
752  AutoVacuumShmem->av_startingWorker = NULL;
753  elog(WARNING, "worker took too long to start; canceled");
754  }
755  }
756  else
757  can_launch = false;
758  }
759  LWLockRelease(AutovacuumLock); /* either shared or exclusive */
760 
761  /* if we can't do anything, just go back to sleep */
762  if (!can_launch)
763  continue;
764 
765  /* We're OK to start a new worker */
766 
767  if (dlist_is_empty(&DatabaseList))
768  {
769  /*
770  * Special case when the list is empty: start a worker right away.
771  * This covers the initial case, when no database is in pgstats
772  * (thus the list is empty). Note that the constraints in
773  * launcher_determine_sleep keep us from starting workers too
774  * quickly (at most once every autovacuum_naptime when the list is
775  * empty).
776  */
777  launch_worker(current_time);
778  }
779  else
780  {
781  /*
782  * because rebuild_database_list constructs a list with most
783  * distant adl_next_worker first, we obtain our database from the
784  * tail of the list.
785  */
786  avl_dbase *avdb;
787 
788  avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
789 
790  /*
791  * launch a worker if next_worker is right now or it is in the
792  * past
793  */
795  current_time, 0))
796  launch_worker(current_time);
797  }
798  }
799 
801 }
802 
803 /*
804  * Process any new interrupts.
805  */
806 static void
808 {
809  /* the normal shutdown case */
812 
814  {
815  ConfigReloadPending = false;
817 
818  /* shutdown requested in config file? */
819  if (!AutoVacuumingActive())
821 
822  /* rebalance in case the default cost parameters changed */
823  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
825  LWLockRelease(AutovacuumLock);
826 
827  /* rebuild the list in case the naptime changed */
829  }
830 
831  /* Process barrier events */
834 
835  /* Process sinval catchup interrupts that happened while sleeping */
837 }
838 
839 /*
840  * Perform a normal exit from the autovac launcher.
841  */
842 static void
844 {
845  ereport(DEBUG1,
846  (errmsg("autovacuum launcher shutting down")));
847  AutoVacuumShmem->av_launcherpid = 0;
848 
849  proc_exit(0); /* done */
850 }
851 
852 /*
853  * Determine the time to sleep, based on the database list.
854  *
855  * The "canlaunch" parameter indicates whether we can start a worker right now,
856  * for example due to the workers being all busy. If this is false, we will
857  * cause a long sleep, which will be interrupted when a worker exits.
858  */
859 static void
860 launcher_determine_sleep(bool canlaunch, bool recursing, struct timeval *nap)
861 {
862  /*
863  * We sleep until the next scheduled vacuum. We trust that when the
864  * database list was built, care was taken so that no entries have times
865  * in the past; if the first entry has too close a next_worker value, or a
866  * time in the past, we will sleep a small nominal time.
867  */
868  if (!canlaunch)
869  {
870  nap->tv_sec = autovacuum_naptime;
871  nap->tv_usec = 0;
872  }
873  else if (!dlist_is_empty(&DatabaseList))
874  {
875  TimestampTz current_time = GetCurrentTimestamp();
876  TimestampTz next_wakeup;
877  avl_dbase *avdb;
878  long secs;
879  int usecs;
880 
881  avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
882 
883  next_wakeup = avdb->adl_next_worker;
884  TimestampDifference(current_time, next_wakeup, &secs, &usecs);
885 
886  nap->tv_sec = secs;
887  nap->tv_usec = usecs;
888  }
889  else
890  {
891  /* list is empty, sleep for whole autovacuum_naptime seconds */
892  nap->tv_sec = autovacuum_naptime;
893  nap->tv_usec = 0;
894  }
895 
896  /*
897  * If the result is exactly zero, it means a database had an entry with
898  * time in the past. Rebuild the list so that the databases are evenly
899  * distributed again, and recalculate the time to sleep. This can happen
900  * if there are more tables needing vacuum than workers, and they all take
901  * longer to vacuum than autovacuum_naptime.
902  *
903  * We only recurse once. rebuild_database_list should always return times
904  * in the future, but it seems best not to trust too much on that.
905  */
906  if (nap->tv_sec == 0 && nap->tv_usec == 0 && !recursing)
907  {
909  launcher_determine_sleep(canlaunch, true, nap);
910  return;
911  }
912 
913  /* The smallest time we'll allow the launcher to sleep. */
914  if (nap->tv_sec <= 0 && nap->tv_usec <= MIN_AUTOVAC_SLEEPTIME * 1000)
915  {
916  nap->tv_sec = 0;
917  nap->tv_usec = MIN_AUTOVAC_SLEEPTIME * 1000;
918  }
919 
920  /*
921  * If the sleep time is too large, clamp it to an arbitrary maximum (plus
922  * any fractional seconds, for simplicity). This avoids an essentially
923  * infinite sleep in strange cases like the system clock going backwards a
924  * few years.
925  */
926  if (nap->tv_sec > MAX_AUTOVAC_SLEEPTIME)
927  nap->tv_sec = MAX_AUTOVAC_SLEEPTIME;
928 }
929 
930 /*
931  * Build an updated DatabaseList. It must only contain databases that appear
932  * in pgstats, and must be sorted by next_worker from highest to lowest,
933  * distributed regularly across the next autovacuum_naptime interval.
934  *
935  * Receives the Oid of the database that made this list be generated (we call
936  * this the "new" database, because when the database was already present on
937  * the list, we expect that this function is not called at all). The
938  * preexisting list, if any, will be used to preserve the order of the
939  * databases in the autovacuum_naptime period. The new database is put at the
940  * end of the interval. The actual values are not saved, which should not be
941  * much of a problem.
942  */
943 static void
945 {
946  List *dblist;
947  ListCell *cell;
948  MemoryContext newcxt;
949  MemoryContext oldcxt;
950  MemoryContext tmpcxt;
951  HASHCTL hctl;
952  int score;
953  int nelems;
954  HTAB *dbhash;
955  dlist_iter iter;
956 
957  /* use fresh stats */
959 
960  newcxt = AllocSetContextCreate(AutovacMemCxt,
961  "AV dblist",
963  tmpcxt = AllocSetContextCreate(newcxt,
964  "tmp AV dblist",
966  oldcxt = MemoryContextSwitchTo(tmpcxt);
967 
968  /*
969  * Implementing this is not as simple as it sounds, because we need to put
970  * the new database at the end of the list; next the databases that were
971  * already on the list, and finally (at the tail of the list) all the
972  * other databases that are not on the existing list.
973  *
974  * To do this, we build an empty hash table of scored databases. We will
975  * start with the lowest score (zero) for the new database, then
976  * increasing scores for the databases in the existing list, in order, and
977  * lastly increasing scores for all databases gotten via
978  * get_database_list() that are not already on the hash.
979  *
980  * Then we will put all the hash elements into an array, sort the array by
981  * score, and finally put the array elements into the new doubly linked
982  * list.
983  */
984  hctl.keysize = sizeof(Oid);
985  hctl.entrysize = sizeof(avl_dbase);
986  hctl.hcxt = tmpcxt;
987  dbhash = hash_create("db hash", 20, &hctl, /* magic number here FIXME */
989 
990  /* start by inserting the new database */
991  score = 0;
992  if (OidIsValid(newdb))
993  {
994  avl_dbase *db;
995  PgStat_StatDBEntry *entry;
996 
997  /* only consider this database if it has a pgstat entry */
998  entry = pgstat_fetch_stat_dbentry(newdb);
999  if (entry != NULL)
1000  {
1001  /* we assume it isn't found because the hash was just created */
1002  db = hash_search(dbhash, &newdb, HASH_ENTER, NULL);
1003 
1004  /* hash_search already filled in the key */
1005  db->adl_score = score++;
1006  /* next_worker is filled in later */
1007  }
1008  }
1009 
1010  /* Now insert the databases from the existing list */
1011  dlist_foreach(iter, &DatabaseList)
1012  {
1014  avl_dbase *db;
1015  bool found;
1016  PgStat_StatDBEntry *entry;
1017 
1018  /*
1019  * skip databases with no stat entries -- in particular, this gets rid
1020  * of dropped databases
1021  */
1022  entry = pgstat_fetch_stat_dbentry(avdb->adl_datid);
1023  if (entry == NULL)
1024  continue;
1025 
1026  db = hash_search(dbhash, &(avdb->adl_datid), HASH_ENTER, &found);
1027 
1028  if (!found)
1029  {
1030  /* hash_search already filled in the key */
1031  db->adl_score = score++;
1032  /* next_worker is filled in later */
1033  }
1034  }
1035 
1036  /* finally, insert all qualifying databases not previously inserted */
1037  dblist = get_database_list();
1038  foreach(cell, dblist)
1039  {
1040  avw_dbase *avdb = lfirst(cell);
1041  avl_dbase *db;
1042  bool found;
1043  PgStat_StatDBEntry *entry;
1044 
1045  /* only consider databases with a pgstat entry */
1046  entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
1047  if (entry == NULL)
1048  continue;
1049 
1050  db = hash_search(dbhash, &(avdb->adw_datid), HASH_ENTER, &found);
1051  /* only update the score if the database was not already on the hash */
1052  if (!found)
1053  {
1054  /* hash_search already filled in the key */
1055  db->adl_score = score++;
1056  /* next_worker is filled in later */
1057  }
1058  }
1059  nelems = score;
1060 
1061  /* from here on, the allocated memory belongs to the new list */
1062  MemoryContextSwitchTo(newcxt);
1063  dlist_init(&DatabaseList);
1064 
1065  if (nelems > 0)
1066  {
1067  TimestampTz current_time;
1068  int millis_increment;
1069  avl_dbase *dbary;
1070  avl_dbase *db;
1071  HASH_SEQ_STATUS seq;
1072  int i;
1073 
1074  /* put all the hash elements into an array */
1075  dbary = palloc(nelems * sizeof(avl_dbase));
1076 
1077  i = 0;
1078  hash_seq_init(&seq, dbhash);
1079  while ((db = hash_seq_search(&seq)) != NULL)
1080  memcpy(&(dbary[i++]), db, sizeof(avl_dbase));
1081 
1082  /* sort the array */
1083  qsort(dbary, nelems, sizeof(avl_dbase), db_comparator);
1084 
1085  /*
1086  * Determine the time interval between databases in the schedule. If
1087  * we see that the configured naptime would take us to sleep times
1088  * lower than our min sleep time (which launcher_determine_sleep is
1089  * coded not to allow), silently use a larger naptime (but don't touch
1090  * the GUC variable).
1091  */
1092  millis_increment = 1000.0 * autovacuum_naptime / nelems;
1093  if (millis_increment <= MIN_AUTOVAC_SLEEPTIME)
1094  millis_increment = MIN_AUTOVAC_SLEEPTIME * 1.1;
1095 
1096  current_time = GetCurrentTimestamp();
1097 
1098  /*
1099  * move the elements from the array into the dlist, setting the
1100  * next_worker while walking the array
1101  */
1102  for (i = 0; i < nelems; i++)
1103  {
1104  avl_dbase *db = &(dbary[i]);
1105 
1106  current_time = TimestampTzPlusMilliseconds(current_time,
1107  millis_increment);
1108  db->adl_next_worker = current_time;
1109 
1110  /* later elements should go closer to the head of the list */
1111  dlist_push_head(&DatabaseList, &db->adl_node);
1112  }
1113  }
1114 
1115  /* all done, clean up memory */
1116  if (DatabaseListCxt != NULL)
1117  MemoryContextDelete(DatabaseListCxt);
1118  MemoryContextDelete(tmpcxt);
1119  DatabaseListCxt = newcxt;
1120  MemoryContextSwitchTo(oldcxt);
1121 }
1122 
1123 /* qsort comparator for avl_dbase, using adl_score */
1124 static int
1125 db_comparator(const void *a, const void *b)
1126 {
1127  if (((const avl_dbase *) a)->adl_score == ((const avl_dbase *) b)->adl_score)
1128  return 0;
1129  else
1130  return (((const avl_dbase *) a)->adl_score < ((const avl_dbase *) b)->adl_score) ? 1 : -1;
1131 }
1132 
1133 /*
1134  * do_start_worker
1135  *
1136  * Bare-bones procedure for starting an autovacuum worker from the launcher.
1137  * It determines what database to work on, sets up shared memory stuff and
1138  * signals postmaster to start the worker. It fails gracefully if invoked when
1139  * autovacuum_workers are already active.
1140  *
1141  * Return value is the OID of the database that the worker is going to process,
1142  * or InvalidOid if no worker was actually started.
1143  */
1144 static Oid
1146 {
1147  List *dblist;
1148  ListCell *cell;
1149  TransactionId xidForceLimit;
1150  MultiXactId multiForceLimit;
1151  bool for_xid_wrap;
1152  bool for_multi_wrap;
1153  avw_dbase *avdb;
1154  TimestampTz current_time;
1155  bool skipit = false;
1156  Oid retval = InvalidOid;
1157  MemoryContext tmpcxt,
1158  oldcxt;
1159 
1160  /* return quickly when there are no free workers */
1161  LWLockAcquire(AutovacuumLock, LW_SHARED);
1162  if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
1163  {
1164  LWLockRelease(AutovacuumLock);
1165  return InvalidOid;
1166  }
1167  LWLockRelease(AutovacuumLock);
1168 
1169  /*
1170  * Create and switch to a temporary context to avoid leaking the memory
1171  * allocated for the database list.
1172  */
1174  "Start worker tmp cxt",
1176  oldcxt = MemoryContextSwitchTo(tmpcxt);
1177 
1178  /* use fresh stats */
1180 
1181  /* Get a list of databases */
1182  dblist = get_database_list();
1183 
1184  /*
1185  * Determine the oldest datfrozenxid/relfrozenxid that we will allow to
1186  * pass without forcing a vacuum. (This limit can be tightened for
1187  * particular tables, but not loosened.)
1188  */
1190  xidForceLimit = recentXid - autovacuum_freeze_max_age;
1191  /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
1192  /* this can cause the limit to go backwards by 3, but that's OK */
1193  if (xidForceLimit < FirstNormalTransactionId)
1194  xidForceLimit -= FirstNormalTransactionId;
1195 
1196  /* Also determine the oldest datminmxid we will consider. */
1198  multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
1199  if (multiForceLimit < FirstMultiXactId)
1200  multiForceLimit -= FirstMultiXactId;
1201 
1202  /*
1203  * Choose a database to connect to. We pick the database that was least
1204  * recently auto-vacuumed, or one that needs vacuuming to prevent Xid
1205  * wraparound-related data loss. If any db at risk of Xid wraparound is
1206  * found, we pick the one with oldest datfrozenxid, independently of
1207  * autovacuum times; similarly we pick the one with the oldest datminmxid
1208  * if any is in MultiXactId wraparound. Note that those in Xid wraparound
1209  * danger are given more priority than those in multi wraparound danger.
1210  *
1211  * Note that a database with no stats entry is not considered, except for
1212  * Xid wraparound purposes. The theory is that if no one has ever
1213  * connected to it since the stats were last initialized, it doesn't need
1214  * vacuuming.
1215  *
1216  * XXX This could be improved if we had more info about whether it needs
1217  * vacuuming before connecting to it. Perhaps look through the pgstats
1218  * data for the database's tables? One idea is to keep track of the
1219  * number of new and dead tuples per database in pgstats. However it
1220  * isn't clear how to construct a metric that measures that and not cause
1221  * starvation for less busy databases.
1222  */
1223  avdb = NULL;
1224  for_xid_wrap = false;
1225  for_multi_wrap = false;
1226  current_time = GetCurrentTimestamp();
1227  foreach(cell, dblist)
1228  {
1229  avw_dbase *tmp = lfirst(cell);
1230  dlist_iter iter;
1231 
1232  /* Check to see if this one is at risk of wraparound */
1233  if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
1234  {
1235  if (avdb == NULL ||
1237  avdb->adw_frozenxid))
1238  avdb = tmp;
1239  for_xid_wrap = true;
1240  continue;
1241  }
1242  else if (for_xid_wrap)
1243  continue; /* ignore not-at-risk DBs */
1244  else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1245  {
1246  if (avdb == NULL ||
1248  avdb = tmp;
1249  for_multi_wrap = true;
1250  continue;
1251  }
1252  else if (for_multi_wrap)
1253  continue; /* ignore not-at-risk DBs */
1254 
1255  /* Find pgstat entry if any */
1257 
1258  /*
1259  * Skip a database with no pgstat entry; it means it hasn't seen any
1260  * activity.
1261  */
1262  if (!tmp->adw_entry)
1263  continue;
1264 
1265  /*
1266  * Also, skip a database that appears on the database list as having
1267  * been processed recently (less than autovacuum_naptime seconds ago).
1268  * We do this so that we don't select a database which we just
1269  * selected, but that pgstat hasn't gotten around to updating the last
1270  * autovacuum time yet.
1271  */
1272  skipit = false;
1273 
1274  dlist_reverse_foreach(iter, &DatabaseList)
1275  {
1277 
1278  if (dbp->adl_datid == tmp->adw_datid)
1279  {
1280  /*
1281  * Skip this database if its next_worker value falls between
1282  * the current time and the current time plus naptime.
1283  */
1285  current_time, 0) &&
1286  !TimestampDifferenceExceeds(current_time,
1287  dbp->adl_next_worker,
1288  autovacuum_naptime * 1000))
1289  skipit = true;
1290 
1291  break;
1292  }
1293  }
1294  if (skipit)
1295  continue;
1296 
1297  /*
1298  * Remember the db with oldest autovac time. (If we are here, both
1299  * tmp->entry and db->entry must be non-null.)
1300  */
1301  if (avdb == NULL ||
1303  avdb = tmp;
1304  }
1305 
1306  /* Found a database -- process it */
1307  if (avdb != NULL)
1308  {
1309  WorkerInfo worker;
1310  dlist_node *wptr;
1311 
1312  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1313 
1314  /*
1315  * Get a worker entry from the freelist. We checked above, so there
1316  * really should be a free slot.
1317  */
1318  wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
1319 
1320  worker = dlist_container(WorkerInfoData, wi_links, wptr);
1321  worker->wi_dboid = avdb->adw_datid;
1322  worker->wi_proc = NULL;
1323  worker->wi_launchtime = GetCurrentTimestamp();
1324 
1325  AutoVacuumShmem->av_startingWorker = worker;
1326 
1327  LWLockRelease(AutovacuumLock);
1328 
1330 
1331  retval = avdb->adw_datid;
1332  }
1333  else if (skipit)
1334  {
1335  /*
1336  * If we skipped all databases on the list, rebuild it, because it
1337  * probably contains a dropped database.
1338  */
1340  }
1341 
1342  MemoryContextSwitchTo(oldcxt);
1343  MemoryContextDelete(tmpcxt);
1344 
1345  return retval;
1346 }
1347 
1348 /*
1349  * launch_worker
1350  *
1351  * Wrapper for starting a worker from the launcher. Besides actually starting
1352  * it, update the database list to reflect the next time that another one will
1353  * need to be started on the selected database. The actual database choice is
1354  * left to do_start_worker.
1355  *
1356  * This routine is also expected to insert an entry into the database list if
1357  * the selected database was previously absent from the list.
1358  */
1359 static void
1361 {
1362  Oid dbid;
1363  dlist_iter iter;
1364 
1365  dbid = do_start_worker();
1366  if (OidIsValid(dbid))
1367  {
1368  bool found = false;
1369 
1370  /*
1371  * Walk the database list and update the corresponding entry. If the
1372  * database is not on the list, we'll recreate the list.
1373  */
1374  dlist_foreach(iter, &DatabaseList)
1375  {
1377 
1378  if (avdb->adl_datid == dbid)
1379  {
1380  found = true;
1381 
1382  /*
1383  * add autovacuum_naptime seconds to the current time, and use
1384  * that as the new "next_worker" field for this database.
1385  */
1386  avdb->adl_next_worker =
1388 
1389  dlist_move_head(&DatabaseList, iter.cur);
1390  break;
1391  }
1392  }
1393 
1394  /*
1395  * If the database was not present in the database list, we rebuild
1396  * the list. It's possible that the database does not get into the
1397  * list anyway, for example if it's a database that doesn't have a
1398  * pgstat entry, but this is not a problem because we don't want to
1399  * schedule workers regularly into those in any case.
1400  */
1401  if (!found)
1402  rebuild_database_list(dbid);
1403  }
1404 }
1405 
1406 /*
1407  * Called from postmaster to signal a failure to fork a process to become
1408  * worker. The postmaster should kill(SIGUSR2) the launcher shortly
1409  * after calling this function.
1410  */
1411 void
1413 {
1414  AutoVacuumShmem->av_signal[AutoVacForkFailed] = true;
1415 }
1416 
1417 /* SIGUSR2: a worker is up and running, or just finished, or failed to fork */
1418 static void
1420 {
1421  int save_errno = errno;
1422 
1423  got_SIGUSR2 = true;
1424  SetLatch(MyLatch);
1425 
1426  errno = save_errno;
1427 }
1428 
1429 
1430 /********************************************************************
1431  * AUTOVACUUM WORKER CODE
1432  ********************************************************************/
1433 
1434 #ifdef EXEC_BACKEND
1435 /*
1436  * forkexec routines for the autovacuum worker.
1437  *
1438  * Format up the arglist, then fork and exec.
1439  */
1440 static pid_t
1441 avworker_forkexec(void)
1442 {
1443  char *av[10];
1444  int ac = 0;
1445 
1446  av[ac++] = "postgres";
1447  av[ac++] = "--forkavworker";
1448  av[ac++] = NULL; /* filled in by postmaster_forkexec */
1449  av[ac] = NULL;
1450 
1451  Assert(ac < lengthof(av));
1452 
1453  return postmaster_forkexec(ac, av);
1454 }
1455 
1456 /*
1457  * We need this set from the outside, before InitProcess is called
1458  */
1459 void
1460 AutovacuumWorkerIAm(void)
1461 {
1462  am_autovacuum_worker = true;
1463 }
1464 #endif
1465 
1466 /*
1467  * Main entry point for autovacuum worker process.
1468  *
1469  * This code is heavily based on pgarch.c, q.v.
1470  */
1471 int
1473 {
1474  pid_t worker_pid;
1475 
1476 #ifdef EXEC_BACKEND
1477  switch ((worker_pid = avworker_forkexec()))
1478 #else
1479  switch ((worker_pid = fork_process()))
1480 #endif
1481  {
1482  case -1:
1483  ereport(LOG,
1484  (errmsg("could not fork autovacuum worker process: %m")));
1485  return 0;
1486 
1487 #ifndef EXEC_BACKEND
1488  case 0:
1489  /* in postmaster child ... */
1491 
1492  /* Close the postmaster's sockets */
1493  ClosePostmasterPorts(false);
1494 
1495  AutoVacWorkerMain(0, NULL);
1496  break;
1497 #endif
1498  default:
1499  return (int) worker_pid;
1500  }
1501 
1502  /* shouldn't get here */
1503  return 0;
1504 }
1505 
1506 /*
1507  * AutoVacWorkerMain
1508  */
1509 NON_EXEC_STATIC void
1510 AutoVacWorkerMain(int argc, char *argv[])
1511 {
1512  sigjmp_buf local_sigjmp_buf;
1513  Oid dbid;
1514 
1515  am_autovacuum_worker = true;
1516 
1518  init_ps_display(NULL);
1519 
1521 
1522  /*
1523  * Set up signal handlers. We operate on databases much like a regular
1524  * backend, so we use the same signal handling. See equivalent code in
1525  * tcop/postgres.c.
1526  */
1528 
1529  /*
1530  * SIGINT is used to signal canceling the current table's vacuum; SIGTERM
1531  * means abort and exit cleanly, and SIGQUIT means abandon ship.
1532  */
1534  pqsignal(SIGTERM, die);
1535  /* SIGQUIT handler was already set up by InitPostmasterChild */
1536 
1537  InitializeTimeouts(); /* establishes SIGALRM handler */
1538 
1544 
1545  /* Early initialization */
1546  BaseInit();
1547 
1548  /*
1549  * Create a per-backend PGPROC struct in shared memory, except in the
1550  * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
1551  * this before we can use LWLocks (and in the EXEC_BACKEND case we already
1552  * had to do some stuff with LWLocks).
1553  */
1554 #ifndef EXEC_BACKEND
1555  InitProcess();
1556 #endif
1557 
1558  /*
1559  * If an exception is encountered, processing resumes here.
1560  *
1561  * Unlike most auxiliary processes, we don't attempt to continue
1562  * processing after an error; we just clean up and exit. The autovac
1563  * launcher is responsible for spawning another worker later.
1564  *
1565  * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask
1566  * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus,
1567  * signals other than SIGQUIT will be blocked until we exit. It might
1568  * seem that this policy makes the HOLD_INTERRUPTS() call redundant, but
1569  * it is not since InterruptPending might be set already.
1570  */
1571  if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1572  {
1573  /* since not using PG_TRY, must reset error stack by hand */
1574  error_context_stack = NULL;
1575 
1576  /* Prevents interrupts while cleaning up */
1577  HOLD_INTERRUPTS();
1578 
1579  /* Report the error to the server log */
1580  EmitErrorReport();
1581 
1582  /*
1583  * We can now go away. Note that because we called InitProcess, a
1584  * callback was registered to do ProcKill, which will clean up
1585  * necessary state.
1586  */
1587  proc_exit(0);
1588  }
1589 
1590  /* We can now handle ereport(ERROR) */
1591  PG_exception_stack = &local_sigjmp_buf;
1592 
1594 
1595  /*
1596  * Set always-secure search path, so malicious users can't redirect user
1597  * code (e.g. pg_index.indexprs). (That code runs in a
1598  * SECURITY_RESTRICTED_OPERATION sandbox, so malicious users could not
1599  * take control of the entire autovacuum worker in any case.)
1600  */
1601  SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1602 
1603  /*
1604  * Force zero_damaged_pages OFF in the autovac process, even if it is set
1605  * in postgresql.conf. We don't really want such a dangerous option being
1606  * applied non-interactively.
1607  */
1608  SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
1609 
1610  /*
1611  * Force settable timeouts off to avoid letting these settings prevent
1612  * regular maintenance from being executed.
1613  */
1614  SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
1615  SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
1616  SetConfigOption("idle_in_transaction_session_timeout", "0",
1618 
1619  /*
1620  * Force default_transaction_isolation to READ COMMITTED. We don't want
1621  * to pay the overhead of serializable mode, nor add any risk of causing
1622  * deadlocks or delaying other transactions.
1623  */
1624  SetConfigOption("default_transaction_isolation", "read committed",
1626 
1627  /*
1628  * Force synchronous replication off to allow regular maintenance even if
1629  * we are waiting for standbys to connect. This is important to ensure we
1630  * aren't blocked from performing anti-wraparound tasks.
1631  */
1633  SetConfigOption("synchronous_commit", "local",
1635 
1636  /*
1637  * Get the info about the database we're going to work on.
1638  */
1639  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1640 
1641  /*
1642  * beware of startingWorker being INVALID; this should normally not
1643  * happen, but if a worker fails after forking and before this, the
1644  * launcher might have decided to remove it from the queue and start
1645  * again.
1646  */
1647  if (AutoVacuumShmem->av_startingWorker != NULL)
1648  {
1649  MyWorkerInfo = AutoVacuumShmem->av_startingWorker;
1650  dbid = MyWorkerInfo->wi_dboid;
1651  MyWorkerInfo->wi_proc = MyProc;
1652 
1653  /* insert into the running list */
1654  dlist_push_head(&AutoVacuumShmem->av_runningWorkers,
1655  &MyWorkerInfo->wi_links);
1656 
1657  /*
1658  * remove from the "starting" pointer, so that the launcher can start
1659  * a new worker if required
1660  */
1661  AutoVacuumShmem->av_startingWorker = NULL;
1662  LWLockRelease(AutovacuumLock);
1663 
1665 
1666  /* wake up the launcher */
1667  if (AutoVacuumShmem->av_launcherpid != 0)
1668  kill(AutoVacuumShmem->av_launcherpid, SIGUSR2);
1669  }
1670  else
1671  {
1672  /* no worker entry for me, go away */
1673  elog(WARNING, "autovacuum worker started without a worker entry");
1674  dbid = InvalidOid;
1675  LWLockRelease(AutovacuumLock);
1676  }
1677 
1678  if (OidIsValid(dbid))
1679  {
1680  char dbname[NAMEDATALEN];
1681 
1682  /*
1683  * Report autovac startup to the stats collector. We deliberately do
1684  * this before InitPostgres, so that the last_autovac_time will get
1685  * updated even if the connection attempt fails. This is to prevent
1686  * autovac from getting "stuck" repeatedly selecting an unopenable
1687  * database, rather than making any progress on stuff it can connect
1688  * to.
1689  */
1690  pgstat_report_autovac(dbid);
1691 
1692  /*
1693  * Connect to the selected database
1694  *
1695  * Note: if we have selected a just-deleted database (due to using
1696  * stale stats info), we'll fail and exit here.
1697  */
1698  InitPostgres(NULL, dbid, NULL, InvalidOid, dbname, false);
1700  set_ps_display(dbname);
1701  ereport(DEBUG1,
1702  (errmsg("autovacuum: processing database \"%s\"", dbname)));
1703 
1704  if (PostAuthDelay)
1705  pg_usleep(PostAuthDelay * 1000000L);
1706 
1707  /* And do an appropriate amount of work */
1710  do_autovacuum();
1711  }
1712 
1713  /*
1714  * The launcher will be notified of my death in ProcKill, *if* we managed
1715  * to get a worker slot at all
1716  */
1717 
1718  /* All done, go away */
1719  proc_exit(0);
1720 }
1721 
1722 /*
1723  * Return a WorkerInfo to the free list
1724  */
1725 static void
1727 {
1728  if (MyWorkerInfo != NULL)
1729  {
1730  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1731 
1732  /*
1733  * Wake the launcher up so that he can launch a new worker immediately
1734  * if required. We only save the launcher's PID in local memory here;
1735  * the actual signal will be sent when the PGPROC is recycled. Note
1736  * that we always do this, so that the launcher can rebalance the cost
1737  * limit setting of the remaining workers.
1738  *
1739  * We somewhat ignore the risk that the launcher changes its PID
1740  * between us reading it and the actual kill; we expect ProcKill to be
1741  * called shortly after us, and we assume that PIDs are not reused too
1742  * quickly after a process exits.
1743  */
1744  AutovacuumLauncherPid = AutoVacuumShmem->av_launcherpid;
1745 
1746  dlist_delete(&MyWorkerInfo->wi_links);
1747  MyWorkerInfo->wi_dboid = InvalidOid;
1748  MyWorkerInfo->wi_tableoid = InvalidOid;
1749  MyWorkerInfo->wi_sharedrel = false;
1750  MyWorkerInfo->wi_proc = NULL;
1751  MyWorkerInfo->wi_launchtime = 0;
1752  MyWorkerInfo->wi_dobalance = false;
1753  MyWorkerInfo->wi_cost_delay = 0;
1754  MyWorkerInfo->wi_cost_limit = 0;
1755  MyWorkerInfo->wi_cost_limit_base = 0;
1756  dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
1757  &MyWorkerInfo->wi_links);
1758  /* not mine anymore */
1759  MyWorkerInfo = NULL;
1760 
1761  /*
1762  * now that we're inactive, cause a rebalancing of the surviving
1763  * workers
1764  */
1765  AutoVacuumShmem->av_signal[AutoVacRebalance] = true;
1766  LWLockRelease(AutovacuumLock);
1767  }
1768 }
1769 
1770 /*
1771  * Update the cost-based delay parameters, so that multiple workers consume
1772  * each a fraction of the total available I/O.
1773  */
1774 void
1776 {
1777  if (MyWorkerInfo)
1778  {
1779  VacuumCostDelay = MyWorkerInfo->wi_cost_delay;
1780  VacuumCostLimit = MyWorkerInfo->wi_cost_limit;
1781  }
1782 }
1783 
1784 /*
1785  * autovac_balance_cost
1786  * Recalculate the cost limit setting for each active worker.
1787  *
1788  * Caller must hold the AutovacuumLock in exclusive mode.
1789  */
1790 static void
1792 {
1793  /*
1794  * The idea here is that we ration out I/O equally. The amount of I/O
1795  * that a worker can consume is determined by cost_limit/cost_delay, so we
1796  * try to equalize those ratios rather than the raw limit settings.
1797  *
1798  * note: in cost_limit, zero also means use value from elsewhere, because
1799  * zero is not a valid value.
1800  */
1801  int vac_cost_limit = (autovacuum_vac_cost_limit > 0 ?
1803  double vac_cost_delay = (autovacuum_vac_cost_delay >= 0 ?
1805  double cost_total;
1806  double cost_avail;
1807  dlist_iter iter;
1808 
1809  /* not set? nothing to do */
1810  if (vac_cost_limit <= 0 || vac_cost_delay <= 0)
1811  return;
1812 
1813  /* calculate the total base cost limit of participating active workers */
1814  cost_total = 0.0;
1815  dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
1816  {
1817  WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
1818 
1819  if (worker->wi_proc != NULL &&
1820  worker->wi_dobalance &&
1821  worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
1822  cost_total +=
1823  (double) worker->wi_cost_limit_base / worker->wi_cost_delay;
1824  }
1825 
1826  /* there are no cost limits -- nothing to do */
1827  if (cost_total <= 0)
1828  return;
1829 
1830  /*
1831  * Adjust cost limit of each active worker to balance the total of cost
1832  * limit to autovacuum_vacuum_cost_limit.
1833  */
1834  cost_avail = (double) vac_cost_limit / vac_cost_delay;
1835  dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
1836  {
1837  WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
1838 
1839  if (worker->wi_proc != NULL &&
1840  worker->wi_dobalance &&
1841  worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
1842  {
1843  int limit = (int)
1844  (cost_avail * worker->wi_cost_limit_base / cost_total);
1845 
1846  /*
1847  * We put a lower bound of 1 on the cost_limit, to avoid division-
1848  * by-zero in the vacuum code. Also, in case of roundoff trouble
1849  * in these calculations, let's be sure we don't ever set
1850  * cost_limit to more than the base value.
1851  */
1852  worker->wi_cost_limit = Max(Min(limit,
1853  worker->wi_cost_limit_base),
1854  1);
1855  }
1856 
1857  if (worker->wi_proc != NULL)
1858  elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, dobalance=%s cost_limit=%d, cost_limit_base=%d, cost_delay=%g)",
1859  worker->wi_proc->pid, worker->wi_dboid, worker->wi_tableoid,
1860  worker->wi_dobalance ? "yes" : "no",
1861  worker->wi_cost_limit, worker->wi_cost_limit_base,
1862  worker->wi_cost_delay);
1863  }
1864 }
1865 
1866 /*
1867  * get_database_list
1868  * Return a list of all databases found in pg_database.
1869  *
1870  * The list and associated data is allocated in the caller's memory context,
1871  * which is in charge of ensuring that it's properly cleaned up afterwards.
1872  *
1873  * Note: this is the only function in which the autovacuum launcher uses a
1874  * transaction. Although we aren't attached to any particular database and
1875  * therefore can't access most catalogs, we do have enough infrastructure
1876  * to do a seqscan on pg_database.
1877  */
1878 static List *
1880 {
1881  List *dblist = NIL;
1882  Relation rel;
1883  TableScanDesc scan;
1884  HeapTuple tup;
1885  MemoryContext resultcxt;
1886 
1887  /* This is the context that we will allocate our output data in */
1888  resultcxt = CurrentMemoryContext;
1889 
1890  /*
1891  * Start a transaction so we can access pg_database, and get a snapshot.
1892  * We don't have a use for the snapshot itself, but we're interested in
1893  * the secondary effect that it sets RecentGlobalXmin. (This is critical
1894  * for anything that reads heap pages, because HOT may decide to prune
1895  * them even if the process doesn't attempt to modify any tuples.)
1896  *
1897  * FIXME: This comment is inaccurate / the code buggy. A snapshot that is
1898  * not pushed/active does not reliably prevent HOT pruning (->xmin could
1899  * e.g. be cleared when cache invalidations are processed).
1900  */
1902  (void) GetTransactionSnapshot();
1903 
1904  rel = table_open(DatabaseRelationId, AccessShareLock);
1905  scan = table_beginscan_catalog(rel, 0, NULL);
1906 
1907  while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
1908  {
1909  Form_pg_database pgdatabase = (Form_pg_database) GETSTRUCT(tup);
1910  avw_dbase *avdb;
1911  MemoryContext oldcxt;
1912 
1913  /*
1914  * Allocate our results in the caller's context, not the
1915  * transaction's. We do this inside the loop, and restore the original
1916  * context at the end, so that leaky things like heap_getnext() are
1917  * not called in a potentially long-lived context.
1918  */
1919  oldcxt = MemoryContextSwitchTo(resultcxt);
1920 
1921  avdb = (avw_dbase *) palloc(sizeof(avw_dbase));
1922 
1923  avdb->adw_datid = pgdatabase->oid;
1924  avdb->adw_name = pstrdup(NameStr(pgdatabase->datname));
1925  avdb->adw_frozenxid = pgdatabase->datfrozenxid;
1926  avdb->adw_minmulti = pgdatabase->datminmxid;
1927  /* this gets set later: */
1928  avdb->adw_entry = NULL;
1929 
1930  dblist = lappend(dblist, avdb);
1931  MemoryContextSwitchTo(oldcxt);
1932  }
1933 
1934  table_endscan(scan);
1936 
1938 
1939  return dblist;
1940 }
1941 
1942 /*
1943  * Process a database table-by-table
1944  *
1945  * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
1946  * order not to ignore shutdown commands for too long.
1947  */
1948 static void
1950 {
1951  Relation classRel;
1952  HeapTuple tuple;
1953  TableScanDesc relScan;
1954  Form_pg_database dbForm;
1955  List *table_oids = NIL;
1956  List *orphan_oids = NIL;
1957  HASHCTL ctl;
1958  HTAB *table_toast_map;
1959  ListCell *volatile cell;
1960  PgStat_StatDBEntry *shared;
1961  PgStat_StatDBEntry *dbentry;
1962  BufferAccessStrategy bstrategy;
1963  ScanKeyData key;
1964  TupleDesc pg_class_desc;
1965  int effective_multixact_freeze_max_age;
1966  bool did_vacuum = false;
1967  bool found_concurrent_worker = false;
1968  int i;
1969 
1970  /*
1971  * StartTransactionCommand and CommitTransactionCommand will automatically
1972  * switch to other contexts. We need this one to keep the list of
1973  * relations to vacuum/analyze across transactions.
1974  */
1975  AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
1976  "AV worker",
1978  MemoryContextSwitchTo(AutovacMemCxt);
1979 
1980  /*
1981  * may be NULL if we couldn't find an entry (only happens if we are
1982  * forcing a vacuum for anti-wrap purposes).
1983  */
1985 
1986  /* Start a transaction so our commands have one to play into. */
1988 
1989  /*
1990  * Clean up any dead statistics collector entries for this DB. We always
1991  * want to do this exactly once per DB-processing cycle, even if we find
1992  * nothing worth vacuuming in the database.
1993  */
1995 
1996  /*
1997  * Compute the multixact age for which freezing is urgent. This is
1998  * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1999  * short of multixact member space.
2000  */
2001  effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
2002 
2003  /*
2004  * Find the pg_database entry and select the default freeze ages. We use
2005  * zero in template and nonconnectable databases, else the system-wide
2006  * default.
2007  */
2009  if (!HeapTupleIsValid(tuple))
2010  elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
2011  dbForm = (Form_pg_database) GETSTRUCT(tuple);
2012 
2013  if (dbForm->datistemplate || !dbForm->datallowconn)
2014  {
2019  }
2020  else
2021  {
2026  }
2027 
2028  ReleaseSysCache(tuple);
2029 
2030  /* StartTransactionCommand changed elsewhere */
2031  MemoryContextSwitchTo(AutovacMemCxt);
2032 
2033  /* The database hash where pgstat keeps shared relations */
2035 
2036  classRel = table_open(RelationRelationId, AccessShareLock);
2037 
2038  /* create a copy so we can use it after closing pg_class */
2039  pg_class_desc = CreateTupleDescCopy(RelationGetDescr(classRel));
2040 
2041  /* create hash table for toast <-> main relid mapping */
2042  MemSet(&ctl, 0, sizeof(ctl));
2043  ctl.keysize = sizeof(Oid);
2044  ctl.entrysize = sizeof(av_relation);
2045 
2046  table_toast_map = hash_create("TOAST to main relid map",
2047  100,
2048  &ctl,
2049  HASH_ELEM | HASH_BLOBS);
2050 
2051  /*
2052  * Scan pg_class to determine which tables to vacuum.
2053  *
2054  * We do this in two passes: on the first one we collect the list of plain
2055  * relations and materialized views, and on the second one we collect
2056  * TOAST tables. The reason for doing the second pass is that during it we
2057  * want to use the main relation's pg_class.reloptions entry if the TOAST
2058  * table does not have any, and we cannot obtain it unless we know
2059  * beforehand what's the main table OID.
2060  *
2061  * We need to check TOAST tables separately because in cases with short,
2062  * wide tables there might be proportionally much more activity in the
2063  * TOAST table than in its parent.
2064  */
2065  relScan = table_beginscan_catalog(classRel, 0, NULL);
2066 
2067  /*
2068  * On the first pass, we collect main tables to vacuum, and also the main
2069  * table relid to TOAST relid mapping.
2070  */
2071  while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
2072  {
2073  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
2074  PgStat_StatTabEntry *tabentry;
2075  AutoVacOpts *relopts;
2076  Oid relid;
2077  bool dovacuum;
2078  bool doanalyze;
2079  bool wraparound;
2080 
2081  if (classForm->relkind != RELKIND_RELATION &&
2082  classForm->relkind != RELKIND_MATVIEW)
2083  continue;
2084 
2085  relid = classForm->oid;
2086 
2087  /*
2088  * Check if it is a temp table (presumably, of some other backend's).
2089  * We cannot safely process other backends' temp tables.
2090  */
2091  if (classForm->relpersistence == RELPERSISTENCE_TEMP)
2092  {
2093  /*
2094  * We just ignore it if the owning backend is still active and
2095  * using the temporary schema. Also, for safety, ignore it if the
2096  * namespace doesn't exist or isn't a temp namespace after all.
2097  */
2098  if (checkTempNamespaceStatus(classForm->relnamespace) == TEMP_NAMESPACE_IDLE)
2099  {
2100  /*
2101  * The table seems to be orphaned -- although it might be that
2102  * the owning backend has already deleted it and exited; our
2103  * pg_class scan snapshot is not necessarily up-to-date
2104  * anymore, so we could be looking at a committed-dead entry.
2105  * Remember it so we can try to delete it later.
2106  */
2107  orphan_oids = lappend_oid(orphan_oids, relid);
2108  }
2109  continue;
2110  }
2111 
2112  /* Fetch reloptions and the pgstat entry for this table */
2113  relopts = extract_autovac_opts(tuple, pg_class_desc);
2114  tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2115  shared, dbentry);
2116 
2117  /* Check if it needs vacuum or analyze */
2118  relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
2119  effective_multixact_freeze_max_age,
2120  &dovacuum, &doanalyze, &wraparound);
2121 
2122  /* Relations that need work are added to table_oids */
2123  if (dovacuum || doanalyze)
2124  table_oids = lappend_oid(table_oids, relid);
2125 
2126  /*
2127  * Remember TOAST associations for the second pass. Note: we must do
2128  * this whether or not the table is going to be vacuumed, because we
2129  * don't automatically vacuum toast tables along the parent table.
2130  */
2131  if (OidIsValid(classForm->reltoastrelid))
2132  {
2133  av_relation *hentry;
2134  bool found;
2135 
2136  hentry = hash_search(table_toast_map,
2137  &classForm->reltoastrelid,
2138  HASH_ENTER, &found);
2139 
2140  if (!found)
2141  {
2142  /* hash_search already filled in the key */
2143  hentry->ar_relid = relid;
2144  hentry->ar_hasrelopts = false;
2145  if (relopts != NULL)
2146  {
2147  hentry->ar_hasrelopts = true;
2148  memcpy(&hentry->ar_reloptions, relopts,
2149  sizeof(AutoVacOpts));
2150  }
2151  }
2152  }
2153  }
2154 
2155  table_endscan(relScan);
2156 
2157  /* second pass: check TOAST tables */
2158  ScanKeyInit(&key,
2159  Anum_pg_class_relkind,
2160  BTEqualStrategyNumber, F_CHAREQ,
2161  CharGetDatum(RELKIND_TOASTVALUE));
2162 
2163  relScan = table_beginscan_catalog(classRel, 1, &key);
2164  while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
2165  {
2166  Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
2167  PgStat_StatTabEntry *tabentry;
2168  Oid relid;
2169  AutoVacOpts *relopts = NULL;
2170  bool dovacuum;
2171  bool doanalyze;
2172  bool wraparound;
2173 
2174  /*
2175  * We cannot safely process other backends' temp tables, so skip 'em.
2176  */
2177  if (classForm->relpersistence == RELPERSISTENCE_TEMP)
2178  continue;
2179 
2180  relid = classForm->oid;
2181 
2182  /*
2183  * fetch reloptions -- if this toast table does not have them, try the
2184  * main rel
2185  */
2186  relopts = extract_autovac_opts(tuple, pg_class_desc);
2187  if (relopts == NULL)
2188  {
2189  av_relation *hentry;
2190  bool found;
2191 
2192  hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found);
2193  if (found && hentry->ar_hasrelopts)
2194  relopts = &hentry->ar_reloptions;
2195  }
2196 
2197  /* Fetch the pgstat entry for this table */
2198  tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2199  shared, dbentry);
2200 
2201  relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
2202  effective_multixact_freeze_max_age,
2203  &dovacuum, &doanalyze, &wraparound);
2204 
2205  /* ignore analyze for toast tables */
2206  if (dovacuum)
2207  table_oids = lappend_oid(table_oids, relid);
2208  }
2209 
2210  table_endscan(relScan);
2211  table_close(classRel, AccessShareLock);
2212 
2213  /*
2214  * Recheck orphan temporary tables, and if they still seem orphaned, drop
2215  * them. We'll eat a transaction per dropped table, which might seem
2216  * excessive, but we should only need to do anything as a result of a
2217  * previous backend crash, so this should not happen often enough to
2218  * justify "optimizing". Using separate transactions ensures that we
2219  * don't bloat the lock table if there are many temp tables to be dropped,
2220  * and it ensures that we don't lose work if a deletion attempt fails.
2221  */
2222  foreach(cell, orphan_oids)
2223  {
2224  Oid relid = lfirst_oid(cell);
2225  Form_pg_class classForm;
2226  ObjectAddress object;
2227 
2228  /*
2229  * Check for user-requested abort.
2230  */
2232 
2233  /*
2234  * Try to lock the table. If we can't get the lock immediately,
2235  * somebody else is using (or dropping) the table, so it's not our
2236  * concern anymore. Having the lock prevents race conditions below.
2237  */
2239  continue;
2240 
2241  /*
2242  * Re-fetch the pg_class tuple and re-check whether it still seems to
2243  * be an orphaned temp table. If it's not there or no longer the same
2244  * relation, ignore it.
2245  */
2246  tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2247  if (!HeapTupleIsValid(tuple))
2248  {
2249  /* be sure to drop useless lock so we don't bloat lock table */
2251  continue;
2252  }
2253  classForm = (Form_pg_class) GETSTRUCT(tuple);
2254 
2255  /*
2256  * Make all the same tests made in the loop above. In event of OID
2257  * counter wraparound, the pg_class entry we have now might be
2258  * completely unrelated to the one we saw before.
2259  */
2260  if (!((classForm->relkind == RELKIND_RELATION ||
2261  classForm->relkind == RELKIND_MATVIEW) &&
2262  classForm->relpersistence == RELPERSISTENCE_TEMP))
2263  {
2265  continue;
2266  }
2267 
2268  if (checkTempNamespaceStatus(classForm->relnamespace) != TEMP_NAMESPACE_IDLE)
2269  {
2271  continue;
2272  }
2273 
2274  /* OK, let's delete it */
2275  ereport(LOG,
2276  (errmsg("autovacuum: dropping orphan temp table \"%s.%s.%s\"",
2278  get_namespace_name(classForm->relnamespace),
2279  NameStr(classForm->relname))));
2280 
2281  object.classId = RelationRelationId;
2282  object.objectId = relid;
2283  object.objectSubId = 0;
2284  performDeletion(&object, DROP_CASCADE,
2288 
2289  /*
2290  * To commit the deletion, end current transaction and start a new
2291  * one. Note this also releases the lock we took.
2292  */
2295 
2296  /* StartTransactionCommand changed current memory context */
2297  MemoryContextSwitchTo(AutovacMemCxt);
2298  }
2299 
2300  /*
2301  * Create a buffer access strategy object for VACUUM to use. We want to
2302  * use the same one across all the vacuum operations we perform, since the
2303  * point is for VACUUM not to blow out the shared cache.
2304  */
2305  bstrategy = GetAccessStrategy(BAS_VACUUM);
2306 
2307  /*
2308  * create a memory context to act as fake PortalContext, so that the
2309  * contexts created in the vacuum code are cleaned up for each table.
2310  */
2311  PortalContext = AllocSetContextCreate(AutovacMemCxt,
2312  "Autovacuum Portal",
2314 
2315  /*
2316  * Perform operations on collected tables.
2317  */
2318  foreach(cell, table_oids)
2319  {
2320  Oid relid = lfirst_oid(cell);
2321  HeapTuple classTup;
2322  autovac_table *tab;
2323  bool isshared;
2324  bool skipit;
2325  double stdVacuumCostDelay;
2326  int stdVacuumCostLimit;
2327  dlist_iter iter;
2328 
2330 
2331  /*
2332  * Check for config changes before processing each collected table.
2333  */
2334  if (ConfigReloadPending)
2335  {
2336  ConfigReloadPending = false;
2338 
2339  /*
2340  * You might be tempted to bail out if we see autovacuum is now
2341  * disabled. Must resist that temptation -- this might be a
2342  * for-wraparound emergency worker, in which case that would be
2343  * entirely inappropriate.
2344  */
2345  }
2346 
2347  /*
2348  * Find out whether the table is shared or not. (It's slightly
2349  * annoying to fetch the syscache entry just for this, but in typical
2350  * cases it adds little cost because table_recheck_autovac would
2351  * refetch the entry anyway. We could buy that back by copying the
2352  * tuple here and passing it to table_recheck_autovac, but that
2353  * increases the odds of that function working with stale data.)
2354  */
2355  classTup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
2356  if (!HeapTupleIsValid(classTup))
2357  continue; /* somebody deleted the rel, forget it */
2358  isshared = ((Form_pg_class) GETSTRUCT(classTup))->relisshared;
2359  ReleaseSysCache(classTup);
2360 
2361  /*
2362  * Hold schedule lock from here until we've claimed the table. We
2363  * also need the AutovacuumLock to walk the worker array, but that one
2364  * can just be a shared lock.
2365  */
2366  LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE);
2367  LWLockAcquire(AutovacuumLock, LW_SHARED);
2368 
2369  /*
2370  * Check whether the table is being vacuumed concurrently by another
2371  * worker.
2372  */
2373  skipit = false;
2374  dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
2375  {
2376  WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
2377 
2378  /* ignore myself */
2379  if (worker == MyWorkerInfo)
2380  continue;
2381 
2382  /* ignore workers in other databases (unless table is shared) */
2383  if (!worker->wi_sharedrel && worker->wi_dboid != MyDatabaseId)
2384  continue;
2385 
2386  if (worker->wi_tableoid == relid)
2387  {
2388  skipit = true;
2389  found_concurrent_worker = true;
2390  break;
2391  }
2392  }
2393  LWLockRelease(AutovacuumLock);
2394  if (skipit)
2395  {
2396  LWLockRelease(AutovacuumScheduleLock);
2397  continue;
2398  }
2399 
2400  /*
2401  * Store the table's OID in shared memory before releasing the
2402  * schedule lock, so that other workers don't try to vacuum it
2403  * concurrently. (We claim it here so as not to hold
2404  * AutovacuumScheduleLock while rechecking the stats.)
2405  */
2406  MyWorkerInfo->wi_tableoid = relid;
2407  MyWorkerInfo->wi_sharedrel = isshared;
2408  LWLockRelease(AutovacuumScheduleLock);
2409 
2410  /*
2411  * Check whether pgstat data still says we need to vacuum this table.
2412  * It could have changed if something else processed the table while
2413  * we weren't looking.
2414  *
2415  * Note: we have a special case in pgstat code to ensure that the
2416  * stats we read are as up-to-date as possible, to avoid the problem
2417  * that somebody just finished vacuuming this table. The window to
2418  * the race condition is not closed but it is very small.
2419  */
2420  MemoryContextSwitchTo(AutovacMemCxt);
2421  tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc,
2422  effective_multixact_freeze_max_age);
2423  if (tab == NULL)
2424  {
2425  /* someone else vacuumed the table, or it went away */
2426  LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE);
2427  MyWorkerInfo->wi_tableoid = InvalidOid;
2428  MyWorkerInfo->wi_sharedrel = false;
2429  LWLockRelease(AutovacuumScheduleLock);
2430  continue;
2431  }
2432 
2433  /*
2434  * Remember the prevailing values of the vacuum cost GUCs. We have to
2435  * restore these at the bottom of the loop, else we'll compute wrong
2436  * values in the next iteration of autovac_balance_cost().
2437  */
2438  stdVacuumCostDelay = VacuumCostDelay;
2439  stdVacuumCostLimit = VacuumCostLimit;
2440 
2441  /* Must hold AutovacuumLock while mucking with cost balance info */
2442  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2443 
2444  /* advertise my cost delay parameters for the balancing algorithm */
2445  MyWorkerInfo->wi_dobalance = tab->at_dobalance;
2446  MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay;
2447  MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit;
2448  MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit;
2449 
2450  /* do a balance */
2452 
2453  /* set the active cost parameters from the result of that */
2455 
2456  /* done */
2457  LWLockRelease(AutovacuumLock);
2458 
2459  /* clean up memory before each iteration */
2461 
2462  /*
2463  * Save the relation name for a possible error message, to avoid a
2464  * catalog lookup in case of an error. If any of these return NULL,
2465  * then the relation has been dropped since last we checked; skip it.
2466  * Note: they must live in a long-lived memory context because we call
2467  * vacuum and analyze in different transactions.
2468  */
2469 
2470  tab->at_relname = get_rel_name(tab->at_relid);
2473  if (!tab->at_relname || !tab->at_nspname || !tab->at_datname)
2474  goto deleted;
2475 
2476  /*
2477  * We will abort vacuuming the current table if something errors out,
2478  * and continue with the next one in schedule; in particular, this
2479  * happens if we are interrupted with SIGINT.
2480  */
2481  PG_TRY();
2482  {
2483  /* Use PortalContext for any per-table allocations */
2485 
2486  /* have at it */
2487  autovacuum_do_vac_analyze(tab, bstrategy);
2488 
2489  /*
2490  * Clear a possible query-cancel signal, to avoid a late reaction
2491  * to an automatically-sent signal because of vacuuming the
2492  * current table (we're done with it, so it would make no sense to
2493  * cancel at this point.)
2494  */
2495  QueryCancelPending = false;
2496  }
2497  PG_CATCH();
2498  {
2499  /*
2500  * Abort the transaction, start a new one, and proceed with the
2501  * next table in our list.
2502  */
2503  HOLD_INTERRUPTS();
2504  if (tab->at_params.options & VACOPT_VACUUM)
2505  errcontext("automatic vacuum of table \"%s.%s.%s\"",
2506  tab->at_datname, tab->at_nspname, tab->at_relname);
2507  else
2508  errcontext("automatic analyze of table \"%s.%s.%s\"",
2509  tab->at_datname, tab->at_nspname, tab->at_relname);
2510  EmitErrorReport();
2511 
2512  /* this resets ProcGlobal->statusFlags[i] too */
2514  FlushErrorState();
2516 
2517  /* restart our transaction for the following operations */
2520  }
2521  PG_END_TRY();
2522 
2523  /* Make sure we're back in AutovacMemCxt */
2524  MemoryContextSwitchTo(AutovacMemCxt);
2525 
2526  did_vacuum = true;
2527 
2528  /* ProcGlobal->statusFlags[i] are reset at the next end of xact */
2529 
2530  /* be tidy */
2531 deleted:
2532  if (tab->at_datname != NULL)
2533  pfree(tab->at_datname);
2534  if (tab->at_nspname != NULL)
2535  pfree(tab->at_nspname);
2536  if (tab->at_relname != NULL)
2537  pfree(tab->at_relname);
2538  pfree(tab);
2539 
2540  /*
2541  * Remove my info from shared memory. We could, but intentionally
2542  * don't, clear wi_cost_limit and friends --- this is on the
2543  * assumption that we probably have more to do with similar cost
2544  * settings, so we don't want to give up our share of I/O for a very
2545  * short interval and thereby thrash the global balance.
2546  */
2547  LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE);
2548  MyWorkerInfo->wi_tableoid = InvalidOid;
2549  MyWorkerInfo->wi_sharedrel = false;
2550  LWLockRelease(AutovacuumScheduleLock);
2551 
2552  /* restore vacuum cost GUCs for the next iteration */
2553  VacuumCostDelay = stdVacuumCostDelay;
2554  VacuumCostLimit = stdVacuumCostLimit;
2555  }
2556 
2557  /*
2558  * Perform additional work items, as requested by backends.
2559  */
2560  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2561  for (i = 0; i < NUM_WORKITEMS; i++)
2562  {
2563  AutoVacuumWorkItem *workitem = &AutoVacuumShmem->av_workItems[i];
2564 
2565  if (!workitem->avw_used)
2566  continue;
2567  if (workitem->avw_active)
2568  continue;
2569  if (workitem->avw_database != MyDatabaseId)
2570  continue;
2571 
2572  /* claim this one, and release lock while performing it */
2573  workitem->avw_active = true;
2574  LWLockRelease(AutovacuumLock);
2575 
2576  perform_work_item(workitem);
2577 
2578  /*
2579  * Check for config changes before acquiring lock for further jobs.
2580  */
2582  if (ConfigReloadPending)
2583  {
2584  ConfigReloadPending = false;
2586  }
2587 
2588  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2589 
2590  /* and mark it done */
2591  workitem->avw_active = false;
2592  workitem->avw_used = false;
2593  }
2594  LWLockRelease(AutovacuumLock);
2595 
2596  /*
2597  * We leak table_toast_map here (among other things), but since we're
2598  * going away soon, it's not a problem.
2599  */
2600 
2601  /*
2602  * Update pg_database.datfrozenxid, and truncate pg_xact if possible. We
2603  * only need to do this once, not after each table.
2604  *
2605  * Even if we didn't vacuum anything, it may still be important to do
2606  * this, because one indirect effect of vac_update_datfrozenxid() is to
2607  * update ShmemVariableCache->xidVacLimit. That might need to be done
2608  * even if we haven't vacuumed anything, because relations with older
2609  * relfrozenxid values or other databases with older datfrozenxid values
2610  * might have been dropped, allowing xidVacLimit to advance.
2611  *
2612  * However, it's also important not to do this blindly in all cases,
2613  * because when autovacuum=off this will restart the autovacuum launcher.
2614  * If we're not careful, an infinite loop can result, where workers find
2615  * no work to do and restart the launcher, which starts another worker in
2616  * the same database that finds no work to do. To prevent that, we skip
2617  * this if (1) we found no work to do and (2) we skipped at least one
2618  * table due to concurrent autovacuum activity. In that case, the other
2619  * worker has already done it, or will do so when it finishes.
2620  */
2621  if (did_vacuum || !found_concurrent_worker)
2623 
2624  /* Finally close out the last transaction. */
2626 }
2627 
2628 /*
2629  * Execute a previously registered work item.
2630  */
2631 static void
2633 {
2634  char *cur_datname = NULL;
2635  char *cur_nspname = NULL;
2636  char *cur_relname = NULL;
2637 
2638  /*
2639  * Note we do not store table info in MyWorkerInfo, since this is not
2640  * vacuuming proper.
2641  */
2642 
2643  /*
2644  * Save the relation name for a possible error message, to avoid a catalog
2645  * lookup in case of an error. If any of these return NULL, then the
2646  * relation has been dropped since last we checked; skip it.
2647  */
2648  Assert(CurrentMemoryContext == AutovacMemCxt);
2649 
2650  cur_relname = get_rel_name(workitem->avw_relation);
2651  cur_nspname = get_namespace_name(get_rel_namespace(workitem->avw_relation));
2652  cur_datname = get_database_name(MyDatabaseId);
2653  if (!cur_relname || !cur_nspname || !cur_datname)
2654  goto deleted2;
2655 
2656  autovac_report_workitem(workitem, cur_nspname, cur_relname);
2657 
2658  /* clean up memory before each work item */
2660 
2661  /*
2662  * We will abort the current work item if something errors out, and
2663  * continue with the next one; in particular, this happens if we are
2664  * interrupted with SIGINT. Note that this means that the work item list
2665  * can be lossy.
2666  */
2667  PG_TRY();
2668  {
2669  /* Use PortalContext for any per-work-item allocations */
2671 
2672  /* have at it */
2673  switch (workitem->avw_type)
2674  {
2677  ObjectIdGetDatum(workitem->avw_relation),
2678  Int64GetDatum((int64) workitem->avw_blockNumber));
2679  break;
2680  default:
2681  elog(WARNING, "unrecognized work item found: type %d",
2682  workitem->avw_type);
2683  break;
2684  }
2685 
2686  /*
2687  * Clear a possible query-cancel signal, to avoid a late reaction to
2688  * an automatically-sent signal because of vacuuming the current table
2689  * (we're done with it, so it would make no sense to cancel at this
2690  * point.)
2691  */
2692  QueryCancelPending = false;
2693  }
2694  PG_CATCH();
2695  {
2696  /*
2697  * Abort the transaction, start a new one, and proceed with the next
2698  * table in our list.
2699  */
2700  HOLD_INTERRUPTS();
2701  errcontext("processing work entry for relation \"%s.%s.%s\"",
2702  cur_datname, cur_nspname, cur_relname);
2703  EmitErrorReport();
2704 
2705  /* this resets ProcGlobal->statusFlags[i] too */
2707  FlushErrorState();
2709 
2710  /* restart our transaction for the following operations */
2713  }
2714  PG_END_TRY();
2715 
2716  /* Make sure we're back in AutovacMemCxt */
2717  MemoryContextSwitchTo(AutovacMemCxt);
2718 
2719  /* We intentionally do not set did_vacuum here */
2720 
2721  /* be tidy */
2722 deleted2:
2723  if (cur_datname)
2724  pfree(cur_datname);
2725  if (cur_nspname)
2726  pfree(cur_nspname);
2727  if (cur_relname)
2728  pfree(cur_relname);
2729 }
2730 
2731 /*
2732  * extract_autovac_opts
2733  *
2734  * Given a relation's pg_class tuple, return the AutoVacOpts portion of
2735  * reloptions, if set; otherwise, return NULL.
2736  */
2737 static AutoVacOpts *
2739 {
2740  bytea *relopts;
2741  AutoVacOpts *av;
2742 
2743  Assert(((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_RELATION ||
2744  ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
2745  ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
2746 
2747  relopts = extractRelOptions(tup, pg_class_desc, NULL);
2748  if (relopts == NULL)
2749  return NULL;
2750 
2751  av = palloc(sizeof(AutoVacOpts));
2752  memcpy(av, &(((StdRdOptions *) relopts)->autovacuum), sizeof(AutoVacOpts));
2753  pfree(relopts);
2754 
2755  return av;
2756 }
2757 
2758 /*
2759  * get_pgstat_tabentry_relid
2760  *
2761  * Fetch the pgstat entry of a table, either local to a database or shared.
2762  */
2763 static PgStat_StatTabEntry *
2764 get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared,
2765  PgStat_StatDBEntry *dbentry)
2766 {
2767  PgStat_StatTabEntry *tabentry = NULL;
2768 
2769  if (isshared)
2770  {
2771  if (PointerIsValid(shared))
2772  tabentry = hash_search(shared->tables, &relid,
2773  HASH_FIND, NULL);
2774  }
2775  else if (PointerIsValid(dbentry))
2776  tabentry = hash_search(dbentry->tables, &relid,
2777  HASH_FIND, NULL);
2778 
2779  return tabentry;
2780 }
2781 
2782 /*
2783  * table_recheck_autovac
2784  *
2785  * Recheck whether a table still needs vacuum or analyze. Return value is a
2786  * valid autovac_table pointer if it does, NULL otherwise.
2787  *
2788  * Note that the returned autovac_table does not have the name fields set.
2789  */
2790 static autovac_table *
2791 table_recheck_autovac(Oid relid, HTAB *table_toast_map,
2792  TupleDesc pg_class_desc,
2793  int effective_multixact_freeze_max_age)
2794 {
2795  Form_pg_class classForm;
2796  HeapTuple classTup;
2797  bool dovacuum;
2798  bool doanalyze;
2799  autovac_table *tab = NULL;
2800  PgStat_StatTabEntry *tabentry;
2801  PgStat_StatDBEntry *shared;
2802  PgStat_StatDBEntry *dbentry;
2803  bool wraparound;
2804  AutoVacOpts *avopts;
2805 
2806  /* use fresh stats */
2808 
2811 
2812  /* fetch the relation's relcache entry */
2813  classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2814  if (!HeapTupleIsValid(classTup))
2815  return NULL;
2816  classForm = (Form_pg_class) GETSTRUCT(classTup);
2817 
2818  /*
2819  * Get the applicable reloptions. If it is a TOAST table, try to get the
2820  * main table reloptions if the toast table itself doesn't have.
2821  */
2822  avopts = extract_autovac_opts(classTup, pg_class_desc);
2823  if (classForm->relkind == RELKIND_TOASTVALUE &&
2824  avopts == NULL && table_toast_map != NULL)
2825  {
2826  av_relation *hentry;
2827  bool found;
2828 
2829  hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found);
2830  if (found && hentry->ar_hasrelopts)
2831  avopts = &hentry->ar_reloptions;
2832  }
2833 
2834  /* fetch the pgstat table entry */
2835  tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2836  shared, dbentry);
2837 
2838  relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
2839  effective_multixact_freeze_max_age,
2840  &dovacuum, &doanalyze, &wraparound);
2841 
2842  /* ignore ANALYZE for toast tables */
2843  if (classForm->relkind == RELKIND_TOASTVALUE)
2844  doanalyze = false;
2845 
2846  /* OK, it needs something done */
2847  if (doanalyze || dovacuum)
2848  {
2849  int freeze_min_age;
2850  int freeze_table_age;
2851  int multixact_freeze_min_age;
2852  int multixact_freeze_table_age;
2853  int vac_cost_limit;
2854  double vac_cost_delay;
2855  int log_min_duration;
2856 
2857  /*
2858  * Calculate the vacuum cost parameters and the freeze ages. If there
2859  * are options set in pg_class.reloptions, use them; in the case of a
2860  * toast table, try the main table too. Otherwise use the GUC
2861  * defaults, autovacuum's own first and plain vacuum second.
2862  */
2863 
2864  /* -1 in autovac setting means use plain vacuum_cost_delay */
2865  vac_cost_delay = (avopts && avopts->vacuum_cost_delay >= 0)
2866  ? avopts->vacuum_cost_delay
2867  : (autovacuum_vac_cost_delay >= 0)
2869  : VacuumCostDelay;
2870 
2871  /* 0 or -1 in autovac setting means use plain vacuum_cost_limit */
2872  vac_cost_limit = (avopts && avopts->vacuum_cost_limit > 0)
2873  ? avopts->vacuum_cost_limit
2876  : VacuumCostLimit;
2877 
2878  /* -1 in autovac setting means use log_autovacuum_min_duration */
2879  log_min_duration = (avopts && avopts->log_min_duration >= 0)
2880  ? avopts->log_min_duration
2882 
2883  /* these do not have autovacuum-specific settings */
2884  freeze_min_age = (avopts && avopts->freeze_min_age >= 0)
2885  ? avopts->freeze_min_age
2887 
2888  freeze_table_age = (avopts && avopts->freeze_table_age >= 0)
2889  ? avopts->freeze_table_age
2891 
2892  multixact_freeze_min_age = (avopts &&
2893  avopts->multixact_freeze_min_age >= 0)
2894  ? avopts->multixact_freeze_min_age
2896 
2897  multixact_freeze_table_age = (avopts &&
2898  avopts->multixact_freeze_table_age >= 0)
2899  ? avopts->multixact_freeze_table_age
2901 
2902  tab = palloc(sizeof(autovac_table));
2903  tab->at_relid = relid;
2904  tab->at_sharedrel = classForm->relisshared;
2906  (dovacuum ? VACOPT_VACUUM : 0) |
2907  (doanalyze ? VACOPT_ANALYZE : 0) |
2908  (!wraparound ? VACOPT_SKIP_LOCKED : 0);
2911  /* As of now, we don't support parallel vacuum for autovacuum */
2912  tab->at_params.nworkers = -1;
2913  tab->at_params.freeze_min_age = freeze_min_age;
2914  tab->at_params.freeze_table_age = freeze_table_age;
2915  tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age;
2916  tab->at_params.multixact_freeze_table_age = multixact_freeze_table_age;
2917  tab->at_params.is_wraparound = wraparound;
2918  tab->at_params.log_min_duration = log_min_duration;
2919  tab->at_vacuum_cost_limit = vac_cost_limit;
2920  tab->at_vacuum_cost_delay = vac_cost_delay;
2921  tab->at_relname = NULL;
2922  tab->at_nspname = NULL;
2923  tab->at_datname = NULL;
2924 
2925  /*
2926  * If any of the cost delay parameters has been set individually for
2927  * this table, disable the balancing algorithm.
2928  */
2929  tab->at_dobalance =
2930  !(avopts && (avopts->vacuum_cost_limit > 0 ||
2931  avopts->vacuum_cost_delay > 0));
2932  }
2933 
2934  heap_freetuple(classTup);
2935 
2936  return tab;
2937 }
2938 
2939 /*
2940  * relation_needs_vacanalyze
2941  *
2942  * Check whether a relation needs to be vacuumed or analyzed; return each into
2943  * "dovacuum" and "doanalyze", respectively. Also return whether the vacuum is
2944  * being forced because of Xid or multixact wraparound.
2945  *
2946  * relopts is a pointer to the AutoVacOpts options (either for itself in the
2947  * case of a plain table, or for either itself or its parent table in the case
2948  * of a TOAST table), NULL if none; tabentry is the pgstats entry, which can be
2949  * NULL.
2950  *
2951  * A table needs to be vacuumed if the number of dead tuples exceeds a
2952  * threshold. This threshold is calculated as
2953  *
2954  * threshold = vac_base_thresh + vac_scale_factor * reltuples
2955  *
2956  * For analyze, the analysis done is that the number of tuples inserted,
2957  * deleted and updated since the last analyze exceeds a threshold calculated
2958  * in the same fashion as above. Note that the collector actually stores
2959  * the number of tuples (both live and dead) that there were as of the last
2960  * analyze. This is asymmetric to the VACUUM case.
2961  *
2962  * We also force vacuum if the table's relfrozenxid is more than freeze_max_age
2963  * transactions back, and if its relminmxid is more than
2964  * multixact_freeze_max_age multixacts back.
2965  *
2966  * A table whose autovacuum_enabled option is false is
2967  * automatically skipped (unless we have to vacuum it due to freeze_max_age).
2968  * Thus autovacuum can be disabled for specific tables. Also, when the stats
2969  * collector does not have data about a table, it will be skipped.
2970  *
2971  * A table whose vac_base_thresh value is < 0 takes the base value from the
2972  * autovacuum_vacuum_threshold GUC variable. Similarly, a vac_scale_factor
2973  * value < 0 is substituted with the value of
2974  * autovacuum_vacuum_scale_factor GUC variable. Ditto for analyze.
2975  */
2976 static void
2978  AutoVacOpts *relopts,
2979  Form_pg_class classForm,
2980  PgStat_StatTabEntry *tabentry,
2981  int effective_multixact_freeze_max_age,
2982  /* output params below */
2983  bool *dovacuum,
2984  bool *doanalyze,
2985  bool *wraparound)
2986 {
2987  bool force_vacuum;
2988  bool av_enabled;
2989  float4 reltuples; /* pg_class.reltuples */
2990 
2991  /* constants from reloptions or GUC variables */
2992  int vac_base_thresh,
2993  vac_ins_base_thresh,
2994  anl_base_thresh;
2995  float4 vac_scale_factor,
2996  vac_ins_scale_factor,
2997  anl_scale_factor;
2998 
2999  /* thresholds calculated from above constants */
3000  float4 vacthresh,
3001  vacinsthresh,
3002  anlthresh;
3003 
3004  /* number of vacuum (resp. analyze) tuples at this time */
3005  float4 vactuples,
3006  instuples,
3007  anltuples;
3008 
3009  /* freeze parameters */
3010  int freeze_max_age;
3011  int multixact_freeze_max_age;
3012  TransactionId xidForceLimit;
3013  MultiXactId multiForceLimit;
3014 
3015  AssertArg(classForm != NULL);
3016  AssertArg(OidIsValid(relid));
3017 
3018  /*
3019  * Determine vacuum/analyze equation parameters. We have two possible
3020  * sources: the passed reloptions (which could be a main table or a toast
3021  * table), or the autovacuum GUC variables.
3022  */
3023 
3024  /* -1 in autovac setting means use plain vacuum_scale_factor */
3025  vac_scale_factor = (relopts && relopts->vacuum_scale_factor >= 0)
3026  ? relopts->vacuum_scale_factor
3028 
3029  vac_base_thresh = (relopts && relopts->vacuum_threshold >= 0)
3030  ? relopts->vacuum_threshold
3032 
3033  vac_ins_scale_factor = (relopts && relopts->vacuum_ins_scale_factor >= 0)
3034  ? relopts->vacuum_ins_scale_factor
3036 
3037  /* -1 is used to disable insert vacuums */
3038  vac_ins_base_thresh = (relopts && relopts->vacuum_ins_threshold >= -1)
3039  ? relopts->vacuum_ins_threshold
3041 
3042  anl_scale_factor = (relopts && relopts->analyze_scale_factor >= 0)
3043  ? relopts->analyze_scale_factor
3045 
3046  anl_base_thresh = (relopts && relopts->analyze_threshold >= 0)
3047  ? relopts->analyze_threshold
3049 
3050  freeze_max_age = (relopts && relopts->freeze_max_age >= 0)
3053 
3054  multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0)
3055  ? Min(relopts->multixact_freeze_max_age, effective_multixact_freeze_max_age)
3056  : effective_multixact_freeze_max_age;
3057 
3058  av_enabled = (relopts ? relopts->enabled : true);
3059 
3060  /* Force vacuum if table is at risk of wraparound */
3061  xidForceLimit = recentXid - freeze_max_age;
3062  if (xidForceLimit < FirstNormalTransactionId)
3063  xidForceLimit -= FirstNormalTransactionId;
3064  force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
3065  TransactionIdPrecedes(classForm->relfrozenxid,
3066  xidForceLimit));
3067  if (!force_vacuum)
3068  {
3069  multiForceLimit = recentMulti - multixact_freeze_max_age;
3070  if (multiForceLimit < FirstMultiXactId)
3071  multiForceLimit -= FirstMultiXactId;
3072  force_vacuum = MultiXactIdIsValid(classForm->relminmxid) &&
3073  MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit);
3074  }
3075  *wraparound = force_vacuum;
3076 
3077  /* User disabled it in pg_class.reloptions? (But ignore if at risk) */
3078  if (!av_enabled && !force_vacuum)
3079  {
3080  *doanalyze = false;
3081  *dovacuum = false;
3082  return;
3083  }
3084 
3085  /*
3086  * If we found the table in the stats hash, and autovacuum is currently
3087  * enabled, make a threshold-based decision whether to vacuum and/or
3088  * analyze. If autovacuum is currently disabled, we must be here for
3089  * anti-wraparound vacuuming only, so don't vacuum (or analyze) anything
3090  * that's not being forced.
3091  */
3092  if (PointerIsValid(tabentry) && AutoVacuumingActive())
3093  {
3094  reltuples = classForm->reltuples;
3095  vactuples = tabentry->n_dead_tuples;
3096  instuples = tabentry->inserts_since_vacuum;
3097  anltuples = tabentry->changes_since_analyze;
3098 
3099  /* If the table hasn't yet been vacuumed, take reltuples as zero */
3100  if (reltuples < 0)
3101  reltuples = 0;
3102 
3103  vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
3104  vacinsthresh = (float4) vac_ins_base_thresh + vac_ins_scale_factor * reltuples;
3105  anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
3106 
3107  /*
3108  * Note that we don't need to take special consideration for stat
3109  * reset, because if that happens, the last vacuum and analyze counts
3110  * will be reset too.
3111  */
3112  if (vac_ins_base_thresh >= 0)
3113  elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), ins: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
3114  NameStr(classForm->relname),
3115  vactuples, vacthresh, instuples, vacinsthresh, anltuples, anlthresh);
3116  else
3117  elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), ins: (disabled), anl: %.0f (threshold %.0f)",
3118  NameStr(classForm->relname),
3119  vactuples, vacthresh, anltuples, anlthresh);
3120 
3121  /* Determine if this table needs vacuum or analyze. */
3122  *dovacuum = force_vacuum || (vactuples > vacthresh) ||
3123  (vac_ins_base_thresh >= 0 && instuples > vacinsthresh);
3124  *doanalyze = (anltuples > anlthresh);
3125  }
3126  else
3127  {
3128  /*
3129  * Skip a table not found in stat hash, unless we have to force vacuum
3130  * for anti-wrap purposes. If it's not acted upon, there's no need to
3131  * vacuum it.
3132  */
3133  *dovacuum = force_vacuum;
3134  *doanalyze = false;
3135  }
3136 
3137  /* ANALYZE refuses to work with pg_statistic */
3138  if (relid == StatisticRelationId)
3139  *doanalyze = false;
3140 }
3141 
3142 /*
3143  * autovacuum_do_vac_analyze
3144  * Vacuum and/or analyze the specified table
3145  */
3146 static void
3148 {
3149  RangeVar *rangevar;
3150  VacuumRelation *rel;
3151  List *rel_list;
3152 
3153  /* Let pgstat know what we're doing */
3155 
3156  /* Set up one VacuumRelation target, identified by OID, for vacuum() */
3157  rangevar = makeRangeVar(tab->at_nspname, tab->at_relname, -1);
3158  rel = makeVacuumRelation(rangevar, tab->at_relid, NIL);
3159  rel_list = list_make1(rel);
3160 
3161  vacuum(rel_list, &tab->at_params, bstrategy, true);
3162 }
3163 
3164 /*
3165  * autovac_report_activity
3166  * Report to pgstat what autovacuum is doing
3167  *
3168  * We send a SQL string corresponding to what the user would see if the
3169  * equivalent command was to be issued manually.
3170  *
3171  * Note we assume that we are going to report the next command as soon as we're
3172  * done with the current one, and exit right after the last one, so we don't
3173  * bother to report "<IDLE>" or some such.
3174  */
3175 static void
3177 {
3178 #define MAX_AUTOVAC_ACTIV_LEN (NAMEDATALEN * 2 + 56)
3179  char activity[MAX_AUTOVAC_ACTIV_LEN];
3180  int len;
3181 
3182  /* Report the command and possible options */
3183  if (tab->at_params.options & VACOPT_VACUUM)
3184  snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3185  "autovacuum: VACUUM%s",
3186  tab->at_params.options & VACOPT_ANALYZE ? " ANALYZE" : "");
3187  else
3188  snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3189  "autovacuum: ANALYZE");
3190 
3191  /*
3192  * Report the qualified name of the relation.
3193  */
3194  len = strlen(activity);
3195 
3196  snprintf(activity + len, MAX_AUTOVAC_ACTIV_LEN - len,
3197  " %s.%s%s", tab->at_nspname, tab->at_relname,
3198  tab->at_params.is_wraparound ? " (to prevent wraparound)" : "");
3199 
3200  /* Set statement_timestamp() to current time for pg_stat_activity */
3202 
3204 }
3205 
3206 /*
3207  * autovac_report_workitem
3208  * Report to pgstat that autovacuum is processing a work item
3209  */
3210 static void
3212  const char *nspname, const char *relname)
3213 {
3214  char activity[MAX_AUTOVAC_ACTIV_LEN + 12 + 2];
3215  char blk[12 + 2];
3216  int len;
3217 
3218  switch (workitem->avw_type)
3219  {
3221  snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3222  "autovacuum: BRIN summarize");
3223  break;
3224  }
3225 
3226  /*
3227  * Report the qualified name of the relation, and the block number if any
3228  */
3229  len = strlen(activity);
3230 
3231  if (BlockNumberIsValid(workitem->avw_blockNumber))
3232  snprintf(blk, sizeof(blk), " %u", workitem->avw_blockNumber);
3233  else
3234  blk[0] = '\0';
3235 
3236  snprintf(activity + len, MAX_AUTOVAC_ACTIV_LEN - len,
3237  " %s.%s%s", nspname, relname, blk);
3238 
3239  /* Set statement_timestamp() to current time for pg_stat_activity */
3241 
3243 }
3244 
3245 /*
3246  * AutoVacuumingActive
3247  * Check GUC vars and report whether the autovacuum process should be
3248  * running.
3249  */
3250 bool
3252 {
3254  return false;
3255  return true;
3256 }
3257 
3258 /*
3259  * Request one work item to the next autovacuum run processing our database.
3260  * Return false if the request can't be recorded.
3261  */
3262 bool
3264  BlockNumber blkno)
3265 {
3266  int i;
3267  bool result = false;
3268 
3269  LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
3270 
3271  /*
3272  * Locate an unused work item and fill it with the given data.
3273  */
3274  for (i = 0; i < NUM_WORKITEMS; i++)
3275  {
3276  AutoVacuumWorkItem *workitem = &AutoVacuumShmem->av_workItems[i];
3277 
3278  if (workitem->avw_used)
3279  continue;
3280 
3281  workitem->avw_used = true;
3282  workitem->avw_active = false;
3283  workitem->avw_type = type;
3284  workitem->avw_database = MyDatabaseId;
3285  workitem->avw_relation = relationId;
3286  workitem->avw_blockNumber = blkno;
3287  result = true;
3288 
3289  /* done */
3290  break;
3291  }
3292 
3293  LWLockRelease(AutovacuumLock);
3294 
3295  return result;
3296 }
3297 
3298 /*
3299  * autovac_init
3300  * This is called at postmaster initialization.
3301  *
3302  * All we do here is annoy the user if he got it wrong.
3303  */
3304 void
3306 {
3308  ereport(WARNING,
3309  (errmsg("autovacuum not started because of misconfiguration"),
3310  errhint("Enable the \"track_counts\" option.")));
3311 }
3312 
3313 /*
3314  * IsAutoVacuum functions
3315  * Return whether this is either a launcher autovacuum process or a worker
3316  * process.
3317  */
3318 bool
3320 {
3321  return am_autovacuum_launcher;
3322 }
3323 
3324 bool
3326 {
3327  return am_autovacuum_worker;
3328 }
3329 
3330 
3331 /*
3332  * AutoVacuumShmemSize
3333  * Compute space needed for autovacuum-related shared memory
3334  */
3335 Size
3337 {
3338  Size size;
3339 
3340  /*
3341  * Need the fixed struct and the array of WorkerInfoData.
3342  */
3343  size = sizeof(AutoVacuumShmemStruct);
3344  size = MAXALIGN(size);
3346  sizeof(WorkerInfoData)));
3347  return size;
3348 }
3349 
3350 /*
3351  * AutoVacuumShmemInit
3352  * Allocate and initialize autovacuum-related shared memory
3353  */
3354 void
3356 {
3357  bool found;
3358 
3359  AutoVacuumShmem = (AutoVacuumShmemStruct *)
3360  ShmemInitStruct("AutoVacuum Data",
3362  &found);
3363 
3364  if (!IsUnderPostmaster)
3365  {
3366  WorkerInfo worker;
3367  int i;
3368 
3369  Assert(!found);
3370 
3371  AutoVacuumShmem->av_launcherpid = 0;
3372  dlist_init(&AutoVacuumShmem->av_freeWorkers);
3373  dlist_init(&AutoVacuumShmem->av_runningWorkers);
3374  AutoVacuumShmem->av_startingWorker = NULL;
3375  memset(AutoVacuumShmem->av_workItems, 0,
3376  sizeof(AutoVacuumWorkItem) * NUM_WORKITEMS);
3377 
3378  worker = (WorkerInfo) ((char *) AutoVacuumShmem +
3379  MAXALIGN(sizeof(AutoVacuumShmemStruct)));
3380 
3381  /* initialize the WorkerInfo free list */
3382  for (i = 0; i < autovacuum_max_workers; i++)
3383  dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
3384  &worker[i].wi_links);
3385  }
3386  else
3387  Assert(found);
3388 }
3389 
3390 /*
3391  * autovac_refresh_stats
3392  * Refresh pgstats data for an autovacuum process
3393  *
3394  * Cause the next pgstats read operation to obtain fresh data, but throttle
3395  * such refreshing in the autovacuum launcher. This is mostly to avoid
3396  * rereading the pgstats files too many times in quick succession when there
3397  * are many databases.
3398  *
3399  * Note: we avoid throttling in the autovac worker, as it would be
3400  * counterproductive in the recheck logic.
3401  */
3402 static void
3404 {
3406  {
3407  static TimestampTz last_read = 0;
3408  TimestampTz current_time;
3409 
3410  current_time = GetCurrentTimestamp();
3411 
3412  if (!TimestampDifferenceExceeds(last_read, current_time,
3414  return;
3415 
3416  last_read = current_time;
3417  }
3418 
3420 }
AutoVacuumWorkItemType avw_type
Definition: autovacuum.c:256
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:542
int autovacuum_work_mem
Definition: autovacuum.c:116
void ProcessCatchupInterrupt(void)
Definition: sinval.c:176
static bool am_autovacuum_worker
Definition: autovacuum.c:141
void InitializeTimeouts(void)
Definition: timeout.c:346
#define NIL
Definition: pg_list.h:65
bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:151
bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno)
Definition: autovacuum.c:3263
int multixact_freeze_table_age
Definition: vacuum.h:215
static int default_freeze_table_age
Definition: autovacuum.c:152
int autovacuum_naptime
Definition: autovacuum.c:117
void AbortCurrentTransaction(void)
Definition: xact.c:3212
#define pg_attribute_noreturn()
Definition: c.h:167
double autovacuum_anl_scale
Definition: autovacuum.c:123
TupleDesc CreateTupleDescCopy(TupleDesc tupdesc)
Definition: tupdesc.c:110
TimestampTz adl_next_worker
Definition: autovacuum.c:163
struct avw_dbase avw_dbase
void vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, bool isTopLevel)
Definition: vacuum.c:272
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
#define AllocSetContextCreate
Definition: memutils.h:170
MultiXactId adw_minmulti
Definition: autovacuum.c:174
volatile sig_atomic_t QueryCancelPending
Definition: globals.c:31
#define DEBUG1
Definition: elog.h:25
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
AutoVacuumWorkItemType
Definition: autovacuum.h:23
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:1149
#define NUM_WORKITEMS
Definition: autovacuum.c:264
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define MAX_AUTOVAC_SLEEPTIME
Definition: autovacuum.c:137
static void autovac_balance_cost(void)
Definition: autovacuum.c:1791
WorkerInfo av_startingWorker
Definition: autovacuum.c:288
sig_atomic_t av_signal[AutoVacNumSignals]
Definition: autovacuum.c:284
int vacuum_multixact_freeze_table_age
Definition: vacuum.c:64
static bool am_autovacuum_launcher
Definition: autovacuum.c:140
#define HASH_CONTEXT
Definition: hsearch.h:91
#define HASH_ELEM
Definition: hsearch.h:85
#define WL_TIMEOUT
Definition: latch.h:127
void ProcessConfigFile(GucContext context)
bool autovacuum_start_daemon
Definition: autovacuum.c:114
dlist_head av_freeWorkers
Definition: autovacuum.c:286
int autovacuum_vac_ins_thresh
Definition: autovacuum.c:120
static AutoVacOpts * extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
Definition: autovacuum.c:2738
uint32 TransactionId
Definition: c.h:575
int waittime
Definition: pg_standby.c:42
void vac_update_datfrozenxid(void)
Definition: vacuum.c:1351
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
static int default_multixact_freeze_min_age
Definition: autovacuum.c:153
void InitPostmasterChild(void)
Definition: miscinit.c:93
MemoryContext hcxt
Definition: hsearch.h:77
NON_EXEC_STATIC void AutoVacLauncherMain(int argc, char *argv[]) pg_attribute_noreturn()
Definition: autovacuum.c:432
#define RelationGetDescr(relation)
Definition: rel.h:483
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
#define DEBUG3
Definition: elog.h:23
#define SIGUSR1
Definition: win32_port.h:171
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:199
BackendType MyBackendType
Definition: miscinit.c:62
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1578
float8 vacuum_scale_factor
Definition: rel.h:293
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:3278
static void do_autovacuum(void)
Definition: autovacuum.c:1949
FormData_pg_database * Form_pg_database
Definition: pg_database.h:81
#define SIGCHLD
Definition: win32_port.h:169
PGPROC * MyProc
Definition: proc.c:67
int64 TimestampTz
Definition: timestamp.h:39
VacuumRelation * makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols)
Definition: makefuncs.c:809
struct WorkerInfoData * WorkerInfo
Definition: autovacuum.c:234
pid_t fork_process(void)
Definition: fork_process.c:29
int freeze_min_age
Definition: rel.h:285
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
void AtEOXact_Buffers(bool isCommit)
Definition: bufmgr.c:2480
int adl_score
Definition: autovacuum.c:164
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition: interrupt.c:56
char * pstrdup(const char *in)
Definition: mcxt.c:1187
void CommitTransactionCommand(void)
Definition: xact.c:2948
void ReleaseAuxProcessResources(bool isCommit)
Definition: resowner.c:879
TimestampTz wi_launchtime
Definition: autovacuum.c:226
static MemoryContext AutovacMemCxt
Definition: autovacuum.c:157
double wi_cost_delay
Definition: autovacuum.c:229
#define Min(x, y)
Definition: c.h:982
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:452
Oid get_rel_namespace(Oid relid)
Definition: lsyscache.c:1869
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static MultiXactId recentMulti
Definition: autovacuum.c:148
#define AccessShareLock
Definition: lockdefs.h:36
struct WorkerInfoData WorkerInfoData
Size entrysize
Definition: hsearch.h:72
struct avl_dbase avl_dbase
int autovacuum_multixact_freeze_max_age
Definition: autovacuum.c:125
void proc_exit(int code)
Definition: ipc.c:104
static void autovac_refresh_stats(void)
Definition: autovacuum.c:3403
#define MemSet(start, val, len)
Definition: c.h:1004
PgStat_StatDBEntry * pgstat_fetch_stat_dbentry(Oid dbid)
Definition: pgstat.c:2595
#define SetProcessingMode(mode)
Definition: miscadmin.h:399
#define kill(pid, sig)
Definition: win32_port.h:454
void BaseInit(void)
Definition: postinit.c:547
dlist_node wi_links
Definition: autovacuum.c:222
uint32 BlockNumber
Definition: block.h:31
#define MIN_AUTOVAC_SLEEPTIME
Definition: autovacuum.c:136
#define lengthof(array)
Definition: c.h:730
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:919
#define SIGPIPE
Definition: win32_port.h:164
char * at_datname
Definition: autovacuum.c:199
#define SIGUSR2
Definition: win32_port.h:172
#define LOG
Definition: elog.h:26
#define STATS_READ_DELAY
Definition: autovacuum.c:133
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
NameData relname
Definition: pg_class.h:38
unsigned int Oid
Definition: postgres_ext.h:31
void SetLatch(Latch *latch)
Definition: latch.c:505
NON_EXEC_STATIC void AutoVacWorkerMain(int argc, char *argv[]) pg_attribute_noreturn()
Definition: autovacuum.c:1510
List * lappend_oid(List *list, Oid datum)
Definition: list.c:357
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1709
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
void ClosePostmasterPorts(bool am_syslogger)
Definition: postmaster.c:2566
#define OidIsValid(objectId)
Definition: c.h:706
void AbortOutOfAnyTransaction(void)
Definition: xact.c:4581
int freeze_table_age
Definition: vacuum.h:212
int autovacuum_vac_cost_limit
Definition: autovacuum.c:128
void FlushErrorState(void)
Definition: elog.c:1646
void ResetLatch(Latch *latch)
Definition: latch.c:588
#define PG_SETMASK(mask)
Definition: pqsignal.h:19
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:145
void StatementCancelHandler(SIGNAL_ARGS)
Definition: postgres.c:2857
void AtEOXact_Files(bool isCommit)
Definition: fd.c:2931
void autovac_init(void)
Definition: autovacuum.c:3305
Oid ar_toastrelid
Definition: autovacuum.c:181
AutoVacuumSignal
Definition: autovacuum.c:241
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:390
double autovacuum_vac_ins_scale
Definition: autovacuum.c:121
MemoryContext PortalContext
Definition: mcxt.c:53
bool at_sharedrel
Definition: autovacuum.c:196
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1811
TempNamespaceStatus checkTempNamespaceStatus(Oid namespaceId)
Definition: namespace.c:3236
Datum brin_summarize_range(PG_FUNCTION_ARGS)
Definition: brin.c:861
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:118
ErrorContextCallback * error_context_stack
Definition: elog.c:92
#define list_make1(x1)
Definition: pg_list.h:206
#define NAMEDATALEN
void set_ps_display(const char *activity)
Definition: ps_status.c:349
int freeze_table_age
Definition: rel.h:287
int at_vacuum_cost_limit
Definition: autovacuum.c:194
PgStat_Counter inserts_since_vacuum
Definition: pgstat.h:730
void pg_usleep(long microsec)
Definition: signal.c:53
static void HandleAutoVacLauncherInterrupts(void)
Definition: autovacuum.c:807
Definition: dynahash.c:218
char * at_relname
Definition: autovacuum.c:197
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void AtEOXact_SMgr(void)
Definition: smgr.c:661
bool pgstat_track_counts
Definition: pgstat.c:126
void pfree(void *pointer)
Definition: mcxt.c:1057
#define dlist_tail_element(type, membername, lhead)
Definition: ilist.h:496
void disable_all_timeouts(bool keep_indicators)
Definition: timeout.c:598
#define FirstNormalTransactionId
Definition: transam.h:34
bool AutoVacuumingActive(void)
Definition: autovacuum.c:3251
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
int AutovacuumLauncherPid
Definition: autovacuum.c:305
struct AutoVacuumWorkItem AutoVacuumWorkItem
int VacuumCostLimit
Definition: globals.c:141
int autovacuum_freeze_max_age
Definition: autovacuum.c:124
int StartAutoVacWorker(void)
Definition: autovacuum.c:1472
int freeze_min_age
Definition: vacuum.h:211
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:392
int vacuum_multixact_freeze_min_age
Definition: vacuum.c:63
Definition: guc.h:75
Oid adl_datid
Definition: autovacuum.c:162
PgStat_Counter n_dead_tuples
Definition: pgstat.h:728
float8 vacuum_cost_delay
Definition: rel.h:292
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
bool is_wraparound
Definition: vacuum.h:217
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
AutoVacuumWorkItem av_workItems[NUM_WORKITEMS]
Definition: autovacuum.c:289
#define DEBUG2
Definition: elog.h:24
void InitProcess(void)
Definition: proc.c:301
_stringlist * dblist
Definition: pg_regress.c:75
TimestampTz last_autovac_time
Definition: pgstat.h:683
double autovacuum_vac_cost_delay
Definition: autovacuum.c:127
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3196
int analyze_threshold
Definition: rel.h:283
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:7808
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1286
int autovacuum_vac_thresh
Definition: autovacuum.c:118
bool IsUnderPostmaster
Definition: globals.c:109
#define SIGHUP
Definition: win32_port.h:159
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition: dependency.c:313
static void autovac_report_workitem(AutoVacuumWorkItem *workitem, const char *nspname, const char *relname)
Definition: autovacuum.c:3211
static void perform_work_item(AutoVacuumWorkItem *workitem)
Definition: autovacuum.c:2632
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
sigset_t UnBlockSig
Definition: pqsignal.c:22
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1466
#define FirstMultiXactId
Definition: multixact.h:25
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
static WorkerInfo MyWorkerInfo
Definition: autovacuum.c:302
static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, int effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound)
Definition: autovacuum.c:2977
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1701
static dlist_head DatabaseList
Definition: autovacuum.c:298
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3325
int synchronous_commit
Definition: xact.c:83
void pgstat_vacuum_stat(void)
Definition: pgstat.c:1066
int MultiXactMemberFreezeThreshold(void)
Definition: multixact.c:2823
int wi_cost_limit_base
Definition: autovacuum.c:231
char * adw_name
Definition: autovacuum.c:172
#define AssertArg(condition)
Definition: c.h:802
MemoryContext TopMemoryContext
Definition: mcxt.c:44
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
volatile sig_atomic_t ShutdownRequestPending
Definition: interrupt.c:27
VacOptTernaryValue index_cleanup
Definition: vacuum.h:221
Definition: guc.h:72
List * lappend(List *list, void *datum)
Definition: list.c:321
void SignalHandlerForShutdownRequest(SIGNAL_ARGS)
Definition: interrupt.c:104
static void avl_sigusr2_handler(SIGNAL_ARGS)
Definition: autovacuum.c:1419
#define SIG_IGN
Definition: win32_port.h:156
static void launcher_determine_sleep(bool canlaunch, bool recursing, struct timeval *nap)
Definition: autovacuum.c:860
TransactionId adw_frozenxid
Definition: autovacuum.c:173
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
static int db_comparator(const void *a, const void *b)
Definition: autovacuum.c:1125
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1115
float float4
Definition: c.h:552
void UnlockBuffers(void)
Definition: bufmgr.c:3724
#define MemoryContextResetAndDeleteChildren(ctx)
Definition: memutils.h:67
static void autovac_report_activity(autovac_table *tab)
Definition: autovacuum.c:3176
#define HASH_BLOBS
Definition: hsearch.h:86
char * at_nspname
Definition: autovacuum.c:198
int multixact_freeze_table_age
Definition: rel.h:290
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
int vacuum_threshold
Definition: rel.h:281
PgStat_StatDBEntry * adw_entry
Definition: autovacuum.c:175
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:326
uintptr_t Datum
Definition: postgres.h:367
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1163
struct @17::@18 av[32]
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
Oid adw_datid
Definition: autovacuum.c:171
Oid MyDatabaseId
Definition: globals.c:85
int PostAuthDelay
Definition: postgres.c:102
double at_vacuum_cost_delay
Definition: autovacuum.c:193
struct autovac_table autovac_table
#define MAX_AUTOVAC_ACTIV_LEN
Size AutoVacuumShmemSize(void)
Definition: autovacuum.c:3336
BlockNumber avw_blockNumber
Definition: autovacuum.c:261
Size keysize
Definition: hsearch.h:71
static TransactionId recentXid
Definition: autovacuum.c:147
dlist_node * cur
Definition: ilist.h:161
void EmitErrorReport(void)
Definition: elog.c:1501
PgStat_Counter changes_since_analyze
Definition: pgstat.h:729
bytea * extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, amoptions_function amoptions)
Definition: reloptions.c:1357
int autovacuum_max_workers
Definition: autovacuum.c:115
#define InvalidOid
Definition: postgres_ext.h:36
VacOptTernaryValue truncate
Definition: vacuum.h:223
static PgStat_StatTabEntry * get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, PgStat_StatDBEntry *dbentry)
Definition: autovacuum.c:2764
#define TimestampTzPlusMilliseconds(tz, ms)
Definition: timestamp.h:56
static void dlist_init(dlist_head *head)
Definition: ilist.h:278
double autovacuum_vac_scale
Definition: autovacuum.c:119
VacuumParams at_params
Definition: autovacuum.c:192
#define ereport(elevel,...)
Definition: elog.h:155
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, char *out_dbname, bool override_allow_connections)
Definition: postinit.c:589
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
static volatile sig_atomic_t got_SIGUSR2
Definition: autovacuum.c:144
float8 analyze_scale_factor
Definition: rel.h:295
TransactionId MultiXactId
Definition: c.h:585
bool IsAutoVacuumLauncherProcess(void)
Definition: autovacuum.c:3319
#define PG_CATCH()
Definition: elog.h:319
#define Max(x, y)
Definition: c.h:976
int freeze_max_age
Definition: rel.h:286
int vacuum_cost_limit
Definition: rel.h:284
#define SIG_DFL
Definition: win32_port.h:154
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define SIGNAL_ARGS
Definition: c.h:1329
#define Assert(condition)
Definition: c.h:800
#define lfirst(lc)
Definition: pg_list.h:169
#define PERFORM_DELETION_QUIETLY
Definition: dependency.h:136
bool enabled
Definition: rel.h:280
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:35
int multixact_freeze_min_age
Definition: rel.h:288
void StartTransactionCommand(void)
Definition: xact.c:2847
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
void pgstat_clear_snapshot(void)
Definition: pgstat.c:6141
size_t Size
Definition: c.h:528
char * dbname
Definition: streamutil.c:51
static void FreeWorkerInfo(int code, Datum arg)
Definition: autovacuum.c:1726
int nworkers
Definition: vacuum.h:231
static AutoVacuumShmemStruct * AutoVacuumShmem
Definition: autovacuum.c:292
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1207
int vacuum_freeze_min_age
Definition: vacuum.c:61
void AbortBufferIO(void)
Definition: bufmgr.c:4202
#define MAXALIGN(LEN)
Definition: c.h:753
int log_min_duration
Definition: vacuum.h:218
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3146
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1401
void AutoVacWorkerFailed(void)
Definition: autovacuum.c:1412
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1391
int vacuum_freeze_table_age
Definition: vacuum.c:62
int log_min_duration
Definition: rel.h:291
sigjmp_buf * PG_exception_stack
Definition: elog.c:94
static List * get_database_list(void)
Definition: autovacuum.c:1879
#define CharGetDatum(X)
Definition: postgres.h:416
int StartAutoVacLauncher(void)
dlist_node adl_node
Definition: autovacuum.c:165
static void dlist_move_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:385
static TransactionId ReadNewTransactionId(void)
Definition: transam.h:308
static void rebuild_database_list(Oid newdb)
Definition: autovacuum.c:944
int Log_autovacuum_min_duration
Definition: autovacuum.c:130
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:863
static pid_t AutoVacPID
Definition: postmaster.c:251
FormData_pg_class * Form_pg_class
Definition: pg_class.h:153
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:174
AutoVacOpts ar_reloptions
Definition: autovacuum.c:184
#define AccessExclusiveLock
Definition: lockdefs.h:45
static void autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy)
Definition: autovacuum.c:3147
void SetCurrentStatementStartTimestamp(void)
Definition: xact.c:833
static void AutoVacLauncherShutdown(void)
Definition: autovacuum.c:316
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:902
double VacuumCostDelay
Definition: globals.c:142
int vacuum_ins_threshold
Definition: rel.h:282
static dlist_node * dlist_pop_head_node(dlist_head *head)
Definition: ilist.h:368
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:116
#define elog(elevel,...)
Definition: elog.h:228
static MemoryContext DatabaseListCxt
Definition: autovacuum.c:299
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:26
int i
void FloatExceptionHandler(SIGNAL_ARGS)
Definition: postgres.c:2878
int options
Definition: vacuum.h:210
#define errcontext
Definition: elog.h:199
#define NameStr(name)
Definition: c.h:677
struct av_relation av_relation
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
int multixact_freeze_max_age
Definition: rel.h:289
void * arg
void AutoVacuumUpdateDelay(void)
Definition: autovacuum.c:1775
struct Latch * MyLatch
Definition: globals.c:54
Definition: c.h:617
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
PGPROC * wi_proc
Definition: autovacuum.c:225
static void launch_worker(TimestampTz now)
Definition: autovacuum.c:1360
float8 vacuum_ins_scale_factor
Definition: rel.h:294
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:146
static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, int effective_multixact_freeze_max_age)
Definition: autovacuum.c:2791
void init_ps_display(const char *fixed_part)
Definition: ps_status.c:258
void LWLockReleaseAll(void)
Definition: lwlock.c:1910
#define qsort(a, b, c, d)
Definition: port.h:497
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition: procsignal.c:551
void AtEOXact_HashTables(bool isCommit)
Definition: dynahash.c:1848
void AutoVacuumShmemInit(void)
Definition: autovacuum.c:3355
dlist_head av_runningWorkers
Definition: autovacuum.c:287
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1654
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define PG_TRY()
Definition: elog.h:309
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
#define PERFORM_DELETION_SKIP_EXTENSIONS
Definition: dependency.h:138
Definition: proc.h:120
bool ar_hasrelopts
Definition: autovacuum.c:183
Definition: pg_list.h:50
char * get_rel_name(Oid relid)
Definition: lsyscache.c:1845
#define PointerIsValid(pointer)
Definition: c.h:694
#define snprintf
Definition: port.h:215
int pid
Definition: proc.h:145
static int default_multixact_freeze_table_age
Definition: autovacuum.c:154
#define WL_LATCH_SET
Definition: latch.h:124
void pgstat_report_autovac(Oid dboid)
Definition: pgstat.c:1512
static Oid do_start_worker(void)
Definition: autovacuum.c:1145
int multixact_freeze_min_age
Definition: vacuum.h:213
#define dlist_reverse_foreach(iter, lhead)
Definition: ilist.h:538
static int default_freeze_min_age
Definition: autovacuum.c:151
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1542
RangeVar * makeRangeVar(char *schemaname, char *relname, int location)
Definition: makefuncs.c:422
#define die(msg)
Definition: pg_test_fsync.c:97
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:628
#define PG_END_TRY()
Definition: elog.h:334
#define BTEqualStrategyNumber
Definition: stratnum.h:31
bool at_dobalance
Definition: autovacuum.c:195
#define lfirst_oid(lc)
Definition: pg_list.h:171
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
#define PERFORM_DELETION_INTERNAL
Definition: dependency.h:134
int autovacuum_anl_thresh
Definition: autovacuum.c:122
MultiXactId ReadNextMultiXactId(void)
Definition: multixact.c:723
#define NON_EXEC_STATIC
Definition: c.h:1347