PostgreSQL Source Code  git master
vacuumparallel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumparallel.c
4  * Support routines for parallel vacuum execution.
5  *
6  * This file contains routines that are intended to support setting up, using,
7  * and tearing down a ParallelVacuumState.
8  *
9  * In a parallel vacuum, we perform both index bulk deletion and index cleanup
10  * with parallel worker processes. Individual indexes are processed by one
11  * vacuum process. ParalleVacuumState contains shared information as well as
12  * the memory space for storing dead items allocated in the DSM segment. We
13  * launch parallel worker processes at the start of parallel index
14  * bulk-deletion and index cleanup and once all indexes are processed, the
15  * parallel worker processes exit. Each time we process indexes in parallel,
16  * the parallel context is re-initialized so that the same DSM can be used for
17  * multiple passes of index bulk-deletion and index cleanup.
18  *
19  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  * IDENTIFICATION
23  * src/backend/commands/vacuumparallel.c
24  *
25  *-------------------------------------------------------------------------
26  */
27 #include "postgres.h"
28 
29 #include "access/amapi.h"
30 #include "access/table.h"
31 #include "access/xact.h"
32 #include "catalog/index.h"
33 #include "commands/vacuum.h"
34 #include "optimizer/paths.h"
35 #include "pgstat.h"
36 #include "storage/bufmgr.h"
37 #include "tcop/tcopprot.h"
38 #include "utils/lsyscache.h"
39 #include "utils/rel.h"
40 
41 /*
42  * DSM keys for parallel vacuum. Unlike other parallel execution code, since
43  * we don't need to worry about DSM keys conflicting with plan_node_id we can
44  * use small integers.
45  */
46 #define PARALLEL_VACUUM_KEY_SHARED 1
47 #define PARALLEL_VACUUM_KEY_DEAD_ITEMS 2
48 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
49 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
50 #define PARALLEL_VACUUM_KEY_WAL_USAGE 5
51 #define PARALLEL_VACUUM_KEY_INDEX_STATS 6
52 
53 /*
54  * Shared information among parallel workers. So this is allocated in the DSM
55  * segment.
56  */
57 typedef struct PVShared
58 {
59  /*
60  * Target table relid and log level (for messages about parallel workers
61  * launched during VACUUM VERBOSE). These fields are not modified during
62  * the parallel vacuum.
63  */
65  int elevel;
66 
67  /*
68  * Fields for both index vacuum and cleanup.
69  *
70  * reltuples is the total number of input heap tuples. We set either old
71  * live tuples in the index vacuum case or the new live tuples in the
72  * index cleanup case.
73  *
74  * estimated_count is true if reltuples is an estimated value. (Note that
75  * reltuples could be -1 in this case, indicating we have no idea.)
76  */
77  double reltuples;
79 
80  /*
81  * In single process vacuum we could consume more memory during index
82  * vacuuming or cleanup apart from the memory for heap scanning. In
83  * parallel vacuum, since individual vacuum workers can consume memory
84  * equal to maintenance_work_mem, the new maintenance_work_mem for each
85  * worker is set such that the parallel operation doesn't consume more
86  * memory than single process vacuum.
87  */
89 
90  /*
91  * Shared vacuum cost balance. During parallel vacuum,
92  * VacuumSharedCostBalance points to this value and it accumulates the
93  * balance of each parallel vacuum worker.
94  */
96 
97  /*
98  * Number of active parallel workers. This is used for computing the
99  * minimum threshold of the vacuum cost balance before a worker sleeps for
100  * cost-based delay.
101  */
103 
104  /* Counter for vacuuming and cleanup */
107 
108 /* Status used during parallel index vacuum or cleanup */
109 typedef enum PVIndVacStatus
110 {
116 
117 /*
118  * Struct for index vacuum statistics of an index that is used for parallel vacuum.
119  * This includes the status of parallel index vacuum as well as index statistics.
120  */
121 typedef struct PVIndStats
122 {
123  /*
124  * The following two fields are set by leader process before executing
125  * parallel index vacuum or parallel index cleanup. These fields are not
126  * fixed for the entire VACUUM operation. They are only fixed for an
127  * individual parallel index vacuum and cleanup.
128  *
129  * parallel_workers_can_process is true if both leader and worker can
130  * process the index, otherwise only leader can process it.
131  */
134 
135  /*
136  * Individual worker or leader stores the result of index vacuum or
137  * cleanup.
138  */
139  bool istat_updated; /* are the stats updated? */
142 
143 /*
144  * Struct for maintaining a parallel vacuum state. typedef appears in vacuum.h.
145  */
147 {
148  /* NULL for worker processes */
150 
151  /* Target indexes */
153  int nindexes;
154 
155  /* Shared information among parallel vacuum workers */
157 
158  /*
159  * Shared index statistics among parallel vacuum workers. The array
160  * element is allocated for every index, even those indexes where parallel
161  * index vacuuming is unsafe or not worthwhile (e.g.,
162  * will_parallel_vacuum[] is false). During parallel vacuum,
163  * IndexBulkDeleteResult of each index is kept in DSM and is copied into
164  * local memory at the end of parallel vacuum.
165  */
167 
168  /* Shared dead items space among parallel vacuum workers */
170 
171  /* Points to buffer usage area in DSM */
173 
174  /* Points to WAL usage area in DSM */
176 
177  /*
178  * False if the index is totally unsuitable target for all parallel
179  * processing. For example, the index could be <
180  * min_parallel_index_scan_size cutoff.
181  */
183 
184  /*
185  * The number of indexes that support parallel index bulk-deletion and
186  * parallel index cleanup respectively.
187  */
191 
192  /* Buffer access strategy used by leader process */
194 
195  /*
196  * Error reporting state. The error callback is set only for workers
197  * processes during parallel index vacuum.
198  */
200  char *relname;
201  char *indname;
203 };
204 
205 static int parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
206  bool *will_parallel_vacuum);
207 static void parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans,
208  bool vacuum);
212  PVIndStats *indstats);
213 static bool parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
214  bool vacuum);
215 static void parallel_vacuum_error_callback(void *arg);
216 
217 /*
218  * Try to enter parallel mode and create a parallel context. Then initialize
219  * shared memory state.
220  *
221  * On success, return parallel vacuum state. Otherwise return NULL.
222  */
224 parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
225  int nrequested_workers, int max_items,
226  int elevel, BufferAccessStrategy bstrategy)
227 {
228  ParallelVacuumState *pvs;
229  ParallelContext *pcxt;
230  PVShared *shared;
231  VacDeadItems *dead_items;
232  PVIndStats *indstats;
233  BufferUsage *buffer_usage;
234  WalUsage *wal_usage;
235  bool *will_parallel_vacuum;
236  Size est_indstats_len;
237  Size est_shared_len;
238  Size est_dead_items_len;
239  int nindexes_mwm = 0;
240  int parallel_workers = 0;
241  int querylen;
242 
243  /*
244  * A parallel vacuum must be requested and there must be indexes on the
245  * relation
246  */
247  Assert(nrequested_workers >= 0);
248  Assert(nindexes > 0);
249 
250  /*
251  * Compute the number of parallel vacuum workers to launch
252  */
253  will_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
254  parallel_workers = parallel_vacuum_compute_workers(indrels, nindexes,
255  nrequested_workers,
256  will_parallel_vacuum);
257  if (parallel_workers <= 0)
258  {
259  /* Can't perform vacuum in parallel -- return NULL */
260  pfree(will_parallel_vacuum);
261  return NULL;
262  }
263 
265  pvs->indrels = indrels;
266  pvs->nindexes = nindexes;
267  pvs->will_parallel_vacuum = will_parallel_vacuum;
268  pvs->bstrategy = bstrategy;
269 
271  pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
272  parallel_workers);
273  Assert(pcxt->nworkers > 0);
274  pvs->pcxt = pcxt;
275 
276  /* Estimate size for index vacuum stats -- PARALLEL_VACUUM_KEY_INDEX_STATS */
277  est_indstats_len = mul_size(sizeof(PVIndStats), nindexes);
278  shm_toc_estimate_chunk(&pcxt->estimator, est_indstats_len);
279  shm_toc_estimate_keys(&pcxt->estimator, 1);
280 
281  /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
282  est_shared_len = sizeof(PVShared);
283  shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len);
284  shm_toc_estimate_keys(&pcxt->estimator, 1);
285 
286  /* Estimate size for dead_items -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */
287  est_dead_items_len = vac_max_items_to_alloc_size(max_items);
288  shm_toc_estimate_chunk(&pcxt->estimator, est_dead_items_len);
289  shm_toc_estimate_keys(&pcxt->estimator, 1);
290 
291  /*
292  * Estimate space for BufferUsage and WalUsage --
293  * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
294  *
295  * If there are no extensions loaded that care, we could skip this. We
296  * have no way of knowing whether anyone's looking at pgBufferUsage or
297  * pgWalUsage, so do it unconditionally.
298  */
300  mul_size(sizeof(BufferUsage), pcxt->nworkers));
301  shm_toc_estimate_keys(&pcxt->estimator, 1);
303  mul_size(sizeof(WalUsage), pcxt->nworkers));
304  shm_toc_estimate_keys(&pcxt->estimator, 1);
305 
306  /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
307  if (debug_query_string)
308  {
309  querylen = strlen(debug_query_string);
310  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
311  shm_toc_estimate_keys(&pcxt->estimator, 1);
312  }
313  else
314  querylen = 0; /* keep compiler quiet */
315 
316  InitializeParallelDSM(pcxt);
317 
318  /* Prepare index vacuum stats */
319  indstats = (PVIndStats *) shm_toc_allocate(pcxt->toc, est_indstats_len);
320  for (int i = 0; i < nindexes; i++)
321  {
322  Relation indrel = indrels[i];
323  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
324 
325  /*
326  * Cleanup option should be either disabled, always performing in
327  * parallel or conditionally performing in parallel.
328  */
329  Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
330  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
331  Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
332 
333  if (!will_parallel_vacuum[i])
334  continue;
335 
336  if (indrel->rd_indam->amusemaintenanceworkmem)
337  nindexes_mwm++;
338 
339  /*
340  * Remember the number of indexes that support parallel operation for
341  * each phase.
342  */
343  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
345  if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
347  if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
349  }
351  pvs->indstats = indstats;
352 
353  /* Prepare shared information */
354  shared = (PVShared *) shm_toc_allocate(pcxt->toc, est_shared_len);
355  MemSet(shared, 0, est_shared_len);
356  shared->relid = RelationGetRelid(rel);
357  shared->elevel = elevel;
359  (nindexes_mwm > 0) ?
360  maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
362 
363  pg_atomic_init_u32(&(shared->cost_balance), 0);
364  pg_atomic_init_u32(&(shared->active_nworkers), 0);
365  pg_atomic_init_u32(&(shared->idx), 0);
366 
368  pvs->shared = shared;
369 
370  /* Prepare the dead_items space */
371  dead_items = (VacDeadItems *) shm_toc_allocate(pcxt->toc,
372  est_dead_items_len);
373  dead_items->max_items = max_items;
374  dead_items->num_items = 0;
375  MemSet(dead_items->items, 0, sizeof(ItemPointerData) * max_items);
377  pvs->dead_items = dead_items;
378 
379  /*
380  * Allocate space for each worker's BufferUsage and WalUsage; no need to
381  * initialize
382  */
383  buffer_usage = shm_toc_allocate(pcxt->toc,
384  mul_size(sizeof(BufferUsage), pcxt->nworkers));
386  pvs->buffer_usage = buffer_usage;
387  wal_usage = shm_toc_allocate(pcxt->toc,
388  mul_size(sizeof(WalUsage), pcxt->nworkers));
390  pvs->wal_usage = wal_usage;
391 
392  /* Store query string for workers */
393  if (debug_query_string)
394  {
395  char *sharedquery;
396 
397  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
398  memcpy(sharedquery, debug_query_string, querylen + 1);
399  sharedquery[querylen] = '\0';
400  shm_toc_insert(pcxt->toc,
401  PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
402  }
403 
404  /* Success -- return parallel vacuum state */
405  return pvs;
406 }
407 
408 /*
409  * Destroy the parallel context, and end parallel mode.
410  *
411  * Since writes are not allowed during parallel mode, copy the
412  * updated index statistics from DSM into local memory and then later use that
413  * to update the index statistics. One might think that we can exit from
414  * parallel mode, update the index statistics and then destroy parallel
415  * context, but that won't be safe (see ExitParallelMode).
416  */
417 void
419 {
421 
422  /* Copy the updated statistics */
423  for (int i = 0; i < pvs->nindexes; i++)
424  {
425  PVIndStats *indstats = &(pvs->indstats[i]);
426 
427  if (indstats->istat_updated)
428  {
429  istats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
430  memcpy(istats[i], &indstats->istat, sizeof(IndexBulkDeleteResult));
431  }
432  else
433  istats[i] = NULL;
434  }
435 
438 
440  pfree(pvs);
441 }
442 
443 /* Returns the dead items space */
444 VacDeadItems *
446 {
447  return pvs->dead_items;
448 }
449 
450 /*
451  * Do parallel index bulk-deletion with parallel workers.
452  */
453 void
455  int num_index_scans)
456 {
458 
459  /*
460  * We can only provide an approximate value of num_heap_tuples, at least
461  * for now.
462  */
463  pvs->shared->reltuples = num_table_tuples;
464  pvs->shared->estimated_count = true;
465 
466  parallel_vacuum_process_all_indexes(pvs, num_index_scans, true);
467 }
468 
469 /*
470  * Do parallel index cleanup with parallel workers.
471  */
472 void
474  int num_index_scans, bool estimated_count)
475 {
477 
478  /*
479  * We can provide a better estimate of total number of surviving tuples
480  * (we assume indexes are more interested in that than in the number of
481  * nominally live tuples).
482  */
483  pvs->shared->reltuples = num_table_tuples;
484  pvs->shared->estimated_count = estimated_count;
485 
486  parallel_vacuum_process_all_indexes(pvs, num_index_scans, false);
487 }
488 
489 /*
490  * Compute the number of parallel worker processes to request. Both index
491  * vacuum and index cleanup can be executed with parallel workers.
492  * The index is eligible for parallel vacuum iff its size is greater than
493  * min_parallel_index_scan_size as invoking workers for very small indexes
494  * can hurt performance.
495  *
496  * nrequested is the number of parallel workers that user requested. If
497  * nrequested is 0, we compute the parallel degree based on nindexes, that is
498  * the number of indexes that support parallel vacuum. This function also
499  * sets will_parallel_vacuum to remember indexes that participate in parallel
500  * vacuum.
501  */
502 static int
503 parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
504  bool *will_parallel_vacuum)
505 {
506  int nindexes_parallel = 0;
507  int nindexes_parallel_bulkdel = 0;
508  int nindexes_parallel_cleanup = 0;
509  int parallel_workers;
510 
511  /*
512  * We don't allow performing parallel operation in standalone backend or
513  * when parallelism is disabled.
514  */
516  return 0;
517 
518  /*
519  * Compute the number of indexes that can participate in parallel vacuum.
520  */
521  for (int i = 0; i < nindexes; i++)
522  {
523  Relation indrel = indrels[i];
524  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
525 
526  /* Skip index that is not a suitable target for parallel index vacuum */
527  if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
529  continue;
530 
531  will_parallel_vacuum[i] = true;
532 
533  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
534  nindexes_parallel_bulkdel++;
535  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
536  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
537  nindexes_parallel_cleanup++;
538  }
539 
540  nindexes_parallel = Max(nindexes_parallel_bulkdel,
541  nindexes_parallel_cleanup);
542 
543  /* The leader process takes one index */
544  nindexes_parallel--;
545 
546  /* No index supports parallel vacuum */
547  if (nindexes_parallel <= 0)
548  return 0;
549 
550  /* Compute the parallel degree */
551  parallel_workers = (nrequested > 0) ?
552  Min(nrequested, nindexes_parallel) : nindexes_parallel;
553 
554  /* Cap by max_parallel_maintenance_workers */
555  parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
556 
557  return parallel_workers;
558 }
559 
560 /*
561  * Perform index vacuum or index cleanup with parallel workers. This function
562  * must be used by the parallel vacuum leader process.
563  */
564 static void
566  bool vacuum)
567 {
568  int nworkers;
569  PVIndVacStatus new_status;
570 
572 
573  if (vacuum)
574  {
576 
577  /* Determine the number of parallel workers to launch */
578  nworkers = pvs->nindexes_parallel_bulkdel;
579  }
580  else
581  {
583 
584  /* Determine the number of parallel workers to launch */
585  nworkers = pvs->nindexes_parallel_cleanup;
586 
587  /* Add conditionally parallel-aware indexes if in the first time call */
588  if (num_index_scans == 0)
589  nworkers += pvs->nindexes_parallel_condcleanup;
590  }
591 
592  /* The leader process will participate */
593  nworkers--;
594 
595  /*
596  * It is possible that parallel context is initialized with fewer workers
597  * than the number of indexes that need a separate worker in the current
598  * phase, so we need to consider it. See
599  * parallel_vacuum_compute_workers().
600  */
601  nworkers = Min(nworkers, pvs->pcxt->nworkers);
602 
603  /*
604  * Set index vacuum status and mark whether parallel vacuum worker can
605  * process it.
606  */
607  for (int i = 0; i < pvs->nindexes; i++)
608  {
609  PVIndStats *indstats = &(pvs->indstats[i]);
610 
612  indstats->status = new_status;
613  indstats->parallel_workers_can_process =
614  (pvs->will_parallel_vacuum[i] &
616  num_index_scans,
617  vacuum));
618  }
619 
620  /* Reset the parallel index processing counter */
621  pg_atomic_write_u32(&(pvs->shared->idx), 0);
622 
623  /* Setup the shared cost-based vacuum delay and launch workers */
624  if (nworkers > 0)
625  {
626  /* Reinitialize parallel context to relaunch parallel workers */
627  if (num_index_scans > 0)
629 
630  /*
631  * Set up shared cost balance and the number of active workers for
632  * vacuum delay. We need to do this before launching workers as
633  * otherwise, they might not see the updated values for these
634  * parameters.
635  */
638 
639  /*
640  * The number of workers can vary between bulkdelete and cleanup
641  * phase.
642  */
643  ReinitializeParallelWorkers(pvs->pcxt, nworkers);
644 
646 
647  if (pvs->pcxt->nworkers_launched > 0)
648  {
649  /*
650  * Reset the local cost values for leader backend as we have
651  * already accumulated the remaining balance of heap.
652  */
653  VacuumCostBalance = 0;
655 
656  /* Enable shared cost balance for leader backend */
659  }
660 
661  if (vacuum)
662  ereport(pvs->shared->elevel,
663  (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
664  "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
665  pvs->pcxt->nworkers_launched),
666  pvs->pcxt->nworkers_launched, nworkers)));
667  else
668  ereport(pvs->shared->elevel,
669  (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
670  "launched %d parallel vacuum workers for index cleanup (planned: %d)",
671  pvs->pcxt->nworkers_launched),
672  pvs->pcxt->nworkers_launched, nworkers)));
673  }
674 
675  /* Vacuum the indexes that can be processed by only leader process */
677 
678  /*
679  * Join as a parallel worker. The leader vacuums alone processes all
680  * parallel-safe indexes in the case where no workers are launched.
681  */
683 
684  /*
685  * Next, accumulate buffer and WAL usage. (This must wait for the workers
686  * to finish, or we might get incomplete data.)
687  */
688  if (nworkers > 0)
689  {
690  /* Wait for all vacuum workers to finish */
692 
693  for (int i = 0; i < pvs->pcxt->nworkers_launched; i++)
695  }
696 
697  /*
698  * Reset all index status back to initial (while checking that we have
699  * vacuumed all indexes).
700  */
701  for (int i = 0; i < pvs->nindexes; i++)
702  {
703  PVIndStats *indstats = &(pvs->indstats[i]);
704 
705  if (indstats->status != PARALLEL_INDVAC_STATUS_COMPLETED)
706  elog(ERROR, "parallel index vacuum on index \"%s\" is not completed",
708 
710  }
711 
712  /*
713  * Carry the shared balance value to heap scan and disable shared costing
714  */
716  {
719  VacuumActiveNWorkers = NULL;
720  }
721 }
722 
723 /*
724  * Index vacuum/cleanup routine used by the leader process and parallel
725  * vacuum worker processes to vacuum the indexes in parallel.
726  */
727 static void
729 {
730  /*
731  * Increment the active worker count if we are able to launch any worker.
732  */
735 
736  /* Loop until all indexes are vacuumed */
737  for (;;)
738  {
739  int idx;
740  PVIndStats *indstats;
741 
742  /* Get an index number to process */
743  idx = pg_atomic_fetch_add_u32(&(pvs->shared->idx), 1);
744 
745  /* Done for all indexes? */
746  if (idx >= pvs->nindexes)
747  break;
748 
749  indstats = &(pvs->indstats[idx]);
750 
751  /*
752  * Skip vacuuming index that is unsafe for workers or has an
753  * unsuitable target for parallel index vacuum (this is vacuumed in
754  * parallel_vacuum_process_unsafe_indexes() by the leader).
755  */
756  if (!indstats->parallel_workers_can_process)
757  continue;
758 
759  /* Do vacuum or cleanup of the index */
760  parallel_vacuum_process_one_index(pvs, pvs->indrels[idx], indstats);
761  }
762 
763  /*
764  * We have completed the index vacuum so decrement the active worker
765  * count.
766  */
769 }
770 
771 /*
772  * Perform parallel vacuuming of indexes in leader process.
773  *
774  * Handles index vacuuming (or index cleanup) for indexes that are not
775  * parallel safe. It's possible that this will vary for a given index, based
776  * on details like whether we're performing index cleanup right now.
777  *
778  * Also performs vacuuming of smaller indexes that fell under the size cutoff
779  * enforced by parallel_vacuum_compute_workers().
780  */
781 static void
783 {
785 
786  /*
787  * Increment the active worker count if we are able to launch any worker.
788  */
791 
792  for (int i = 0; i < pvs->nindexes; i++)
793  {
794  PVIndStats *indstats = &(pvs->indstats[i]);
795 
796  /* Skip, indexes that are safe for workers */
797  if (indstats->parallel_workers_can_process)
798  continue;
799 
800  /* Do vacuum or cleanup of the index */
801  parallel_vacuum_process_one_index(pvs, pvs->indrels[i], indstats);
802  }
803 
804  /*
805  * We have completed the index vacuum so decrement the active worker
806  * count.
807  */
810 }
811 
812 /*
813  * Vacuum or cleanup index either by leader process or by one of the worker
814  * process. After vacuuming the index this function copies the index
815  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
816  * segment.
817  */
818 static void
820  PVIndStats *indstats)
821 {
822  IndexBulkDeleteResult *istat = NULL;
823  IndexBulkDeleteResult *istat_res;
824  IndexVacuumInfo ivinfo;
825 
826  /*
827  * Update the pointer to the corresponding bulk-deletion result if someone
828  * has already updated it
829  */
830  if (indstats->istat_updated)
831  istat = &(indstats->istat);
832 
833  ivinfo.index = indrel;
834  ivinfo.analyze_only = false;
835  ivinfo.report_progress = false;
836  ivinfo.message_level = DEBUG2;
837  ivinfo.estimated_count = pvs->shared->estimated_count;
838  ivinfo.num_heap_tuples = pvs->shared->reltuples;
839  ivinfo.strategy = pvs->bstrategy;
840 
841  /* Update error traceback information */
842  pvs->indname = pstrdup(RelationGetRelationName(indrel));
843  pvs->status = indstats->status;
844 
845  switch (indstats->status)
846  {
848  istat_res = vac_bulkdel_one_index(&ivinfo, istat, pvs->dead_items);
849  break;
851  istat_res = vac_cleanup_one_index(&ivinfo, istat);
852  break;
853  default:
854  elog(ERROR, "unexpected parallel vacuum index status %d for index \"%s\"",
855  indstats->status,
856  RelationGetRelationName(indrel));
857  }
858 
859  /*
860  * Copy the index bulk-deletion result returned from ambulkdelete and
861  * amvacuumcleanup to the DSM segment if it's the first cycle because they
862  * allocate locally and it's possible that an index will be vacuumed by a
863  * different vacuum process the next cycle. Copying the result normally
864  * happens only the first time an index is vacuumed. For any additional
865  * vacuum pass, we directly point to the result on the DSM segment and
866  * pass it to vacuum index APIs so that workers can update it directly.
867  *
868  * Since all vacuum workers write the bulk-deletion result at different
869  * slots we can write them without locking.
870  */
871  if (!indstats->istat_updated && istat_res != NULL)
872  {
873  memcpy(&(indstats->istat), istat_res, sizeof(IndexBulkDeleteResult));
874  indstats->istat_updated = true;
875 
876  /* Free the locally-allocated bulk-deletion result */
877  pfree(istat_res);
878  }
879 
880  /*
881  * Update the status to completed. No need to lock here since each worker
882  * touches different indexes.
883  */
885 
886  /* Reset error traceback information */
888  pfree(pvs->indname);
889  pvs->indname = NULL;
890 }
891 
892 /*
893  * Returns false, if the given index can't participate in the next execution of
894  * parallel index vacuum or parallel index cleanup.
895  */
896 static bool
898  bool vacuum)
899 {
900  uint8 vacoptions;
901 
902  vacoptions = indrel->rd_indam->amparallelvacuumoptions;
903 
904  /* In parallel vacuum case, check if it supports parallel bulk-deletion */
905  if (vacuum)
906  return ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0);
907 
908  /* Not safe, if the index does not support parallel cleanup */
909  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
910  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
911  return false;
912 
913  /*
914  * Not safe, if the index supports parallel cleanup conditionally, but we
915  * have already processed the index (for bulkdelete). We do this to avoid
916  * the need to invoke workers when parallel index cleanup doesn't need to
917  * scan the index. See the comments for option
918  * VACUUM_OPTION_PARALLEL_COND_CLEANUP to know when indexes support
919  * parallel cleanup conditionally.
920  */
921  if (num_index_scans > 0 &&
922  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
923  return false;
924 
925  return true;
926 }
927 
928 /*
929  * Perform work within a launched parallel process.
930  *
931  * Since parallel vacuum workers perform only index vacuum or index cleanup,
932  * we don't need to report progress information.
933  */
934 void
936 {
938  Relation rel;
939  Relation *indrels;
940  PVIndStats *indstats;
941  PVShared *shared;
942  VacDeadItems *dead_items;
943  BufferUsage *buffer_usage;
944  WalUsage *wal_usage;
945  int nindexes;
946  char *sharedquery;
947  ErrorContextCallback errcallback;
948 
949  /*
950  * A parallel vacuum worker must have only PROC_IN_VACUUM flag since we
951  * don't support parallel vacuum for autovacuum as of now.
952  */
954 
955  elog(DEBUG1, "starting parallel vacuum worker");
956 
957  shared = (PVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED, false);
958 
959  /* Set debug_query_string for individual workers */
960  sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true);
961  debug_query_string = sharedquery;
963 
964  /*
965  * Open table. The lock mode is the same as the leader process. It's
966  * okay because the lock mode does not conflict among the parallel
967  * workers.
968  */
969  rel = table_open(shared->relid, ShareUpdateExclusiveLock);
970 
971  /*
972  * Open all indexes. indrels are sorted in order by OID, which should be
973  * matched to the leader's one.
974  */
975  vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
976  Assert(nindexes > 0);
977 
978  if (shared->maintenance_work_mem_worker > 0)
980 
981  /* Set index statistics */
982  indstats = (PVIndStats *) shm_toc_lookup(toc,
984  false);
985 
986  /* Set dead_items space */
987  dead_items = (VacDeadItems *) shm_toc_lookup(toc,
989  false);
990 
991  /* Set cost-based vacuum delay */
993  VacuumCostBalance = 0;
994  VacuumPageHit = 0;
995  VacuumPageMiss = 0;
996  VacuumPageDirty = 0;
1000 
1001  /* Set parallel vacuum state */
1002  pvs.indrels = indrels;
1003  pvs.nindexes = nindexes;
1004  pvs.indstats = indstats;
1005  pvs.shared = shared;
1006  pvs.dead_items = dead_items;
1009 
1010  /* These fields will be filled during index vacuum or cleanup */
1011  pvs.indname = NULL;
1013 
1014  /* Each parallel VACUUM worker gets its own access strategy */
1016 
1017  /* Setup error traceback support for ereport() */
1019  errcallback.arg = &pvs;
1020  errcallback.previous = error_context_stack;
1021  error_context_stack = &errcallback;
1022 
1023  /* Prepare to track buffer usage during parallel execution */
1025 
1026  /* Process indexes to perform vacuum/cleanup */
1028 
1029  /* Report buffer/WAL usage during parallel execution */
1030  buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
1031  wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
1033  &wal_usage[ParallelWorkerNumber]);
1034 
1035  /* Pop the error context stack */
1036  error_context_stack = errcallback.previous;
1037 
1038  vac_close_indexes(nindexes, indrels, RowExclusiveLock);
1041 }
1042 
1043 /*
1044  * Error context callback for errors occurring during parallel index vacuum.
1045  * The error context messages should match the messages set in the lazy vacuum
1046  * error context. If you change this function, change vacuum_error_callback()
1047  * as well.
1048  */
1049 static void
1051 {
1052  ParallelVacuumState *errinfo = arg;
1053 
1054  switch (errinfo->status)
1055  {
1057  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
1058  errinfo->indname,
1059  errinfo->relnamespace,
1060  errinfo->relname);
1061  break;
1063  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
1064  errinfo->indname,
1065  errinfo->relnamespace,
1066  errinfo->relname);
1067  break;
1070  default:
1071  return;
1072  }
1073 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int min_parallel_index_scan_size
Definition: allpaths.c:66
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
int ParallelWorkerNumber
Definition: parallel.c:112
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:762
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:539
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:475
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:916
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:525
void pgstat_report_activity(BackendState state, const char *cmd_str)
@ STATE_RUNNING
@ BAS_VACUUM
Definition: bufmgr.h:33
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:216
#define Min(x, y)
Definition: c.h:986
#define ngettext(s, p, n)
Definition: c.h:1179
#define Max(x, y)
Definition: c.h:980
unsigned char uint8
Definition: c.h:439
#define MemSet(start, val, len)
Definition: c.h:1008
size_t Size
Definition: c.h:540
ErrorContextCallback * error_context_stack
Definition: elog.c:93
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define errcontext
Definition: elog.h:190
#define DEBUG2
Definition: elog.h:23
#define DEBUG1
Definition: elog.h:24
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:596
int64 VacuumPageHit
Definition: globals.c:148
int max_parallel_maintenance_workers
Definition: globals.c:128
int64 VacuumPageMiss
Definition: globals.c:149
bool VacuumCostActive
Definition: globals.c:153
bool IsUnderPostmaster
Definition: globals.c:113
int64 VacuumPageDirty
Definition: globals.c:150
int VacuumCostBalance
Definition: globals.c:152
int maintenance_work_mem
Definition: globals.c:127
double VacuumCostDelay
Definition: globals.c:146
#define IsParallelWorker()
Definition: parallel.h:61
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:218
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:208
void InstrStartParallelQuery(void)
Definition: instrument.c:200
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3326
char * pstrdup(const char *in)
Definition: mcxt.c:1305
void pfree(void *pointer)
Definition: mcxt.c:1175
void * palloc0(Size size)
Definition: mcxt.c:1099
void * arg
const char * debug_query_string
Definition: postgres.c:89
unsigned int Oid
Definition: postgres_ext.h:31
#define PROC_IN_VACUUM
Definition: proc.h:55
#define RelationGetRelid(relation)
Definition: rel.h:489
#define RelationGetRelationName(relation)
Definition: rel.h:523
#define RelationGetNamespace(relation)
Definition: rel.h:530
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
PGPROC * MyProc
Definition: proc.c:68
struct ErrorContextCallback * previous
Definition: elog.h:232
void(* callback)(void *arg)
Definition: elog.h:233
bool amusemaintenanceworkmem
Definition: amapi.h:246
uint8 amparallelvacuumoptions
Definition: amapi.h:250
Relation index
Definition: genam.h:46
double num_heap_tuples
Definition: genam.h:51
bool analyze_only
Definition: genam.h:47
BufferAccessStrategy strategy
Definition: genam.h:52
bool report_progress
Definition: genam.h:48
int message_level
Definition: genam.h:50
bool estimated_count
Definition: genam.h:49
uint8 statusFlags
Definition: proc.h:228
bool istat_updated
IndexBulkDeleteResult istat
bool parallel_workers_can_process
PVIndVacStatus status
double reltuples
pg_atomic_uint32 cost_balance
int maintenance_work_mem_worker
pg_atomic_uint32 active_nworkers
pg_atomic_uint32 idx
bool estimated_count
shm_toc_estimator estimator
Definition: parallel.h:42
shm_toc * toc
Definition: parallel.h:45
int nworkers_launched
Definition: parallel.h:38
BufferAccessStrategy bstrategy
BufferUsage * buffer_usage
ParallelContext * pcxt
PVIndStats * indstats
VacDeadItems * dead_items
PVIndVacStatus status
struct IndexAmRoutine * rd_indam
Definition: rel.h:202
ItemPointerData items[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuum.h:247
int max_items
Definition: vacuum.h:243
int num_items
Definition: vacuum.h:244
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, VacDeadItems *dead_items)
Definition: vacuum.c:2326
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:84
Size vac_max_items_to_alloc_size(int max_items)
Definition: vacuum.c:2372
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2143
int VacuumCostBalanceLocal
Definition: vacuum.c:85
void vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, bool isTopLevel)
Definition: vacuum.c:298
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2186
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:83
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition: vacuum.c:2347
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:62
#define VACUUM_OPTION_NO_PARALLEL
Definition: vacuum.h:41
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:47
#define VACUUM_OPTION_MAX_VALID_VALUE
Definition: vacuum.h:65
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:54
static void parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans, bool vacuum)
static void parallel_vacuum_error_callback(void *arg)
static int parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested, bool *will_parallel_vacuum)
#define PARALLEL_VACUUM_KEY_INDEX_STATS
#define PARALLEL_VACUUM_KEY_QUERY_TEXT
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE
VacDeadItems * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
#define PARALLEL_VACUUM_KEY_SHARED
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
#define PARALLEL_VACUUM_KEY_WAL_USAGE
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
static void parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs)
static void parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel, PVIndStats *indstats)
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
struct PVShared PVShared
static bool parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans, bool vacuum)
static void parallel_vacuum_process_unsafe_indexes(ParallelVacuumState *pvs)
#define PARALLEL_VACUUM_KEY_DEAD_ITEMS
struct PVIndStats PVIndStats
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int max_items, int elevel, BufferAccessStrategy bstrategy)
PVIndVacStatus
@ PARALLEL_INDVAC_STATUS_NEED_CLEANUP
@ PARALLEL_INDVAC_STATUS_INITIAL
@ PARALLEL_INDVAC_STATUS_NEED_BULKDELETE
@ PARALLEL_INDVAC_STATUS_COMPLETED
void ExitParallelMode(void)
Definition: xact.c:1045
void EnterParallelMode(void)
Definition: xact.c:1032