PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pgstat.c
Go to the documentation of this file.
1 /* ----------
2  * pgstat.c
3  *
4  * All the statistics collector stuff hacked up in one big, ugly file.
5  *
6  * TODO: - Separate collector, postmaster and backend stuff
7  * into different files.
8  *
9  * - Add some automatic call for pgstat vacuuming.
10  *
11  * - Add a pgstat config column to pg_database, so this
12  * entire thing can be enabled/disabled on a per db basis.
13  *
14  * Copyright (c) 2001-2017, PostgreSQL Global Development Group
15  *
16  * src/backend/postmaster/pgstat.c
17  * ----------
18  */
19 #include "postgres.h"
20 
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/param.h>
24 #include <sys/time.h>
25 #include <sys/socket.h>
26 #include <netdb.h>
27 #include <netinet/in.h>
28 #include <arpa/inet.h>
29 #include <signal.h>
30 #include <time.h>
31 #ifdef HAVE_SYS_SELECT_H
32 #include <sys/select.h>
33 #endif
34 
35 #include "pgstat.h"
36 
37 #include "access/heapam.h"
38 #include "access/htup_details.h"
39 #include "access/transam.h"
40 #include "access/twophase_rmgr.h"
41 #include "access/xact.h"
42 #include "catalog/pg_database.h"
43 #include "catalog/pg_proc.h"
44 #include "common/ip.h"
45 #include "libpq/libpq.h"
46 #include "libpq/pqsignal.h"
47 #include "mb/pg_wchar.h"
48 #include "miscadmin.h"
49 #include "pg_trace.h"
50 #include "postmaster/autovacuum.h"
52 #include "postmaster/postmaster.h"
53 #include "storage/backendid.h"
54 #include "storage/dsm.h"
55 #include "storage/fd.h"
56 #include "storage/ipc.h"
57 #include "storage/latch.h"
58 #include "storage/lmgr.h"
59 #include "storage/pg_shmem.h"
60 #include "storage/procsignal.h"
61 #include "storage/sinvaladt.h"
62 #include "utils/ascii.h"
63 #include "utils/guc.h"
64 #include "utils/memutils.h"
65 #include "utils/ps_status.h"
66 #include "utils/rel.h"
67 #include "utils/snapmgr.h"
68 #include "utils/timestamp.h"
69 #include "utils/tqual.h"
70 
71 
72 /* ----------
73  * Timer definitions.
74  * ----------
75  */
76 #define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
77  * updates; in milliseconds. */
78 
79 #define PGSTAT_RETRY_DELAY 10 /* How long to wait between checks for
80  * a new file; in milliseconds. */
81 
82 #define PGSTAT_MAX_WAIT_TIME 10000 /* Maximum time to wait for a stats
83  * file update; in milliseconds. */
84 
85 #define PGSTAT_INQ_INTERVAL 640 /* How often to ping the collector for
86  * a new file; in milliseconds. */
87 
88 #define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart a
89  * failed statistics collector; in
90  * seconds. */
91 
92 #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
93 #define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
94 
95 
96 /* ----------
97  * The initial size hints for the hash tables used in the collector.
98  * ----------
99  */
100 #define PGSTAT_DB_HASH_SIZE 16
101 #define PGSTAT_TAB_HASH_SIZE 512
102 #define PGSTAT_FUNCTION_HASH_SIZE 512
103 
104 
105 /* ----------
106  * GUC parameters
107  * ----------
108  */
110 bool pgstat_track_counts = false;
113 
114 /* ----------
115  * Built from GUC parameter
116  * ----------
117  */
121 
122 /*
123  * BgWriter global statistics counters (unused in other processes).
124  * Stored directly in a stats message structure so it can be sent
125  * without needing to copy things around. We assume this inits to zeroes.
126  */
128 
129 /* ----------
130  * Local data
131  * ----------
132  */
134 
136 
138 
139 static bool pgStatRunningInCollector = false;
140 
141 /*
142  * Structures in which backends store per-table info that's waiting to be
143  * sent to the collector.
144  *
145  * NOTE: once allocated, TabStatusArray structures are never moved or deleted
146  * for the life of the backend. Also, we zero out the t_id fields of the
147  * contained PgStat_TableStatus structs whenever they are not actively in use.
148  * This allows relcache pgstat_info pointers to be treated as long-lived data,
149  * avoiding repeated searches in pgstat_initstats() when a relation is
150  * repeatedly opened during a transaction.
151  */
152 #define TABSTAT_QUANTUM 100 /* we alloc this many at a time */
153 
154 typedef struct TabStatusArray
155 {
156  struct TabStatusArray *tsa_next; /* link to next array, if any */
157  int tsa_used; /* # entries currently used */
160 
162 
163 /*
164  * Backends store per-function info that's waiting to be sent to the collector
165  * in this hash table (indexed by function OID).
166  */
168 
169 /*
170  * Indicates if backend has some function stats that it hasn't yet
171  * sent to the collector.
172  */
173 static bool have_function_stats = false;
174 
175 /*
176  * Tuple insertion/deletion counts for an open transaction can't be propagated
177  * into PgStat_TableStatus counters until we know if it is going to commit
178  * or abort. Hence, we keep these counts in per-subxact structs that live
179  * in TopTransactionContext. This data structure is designed on the assumption
180  * that subxacts won't usually modify very many tables.
181  */
182 typedef struct PgStat_SubXactStatus
183 {
184  int nest_level; /* subtransaction nest level */
185  struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
186  PgStat_TableXactStatus *first; /* head of list for this subxact */
188 
190 
191 static int pgStatXactCommit = 0;
192 static int pgStatXactRollback = 0;
195 
196 /* Record that's written to 2PC state file when pgstat state is persisted */
197 typedef struct TwoPhasePgStatRecord
198 {
199  PgStat_Counter tuples_inserted; /* tuples inserted in xact */
200  PgStat_Counter tuples_updated; /* tuples updated in xact */
201  PgStat_Counter tuples_deleted; /* tuples deleted in xact */
202  PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */
203  PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */
204  PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */
205  Oid t_id; /* table's OID */
206  bool t_shared; /* is it a shared catalog? */
207  bool t_truncated; /* was the relation truncated? */
209 
210 /*
211  * Info about current "snapshot" of stats file
212  */
216 static int localNumBackends = 0;
217 
218 /*
219  * Cluster wide statistics, kept in the stats collector.
220  * Contains statistics that are not collected per database
221  * or per table.
222  */
225 
226 /*
227  * List of OIDs of databases we need to write out. If an entry is InvalidOid,
228  * it means to write only the shared-catalog stats ("DB 0"); otherwise, we
229  * will write both that DB's data and the shared stats.
230  */
232 
233 /* Signal handler flags */
234 static volatile bool need_exit = false;
235 static volatile bool got_SIGHUP = false;
236 
237 /*
238  * Total time charged to functions so far in the current backend.
239  * We use this to help separate "self" and "other" time charges.
240  * (We assume this initializes to zero.)
241  */
243 
244 
245 /* ----------
246  * Local function forward declarations
247  * ----------
248  */
249 #ifdef EXEC_BACKEND
250 static pid_t pgstat_forkexec(void);
251 #endif
252 
253 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
254 static void pgstat_exit(SIGNAL_ARGS);
255 static void pgstat_beshutdown_hook(int code, Datum arg);
257 
258 static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
260  Oid tableoid, bool create);
261 static void pgstat_write_statsfiles(bool permanent, bool allDbs);
262 static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent);
263 static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep);
264 static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
265 static void backend_read_statsfile(void);
266 static void pgstat_read_current_status(void);
267 
268 static bool pgstat_write_statsfile_needed(void);
269 static bool pgstat_db_requested(Oid databaseid);
270 
271 static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
272 static void pgstat_send_funcstats(void);
273 static HTAB *pgstat_collect_oids(Oid catalogid);
274 
275 static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
276 
277 static void pgstat_setup_memcxt(void);
278 
279 static const char *pgstat_get_wait_activity(WaitEventActivity w);
280 static const char *pgstat_get_wait_client(WaitEventClient w);
281 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
282 static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
283 static const char *pgstat_get_wait_io(WaitEventIO w);
284 
285 static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
286 static void pgstat_send(void *msg, int len);
287 
288 static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);
289 static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
290 static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
291 static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
292 static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
295 static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
296 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
297 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
298 static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
299 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
300 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
301 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
303 static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
304 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
305 
306 /* ------------------------------------------------------------
307  * Public functions called from postmaster follow
308  * ------------------------------------------------------------
309  */
310 
311 /* ----------
312  * pgstat_init() -
313  *
314  * Called from postmaster at startup. Create the resources required
315  * by the statistics collector process. If unable to do so, do not
316  * fail --- better to let the postmaster start with stats collection
317  * disabled.
318  * ----------
319  */
320 void
322 {
323  ACCEPT_TYPE_ARG3 alen;
324  struct addrinfo *addrs = NULL,
325  *addr,
326  hints;
327  int ret;
328  fd_set rset;
329  struct timeval tv;
330  char test_byte;
331  int sel_res;
332  int tries = 0;
333 
334 #define TESTBYTEVAL ((char) 199)
335 
336  /*
337  * This static assertion verifies that we didn't mess up the calculations
338  * involved in selecting maximum payload sizes for our UDP messages.
339  * Because the only consequence of overrunning PGSTAT_MAX_MSG_SIZE would
340  * be silent performance loss from fragmentation, it seems worth having a
341  * compile-time cross-check that we didn't.
342  */
344  "maximum stats message size exceeds PGSTAT_MAX_MSG_SIZE");
345 
346  /*
347  * Create the UDP socket for sending and receiving statistic messages
348  */
349  hints.ai_flags = AI_PASSIVE;
350  hints.ai_family = AF_UNSPEC;
351  hints.ai_socktype = SOCK_DGRAM;
352  hints.ai_protocol = 0;
353  hints.ai_addrlen = 0;
354  hints.ai_addr = NULL;
355  hints.ai_canonname = NULL;
356  hints.ai_next = NULL;
357  ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
358  if (ret || !addrs)
359  {
360  ereport(LOG,
361  (errmsg("could not resolve \"localhost\": %s",
362  gai_strerror(ret))));
363  goto startup_failed;
364  }
365 
366  /*
367  * On some platforms, pg_getaddrinfo_all() may return multiple addresses
368  * only one of which will actually work (eg, both IPv6 and IPv4 addresses
369  * when kernel will reject IPv6). Worse, the failure may occur at the
370  * bind() or perhaps even connect() stage. So we must loop through the
371  * results till we find a working combination. We will generate LOG
372  * messages, but no error, for bogus combinations.
373  */
374  for (addr = addrs; addr; addr = addr->ai_next)
375  {
376 #ifdef HAVE_UNIX_SOCKETS
377  /* Ignore AF_UNIX sockets, if any are returned. */
378  if (addr->ai_family == AF_UNIX)
379  continue;
380 #endif
381 
382  if (++tries > 1)
383  ereport(LOG,
384  (errmsg("trying another address for the statistics collector")));
385 
386  /*
387  * Create the socket.
388  */
389  if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) == PGINVALID_SOCKET)
390  {
391  ereport(LOG,
393  errmsg("could not create socket for statistics collector: %m")));
394  continue;
395  }
396 
397  /*
398  * Bind it to a kernel assigned port on localhost and get the assigned
399  * port via getsockname().
400  */
401  if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
402  {
403  ereport(LOG,
405  errmsg("could not bind socket for statistics collector: %m")));
408  continue;
409  }
410 
411  alen = sizeof(pgStatAddr);
412  if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0)
413  {
414  ereport(LOG,
416  errmsg("could not get address of socket for statistics collector: %m")));
419  continue;
420  }
421 
422  /*
423  * Connect the socket to its own address. This saves a few cycles by
424  * not having to respecify the target address on every send. This also
425  * provides a kernel-level check that only packets from this same
426  * address will be received.
427  */
428  if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0)
429  {
430  ereport(LOG,
432  errmsg("could not connect socket for statistics collector: %m")));
435  continue;
436  }
437 
438  /*
439  * Try to send and receive a one-byte test message on the socket. This
440  * is to catch situations where the socket can be created but will not
441  * actually pass data (for instance, because kernel packet filtering
442  * rules prevent it).
443  */
444  test_byte = TESTBYTEVAL;
445 
446 retry1:
447  if (send(pgStatSock, &test_byte, 1, 0) != 1)
448  {
449  if (errno == EINTR)
450  goto retry1; /* if interrupted, just retry */
451  ereport(LOG,
453  errmsg("could not send test message on socket for statistics collector: %m")));
456  continue;
457  }
458 
459  /*
460  * There could possibly be a little delay before the message can be
461  * received. We arbitrarily allow up to half a second before deciding
462  * it's broken.
463  */
464  for (;;) /* need a loop to handle EINTR */
465  {
466  FD_ZERO(&rset);
467  FD_SET(pgStatSock, &rset);
468 
469  tv.tv_sec = 0;
470  tv.tv_usec = 500000;
471  sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
472  if (sel_res >= 0 || errno != EINTR)
473  break;
474  }
475  if (sel_res < 0)
476  {
477  ereport(LOG,
479  errmsg("select() failed in statistics collector: %m")));
482  continue;
483  }
484  if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
485  {
486  /*
487  * This is the case we actually think is likely, so take pains to
488  * give a specific message for it.
489  *
490  * errno will not be set meaningfully here, so don't use it.
491  */
492  ereport(LOG,
493  (errcode(ERRCODE_CONNECTION_FAILURE),
494  errmsg("test message did not get through on socket for statistics collector")));
497  continue;
498  }
499 
500  test_byte++; /* just make sure variable is changed */
501 
502 retry2:
503  if (recv(pgStatSock, &test_byte, 1, 0) != 1)
504  {
505  if (errno == EINTR)
506  goto retry2; /* if interrupted, just retry */
507  ereport(LOG,
509  errmsg("could not receive test message on socket for statistics collector: %m")));
512  continue;
513  }
514 
515  if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
516  {
517  ereport(LOG,
518  (errcode(ERRCODE_INTERNAL_ERROR),
519  errmsg("incorrect test message transmission on socket for statistics collector")));
522  continue;
523  }
524 
525  /* If we get here, we have a working socket */
526  break;
527  }
528 
529  /* Did we find a working address? */
530  if (!addr || pgStatSock == PGINVALID_SOCKET)
531  goto startup_failed;
532 
533  /*
534  * Set the socket to non-blocking IO. This ensures that if the collector
535  * falls behind, statistics messages will be discarded; backends won't
536  * block waiting to send messages to the collector.
537  */
539  {
540  ereport(LOG,
542  errmsg("could not set statistics collector socket to nonblocking mode: %m")));
543  goto startup_failed;
544  }
545 
546  pg_freeaddrinfo_all(hints.ai_family, addrs);
547 
548  return;
549 
550 startup_failed:
551  ereport(LOG,
552  (errmsg("disabling statistics collector for lack of working socket")));
553 
554  if (addrs)
555  pg_freeaddrinfo_all(hints.ai_family, addrs);
556 
560 
561  /*
562  * Adjust GUC variables to suppress useless activity, and for debugging
563  * purposes (seeing track_counts off is a clue that we failed here). We
564  * use PGC_S_OVERRIDE because there is no point in trying to turn it back
565  * on from postgresql.conf without a restart.
566  */
567  SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
568 }
569 
570 /*
571  * subroutine for pgstat_reset_all
572  */
573 static void
575 {
576  DIR *dir;
577  struct dirent *entry;
578  char fname[MAXPGPATH];
579 
580  dir = AllocateDir(directory);
581  while ((entry = ReadDir(dir, directory)) != NULL)
582  {
583  int nchars;
584  Oid tmp_oid;
585 
586  /*
587  * Skip directory entries that don't match the file names we write.
588  * See get_dbstat_filename for the database-specific pattern.
589  */
590  if (strncmp(entry->d_name, "global.", 7) == 0)
591  nchars = 7;
592  else
593  {
594  nchars = 0;
595  (void) sscanf(entry->d_name, "db_%u.%n",
596  &tmp_oid, &nchars);
597  if (nchars <= 0)
598  continue;
599  /* %u allows leading whitespace, so reject that */
600  if (strchr("0123456789", entry->d_name[3]) == NULL)
601  continue;
602  }
603 
604  if (strcmp(entry->d_name + nchars, "tmp") != 0 &&
605  strcmp(entry->d_name + nchars, "stat") != 0)
606  continue;
607 
608  snprintf(fname, MAXPGPATH, "%s/%s", directory,
609  entry->d_name);
610  unlink(fname);
611  }
612  FreeDir(dir);
613 }
614 
615 /*
616  * pgstat_reset_all() -
617  *
618  * Remove the stats files. This is currently used only if WAL
619  * recovery is needed after a crash.
620  */
621 void
623 {
626 }
627 
628 #ifdef EXEC_BACKEND
629 
630 /*
631  * pgstat_forkexec() -
632  *
633  * Format up the arglist for, then fork and exec, statistics collector process
634  */
635 static pid_t
636 pgstat_forkexec(void)
637 {
638  char *av[10];
639  int ac = 0;
640 
641  av[ac++] = "postgres";
642  av[ac++] = "--forkcol";
643  av[ac++] = NULL; /* filled in by postmaster_forkexec */
644 
645  av[ac] = NULL;
646  Assert(ac < lengthof(av));
647 
648  return postmaster_forkexec(ac, av);
649 }
650 #endif /* EXEC_BACKEND */
651 
652 
653 /*
654  * pgstat_start() -
655  *
656  * Called from postmaster at startup or after an existing collector
657  * died. Attempt to fire up a fresh statistics collector.
658  *
659  * Returns PID of child process, or 0 if fail.
660  *
661  * Note: if fail, we will be called again from the postmaster main loop.
662  */
663 int
665 {
666  time_t curtime;
667  pid_t pgStatPid;
668 
669  /*
670  * Check that the socket is there, else pgstat_init failed and we can do
671  * nothing useful.
672  */
674  return 0;
675 
676  /*
677  * Do nothing if too soon since last collector start. This is a safety
678  * valve to protect against continuous respawn attempts if the collector
679  * is dying immediately at launch. Note that since we will be re-called
680  * from the postmaster main loop, we will get another chance later.
681  */
682  curtime = time(NULL);
683  if ((unsigned int) (curtime - last_pgstat_start_time) <
684  (unsigned int) PGSTAT_RESTART_INTERVAL)
685  return 0;
686  last_pgstat_start_time = curtime;
687 
688  /*
689  * Okay, fork off the collector.
690  */
691 #ifdef EXEC_BACKEND
692  switch ((pgStatPid = pgstat_forkexec()))
693 #else
694  switch ((pgStatPid = fork_process()))
695 #endif
696  {
697  case -1:
698  ereport(LOG,
699  (errmsg("could not fork statistics collector: %m")));
700  return 0;
701 
702 #ifndef EXEC_BACKEND
703  case 0:
704  /* in postmaster child ... */
706 
707  /* Close the postmaster's sockets */
708  ClosePostmasterPorts(false);
709 
710  /* Drop our connection to postmaster's shared memory, as well */
711  dsm_detach_all();
713 
715  break;
716 #endif
717 
718  default:
719  return (int) pgStatPid;
720  }
721 
722  /* shouldn't get here */
723  return 0;
724 }
725 
726 void
728 {
730 }
731 
732 /* ------------------------------------------------------------
733  * Public functions used by backends follow
734  *------------------------------------------------------------
735  */
736 
737 
738 /* ----------
739  * pgstat_report_stat() -
740  *
741  * Called from tcop/postgres.c to send the so far collected per-table
742  * and function usage statistics to the collector. Note that this is
743  * called only when not within a transaction, so it is fair to use
744  * transaction stop time as an approximation of current time.
745  * ----------
746  */
747 void
749 {
750  /* we assume this inits to all zeroes: */
751  static const PgStat_TableCounts all_zeroes;
752  static TimestampTz last_report = 0;
753 
755  PgStat_MsgTabstat regular_msg;
756  PgStat_MsgTabstat shared_msg;
757  TabStatusArray *tsa;
758  int i;
759 
760  /* Don't expend a clock check if nothing to do */
761  if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
762  pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
764  return;
765 
766  /*
767  * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
768  * msec since we last sent one, or the caller wants to force stats out.
769  */
771  if (!force &&
773  return;
774  last_report = now;
775 
776  /*
777  * Scan through the TabStatusArray struct(s) to find tables that actually
778  * have counts, and build messages to send. We have to separate shared
779  * relations from regular ones because the databaseid field in the message
780  * header has to depend on that.
781  */
782  regular_msg.m_databaseid = MyDatabaseId;
783  shared_msg.m_databaseid = InvalidOid;
784  regular_msg.m_nentries = 0;
785  shared_msg.m_nentries = 0;
786 
787  for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
788  {
789  for (i = 0; i < tsa->tsa_used; i++)
790  {
791  PgStat_TableStatus *entry = &tsa->tsa_entries[i];
792  PgStat_MsgTabstat *this_msg;
793  PgStat_TableEntry *this_ent;
794 
795  /* Shouldn't have any pending transaction-dependent counts */
796  Assert(entry->trans == NULL);
797 
798  /*
799  * Ignore entries that didn't accumulate any actual counts, such
800  * as indexes that were opened by the planner but not used.
801  */
802  if (memcmp(&entry->t_counts, &all_zeroes,
803  sizeof(PgStat_TableCounts)) == 0)
804  continue;
805 
806  /*
807  * OK, insert data into the appropriate message, and send if full.
808  */
809  this_msg = entry->t_shared ? &shared_msg : &regular_msg;
810  this_ent = &this_msg->m_entry[this_msg->m_nentries];
811  this_ent->t_id = entry->t_id;
812  memcpy(&this_ent->t_counts, &entry->t_counts,
813  sizeof(PgStat_TableCounts));
814  if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
815  {
816  pgstat_send_tabstat(this_msg);
817  this_msg->m_nentries = 0;
818  }
819  }
820  /* zero out TableStatus structs after use */
821  MemSet(tsa->tsa_entries, 0,
822  tsa->tsa_used * sizeof(PgStat_TableStatus));
823  tsa->tsa_used = 0;
824  }
825 
826  /*
827  * Send partial messages. Make sure that any pending xact commit/abort
828  * gets counted, even if there are no table stats to send.
829  */
830  if (regular_msg.m_nentries > 0 ||
832  pgstat_send_tabstat(&regular_msg);
833  if (shared_msg.m_nentries > 0)
834  pgstat_send_tabstat(&shared_msg);
835 
836  /* Now, send function statistics */
838 }
839 
840 /*
841  * Subroutine for pgstat_report_stat: finish and send a tabstat message
842  */
843 static void
845 {
846  int n;
847  int len;
848 
849  /* It's unlikely we'd get here with no socket, but maybe not impossible */
851  return;
852 
853  /*
854  * Report and reset accumulated xact commit/rollback and I/O timings
855  * whenever we send a normal tabstat message
856  */
857  if (OidIsValid(tsmsg->m_databaseid))
858  {
863  pgStatXactCommit = 0;
864  pgStatXactRollback = 0;
867  }
868  else
869  {
870  tsmsg->m_xact_commit = 0;
871  tsmsg->m_xact_rollback = 0;
872  tsmsg->m_block_read_time = 0;
873  tsmsg->m_block_write_time = 0;
874  }
875 
876  n = tsmsg->m_nentries;
877  len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
878  n * sizeof(PgStat_TableEntry);
879 
881  pgstat_send(tsmsg, len);
882 }
883 
884 /*
885  * Subroutine for pgstat_report_stat: populate and send a function stat message
886  */
887 static void
889 {
890  /* we assume this inits to all zeroes: */
891  static const PgStat_FunctionCounts all_zeroes;
892 
893  PgStat_MsgFuncstat msg;
895  HASH_SEQ_STATUS fstat;
896 
897  if (pgStatFunctions == NULL)
898  return;
899 
902  msg.m_nentries = 0;
903 
904  hash_seq_init(&fstat, pgStatFunctions);
905  while ((entry = (PgStat_BackendFunctionEntry *) hash_seq_search(&fstat)) != NULL)
906  {
907  PgStat_FunctionEntry *m_ent;
908 
909  /* Skip it if no counts accumulated since last time */
910  if (memcmp(&entry->f_counts, &all_zeroes,
911  sizeof(PgStat_FunctionCounts)) == 0)
912  continue;
913 
914  /* need to convert format of time accumulators */
915  m_ent = &msg.m_entry[msg.m_nentries];
916  m_ent->f_id = entry->f_id;
917  m_ent->f_numcalls = entry->f_counts.f_numcalls;
920 
921  if (++msg.m_nentries >= PGSTAT_NUM_FUNCENTRIES)
922  {
923  pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
924  msg.m_nentries * sizeof(PgStat_FunctionEntry));
925  msg.m_nentries = 0;
926  }
927 
928  /* reset the entry's counts */
929  MemSet(&entry->f_counts, 0, sizeof(PgStat_FunctionCounts));
930  }
931 
932  if (msg.m_nentries > 0)
933  pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
934  msg.m_nentries * sizeof(PgStat_FunctionEntry));
935 
936  have_function_stats = false;
937 }
938 
939 
940 /* ----------
941  * pgstat_vacuum_stat() -
942  *
943  * Will tell the collector about objects he can get rid of.
944  * ----------
945  */
946 void
948 {
949  HTAB *htab;
950  PgStat_MsgTabpurge msg;
951  PgStat_MsgFuncpurge f_msg;
952  HASH_SEQ_STATUS hstat;
953  PgStat_StatDBEntry *dbentry;
954  PgStat_StatTabEntry *tabentry;
955  PgStat_StatFuncEntry *funcentry;
956  int len;
957 
959  return;
960 
961  /*
962  * If not done for this transaction, read the statistics collector stats
963  * file into some hash tables.
964  */
966 
967  /*
968  * Read pg_database and make a list of OIDs of all existing databases
969  */
971 
972  /*
973  * Search the database hash table for dead databases and tell the
974  * collector to drop them.
975  */
976  hash_seq_init(&hstat, pgStatDBHash);
977  while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
978  {
979  Oid dbid = dbentry->databaseid;
980 
982 
983  /* the DB entry for shared tables (with InvalidOid) is never dropped */
984  if (OidIsValid(dbid) &&
985  hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
986  pgstat_drop_database(dbid);
987  }
988 
989  /* Clean up */
990  hash_destroy(htab);
991 
992  /*
993  * Lookup our own database entry; if not found, nothing more to do.
994  */
995  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
996  (void *) &MyDatabaseId,
997  HASH_FIND, NULL);
998  if (dbentry == NULL || dbentry->tables == NULL)
999  return;
1000 
1001  /*
1002  * Similarly to above, make a list of all known relations in this DB.
1003  */
1005 
1006  /*
1007  * Initialize our messages table counter to zero
1008  */
1009  msg.m_nentries = 0;
1010 
1011  /*
1012  * Check for all tables listed in stats hashtable if they still exist.
1013  */
1014  hash_seq_init(&hstat, dbentry->tables);
1015  while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
1016  {
1017  Oid tabid = tabentry->tableid;
1018 
1020 
1021  if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
1022  continue;
1023 
1024  /*
1025  * Not there, so add this table's Oid to the message
1026  */
1027  msg.m_tableid[msg.m_nentries++] = tabid;
1028 
1029  /*
1030  * If the message is full, send it out and reinitialize to empty
1031  */
1032  if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
1033  {
1034  len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1035  +msg.m_nentries * sizeof(Oid);
1036 
1038  msg.m_databaseid = MyDatabaseId;
1039  pgstat_send(&msg, len);
1040 
1041  msg.m_nentries = 0;
1042  }
1043  }
1044 
1045  /*
1046  * Send the rest
1047  */
1048  if (msg.m_nentries > 0)
1049  {
1050  len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1051  +msg.m_nentries * sizeof(Oid);
1052 
1054  msg.m_databaseid = MyDatabaseId;
1055  pgstat_send(&msg, len);
1056  }
1057 
1058  /* Clean up */
1059  hash_destroy(htab);
1060 
1061  /*
1062  * Now repeat the above steps for functions. However, we needn't bother
1063  * in the common case where no function stats are being collected.
1064  */
1065  if (dbentry->functions != NULL &&
1066  hash_get_num_entries(dbentry->functions) > 0)
1067  {
1069 
1071  f_msg.m_databaseid = MyDatabaseId;
1072  f_msg.m_nentries = 0;
1073 
1074  hash_seq_init(&hstat, dbentry->functions);
1075  while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&hstat)) != NULL)
1076  {
1077  Oid funcid = funcentry->functionid;
1078 
1080 
1081  if (hash_search(htab, (void *) &funcid, HASH_FIND, NULL) != NULL)
1082  continue;
1083 
1084  /*
1085  * Not there, so add this function's Oid to the message
1086  */
1087  f_msg.m_functionid[f_msg.m_nentries++] = funcid;
1088 
1089  /*
1090  * If the message is full, send it out and reinitialize to empty
1091  */
1092  if (f_msg.m_nentries >= PGSTAT_NUM_FUNCPURGE)
1093  {
1094  len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1095  +f_msg.m_nentries * sizeof(Oid);
1096 
1097  pgstat_send(&f_msg, len);
1098 
1099  f_msg.m_nentries = 0;
1100  }
1101  }
1102 
1103  /*
1104  * Send the rest
1105  */
1106  if (f_msg.m_nentries > 0)
1107  {
1108  len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1109  +f_msg.m_nentries * sizeof(Oid);
1110 
1111  pgstat_send(&f_msg, len);
1112  }
1113 
1114  hash_destroy(htab);
1115  }
1116 }
1117 
1118 
1119 /* ----------
1120  * pgstat_collect_oids() -
1121  *
1122  * Collect the OIDs of all objects listed in the specified system catalog
1123  * into a temporary hash table. Caller should hash_destroy the result
1124  * when done with it. (However, we make the table in CurrentMemoryContext
1125  * so that it will be freed properly in event of an error.)
1126  * ----------
1127  */
1128 static HTAB *
1130 {
1131  HTAB *htab;
1132  HASHCTL hash_ctl;
1133  Relation rel;
1134  HeapScanDesc scan;
1135  HeapTuple tup;
1136  Snapshot snapshot;
1137 
1138  memset(&hash_ctl, 0, sizeof(hash_ctl));
1139  hash_ctl.keysize = sizeof(Oid);
1140  hash_ctl.entrysize = sizeof(Oid);
1141  hash_ctl.hcxt = CurrentMemoryContext;
1142  htab = hash_create("Temporary table of OIDs",
1144  &hash_ctl,
1146 
1147  rel = heap_open(catalogid, AccessShareLock);
1148  snapshot = RegisterSnapshot(GetLatestSnapshot());
1149  scan = heap_beginscan(rel, snapshot, 0, NULL);
1150  while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
1151  {
1152  Oid thisoid = HeapTupleGetOid(tup);
1153 
1155 
1156  (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
1157  }
1158  heap_endscan(scan);
1159  UnregisterSnapshot(snapshot);
1161 
1162  return htab;
1163 }
1164 
1165 
1166 /* ----------
1167  * pgstat_drop_database() -
1168  *
1169  * Tell the collector that we just dropped a database.
1170  * (If the message gets lost, we will still clean the dead DB eventually
1171  * via future invocations of pgstat_vacuum_stat().)
1172  * ----------
1173  */
1174 void
1176 {
1177  PgStat_MsgDropdb msg;
1178 
1180  return;
1181 
1183  msg.m_databaseid = databaseid;
1184  pgstat_send(&msg, sizeof(msg));
1185 }
1186 
1187 
1188 /* ----------
1189  * pgstat_drop_relation() -
1190  *
1191  * Tell the collector that we just dropped a relation.
1192  * (If the message gets lost, we will still clean the dead entry eventually
1193  * via future invocations of pgstat_vacuum_stat().)
1194  *
1195  * Currently not used for lack of any good place to call it; we rely
1196  * entirely on pgstat_vacuum_stat() to clean out stats for dead rels.
1197  * ----------
1198  */
1199 #ifdef NOT_USED
1200 void
1201 pgstat_drop_relation(Oid relid)
1202 {
1203  PgStat_MsgTabpurge msg;
1204  int len;
1205 
1207  return;
1208 
1209  msg.m_tableid[0] = relid;
1210  msg.m_nentries = 1;
1211 
1212  len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) +sizeof(Oid);
1213 
1215  msg.m_databaseid = MyDatabaseId;
1216  pgstat_send(&msg, len);
1217 }
1218 #endif /* NOT_USED */
1219 
1220 
1221 /* ----------
1222  * pgstat_reset_counters() -
1223  *
1224  * Tell the statistics collector to reset counters for our database.
1225  *
1226  * Permission checking for this function is managed through the normal
1227  * GRANT system.
1228  * ----------
1229  */
1230 void
1232 {
1234 
1236  return;
1237 
1239  msg.m_databaseid = MyDatabaseId;
1240  pgstat_send(&msg, sizeof(msg));
1241 }
1242 
1243 /* ----------
1244  * pgstat_reset_shared_counters() -
1245  *
1246  * Tell the statistics collector to reset cluster-wide shared counters.
1247  *
1248  * Permission checking for this function is managed through the normal
1249  * GRANT system.
1250  * ----------
1251  */
1252 void
1253 pgstat_reset_shared_counters(const char *target)
1254 {
1256 
1258  return;
1259 
1260  if (strcmp(target, "archiver") == 0)
1262  else if (strcmp(target, "bgwriter") == 0)
1264  else
1265  ereport(ERROR,
1266  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1267  errmsg("unrecognized reset target: \"%s\"", target),
1268  errhint("Target must be \"archiver\" or \"bgwriter\".")));
1269 
1271  pgstat_send(&msg, sizeof(msg));
1272 }
1273 
1274 /* ----------
1275  * pgstat_reset_single_counter() -
1276  *
1277  * Tell the statistics collector to reset a single counter.
1278  *
1279  * Permission checking for this function is managed through the normal
1280  * GRANT system.
1281  * ----------
1282  */
1283 void
1285 {
1287 
1289  return;
1290 
1292  msg.m_databaseid = MyDatabaseId;
1293  msg.m_resettype = type;
1294  msg.m_objectid = objoid;
1295 
1296  pgstat_send(&msg, sizeof(msg));
1297 }
1298 
1299 /* ----------
1300  * pgstat_report_autovac() -
1301  *
1302  * Called from autovacuum.c to report startup of an autovacuum process.
1303  * We are called before InitPostgres is done, so can't rely on MyDatabaseId;
1304  * the db OID must be passed in, instead.
1305  * ----------
1306  */
1307 void
1309 {
1311 
1313  return;
1314 
1316  msg.m_databaseid = dboid;
1318 
1319  pgstat_send(&msg, sizeof(msg));
1320 }
1321 
1322 
1323 /* ---------
1324  * pgstat_report_vacuum() -
1325  *
1326  * Tell the collector about the table we just vacuumed.
1327  * ---------
1328  */
1329 void
1330 pgstat_report_vacuum(Oid tableoid, bool shared,
1331  PgStat_Counter livetuples, PgStat_Counter deadtuples)
1332 {
1333  PgStat_MsgVacuum msg;
1334 
1336  return;
1337 
1339  msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
1340  msg.m_tableoid = tableoid;
1343  msg.m_live_tuples = livetuples;
1344  msg.m_dead_tuples = deadtuples;
1345  pgstat_send(&msg, sizeof(msg));
1346 }
1347 
1348 /* --------
1349  * pgstat_report_analyze() -
1350  *
1351  * Tell the collector about the table we just analyzed.
1352  *
1353  * Caller must provide new live- and dead-tuples estimates, as well as a
1354  * flag indicating whether to reset the changes_since_analyze counter.
1355  * --------
1356  */
1357 void
1359  PgStat_Counter livetuples, PgStat_Counter deadtuples,
1360  bool resetcounter)
1361 {
1362  PgStat_MsgAnalyze msg;
1363 
1365  return;
1366 
1367  /*
1368  * Unlike VACUUM, ANALYZE might be running inside a transaction that has
1369  * already inserted and/or deleted rows in the target table. ANALYZE will
1370  * have counted such rows as live or dead respectively. Because we will
1371  * report our counts of such rows at transaction end, we should subtract
1372  * off these counts from what we send to the collector now, else they'll
1373  * be double-counted after commit. (This approach also ensures that the
1374  * collector ends up with the right numbers if we abort instead of
1375  * committing.)
1376  */
1377  if (rel->pgstat_info != NULL)
1378  {
1380 
1381  for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
1382  {
1383  livetuples -= trans->tuples_inserted - trans->tuples_deleted;
1384  deadtuples -= trans->tuples_updated + trans->tuples_deleted;
1385  }
1386  /* count stuff inserted by already-aborted subxacts, too */
1387  deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
1388  /* Since ANALYZE's counts are estimates, we could have underflowed */
1389  livetuples = Max(livetuples, 0);
1390  deadtuples = Max(deadtuples, 0);
1391  }
1392 
1394  msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
1395  msg.m_tableoid = RelationGetRelid(rel);
1397  msg.m_resetcounter = resetcounter;
1399  msg.m_live_tuples = livetuples;
1400  msg.m_dead_tuples = deadtuples;
1401  pgstat_send(&msg, sizeof(msg));
1402 }
1403 
1404 /* --------
1405  * pgstat_report_recovery_conflict() -
1406  *
1407  * Tell the collector about a Hot Standby recovery conflict.
1408  * --------
1409  */
1410 void
1412 {
1414 
1416  return;
1417 
1419  msg.m_databaseid = MyDatabaseId;
1420  msg.m_reason = reason;
1421  pgstat_send(&msg, sizeof(msg));
1422 }
1423 
1424 /* --------
1425  * pgstat_report_deadlock() -
1426  *
1427  * Tell the collector about a deadlock detected.
1428  * --------
1429  */
1430 void
1432 {
1433  PgStat_MsgDeadlock msg;
1434 
1436  return;
1437 
1439  msg.m_databaseid = MyDatabaseId;
1440  pgstat_send(&msg, sizeof(msg));
1441 }
1442 
1443 /* --------
1444  * pgstat_report_tempfile() -
1445  *
1446  * Tell the collector about a temporary file.
1447  * --------
1448  */
1449 void
1450 pgstat_report_tempfile(size_t filesize)
1451 {
1452  PgStat_MsgTempFile msg;
1453 
1455  return;
1456 
1458  msg.m_databaseid = MyDatabaseId;
1459  msg.m_filesize = filesize;
1460  pgstat_send(&msg, sizeof(msg));
1461 }
1462 
1463 
1464 /* ----------
1465  * pgstat_ping() -
1466  *
1467  * Send some junk data to the collector to increase traffic.
1468  * ----------
1469  */
1470 void
1472 {
1473  PgStat_MsgDummy msg;
1474 
1476  return;
1477 
1479  pgstat_send(&msg, sizeof(msg));
1480 }
1481 
1482 /* ----------
1483  * pgstat_send_inquiry() -
1484  *
1485  * Notify collector that we need fresh data.
1486  * ----------
1487  */
1488 static void
1489 pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
1490 {
1491  PgStat_MsgInquiry msg;
1492 
1494  msg.clock_time = clock_time;
1495  msg.cutoff_time = cutoff_time;
1496  msg.databaseid = databaseid;
1497  pgstat_send(&msg, sizeof(msg));
1498 }
1499 
1500 
1501 /*
1502  * Initialize function call usage data.
1503  * Called by the executor before invoking a function.
1504  */
1505 void
1508 {
1509  PgStat_BackendFunctionEntry *htabent;
1510  bool found;
1511 
1512  if (pgstat_track_functions <= fcinfo->flinfo->fn_stats)
1513  {
1514  /* stats not wanted */
1515  fcu->fs = NULL;
1516  return;
1517  }
1518 
1519  if (!pgStatFunctions)
1520  {
1521  /* First time through - initialize function stat table */
1522  HASHCTL hash_ctl;
1523 
1524  memset(&hash_ctl, 0, sizeof(hash_ctl));
1525  hash_ctl.keysize = sizeof(Oid);
1526  hash_ctl.entrysize = sizeof(PgStat_BackendFunctionEntry);
1527  pgStatFunctions = hash_create("Function stat entries",
1529  &hash_ctl,
1530  HASH_ELEM | HASH_BLOBS);
1531  }
1532 
1533  /* Get the stats entry for this function, create if necessary */
1534  htabent = hash_search(pgStatFunctions, &fcinfo->flinfo->fn_oid,
1535  HASH_ENTER, &found);
1536  if (!found)
1537  MemSet(&htabent->f_counts, 0, sizeof(PgStat_FunctionCounts));
1538 
1539  fcu->fs = &htabent->f_counts;
1540 
1541  /* save stats for this function, later used to compensate for recursion */
1542  fcu->save_f_total_time = htabent->f_counts.f_total_time;
1543 
1544  /* save current backend-wide total time */
1545  fcu->save_total = total_func_time;
1546 
1547  /* get clock time as of function start */
1549 }
1550 
1551 /*
1552  * find_funcstat_entry - find any existing PgStat_BackendFunctionEntry entry
1553  * for specified function
1554  *
1555  * If no entry, return NULL, don't create a new one
1556  */
1559 {
1560  if (pgStatFunctions == NULL)
1561  return NULL;
1562 
1563  return (PgStat_BackendFunctionEntry *) hash_search(pgStatFunctions,
1564  (void *) &func_id,
1565  HASH_FIND, NULL);
1566 }
1567 
1568 /*
1569  * Calculate function call usage and update stat counters.
1570  * Called by the executor after invoking a function.
1571  *
1572  * In the case of a set-returning function that runs in value-per-call mode,
1573  * we will see multiple pgstat_init_function_usage/pgstat_end_function_usage
1574  * calls for what the user considers a single call of the function. The
1575  * finalize flag should be TRUE on the last call.
1576  */
1577 void
1579 {
1580  PgStat_FunctionCounts *fs = fcu->fs;
1581  instr_time f_total;
1582  instr_time f_others;
1583  instr_time f_self;
1584 
1585  /* stats not wanted? */
1586  if (fs == NULL)
1587  return;
1588 
1589  /* total elapsed time in this function call */
1590  INSTR_TIME_SET_CURRENT(f_total);
1591  INSTR_TIME_SUBTRACT(f_total, fcu->f_start);
1592 
1593  /* self usage: elapsed minus anything already charged to other calls */
1594  f_others = total_func_time;
1595  INSTR_TIME_SUBTRACT(f_others, fcu->save_total);
1596  f_self = f_total;
1597  INSTR_TIME_SUBTRACT(f_self, f_others);
1598 
1599  /* update backend-wide total time */
1601 
1602  /*
1603  * Compute the new f_total_time as the total elapsed time added to the
1604  * pre-call value of f_total_time. This is necessary to avoid
1605  * double-counting any time taken by recursive calls of myself. (We do
1606  * not need any similar kluge for self time, since that already excludes
1607  * any recursive calls.)
1608  */
1609  INSTR_TIME_ADD(f_total, fcu->save_f_total_time);
1610 
1611  /* update counters in function stats table */
1612  if (finalize)
1613  fs->f_numcalls++;
1614  fs->f_total_time = f_total;
1615  INSTR_TIME_ADD(fs->f_self_time, f_self);
1616 
1617  /* indicate that we have something to send */
1618  have_function_stats = true;
1619 }
1620 
1621 
1622 /* ----------
1623  * pgstat_initstats() -
1624  *
1625  * Initialize a relcache entry to count access statistics.
1626  * Called whenever a relation is opened.
1627  *
1628  * We assume that a relcache entry's pgstat_info field is zeroed by
1629  * relcache.c when the relcache entry is made; thereafter it is long-lived
1630  * data. We can avoid repeated searches of the TabStatus arrays when the
1631  * same relation is touched repeatedly within a transaction.
1632  * ----------
1633  */
1634 void
1636 {
1637  Oid rel_id = rel->rd_id;
1638  char relkind = rel->rd_rel->relkind;
1639 
1640  /* We only count stats for things that have storage */
1641  if (!(relkind == RELKIND_RELATION ||
1642  relkind == RELKIND_MATVIEW ||
1643  relkind == RELKIND_INDEX ||
1644  relkind == RELKIND_TOASTVALUE ||
1645  relkind == RELKIND_SEQUENCE))
1646  {
1647  rel->pgstat_info = NULL;
1648  return;
1649  }
1650 
1652  {
1653  /* We're not counting at all */
1654  rel->pgstat_info = NULL;
1655  return;
1656  }
1657 
1658  /*
1659  * If we already set up this relation in the current transaction, nothing
1660  * to do.
1661  */
1662  if (rel->pgstat_info != NULL &&
1663  rel->pgstat_info->t_id == rel_id)
1664  return;
1665 
1666  /* Else find or make the PgStat_TableStatus entry, and update link */
1667  rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
1668 }
1669 
1670 /*
1671  * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
1672  */
1673 static PgStat_TableStatus *
1674 get_tabstat_entry(Oid rel_id, bool isshared)
1675 {
1676  PgStat_TableStatus *entry;
1677  TabStatusArray *tsa;
1678  TabStatusArray *prev_tsa;
1679  int i;
1680 
1681  /*
1682  * Search the already-used tabstat slots for this relation.
1683  */
1684  prev_tsa = NULL;
1685  for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next)
1686  {
1687  for (i = 0; i < tsa->tsa_used; i++)
1688  {
1689  entry = &tsa->tsa_entries[i];
1690  if (entry->t_id == rel_id)
1691  return entry;
1692  }
1693 
1694  if (tsa->tsa_used < TABSTAT_QUANTUM)
1695  {
1696  /*
1697  * It must not be present, but we found a free slot instead. Fine,
1698  * let's use this one. We assume the entry was already zeroed,
1699  * either at creation or after last use.
1700  */
1701  entry = &tsa->tsa_entries[tsa->tsa_used++];
1702  entry->t_id = rel_id;
1703  entry->t_shared = isshared;
1704  return entry;
1705  }
1706  }
1707 
1708  /*
1709  * We ran out of tabstat slots, so allocate more. Be sure they're zeroed.
1710  */
1712  sizeof(TabStatusArray));
1713  if (prev_tsa)
1714  prev_tsa->tsa_next = tsa;
1715  else
1716  pgStatTabList = tsa;
1717 
1718  /*
1719  * Use the first entry of the new TabStatusArray.
1720  */
1721  entry = &tsa->tsa_entries[tsa->tsa_used++];
1722  entry->t_id = rel_id;
1723  entry->t_shared = isshared;
1724  return entry;
1725 }
1726 
1727 /*
1728  * find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
1729  *
1730  * If no entry, return NULL, don't create a new one
1731  */
1734 {
1735  PgStat_TableStatus *entry;
1736  TabStatusArray *tsa;
1737  int i;
1738 
1739  for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
1740  {
1741  for (i = 0; i < tsa->tsa_used; i++)
1742  {
1743  entry = &tsa->tsa_entries[i];
1744  if (entry->t_id == rel_id)
1745  return entry;
1746  }
1747  }
1748 
1749  /* Not present */
1750  return NULL;
1751 }
1752 
1753 /*
1754  * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
1755  */
1756 static PgStat_SubXactStatus *
1758 {
1759  PgStat_SubXactStatus *xact_state;
1760 
1761  xact_state = pgStatXactStack;
1762  if (xact_state == NULL || xact_state->nest_level != nest_level)
1763  {
1764  xact_state = (PgStat_SubXactStatus *)
1766  sizeof(PgStat_SubXactStatus));
1767  xact_state->nest_level = nest_level;
1768  xact_state->prev = pgStatXactStack;
1769  xact_state->first = NULL;
1770  pgStatXactStack = xact_state;
1771  }
1772  return xact_state;
1773 }
1774 
1775 /*
1776  * add_tabstat_xact_level - add a new (sub)transaction state record
1777  */
1778 static void
1779 add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
1780 {
1781  PgStat_SubXactStatus *xact_state;
1783 
1784  /*
1785  * If this is the first rel to be modified at the current nest level, we
1786  * first have to push a transaction stack entry.
1787  */
1788  xact_state = get_tabstat_stack_level(nest_level);
1789 
1790  /* Now make a per-table stack entry */
1791  trans = (PgStat_TableXactStatus *)
1793  sizeof(PgStat_TableXactStatus));
1794  trans->nest_level = nest_level;
1795  trans->upper = pgstat_info->trans;
1796  trans->parent = pgstat_info;
1797  trans->next = xact_state->first;
1798  xact_state->first = trans;
1799  pgstat_info->trans = trans;
1800 }
1801 
1802 /*
1803  * pgstat_count_heap_insert - count a tuple insertion of n tuples
1804  */
1805 void
1807 {
1808  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1809 
1810  if (pgstat_info != NULL)
1811  {
1812  /* We have to log the effect at the proper transactional level */
1813  int nest_level = GetCurrentTransactionNestLevel();
1814 
1815  if (pgstat_info->trans == NULL ||
1816  pgstat_info->trans->nest_level != nest_level)
1817  add_tabstat_xact_level(pgstat_info, nest_level);
1818 
1819  pgstat_info->trans->tuples_inserted += n;
1820  }
1821 }
1822 
1823 /*
1824  * pgstat_count_heap_update - count a tuple update
1825  */
1826 void
1828 {
1829  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1830 
1831  if (pgstat_info != NULL)
1832  {
1833  /* We have to log the effect at the proper transactional level */
1834  int nest_level = GetCurrentTransactionNestLevel();
1835 
1836  if (pgstat_info->trans == NULL ||
1837  pgstat_info->trans->nest_level != nest_level)
1838  add_tabstat_xact_level(pgstat_info, nest_level);
1839 
1840  pgstat_info->trans->tuples_updated++;
1841 
1842  /* t_tuples_hot_updated is nontransactional, so just advance it */
1843  if (hot)
1844  pgstat_info->t_counts.t_tuples_hot_updated++;
1845  }
1846 }
1847 
1848 /*
1849  * pgstat_count_heap_delete - count a tuple deletion
1850  */
1851 void
1853 {
1854  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1855 
1856  if (pgstat_info != NULL)
1857  {
1858  /* We have to log the effect at the proper transactional level */
1859  int nest_level = GetCurrentTransactionNestLevel();
1860 
1861  if (pgstat_info->trans == NULL ||
1862  pgstat_info->trans->nest_level != nest_level)
1863  add_tabstat_xact_level(pgstat_info, nest_level);
1864 
1865  pgstat_info->trans->tuples_deleted++;
1866  }
1867 }
1868 
1869 /*
1870  * pgstat_truncate_save_counters
1871  *
1872  * Whenever a table is truncated, we save its i/u/d counters so that they can
1873  * be cleared, and if the (sub)xact that executed the truncate later aborts,
1874  * the counters can be restored to the saved (pre-truncate) values. Note we do
1875  * this on the first truncate in any particular subxact level only.
1876  */
1877 static void
1879 {
1880  if (!trans->truncated)
1881  {
1882  trans->inserted_pre_trunc = trans->tuples_inserted;
1883  trans->updated_pre_trunc = trans->tuples_updated;
1884  trans->deleted_pre_trunc = trans->tuples_deleted;
1885  trans->truncated = true;
1886  }
1887 }
1888 
1889 /*
1890  * pgstat_truncate_restore_counters - restore counters when a truncate aborts
1891  */
1892 static void
1894 {
1895  if (trans->truncated)
1896  {
1897  trans->tuples_inserted = trans->inserted_pre_trunc;
1898  trans->tuples_updated = trans->updated_pre_trunc;
1899  trans->tuples_deleted = trans->deleted_pre_trunc;
1900  }
1901 }
1902 
1903 /*
1904  * pgstat_count_truncate - update tuple counters due to truncate
1905  */
1906 void
1908 {
1909  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1910 
1911  if (pgstat_info != NULL)
1912  {
1913  /* We have to log the effect at the proper transactional level */
1914  int nest_level = GetCurrentTransactionNestLevel();
1915 
1916  if (pgstat_info->trans == NULL ||
1917  pgstat_info->trans->nest_level != nest_level)
1918  add_tabstat_xact_level(pgstat_info, nest_level);
1919 
1920  pgstat_truncate_save_counters(pgstat_info->trans);
1921  pgstat_info->trans->tuples_inserted = 0;
1922  pgstat_info->trans->tuples_updated = 0;
1923  pgstat_info->trans->tuples_deleted = 0;
1924  }
1925 }
1926 
1927 /*
1928  * pgstat_update_heap_dead_tuples - update dead-tuples count
1929  *
1930  * The semantics of this are that we are reporting the nontransactional
1931  * recovery of "delta" dead tuples; so t_delta_dead_tuples decreases
1932  * rather than increasing, and the change goes straight into the per-table
1933  * counter, not into transactional state.
1934  */
1935 void
1937 {
1938  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1939 
1940  if (pgstat_info != NULL)
1941  pgstat_info->t_counts.t_delta_dead_tuples -= delta;
1942 }
1943 
1944 
1945 /* ----------
1946  * AtEOXact_PgStat
1947  *
1948  * Called from access/transam/xact.c at top-level transaction commit/abort.
1949  * ----------
1950  */
1951 void
1952 AtEOXact_PgStat(bool isCommit)
1953 {
1954  PgStat_SubXactStatus *xact_state;
1955 
1956  /*
1957  * Count transaction commit or abort. (We use counters, not just bools,
1958  * in case the reporting message isn't sent right away.)
1959  */
1960  if (isCommit)
1961  pgStatXactCommit++;
1962  else
1964 
1965  /*
1966  * Transfer transactional insert/update counts into the base tabstat
1967  * entries. We don't bother to free any of the transactional state, since
1968  * it's all in TopTransactionContext and will go away anyway.
1969  */
1970  xact_state = pgStatXactStack;
1971  if (xact_state != NULL)
1972  {
1974 
1975  Assert(xact_state->nest_level == 1);
1976  Assert(xact_state->prev == NULL);
1977  for (trans = xact_state->first; trans != NULL; trans = trans->next)
1978  {
1979  PgStat_TableStatus *tabstat;
1980 
1981  Assert(trans->nest_level == 1);
1982  Assert(trans->upper == NULL);
1983  tabstat = trans->parent;
1984  Assert(tabstat->trans == trans);
1985  /* restore pre-truncate stats (if any) in case of aborted xact */
1986  if (!isCommit)
1988  /* count attempted actions regardless of commit/abort */
1989  tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
1990  tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
1991  tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
1992  if (isCommit)
1993  {
1994  tabstat->t_counts.t_truncated = trans->truncated;
1995  if (trans->truncated)
1996  {
1997  /* forget live/dead stats seen by backend thus far */
1998  tabstat->t_counts.t_delta_live_tuples = 0;
1999  tabstat->t_counts.t_delta_dead_tuples = 0;
2000  }
2001  /* insert adds a live tuple, delete removes one */
2002  tabstat->t_counts.t_delta_live_tuples +=
2003  trans->tuples_inserted - trans->tuples_deleted;
2004  /* update and delete each create a dead tuple */
2005  tabstat->t_counts.t_delta_dead_tuples +=
2006  trans->tuples_updated + trans->tuples_deleted;
2007  /* insert, update, delete each count as one change event */
2008  tabstat->t_counts.t_changed_tuples +=
2009  trans->tuples_inserted + trans->tuples_updated +
2010  trans->tuples_deleted;
2011  }
2012  else
2013  {
2014  /* inserted tuples are dead, deleted tuples are unaffected */
2015  tabstat->t_counts.t_delta_dead_tuples +=
2016  trans->tuples_inserted + trans->tuples_updated;
2017  /* an aborted xact generates no changed_tuple events */
2018  }
2019  tabstat->trans = NULL;
2020  }
2021  }
2022  pgStatXactStack = NULL;
2023 
2024  /* Make sure any stats snapshot is thrown away */
2026 }
2027 
2028 /* ----------
2029  * AtEOSubXact_PgStat
2030  *
2031  * Called from access/transam/xact.c at subtransaction commit/abort.
2032  * ----------
2033  */
2034 void
2035 AtEOSubXact_PgStat(bool isCommit, int nestDepth)
2036 {
2037  PgStat_SubXactStatus *xact_state;
2038 
2039  /*
2040  * Transfer transactional insert/update counts into the next higher
2041  * subtransaction state.
2042  */
2043  xact_state = pgStatXactStack;
2044  if (xact_state != NULL &&
2045  xact_state->nest_level >= nestDepth)
2046  {
2048  PgStat_TableXactStatus *next_trans;
2049 
2050  /* delink xact_state from stack immediately to simplify reuse case */
2051  pgStatXactStack = xact_state->prev;
2052 
2053  for (trans = xact_state->first; trans != NULL; trans = next_trans)
2054  {
2055  PgStat_TableStatus *tabstat;
2056 
2057  next_trans = trans->next;
2058  Assert(trans->nest_level == nestDepth);
2059  tabstat = trans->parent;
2060  Assert(tabstat->trans == trans);
2061  if (isCommit)
2062  {
2063  if (trans->upper && trans->upper->nest_level == nestDepth - 1)
2064  {
2065  if (trans->truncated)
2066  {
2067  /* propagate the truncate status one level up */
2069  /* replace upper xact stats with ours */
2070  trans->upper->tuples_inserted = trans->tuples_inserted;
2071  trans->upper->tuples_updated = trans->tuples_updated;
2072  trans->upper->tuples_deleted = trans->tuples_deleted;
2073  }
2074  else
2075  {
2076  trans->upper->tuples_inserted += trans->tuples_inserted;
2077  trans->upper->tuples_updated += trans->tuples_updated;
2078  trans->upper->tuples_deleted += trans->tuples_deleted;
2079  }
2080  tabstat->trans = trans->upper;
2081  pfree(trans);
2082  }
2083  else
2084  {
2085  /*
2086  * When there isn't an immediate parent state, we can just
2087  * reuse the record instead of going through a
2088  * palloc/pfree pushup (this works since it's all in
2089  * TopTransactionContext anyway). We have to re-link it
2090  * into the parent level, though, and that might mean
2091  * pushing a new entry into the pgStatXactStack.
2092  */
2093  PgStat_SubXactStatus *upper_xact_state;
2094 
2095  upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
2096  trans->next = upper_xact_state->first;
2097  upper_xact_state->first = trans;
2098  trans->nest_level = nestDepth - 1;
2099  }
2100  }
2101  else
2102  {
2103  /*
2104  * On abort, update top-level tabstat counts, then forget the
2105  * subtransaction
2106  */
2107 
2108  /* first restore values obliterated by truncate */
2110  /* count attempted actions regardless of commit/abort */
2111  tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
2112  tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
2113  tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
2114  /* inserted tuples are dead, deleted tuples are unaffected */
2115  tabstat->t_counts.t_delta_dead_tuples +=
2116  trans->tuples_inserted + trans->tuples_updated;
2117  tabstat->trans = trans->upper;
2118  pfree(trans);
2119  }
2120  }
2121  pfree(xact_state);
2122  }
2123 }
2124 
2125 
2126 /*
2127  * AtPrepare_PgStat
2128  * Save the transactional stats state at 2PC transaction prepare.
2129  *
2130  * In this phase we just generate 2PC records for all the pending
2131  * transaction-dependent stats work.
2132  */
2133 void
2135 {
2136  PgStat_SubXactStatus *xact_state;
2137 
2138  xact_state = pgStatXactStack;
2139  if (xact_state != NULL)
2140  {
2142 
2143  Assert(xact_state->nest_level == 1);
2144  Assert(xact_state->prev == NULL);
2145  for (trans = xact_state->first; trans != NULL; trans = trans->next)
2146  {
2147  PgStat_TableStatus *tabstat;
2148  TwoPhasePgStatRecord record;
2149 
2150  Assert(trans->nest_level == 1);
2151  Assert(trans->upper == NULL);
2152  tabstat = trans->parent;
2153  Assert(tabstat->trans == trans);
2154 
2155  record.tuples_inserted = trans->tuples_inserted;
2156  record.tuples_updated = trans->tuples_updated;
2157  record.tuples_deleted = trans->tuples_deleted;
2158  record.inserted_pre_trunc = trans->inserted_pre_trunc;
2159  record.updated_pre_trunc = trans->updated_pre_trunc;
2160  record.deleted_pre_trunc = trans->deleted_pre_trunc;
2161  record.t_id = tabstat->t_id;
2162  record.t_shared = tabstat->t_shared;
2163  record.t_truncated = trans->truncated;
2164 
2166  &record, sizeof(TwoPhasePgStatRecord));
2167  }
2168  }
2169 }
2170 
2171 /*
2172  * PostPrepare_PgStat
2173  * Clean up after successful PREPARE.
2174  *
2175  * All we need do here is unlink the transaction stats state from the
2176  * nontransactional state. The nontransactional action counts will be
2177  * reported to the stats collector immediately, while the effects on live
2178  * and dead tuple counts are preserved in the 2PC state file.
2179  *
2180  * Note: AtEOXact_PgStat is not called during PREPARE.
2181  */
2182 void
2184 {
2185  PgStat_SubXactStatus *xact_state;
2186 
2187  /*
2188  * We don't bother to free any of the transactional state, since it's all
2189  * in TopTransactionContext and will go away anyway.
2190  */
2191  xact_state = pgStatXactStack;
2192  if (xact_state != NULL)
2193  {
2195 
2196  for (trans = xact_state->first; trans != NULL; trans = trans->next)
2197  {
2198  PgStat_TableStatus *tabstat;
2199 
2200  tabstat = trans->parent;
2201  tabstat->trans = NULL;
2202  }
2203  }
2204  pgStatXactStack = NULL;
2205 
2206  /* Make sure any stats snapshot is thrown away */
2208 }
2209 
2210 /*
2211  * 2PC processing routine for COMMIT PREPARED case.
2212  *
2213  * Load the saved counts into our local pgstats state.
2214  */
2215 void
2217  void *recdata, uint32 len)
2218 {
2219  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2220  PgStat_TableStatus *pgstat_info;
2221 
2222  /* Find or create a tabstat entry for the rel */
2223  pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2224 
2225  /* Same math as in AtEOXact_PgStat, commit case */
2226  pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2227  pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2228  pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2229  pgstat_info->t_counts.t_truncated = rec->t_truncated;
2230  if (rec->t_truncated)
2231  {
2232  /* forget live/dead stats seen by backend thus far */
2233  pgstat_info->t_counts.t_delta_live_tuples = 0;
2234  pgstat_info->t_counts.t_delta_dead_tuples = 0;
2235  }
2236  pgstat_info->t_counts.t_delta_live_tuples +=
2237  rec->tuples_inserted - rec->tuples_deleted;
2238  pgstat_info->t_counts.t_delta_dead_tuples +=
2239  rec->tuples_updated + rec->tuples_deleted;
2240  pgstat_info->t_counts.t_changed_tuples +=
2241  rec->tuples_inserted + rec->tuples_updated +
2242  rec->tuples_deleted;
2243 }
2244 
2245 /*
2246  * 2PC processing routine for ROLLBACK PREPARED case.
2247  *
2248  * Load the saved counts into our local pgstats state, but treat them
2249  * as aborted.
2250  */
2251 void
2253  void *recdata, uint32 len)
2254 {
2255  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2256  PgStat_TableStatus *pgstat_info;
2257 
2258  /* Find or create a tabstat entry for the rel */
2259  pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2260 
2261  /* Same math as in AtEOXact_PgStat, abort case */
2262  if (rec->t_truncated)
2263  {
2264  rec->tuples_inserted = rec->inserted_pre_trunc;
2265  rec->tuples_updated = rec->updated_pre_trunc;
2266  rec->tuples_deleted = rec->deleted_pre_trunc;
2267  }
2268  pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2269  pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2270  pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2271  pgstat_info->t_counts.t_delta_dead_tuples +=
2272  rec->tuples_inserted + rec->tuples_updated;
2273 }
2274 
2275 
2276 /* ----------
2277  * pgstat_fetch_stat_dbentry() -
2278  *
2279  * Support function for the SQL-callable pgstat* functions. Returns
2280  * the collected statistics for one database or NULL. NULL doesn't mean
2281  * that the database doesn't exist, it is just not yet known by the
2282  * collector, so the caller is better off to report ZERO instead.
2283  * ----------
2284  */
2287 {
2288  /*
2289  * If not done for this transaction, read the statistics collector stats
2290  * file into some hash tables.
2291  */
2293 
2294  /*
2295  * Lookup the requested database; return NULL if not found
2296  */
2297  return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2298  (void *) &dbid,
2299  HASH_FIND, NULL);
2300 }
2301 
2302 
2303 /* ----------
2304  * pgstat_fetch_stat_tabentry() -
2305  *
2306  * Support function for the SQL-callable pgstat* functions. Returns
2307  * the collected statistics for one table or NULL. NULL doesn't mean
2308  * that the table doesn't exist, it is just not yet known by the
2309  * collector, so the caller is better off to report ZERO instead.
2310  * ----------
2311  */
2314 {
2315  Oid dbid;
2316  PgStat_StatDBEntry *dbentry;
2317  PgStat_StatTabEntry *tabentry;
2318 
2319  /*
2320  * If not done for this transaction, read the statistics collector stats
2321  * file into some hash tables.
2322  */
2324 
2325  /*
2326  * Lookup our database, then look in its table hash table.
2327  */
2328  dbid = MyDatabaseId;
2329  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2330  (void *) &dbid,
2331  HASH_FIND, NULL);
2332  if (dbentry != NULL && dbentry->tables != NULL)
2333  {
2334  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2335  (void *) &relid,
2336  HASH_FIND, NULL);
2337  if (tabentry)
2338  return tabentry;
2339  }
2340 
2341  /*
2342  * If we didn't find it, maybe it's a shared table.
2343  */
2344  dbid = InvalidOid;
2345  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2346  (void *) &dbid,
2347  HASH_FIND, NULL);
2348  if (dbentry != NULL && dbentry->tables != NULL)
2349  {
2350  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2351  (void *) &relid,
2352  HASH_FIND, NULL);
2353  if (tabentry)
2354  return tabentry;
2355  }
2356 
2357  return NULL;
2358 }
2359 
2360 
2361 /* ----------
2362  * pgstat_fetch_stat_funcentry() -
2363  *
2364  * Support function for the SQL-callable pgstat* functions. Returns
2365  * the collected statistics for one function or NULL.
2366  * ----------
2367  */
2370 {
2371  PgStat_StatDBEntry *dbentry;
2372  PgStat_StatFuncEntry *funcentry = NULL;
2373 
2374  /* load the stats file if needed */
2376 
2377  /* Lookup our database, then find the requested function. */
2379  if (dbentry != NULL && dbentry->functions != NULL)
2380  {
2381  funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
2382  (void *) &func_id,
2383  HASH_FIND, NULL);
2384  }
2385 
2386  return funcentry;
2387 }
2388 
2389 
2390 /* ----------
2391  * pgstat_fetch_stat_beentry() -
2392  *
2393  * Support function for the SQL-callable pgstat* functions. Returns
2394  * our local copy of the current-activity entry for one backend.
2395  *
2396  * NB: caller is responsible for a check if the user is permitted to see
2397  * this info (especially the querystring).
2398  * ----------
2399  */
2402 {
2404 
2405  if (beid < 1 || beid > localNumBackends)
2406  return NULL;
2407 
2408  return &localBackendStatusTable[beid - 1].backendStatus;
2409 }
2410 
2411 
2412 /* ----------
2413  * pgstat_fetch_stat_local_beentry() -
2414  *
2415  * Like pgstat_fetch_stat_beentry() but with locally computed additions (like
2416  * xid and xmin values of the backend)
2417  *
2418  * NB: caller is responsible for a check if the user is permitted to see
2419  * this info (especially the querystring).
2420  * ----------
2421  */
2424 {
2426 
2427  if (beid < 1 || beid > localNumBackends)
2428  return NULL;
2429 
2430  return &localBackendStatusTable[beid - 1];
2431 }
2432 
2433 
2434 /* ----------
2435  * pgstat_fetch_stat_numbackends() -
2436  *
2437  * Support function for the SQL-callable pgstat* functions. Returns
2438  * the maximum current backend id.
2439  * ----------
2440  */
2441 int
2443 {
2445 
2446  return localNumBackends;
2447 }
2448 
2449 /*
2450  * ---------
2451  * pgstat_fetch_stat_archiver() -
2452  *
2453  * Support function for the SQL-callable pgstat* functions. Returns
2454  * a pointer to the archiver statistics struct.
2455  * ---------
2456  */
2459 {
2461 
2462  return &archiverStats;
2463 }
2464 
2465 
2466 /*
2467  * ---------
2468  * pgstat_fetch_global() -
2469  *
2470  * Support function for the SQL-callable pgstat* functions. Returns
2471  * a pointer to the global statistics struct.
2472  * ---------
2473  */
2476 {
2478 
2479  return &globalStats;
2480 }
2481 
2482 
2483 /* ------------------------------------------------------------
2484  * Functions for management of the shared-memory PgBackendStatus array
2485  * ------------------------------------------------------------
2486  */
2487 
2494 #ifdef USE_SSL
2495 static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
2496 #endif
2497 
2498 
2499 /*
2500  * Report shared-memory space needed by CreateSharedBackendStatus.
2501  */
2502 Size
2504 {
2505  Size size;
2506 
2507  /* BackendStatusArray: */
2508  size = mul_size(sizeof(PgBackendStatus), MaxBackends);
2509  /* BackendAppnameBuffer: */
2510  size = add_size(size,
2512  /* BackendClientHostnameBuffer: */
2513  size = add_size(size,
2515  /* BackendActivityBuffer: */
2516  size = add_size(size,
2518 #ifdef USE_SSL
2519  /* BackendSslStatusBuffer: */
2520  size = add_size(size,
2522 #endif
2523  return size;
2524 }
2525 
2526 /*
2527  * Initialize the shared status array and several string buffers
2528  * during postmaster startup.
2529  */
2530 void
2532 {
2533  Size size;
2534  bool found;
2535  int i;
2536  char *buffer;
2537 
2538  /* Create or attach to the shared array */
2539  size = mul_size(sizeof(PgBackendStatus), MaxBackends);
2540  BackendStatusArray = (PgBackendStatus *)
2541  ShmemInitStruct("Backend Status Array", size, &found);
2542 
2543  if (!found)
2544  {
2545  /*
2546  * We're the first - initialize.
2547  */
2548  MemSet(BackendStatusArray, 0, size);
2549  }
2550 
2551  /* Create or attach to the shared appname buffer */
2552  size = mul_size(NAMEDATALEN, MaxBackends);
2553  BackendAppnameBuffer = (char *)
2554  ShmemInitStruct("Backend Application Name Buffer", size, &found);
2555 
2556  if (!found)
2557  {
2558  MemSet(BackendAppnameBuffer, 0, size);
2559 
2560  /* Initialize st_appname pointers. */
2561  buffer = BackendAppnameBuffer;
2562  for (i = 0; i < MaxBackends; i++)
2563  {
2564  BackendStatusArray[i].st_appname = buffer;
2565  buffer += NAMEDATALEN;
2566  }
2567  }
2568 
2569  /* Create or attach to the shared client hostname buffer */
2570  size = mul_size(NAMEDATALEN, MaxBackends);
2571  BackendClientHostnameBuffer = (char *)
2572  ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
2573 
2574  if (!found)
2575  {
2576  MemSet(BackendClientHostnameBuffer, 0, size);
2577 
2578  /* Initialize st_clienthostname pointers. */
2579  buffer = BackendClientHostnameBuffer;
2580  for (i = 0; i < MaxBackends; i++)
2581  {
2582  BackendStatusArray[i].st_clienthostname = buffer;
2583  buffer += NAMEDATALEN;
2584  }
2585  }
2586 
2587  /* Create or attach to the shared activity buffer */
2588  BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
2589  MaxBackends);
2590  BackendActivityBuffer = (char *)
2591  ShmemInitStruct("Backend Activity Buffer",
2592  BackendActivityBufferSize,
2593  &found);
2594 
2595  if (!found)
2596  {
2597  MemSet(BackendActivityBuffer, 0, size);
2598 
2599  /* Initialize st_activity pointers. */
2600  buffer = BackendActivityBuffer;
2601  for (i = 0; i < MaxBackends; i++)
2602  {
2603  BackendStatusArray[i].st_activity = buffer;
2605  }
2606  }
2607 
2608 #ifdef USE_SSL
2609  /* Create or attach to the shared SSL status buffer */
2610  size = mul_size(sizeof(PgBackendSSLStatus), MaxBackends);
2611  BackendSslStatusBuffer = (PgBackendSSLStatus *)
2612  ShmemInitStruct("Backend SSL Status Buffer", size, &found);
2613 
2614  if (!found)
2615  {
2616  PgBackendSSLStatus *ptr;
2617 
2618  MemSet(BackendSslStatusBuffer, 0, size);
2619 
2620  /* Initialize st_sslstatus pointers. */
2621  ptr = BackendSslStatusBuffer;
2622  for (i = 0; i < MaxBackends; i++)
2623  {
2624  BackendStatusArray[i].st_sslstatus = ptr;
2625  ptr++;
2626  }
2627  }
2628 #endif
2629 }
2630 
2631 
2632 /* ----------
2633  * pgstat_initialize() -
2634  *
2635  * Initialize pgstats state, and set up our on-proc-exit hook.
2636  * Called from InitPostgres. MyBackendId must be set,
2637  * but we must not have started any transaction yet (since the
2638  * exit hook must run after the last transaction exit).
2639  * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
2640  * ----------
2641  */
2642 void
2644 {
2645  /* Initialize MyBEEntry */
2647  MyBEEntry = &BackendStatusArray[MyBackendId - 1];
2648 
2649  /* Set up a process-exit hook to clean up */
2651 }
2652 
2653 /* ----------
2654  * pgstat_bestart() -
2655  *
2656  * Initialize this backend's entry in the PgBackendStatus array.
2657  * Called from InitPostgres.
2658  * MyDatabaseId, session userid, and application_name must be set
2659  * (hence, this cannot be combined with pgstat_initialize).
2660  * ----------
2661  */
2662 void
2664 {
2665  TimestampTz proc_start_timestamp;
2666  Oid userid;
2667  SockAddr clientaddr;
2668  volatile PgBackendStatus *beentry;
2669 
2670  /*
2671  * To minimize the time spent modifying the PgBackendStatus entry, fetch
2672  * all the needed data first.
2673  *
2674  * If we have a MyProcPort, use its session start time (for consistency,
2675  * and to save a kernel call).
2676  */
2677  if (MyProcPort)
2678  proc_start_timestamp = MyProcPort->SessionStartTime;
2679  else
2680  proc_start_timestamp = GetCurrentTimestamp();
2681  userid = GetSessionUserId();
2682 
2683  /*
2684  * We may not have a MyProcPort (eg, if this is the autovacuum process).
2685  * If so, use all-zeroes client address, which is dealt with specially in
2686  * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
2687  */
2688  if (MyProcPort)
2689  memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr));
2690  else
2691  MemSet(&clientaddr, 0, sizeof(clientaddr));
2692 
2693  /*
2694  * Initialize my status entry, following the protocol of bumping
2695  * st_changecount before and after; and make sure it's even afterwards. We
2696  * use a volatile pointer here to ensure the compiler doesn't try to get
2697  * cute.
2698  */
2699  beentry = MyBEEntry;
2700  do
2701  {
2703  } while ((beentry->st_changecount & 1) == 0);
2704 
2705  beentry->st_procpid = MyProcPid;
2706  beentry->st_proc_start_timestamp = proc_start_timestamp;
2707  beentry->st_activity_start_timestamp = 0;
2708  beentry->st_state_start_timestamp = 0;
2709  beentry->st_xact_start_timestamp = 0;
2710  beentry->st_databaseid = MyDatabaseId;
2711  beentry->st_userid = userid;
2712  beentry->st_clientaddr = clientaddr;
2715  NAMEDATALEN);
2716  else
2717  beentry->st_clienthostname[0] = '\0';
2718 #ifdef USE_SSL
2719  if (MyProcPort && MyProcPort->ssl != NULL)
2720  {
2721  beentry->st_ssl = true;
2727  }
2728  else
2729  {
2730  beentry->st_ssl = false;
2731  }
2732 #else
2733  beentry->st_ssl = false;
2734 #endif
2735  beentry->st_state = STATE_UNDEFINED;
2736  beentry->st_appname[0] = '\0';
2737  beentry->st_activity[0] = '\0';
2738  /* Also make sure the last byte in each string area is always 0 */
2739  beentry->st_clienthostname[NAMEDATALEN - 1] = '\0';
2740  beentry->st_appname[NAMEDATALEN - 1] = '\0';
2741  beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
2744 
2745  /*
2746  * we don't zero st_progress_param here to save cycles; nobody should
2747  * examine it until st_progress_command has been set to something other
2748  * than PROGRESS_COMMAND_INVALID
2749  */
2750 
2752 
2753  /* Update app name to current GUC setting */
2754  if (application_name)
2756 }
2757 
2758 /*
2759  * Shut down a single backend's statistics reporting at process exit.
2760  *
2761  * Flush any remaining statistics counts out to the collector.
2762  * Without this, operations triggered during backend exit (such as
2763  * temp table deletions) won't be counted.
2764  *
2765  * Lastly, clear out our entry in the PgBackendStatus array.
2766  */
2767 static void
2769 {
2770  volatile PgBackendStatus *beentry = MyBEEntry;
2771 
2772  /*
2773  * If we got as far as discovering our own database ID, we can report what
2774  * we did to the collector. Otherwise, we'd be sending an invalid
2775  * database ID, so forget it. (This means that accesses to pg_database
2776  * during failed backend starts might never get counted.)
2777  */
2778  if (OidIsValid(MyDatabaseId))
2779  pgstat_report_stat(true);
2780 
2781  /*
2782  * Clear my status entry, following the protocol of bumping st_changecount
2783  * before and after. We use a volatile pointer here to ensure the
2784  * compiler doesn't try to get cute.
2785  */
2787 
2788  beentry->st_procpid = 0; /* mark invalid */
2789 
2791 }
2792 
2793 
2794 /* ----------
2795  * pgstat_report_activity() -
2796  *
2797  * Called from tcop/postgres.c to report what the backend is actually doing
2798  * (but note cmd_str can be NULL for certain cases).
2799  *
2800  * All updates of the status entry follow the protocol of bumping
2801  * st_changecount before and after. We use a volatile pointer here to
2802  * ensure the compiler doesn't try to get cute.
2803  * ----------
2804  */
2805 void
2807 {
2808  volatile PgBackendStatus *beentry = MyBEEntry;
2809  TimestampTz start_timestamp;
2810  TimestampTz current_timestamp;
2811  int len = 0;
2812 
2813  TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
2814 
2815  if (!beentry)
2816  return;
2817 
2819  {
2820  if (beentry->st_state != STATE_DISABLED)
2821  {
2822  volatile PGPROC *proc = MyProc;
2823 
2824  /*
2825  * track_activities is disabled, but we last reported a
2826  * non-disabled state. As our final update, change the state and
2827  * clear fields we will not be updating anymore.
2828  */
2830  beentry->st_state = STATE_DISABLED;
2831  beentry->st_state_start_timestamp = 0;
2832  beentry->st_activity[0] = '\0';
2833  beentry->st_activity_start_timestamp = 0;
2834  /* st_xact_start_timestamp and wait_event_info are also disabled */
2835  beentry->st_xact_start_timestamp = 0;
2836  proc->wait_event_info = 0;
2838  }
2839  return;
2840  }
2841 
2842  /*
2843  * To minimize the time spent modifying the entry, fetch all the needed
2844  * data first.
2845  */
2846  start_timestamp = GetCurrentStatementStartTimestamp();
2847  if (cmd_str != NULL)
2848  {
2849  len = pg_mbcliplen(cmd_str, strlen(cmd_str),
2851  }
2852  current_timestamp = GetCurrentTimestamp();
2853 
2854  /*
2855  * Now update the status entry
2856  */
2858 
2859  beentry->st_state = state;
2860  beentry->st_state_start_timestamp = current_timestamp;
2861 
2862  if (cmd_str != NULL)
2863  {
2864  memcpy((char *) beentry->st_activity, cmd_str, len);
2865  beentry->st_activity[len] = '\0';
2866  beentry->st_activity_start_timestamp = start_timestamp;
2867  }
2868 
2870 }
2871 
2872 /*-----------
2873  * pgstat_progress_start_command() -
2874  *
2875  * Set st_progress_command (and st_progress_command_target) in own backend
2876  * entry. Also, zero-initialize st_progress_param array.
2877  *-----------
2878  */
2879 void
2881 {
2882  volatile PgBackendStatus *beentry = MyBEEntry;
2883 
2884  if (!beentry || !pgstat_track_activities)
2885  return;
2886 
2888  beentry->st_progress_command = cmdtype;
2889  beentry->st_progress_command_target = relid;
2890  MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
2892 }
2893 
2894 /*-----------
2895  * pgstat_progress_update_param() -
2896  *
2897  * Update index'th member in st_progress_param[] of own backend entry.
2898  *-----------
2899  */
2900 void
2902 {
2903  volatile PgBackendStatus *beentry = MyBEEntry;
2904 
2905  Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
2906 
2907  if (!beentry || !pgstat_track_activities)
2908  return;
2909 
2911  beentry->st_progress_param[index] = val;
2913 }
2914 
2915 /*-----------
2916  * pgstat_progress_update_multi_param() -
2917  *
2918  * Update multiple members in st_progress_param[] of own backend entry.
2919  * This is atomic; readers won't see intermediate states.
2920  *-----------
2921  */
2922 void
2924  const int64 *val)
2925 {
2926  volatile PgBackendStatus *beentry = MyBEEntry;
2927  int i;
2928 
2929  if (!beentry || !pgstat_track_activities || nparam == 0)
2930  return;
2931 
2933 
2934  for (i = 0; i < nparam; ++i)
2935  {
2936  Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
2937 
2938  beentry->st_progress_param[index[i]] = val[i];
2939  }
2940 
2942 }
2943 
2944 /*-----------
2945  * pgstat_progress_end_command() -
2946  *
2947  * Reset st_progress_command (and st_progress_command_target) in own backend
2948  * entry. This signals the end of the command.
2949  *-----------
2950  */
2951 void
2953 {
2954  volatile PgBackendStatus *beentry = MyBEEntry;
2955 
2956  if (!beentry)
2957  return;
2960  return;
2961 
2966 }
2967 
2968 /* ----------
2969  * pgstat_report_appname() -
2970  *
2971  * Called to update our application name.
2972  * ----------
2973  */
2974 void
2975 pgstat_report_appname(const char *appname)
2976 {
2977  volatile PgBackendStatus *beentry = MyBEEntry;
2978  int len;
2979 
2980  if (!beentry)
2981  return;
2982 
2983  /* This should be unnecessary if GUC did its job, but be safe */
2984  len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
2985 
2986  /*
2987  * Update my status entry, following the protocol of bumping
2988  * st_changecount before and after. We use a volatile pointer here to
2989  * ensure the compiler doesn't try to get cute.
2990  */
2992 
2993  memcpy((char *) beentry->st_appname, appname, len);
2994  beentry->st_appname[len] = '\0';
2995 
2997 }
2998 
2999 /*
3000  * Report current transaction start timestamp as the specified value.
3001  * Zero means there is no active transaction.
3002  */
3003 void
3005 {
3006  volatile PgBackendStatus *beentry = MyBEEntry;
3007 
3008  if (!pgstat_track_activities || !beentry)
3009  return;
3010 
3011  /*
3012  * Update my status entry, following the protocol of bumping
3013  * st_changecount before and after. We use a volatile pointer here to
3014  * ensure the compiler doesn't try to get cute.
3015  */
3017  beentry->st_xact_start_timestamp = tstamp;
3019 }
3020 
3021 /* ----------
3022  * pgstat_read_current_status() -
3023  *
3024  * Copy the current contents of the PgBackendStatus array to local memory,
3025  * if not already done in this transaction.
3026  * ----------
3027  */
3028 static void
3030 {
3031  volatile PgBackendStatus *beentry;
3032  LocalPgBackendStatus *localtable;
3033  LocalPgBackendStatus *localentry;
3034  char *localappname,
3035  *localactivity;
3036 #ifdef USE_SSL
3037  PgBackendSSLStatus *localsslstatus;
3038 #endif
3039  int i;
3040 
3042  if (localBackendStatusTable)
3043  return; /* already done */
3044 
3046 
3047  localtable = (LocalPgBackendStatus *)
3048  MemoryContextAlloc(pgStatLocalContext,
3049  sizeof(LocalPgBackendStatus) * MaxBackends);
3050  localappname = (char *)
3051  MemoryContextAlloc(pgStatLocalContext,
3053  localactivity = (char *)
3054  MemoryContextAlloc(pgStatLocalContext,
3055  pgstat_track_activity_query_size * MaxBackends);
3056 #ifdef USE_SSL
3057  localsslstatus = (PgBackendSSLStatus *)
3058  MemoryContextAlloc(pgStatLocalContext,
3059  sizeof(PgBackendSSLStatus) * MaxBackends);
3060 #endif
3061 
3062  localNumBackends = 0;
3063 
3064  beentry = BackendStatusArray;
3065  localentry = localtable;
3066  for (i = 1; i <= MaxBackends; i++)
3067  {
3068  /*
3069  * Follow the protocol of retrying if st_changecount changes while we
3070  * copy the entry, or if it's odd. (The check for odd is needed to
3071  * cover the case where we are able to completely copy the entry while
3072  * the source backend is between increment steps.) We use a volatile
3073  * pointer here to ensure the compiler doesn't try to get cute.
3074  */
3075  for (;;)
3076  {
3077  int before_changecount;
3078  int after_changecount;
3079 
3080  pgstat_save_changecount_before(beentry, before_changecount);
3081 
3082  localentry->backendStatus.st_procpid = beentry->st_procpid;
3083  if (localentry->backendStatus.st_procpid > 0)
3084  {
3085  memcpy(&localentry->backendStatus, (char *) beentry, sizeof(PgBackendStatus));
3086 
3087  /*
3088  * strcpy is safe even if the string is modified concurrently,
3089  * because there's always a \0 at the end of the buffer.
3090  */
3091  strcpy(localappname, (char *) beentry->st_appname);
3092  localentry->backendStatus.st_appname = localappname;
3093  strcpy(localactivity, (char *) beentry->st_activity);
3094  localentry->backendStatus.st_activity = localactivity;
3095  localentry->backendStatus.st_ssl = beentry->st_ssl;
3096 #ifdef USE_SSL
3097  if (beentry->st_ssl)
3098  {
3099  memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
3100  localentry->backendStatus.st_sslstatus = localsslstatus;
3101  }
3102 #endif
3103  }
3104 
3105  pgstat_save_changecount_after(beentry, after_changecount);
3106  if (before_changecount == after_changecount &&
3107  (before_changecount & 1) == 0)
3108  break;
3109 
3110  /* Make sure we can break out of loop if stuck... */
3112  }
3113 
3114  beentry++;
3115  /* Only valid entries get included into the local array */
3116  if (localentry->backendStatus.st_procpid > 0)
3117  {
3119  &localentry->backend_xid,
3120  &localentry->backend_xmin);
3121 
3122  localentry++;
3123  localappname += NAMEDATALEN;
3124  localactivity += pgstat_track_activity_query_size;
3125 #ifdef USE_SSL
3126  localsslstatus++;
3127 #endif
3128  localNumBackends++;
3129  }
3130  }
3131 
3132  /* Set the pointer only after completion of a valid table */
3133  localBackendStatusTable = localtable;
3134 }
3135 
3136 /* ----------
3137  * pgstat_get_wait_event_type() -
3138  *
3139  * Return a string representing the current wait event type, backend is
3140  * waiting on.
3141  */
3142 const char *
3144 {
3145  uint32 classId;
3146  const char *event_type;
3147 
3148  /* report process as not waiting. */
3149  if (wait_event_info == 0)
3150  return NULL;
3151 
3152  classId = wait_event_info & 0xFF000000;
3153 
3154  switch (classId)
3155  {
3156  case PG_WAIT_LWLOCK:
3157  event_type = "LWLock";
3158  break;
3159  case PG_WAIT_LOCK:
3160  event_type = "Lock";
3161  break;
3162  case PG_WAIT_BUFFER_PIN:
3163  event_type = "BufferPin";
3164  break;
3165  case PG_WAIT_ACTIVITY:
3166  event_type = "Activity";
3167  break;
3168  case PG_WAIT_CLIENT:
3169  event_type = "Client";
3170  break;
3171  case PG_WAIT_EXTENSION:
3172  event_type = "Extension";
3173  break;
3174  case PG_WAIT_IPC:
3175  event_type = "IPC";
3176  break;
3177  case PG_WAIT_TIMEOUT:
3178  event_type = "Timeout";
3179  break;
3180  case PG_WAIT_IO:
3181  event_type = "IO";
3182  break;
3183  default:
3184  event_type = "???";
3185  break;
3186  }
3187 
3188  return event_type;
3189 }
3190 
3191 /* ----------
3192  * pgstat_get_wait_event() -
3193  *
3194  * Return a string representing the current wait event, backend is
3195  * waiting on.
3196  */
3197 const char *
3199 {
3200  uint32 classId;
3201  uint16 eventId;
3202  const char *event_name;
3203 
3204  /* report process as not waiting. */
3205  if (wait_event_info == 0)
3206  return NULL;
3207 
3208  classId = wait_event_info & 0xFF000000;
3209  eventId = wait_event_info & 0x0000FFFF;
3210 
3211  switch (classId)
3212  {
3213  case PG_WAIT_LWLOCK:
3214  event_name = GetLWLockIdentifier(classId, eventId);
3215  break;
3216  case PG_WAIT_LOCK:
3217  event_name = GetLockNameFromTagType(eventId);
3218  break;
3219  case PG_WAIT_BUFFER_PIN:
3220  event_name = "BufferPin";
3221  break;
3222  case PG_WAIT_ACTIVITY:
3223  {
3224  WaitEventActivity w = (WaitEventActivity) wait_event_info;
3225 
3226  event_name = pgstat_get_wait_activity(w);
3227  break;
3228  }
3229  case PG_WAIT_CLIENT:
3230  {
3231  WaitEventClient w = (WaitEventClient) wait_event_info;
3232 
3233  event_name = pgstat_get_wait_client(w);
3234  break;
3235  }
3236  case PG_WAIT_EXTENSION:
3237  event_name = "Extension";
3238  break;
3239  case PG_WAIT_IPC:
3240  {
3241  WaitEventIPC w = (WaitEventIPC) wait_event_info;
3242 
3243  event_name = pgstat_get_wait_ipc(w);
3244  break;
3245  }
3246  case PG_WAIT_TIMEOUT:
3247  {
3248  WaitEventTimeout w = (WaitEventTimeout) wait_event_info;
3249 
3250  event_name = pgstat_get_wait_timeout(w);
3251  break;
3252  }
3253  case PG_WAIT_IO:
3254  {
3255  WaitEventIO w = (WaitEventIO) wait_event_info;
3256 
3257  event_name = pgstat_get_wait_io(w);
3258  break;
3259  }
3260  default:
3261  event_name = "unknown wait event";
3262  break;
3263  }
3264 
3265  return event_name;
3266 }
3267 
3268 /* ----------
3269  * pgstat_get_wait_activity() -
3270  *
3271  * Convert WaitEventActivity to string.
3272  * ----------
3273  */
3274 static const char *
3276 {
3277  const char *event_name = "unknown wait event";
3278 
3279  switch (w)
3280  {
3282  event_name = "ArchiverMain";
3283  break;
3285  event_name = "AutoVacuumMain";
3286  break;
3288  event_name = "BgWriterHibernate";
3289  break;
3291  event_name = "BgWriterMain";
3292  break;
3294  event_name = "CheckpointerMain";
3295  break;
3297  event_name = "PgStatMain";
3298  break;
3300  event_name = "RecoveryWalAll";
3301  break;
3303  event_name = "RecoveryWalStream";
3304  break;
3306  event_name = "SysLoggerMain";
3307  break;
3309  event_name = "WalReceiverMain";
3310  break;
3312  event_name = "WalSenderMain";
3313  break;
3315  event_name = "WalWriterMain";
3316  break;
3318  event_name = "LogicalLauncherMain";
3319  break;
3321  event_name = "LogicalApplyMain";
3322  break;
3323  /* no default case, so that compiler will warn */
3324  }
3325 
3326  return event_name;
3327 }
3328 
3329 /* ----------
3330  * pgstat_get_wait_client() -
3331  *
3332  * Convert WaitEventClient to string.
3333  * ----------
3334  */
3335 static const char *
3337 {
3338  const char *event_name = "unknown wait event";
3339 
3340  switch (w)
3341  {
3343  event_name = "ClientRead";
3344  break;
3346  event_name = "ClientWrite";
3347  break;
3349  event_name = "SSLOpenServer";
3350  break;
3352  event_name = "WalReceiverWaitStart";
3353  break;
3355  event_name = "LibPQWalReceiver";
3356  break;
3358  event_name = "WalSenderWaitForWAL";
3359  break;
3361  event_name = "WalSenderWriteData";
3362  break;
3363  /* no default case, so that compiler will warn */
3364  }
3365 
3366  return event_name;
3367 }
3368 
3369 /* ----------
3370  * pgstat_get_wait_ipc() -
3371  *
3372  * Convert WaitEventIPC to string.
3373  * ----------
3374  */
3375 static const char *
3377 {
3378  const char *event_name = "unknown wait event";
3379 
3380  switch (w)
3381  {
3383  event_name = "BgWorkerShutdown";
3384  break;
3386  event_name = "BgWorkerStartup";
3387  break;
3388  case WAIT_EVENT_BTREE_PAGE:
3389  event_name = "BtreePage";
3390  break;
3392  event_name = "ExecuteGather";
3393  break;
3395  event_name = "MessageQueueInternal";
3396  break;
3398  event_name = "MessageQueuePutMessage";
3399  break;
3400  case WAIT_EVENT_MQ_RECEIVE:
3401  event_name = "MessageQueueReceive";
3402  break;
3403  case WAIT_EVENT_MQ_SEND:
3404  event_name = "MessageQueueSend";
3405  break;
3407  event_name = "ParallelFinish";
3408  break;
3410  event_name = "ParallelBitmapScan";
3411  break;
3413  event_name = "SafeSnapshot";
3414  break;
3415  case WAIT_EVENT_SYNC_REP:
3416  event_name = "SyncRep";
3417  break;
3419  event_name = "LogicalSyncData";
3420  break;
3422  event_name = "LogicalSyncStateChange";
3423  break;
3424  /* no default case, so that compiler will warn */
3425  }
3426 
3427  return event_name;
3428 }
3429 
3430 /* ----------
3431  * pgstat_get_wait_timeout() -
3432  *
3433  * Convert WaitEventTimeout to string.
3434  * ----------
3435  */
3436 static const char *
3438 {
3439  const char *event_name = "unknown wait event";
3440 
3441  switch (w)
3442  {
3444  event_name = "BaseBackupThrottle";
3445  break;
3446  case WAIT_EVENT_PG_SLEEP:
3447  event_name = "PgSleep";
3448  break;
3450  event_name = "RecoveryApplyDelay";
3451  break;
3452  /* no default case, so that compiler will warn */
3453  }
3454 
3455  return event_name;
3456 }
3457 
3458 /* ----------
3459  * pgstat_get_wait_io() -
3460  *
3461  * Convert WaitEventIO to string.
3462  * ----------
3463  */
3464 static const char *
3466 {
3467  const char *event_name = "unknown wait event";
3468 
3469  switch (w)
3470  {
3472  event_name = "BufFileRead";
3473  break;
3475  event_name = "BufFileWrite";
3476  break;
3478  event_name = "ControlFileRead";
3479  break;
3481  event_name = "ControlFileSync";
3482  break;
3484  event_name = "ControlFileSyncUpdate";
3485  break;
3487  event_name = "ControlFileWrite";
3488  break;
3490  event_name = "ControlFileWriteUpdate";
3491  break;
3493  event_name = "CopyFileRead";
3494  break;
3496  event_name = "CopyFileWrite";
3497  break;
3499  event_name = "DataFileExtend";
3500  break;
3502  event_name = "DataFileFlush";
3503  break;
3505  event_name = "DataFileImmediateSync";
3506  break;
3508  event_name = "DataFilePrefetch";
3509  break;
3511  event_name = "DataFileRead";
3512  break;
3514  event_name = "DataFileSync";
3515  break;
3517  event_name = "DataFileTruncate";
3518  break;
3520  event_name = "DataFileWrite";
3521  break;
3523  event_name = "DSMFillZeroWrite";
3524  break;
3526  event_name = "LockFileAddToDataDirRead";
3527  break;
3529  event_name = "LockFileAddToDataDirSync";
3530  break;
3532  event_name = "LockFileAddToDataDirWrite";
3533  break;
3535  event_name = "LockFileCreateRead";
3536  break;
3538  event_name = "LockFileCreateSync";
3539  break;
3541  event_name = "LockFileCreateWRITE";
3542  break;
3544  event_name = "LockFileReCheckDataDirRead";
3545  break;
3547  event_name = "LogicalRewriteCheckpointSync";
3548  break;
3550  event_name = "LogicalRewriteMappingSync";
3551  break;
3553  event_name = "LogicalRewriteMappingWrite";
3554  break;
3556  event_name = "LogicalRewriteSync";
3557  break;
3559  event_name = "LogicalRewriteTruncate";
3560  break;
3562  event_name = "LogicalRewriteWrite";
3563  break;
3565  event_name = "RelationMapRead";
3566  break;
3568  event_name = "RelationMapSync";
3569  break;
3571  event_name = "RelationMapWrite";
3572  break;
3574  event_name = "ReorderBufferRead";
3575  break;
3577  event_name = "ReorderBufferWrite";
3578  break;
3580  event_name = "ReorderLogicalMappingRead";
3581  break;
3583  event_name = "ReplicationSlotRead";
3584  break;
3586  event_name = "ReplicationSlotRestoreSync";
3587  break;
3589  event_name = "ReplicationSlotSync";
3590  break;
3592  event_name = "ReplicationSlotWrite";
3593  break;
3595  event_name = "SLRUFlushSync";
3596  break;
3597  case WAIT_EVENT_SLRU_READ:
3598  event_name = "SLRURead";
3599  break;
3600  case WAIT_EVENT_SLRU_SYNC:
3601  event_name = "SLRUSync";
3602  break;
3603  case WAIT_EVENT_SLRU_WRITE:
3604  event_name = "SLRUWrite";
3605  break;
3607  event_name = "SnapbuildRead";
3608  break;
3610  event_name = "SnapbuildSync";
3611  break;
3613  event_name = "SnapbuildWrite";
3614  break;
3616  event_name = "TimelineHistoryFileSync";
3617  break;
3619  event_name = "TimelineHistoryFileWrite";
3620  break;
3622  event_name = "TimelineHistoryRead";
3623  break;
3625  event_name = "TimelineHistorySync";
3626  break;
3628  event_name = "TimelineHistoryWrite";
3629  break;
3631  event_name = "TwophaseFileRead";
3632  break;
3634  event_name = "TwophaseFileSync";
3635  break;
3637  event_name = "TwophaseFileWrite";
3638  break;
3640  event_name = "WALSenderTimelineHistoryRead";
3641  break;
3643  event_name = "WALBootstrapSync";
3644  break;
3646  event_name = "WALBootstrapWrite";
3647  break;
3649  event_name = "WALCopyRead";
3650  break;
3652  event_name = "WALCopySync";
3653  break;
3655  event_name = "WALCopyWrite";
3656  break;
3658  event_name = "WALInitSync";
3659  break;
3661  event_name = "WALInitWrite";
3662  break;
3663  case WAIT_EVENT_WAL_READ:
3664  event_name = "WALRead";
3665  break;
3667  event_name = "WALSyncMethodAssign";
3668  break;
3669  case WAIT_EVENT_WAL_WRITE:
3670  event_name = "WALWrite";
3671  break;
3672 
3673  /* no default case, so that compiler will warn */
3674  }
3675 
3676  return event_name;
3677 }
3678 
3679 
3680 /* ----------
3681  * pgstat_get_backend_current_activity() -
3682  *
3683  * Return a string representing the current activity of the backend with
3684  * the specified PID. This looks directly at the BackendStatusArray,
3685  * and so will provide current information regardless of the age of our
3686  * transaction's snapshot of the status array.
3687  *
3688  * It is the caller's responsibility to invoke this only for backends whose
3689  * state is expected to remain stable while the result is in use. The
3690  * only current use is in deadlock reporting, where we can expect that
3691  * the target backend is blocked on a lock. (There are corner cases
3692  * where the target's wait could get aborted while we are looking at it,
3693  * but the very worst consequence is to return a pointer to a string
3694  * that's been changed, so we won't worry too much.)
3695  *
3696  * Note: return strings for special cases match pg_stat_get_backend_activity.
3697  * ----------
3698  */
3699 const char *
3700 pgstat_get_backend_current_activity(int pid, bool checkUser)
3701 {
3702  PgBackendStatus *beentry;
3703  int i;
3704 
3705  beentry = BackendStatusArray;
3706  for (i = 1; i <= MaxBackends; i++)
3707  {
3708  /*
3709  * Although we expect the target backend's entry to be stable, that
3710  * doesn't imply that anyone else's is. To avoid identifying the
3711  * wrong backend, while we check for a match to the desired PID we
3712  * must follow the protocol of retrying if st_changecount changes
3713  * while we examine the entry, or if it's odd. (This might be
3714  * unnecessary, since fetching or storing an int is almost certainly
3715  * atomic, but let's play it safe.) We use a volatile pointer here to
3716  * ensure the compiler doesn't try to get cute.
3717  */
3718  volatile PgBackendStatus *vbeentry = beentry;
3719  bool found;
3720 
3721  for (;;)
3722  {
3723  int before_changecount;
3724  int after_changecount;
3725 
3726  pgstat_save_changecount_before(vbeentry, before_changecount);
3727 
3728  found = (vbeentry->st_procpid == pid);
3729 
3730  pgstat_save_changecount_after(vbeentry, after_changecount);
3731 
3732  if (before_changecount == after_changecount &&
3733  (before_changecount & 1) == 0)
3734  break;
3735 
3736  /* Make sure we can break out of loop if stuck... */
3738  }
3739 
3740  if (found)
3741  {
3742  /* Now it is safe to use the non-volatile pointer */
3743  if (checkUser && !superuser() && beentry->st_userid != GetUserId())
3744  return "<insufficient privilege>";
3745  else if (*(beentry->st_activity) == '\0')
3746  return "<command string not enabled>";
3747  else
3748  return beentry->st_activity;
3749  }
3750 
3751  beentry++;
3752  }
3753 
3754  /* If we get here, caller is in error ... */
3755  return "<backend information not available>";
3756 }
3757 
3758 /* ----------
3759  * pgstat_get_crashed_backend_activity() -
3760  *
3761  * Return a string representing the current activity of the backend with
3762  * the specified PID. Like the function above, but reads shared memory with
3763  * the expectation that it may be corrupt. On success, copy the string
3764  * into the "buffer" argument and return that pointer. On failure,
3765  * return NULL.
3766  *
3767  * This function is only intended to be used by the postmaster to report the
3768  * query that crashed a backend. In particular, no attempt is made to
3769  * follow the correct concurrency protocol when accessing the
3770  * BackendStatusArray. But that's OK, in the worst case we'll return a
3771  * corrupted message. We also must take care not to trip on ereport(ERROR).
3772  * ----------
3773  */
3774 const char *
3775 pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
3776 {
3777  volatile PgBackendStatus *beentry;
3778  int i;
3779 
3780  beentry = BackendStatusArray;
3781 
3782  /*
3783  * We probably shouldn't get here before shared memory has been set up,
3784  * but be safe.
3785  */
3786  if (beentry == NULL || BackendActivityBuffer == NULL)
3787  return NULL;
3788 
3789  for (i = 1; i <= MaxBackends; i++)
3790  {
3791  if (beentry->st_procpid == pid)
3792  {
3793  /* Read pointer just once, so it can't change after validation */
3794  const char *activity = beentry->st_activity;
3795  const char *activity_last;
3796 
3797  /*
3798  * We mustn't access activity string before we verify that it
3799  * falls within the BackendActivityBuffer. To make sure that the
3800  * entire string including its ending is contained within the
3801  * buffer, subtract one activity length from the buffer size.
3802  */
3803  activity_last = BackendActivityBuffer + BackendActivityBufferSize
3805 
3806  if (activity < BackendActivityBuffer ||
3807  activity > activity_last)
3808  return NULL;
3809 
3810  /* If no string available, no point in a report */
3811  if (activity[0] == '\0')
3812  return NULL;
3813 
3814  /*
3815  * Copy only ASCII-safe characters so we don't run into encoding
3816  * problems when reporting the message; and be sure not to run off
3817  * the end of memory.
3818  */
3819  ascii_safe_strlcpy(buffer, activity,
3820  Min(buflen, pgstat_track_activity_query_size));
3821 
3822  return buffer;
3823  }
3824 
3825  beentry++;
3826  }
3827 
3828  /* PID not found */
3829  return NULL;
3830 }
3831 
3832 
3833 /* ------------------------------------------------------------
3834  * Local support functions follow
3835  * ------------------------------------------------------------
3836  */
3837 
3838 
3839 /* ----------
3840  * pgstat_setheader() -
3841  *
3842  * Set common header fields in a statistics message
3843  * ----------
3844  */
3845 static void
3847 {
3848  hdr->m_type = mtype;
3849 }
3850 
3851 
3852 /* ----------
3853  * pgstat_send() -
3854  *
3855  * Send out one statistics message to the collector
3856  * ----------
3857  */
3858 static void
3859 pgstat_send(void *msg, int len)
3860 {
3861  int rc;
3862 
3864  return;
3865 
3866  ((PgStat_MsgHdr *) msg)->m_size = len;
3867 
3868  /* We'll retry after EINTR, but ignore all other failures */
3869  do
3870  {
3871  rc = send(pgStatSock, msg, len, 0);
3872  } while (rc < 0 && errno == EINTR);
3873 
3874 #ifdef USE_ASSERT_CHECKING
3875  /* In debug builds, log send failures ... */
3876  if (rc < 0)
3877  elog(LOG, "could not send to statistics collector: %m");
3878 #endif
3879 }
3880 
3881 /* ----------
3882  * pgstat_send_archiver() -
3883  *
3884  * Tell the collector about the WAL file that we successfully
3885  * archived or failed to archive.
3886  * ----------
3887  */
3888 void
3889 pgstat_send_archiver(const char *xlog, bool failed)
3890 {
3891  PgStat_MsgArchiver msg;
3892 
3893  /*
3894  * Prepare and send the message
3895  */
3897  msg.m_failed = failed;
3898  StrNCpy(msg.m_xlog, xlog, sizeof(msg.m_xlog));
3900  pgstat_send(&msg, sizeof(msg));
3901 }
3902 
3903 /* ----------
3904  * pgstat_send_bgwriter() -
3905  *
3906  * Send bgwriter statistics to the collector
3907  * ----------
3908  */
3909 void
3911 {
3912  /* We assume this initializes to zeroes */
3913  static const PgStat_MsgBgWriter all_zeroes;
3914 
3915  /*
3916  * This function can be called even if nothing at all has happened. In
3917  * this case, avoid sending a completely empty message to the stats
3918  * collector.
3919  */
3920  if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
3921  return;
3922 
3923  /*
3924  * Prepare and send the message
3925  */
3926  pgstat_setheader(&BgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER);
3927  pgstat_send(&BgWriterStats, sizeof(BgWriterStats));
3928 
3929  /*
3930  * Clear out the statistics buffer, so it can be re-used.
3931  */
3932  MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
3933 }
3934 
3935 
3936 /* ----------
3937  * PgstatCollectorMain() -
3938  *
3939  * Start up the statistics collector process. This is the body of the
3940  * postmaster child process.
3941  *
3942  * The argc/argv parameters are valid only in EXEC_BACKEND case.
3943  * ----------
3944  */
3945 NON_EXEC_STATIC void
3946 PgstatCollectorMain(int argc, char *argv[])
3947 {
3948  int len;
3949  PgStat_Msg msg;
3950  int wr;
3951 
3952  /*
3953  * Ignore all signals usually bound to some action in the postmaster,
3954  * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3955  * support latch operations, because we only use a local latch.
3956  */
3958  pqsignal(SIGINT, SIG_IGN);
3959  pqsignal(SIGTERM, SIG_IGN);
3971 
3972  /*
3973  * Identify myself via ps
3974  */
3975  init_ps_display("stats collector process", "", "", "");
3976 
3977  /*
3978  * Read in existing stats files or initialize the stats to zero.
3979  */
3980  pgStatRunningInCollector = true;
3981  pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
3982 
3983  /*
3984  * Loop to process messages until we get SIGQUIT or detect ungraceful
3985  * death of our parent postmaster.
3986  *
3987  * For performance reasons, we don't want to do ResetLatch/WaitLatch after
3988  * every message; instead, do that only after a recv() fails to obtain a
3989  * message. (This effectively means that if backends are sending us stuff
3990  * like mad, we won't notice postmaster death until things slack off a
3991  * bit; which seems fine.) To do that, we have an inner loop that
3992  * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3993  * the inner loop, which means that such interrupts will get serviced but
3994  * the latch won't get cleared until next time there is a break in the
3995  * action.
3996  */
3997  for (;;)
3998  {
3999  /* Clear any already-pending wakeups */
4001 
4002  /*
4003  * Quit if we get SIGQUIT from the postmaster.
4004  */
4005  if (need_exit)
4006  break;
4007 
4008  /*
4009  * Inner loop iterates as long as we keep getting messages, or until
4010  * need_exit becomes set.
4011  */
4012  while (!need_exit)
4013  {
4014  /*
4015  * Reload configuration if we got SIGHUP from the postmaster.
4016  */
4017  if (got_SIGHUP)
4018  {
4019  got_SIGHUP = false;
4021  }
4022 
4023  /*
4024  * Write the stats file(s) if a new request has arrived that is
4025  * not satisfied by existing file(s).
4026  */
4028  pgstat_write_statsfiles(false, false);
4029 
4030  /*
4031  * Try to receive and process a message. This will not block,
4032  * since the socket is set to non-blocking mode.
4033  *
4034  * XXX On Windows, we have to force pgwin32_recv to cooperate,
4035  * despite the previous use of pg_set_noblock() on the socket.
4036  * This is extremely broken and should be fixed someday.
4037  */
4038 #ifdef WIN32
4039  pgwin32_noblock = 1;
4040 #endif
4041 
4042  len = recv(pgStatSock, (char *) &msg,
4043  sizeof(PgStat_Msg), 0);
4044 
4045 #ifdef WIN32
4046  pgwin32_noblock = 0;
4047 #endif
4048 
4049  if (len < 0)
4050  {
4051  if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
4052  break; /* out of inner loop */
4053  ereport(ERROR,
4055  errmsg("could not read statistics message: %m")));
4056  }
4057 
4058  /*
4059  * We ignore messages that are smaller than our common header
4060  */
4061  if (len < sizeof(PgStat_MsgHdr))
4062  continue;
4063 
4064  /*
4065  * The received length must match the length in the header
4066  */
4067  if (msg.msg_hdr.m_size != len)
4068  continue;
4069 
4070  /*
4071  * O.K. - we accept this message. Process it.
4072  */
4073  switch (msg.msg_hdr.m_type)
4074  {
4075  case PGSTAT_MTYPE_DUMMY:
4076  break;
4077 
4078  case PGSTAT_MTYPE_INQUIRY:
4079  pgstat_recv_inquiry((PgStat_MsgInquiry *) &msg, len);
4080  break;
4081 
4082  case PGSTAT_MTYPE_TABSTAT:
4083  pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, len);
4084  break;
4085 
4086  case PGSTAT_MTYPE_TABPURGE:
4088  break;
4089 
4090  case PGSTAT_MTYPE_DROPDB:
4091  pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, len);
4092  break;
4093 
4096  len);
4097  break;
4098 
4102  len);
4103  break;
4104 
4108  len);
4109  break;
4110 
4113  break;
4114 
4115  case PGSTAT_MTYPE_VACUUM:
4116  pgstat_recv_vacuum((PgStat_MsgVacuum *) &msg, len);
4117  break;
4118 
4119  case PGSTAT_MTYPE_ANALYZE:
4120  pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, len);
4121  break;
4122 
4123  case PGSTAT_MTYPE_ARCHIVER:
4125  break;
4126 
4127  case PGSTAT_MTYPE_BGWRITER:
4129  break;
4130 
4131  case PGSTAT_MTYPE_FUNCSTAT:
4133  break;
4134 
4137  break;
4138 
4141  break;
4142 
4143  case PGSTAT_MTYPE_DEADLOCK:
4145  break;
4146 
4147  case PGSTAT_MTYPE_TEMPFILE:
4149  break;
4150 
4151  default:
4152  break;
4153  }
4154  } /* end of inner message-processing loop */
4155 
4156  /* Sleep until there's something to do */
4157 #ifndef WIN32
4160  pgStatSock, -1L,
4162 #else
4163 
4164  /*
4165  * Windows, at least in its Windows Server 2003 R2 incarnation,
4166  * sometimes loses FD_READ events. Waking up and retrying the recv()
4167  * fixes that, so don't sleep indefinitely. This is a crock of the
4168  * first water, but until somebody wants to debug exactly what's
4169  * happening there, this is the best we can do. The two-second
4170  * timeout matches our pre-9.2 behavior, and needs to be short enough
4171  * to not provoke "using stale statistics" complaints from
4172  * backend_read_statsfile.
4173  */
4176  pgStatSock,
4177  2 * 1000L /* msec */,
4179 #endif
4180 
4181  /*
4182  * Emergency bailout if postmaster has died. This is to avoid the
4183  * necessity for manual cleanup of all postmaster children.
4184  */
4185  if (wr & WL_POSTMASTER_DEATH)
4186  break;
4187  } /* end of outer loop */
4188 
4189  /*
4190  * Save the final stats to reuse at next startup.
4191  */
4192  pgstat_write_statsfiles(true, true);
4193 
4194  exit(0);
4195 }
4196 
4197 
4198 /* SIGQUIT signal handler for collector process */
4199 static void
4201 {
4202  int save_errno = errno;
4203 
4204  need_exit = true;
4205  SetLatch(MyLatch);
4206 
4207  errno = save_errno;
4208 }
4209 
4210 /* SIGHUP handler for collector process */
4211 static void
4213 {
4214  int save_errno = errno;
4215 
4216  got_SIGHUP = true;
4217  SetLatch(MyLatch);
4218 
4219  errno = save_errno;
4220 }
4221 
4222 /*
4223  * Subroutine to clear stats in a database entry
4224  *
4225  * Tables and functions hashes are initialized to empty.
4226  */
4227 static void
4229 {
4230  HASHCTL hash_ctl;
4231 
4232  dbentry->n_xact_commit = 0;
4233  dbentry->n_xact_rollback = 0;
4234  dbentry->n_blocks_fetched = 0;
4235  dbentry->n_blocks_hit = 0;
4236  dbentry->n_tuples_returned = 0;
4237  dbentry->n_tuples_fetched = 0;
4238  dbentry->n_tuples_inserted = 0;
4239  dbentry->n_tuples_updated = 0;
4240  dbentry->n_tuples_deleted = 0;
4241  dbentry->last_autovac_time = 0;
4242  dbentry->n_conflict_tablespace = 0;
4243  dbentry->n_conflict_lock = 0;
4244  dbentry->n_conflict_snapshot = 0;
4245  dbentry->n_conflict_bufferpin = 0;
4246  dbentry->n_conflict_startup_deadlock = 0;
4247  dbentry->n_temp_files = 0;
4248  dbentry->n_temp_bytes = 0;
4249  dbentry->n_deadlocks = 0;
4250  dbentry->n_block_read_time = 0;
4251  dbentry->n_block_write_time = 0;
4252 
4254  dbentry->stats_timestamp = 0;
4255 
4256  memset(&hash_ctl, 0, sizeof(hash_ctl));
4257  hash_ctl.keysize = sizeof(Oid);
4258  hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
4259  dbentry->tables = hash_create("Per-database table",
4261  &hash_ctl,
4262  HASH_ELEM | HASH_BLOBS);
4263 
4264  hash_ctl.keysize = sizeof(Oid);
4265  hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
4266  dbentry->functions = hash_create("Per-database function",
4268  &hash_ctl,
4269  HASH_ELEM | HASH_BLOBS);
4270 }
4271 
4272 /*
4273  * Lookup the hash table entry for the specified database. If no hash
4274  * table entry exists, initialize it, if the create parameter is true.
4275  * Else, return NULL.
4276  */
4277 static PgStat_StatDBEntry *
4278 pgstat_get_db_entry(Oid databaseid, bool create)
4279 {
4281  bool found;
4282  HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4283 
4284  /* Lookup or create the hash table entry for this database */
4285  result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
4286  &databaseid,
4287  action, &found);
4288 
4289  if (!create && !found)
4290  return NULL;
4291 
4292  /*
4293  * If not found, initialize the new one. This creates empty hash tables
4294  * for tables and functions, too.
4295  */
4296  if (!found)
4297  reset_dbentry_counters(result);
4298 
4299  return result;
4300 }
4301 
4302 
4303 /*
4304  * Lookup the hash table entry for the specified table. If no hash
4305  * table entry exists, initialize it, if the create parameter is true.
4306  * Else, return NULL.
4307  */
4308 static PgStat_StatTabEntry *
4309 pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
4310 {
4312  bool found;
4313  HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4314 
4315  /* Lookup or create the hash table entry for this table */
4316  result = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
4317  &tableoid,
4318  action, &found);
4319 
4320  if (!create && !found)
4321  return NULL;
4322 
4323  /* If not found, initialize the new one. */
4324  if (!found)
4325  {
4326  result->numscans = 0;
4327  result->tuples_returned = 0;
4328  result->tuples_fetched = 0;
4329  result->tuples_inserted = 0;
4330  result->tuples_updated = 0;
4331  result->tuples_deleted = 0;
4332  result->tuples_hot_updated = 0;
4333  result->n_live_tuples = 0;
4334  result->n_dead_tuples = 0;
4335  result->changes_since_analyze = 0;
4336  result->blocks_fetched = 0;
4337  result->blocks_hit = 0;
4338  result->vacuum_timestamp = 0;
4339  result->vacuum_count = 0;
4340  result->autovac_vacuum_timestamp = 0;
4341  result->autovac_vacuum_count = 0;
4342  result->analyze_timestamp = 0;
4343  result->analyze_count = 0;
4344  result->autovac_analyze_timestamp = 0;
4345  result->autovac_analyze_count = 0;
4346  }
4347 
4348  return result;
4349 }
4350 
4351 
4352 /* ----------
4353  * pgstat_write_statsfiles() -
4354  * Write the global statistics file, as well as requested DB files.
4355  *
4356  * 'permanent' specifies writing to the permanent files not temporary ones.
4357  * When true (happens only when the collector is shutting down), also remove
4358  * the temporary files so that backends starting up under a new postmaster
4359  * can't read old data before the new collector is ready.
4360  *
4361  * When 'allDbs' is false, only the requested databases (listed in
4362  * pending_write_requests) will be written; otherwise, all databases
4363  * will be written.
4364  * ----------
4365  */
4366 static void
4367 pgstat_write_statsfiles(bool permanent, bool allDbs)
4368 {
4369  HASH_SEQ_STATUS hstat;
4370  PgStat_StatDBEntry *dbentry;
4371  FILE *fpout;
4372  int32 format_id;
4373  const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
4374  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4375  int rc;
4376 
4377  elog(DEBUG2, "writing stats file \"%s\"", statfile);
4378 
4379  /*
4380  * Open the statistics temp file to write out the current values.
4381  */
4382  fpout = AllocateFile(tmpfile, PG_BINARY_W);
4383  if (fpout == NULL)
4384  {
4385  ereport(LOG,
4387  errmsg("could not open temporary statistics file \"%s\": %m",
4388  tmpfile)));
4389  return;
4390  }
4391 
4392  /*
4393  * Set the timestamp of the stats file.
4394  */
4395  globalStats.stats_timestamp = GetCurrentTimestamp();
4396 
4397  /*
4398  * Write the file header --- currently just a format ID.
4399  */
4400  format_id = PGSTAT_FILE_FORMAT_ID;
4401  rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4402  (void) rc; /* we'll check for error with ferror */
4403 
4404  /*
4405  * Write global stats struct
4406  */
4407  rc = fwrite(&globalStats, sizeof(globalStats), 1, fpout);
4408  (void) rc; /* we'll check for error with ferror */
4409 
4410  /*
4411  * Write archiver stats struct
4412  */
4413  rc = fwrite(&archiverStats, sizeof(archiverStats), 1, fpout);
4414  (void) rc; /* we'll check for error with ferror */
4415 
4416  /*
4417  * Walk through the database table.
4418  */
4419  hash_seq_init(&hstat, pgStatDBHash);
4420  while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
4421  {
4422  /*
4423  * Write out the table and function stats for this DB into the
4424  * appropriate per-DB stat file, if required.
4425  */
4426  if (allDbs || pgstat_db_requested(dbentry->databaseid))
4427  {
4428  /* Make DB's timestamp consistent with the global stats */
4429  dbentry->stats_timestamp = globalStats.stats_timestamp;
4430 
4431  pgstat_write_db_statsfile(dbentry, permanent);
4432  }
4433 
4434  /*
4435  * Write out the DB entry. We don't write the tables or functions
4436  * pointers, since they're of no use to any other process.
4437  */
4438  fputc('D', fpout);
4439  rc = fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
4440  (void) rc; /* we'll check for error with ferror */
4441  }
4442 
4443  /*
4444  * No more output to be done. Close the temp file and replace the old
4445  * pgstat.stat with it. The ferror() check replaces testing for error
4446  * after each individual fputc or fwrite above.
4447  */
4448  fputc('E', fpout);
4449 
4450  if (ferror(fpout))
4451  {
4452  ereport(LOG,
4454  errmsg("could not write temporary statistics file \"%s\": %m",
4455  tmpfile)));
4456  FreeFile(fpout);
4457  unlink(tmpfile);
4458  }
4459  else if (FreeFile(fpout) < 0)
4460  {
4461  ereport(LOG,
4463  errmsg("could not close temporary statistics file \"%s\": %m",
4464  tmpfile)));
4465  unlink(tmpfile);
4466  }
4467  else if (rename(tmpfile, statfile) < 0)
4468  {
4469  ereport(LOG,
4471  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4472  tmpfile, statfile)));
4473  unlink(tmpfile);
4474  }
4475 
4476  if (permanent)
4478 
4479  /*
4480  * Now throw away the list of requests. Note that requests sent after we
4481  * started the write are still waiting on the network socket.
4482  */
4483  list_free(pending_write_requests);
4484  pending_write_requests = NIL;
4485 }
4486 
4487 /*
4488  * return the filename for a DB stat file; filename is the output buffer,
4489  * of length len.
4490  */
4491 static void
4492 get_dbstat_filename(bool permanent, bool tempname, Oid databaseid,
4493  char *filename, int len)
4494 {
4495  int printed;
4496 
4497  /* NB -- pgstat_reset_remove_files knows about the pattern this uses */
4498  printed = snprintf(filename, len, "%s/db_%u.%s",
4499  permanent ? PGSTAT_STAT_PERMANENT_DIRECTORY :
4501  databaseid,
4502  tempname ? "tmp" : "stat");
4503  if (printed > len)
4504  elog(ERROR, "overlength pgstat path");
4505 }
4506 
4507 /* ----------
4508  * pgstat_write_db_statsfile() -
4509  * Write the stat file for a single database.
4510  *
4511  * If writing to the permanent file (happens when the collector is
4512  * shutting down only), remove the temporary file so that backends
4513  * starting up under a new postmaster can't read the old data before
4514  * the new collector is ready.
4515  * ----------
4516  */
4517 static void
4519 {
4520  HASH_SEQ_STATUS tstat;
4521  HASH_SEQ_STATUS fstat;
4522  PgStat_StatTabEntry *tabentry;
4523  PgStat_StatFuncEntry *funcentry;
4524  FILE *fpout;
4525  int32 format_id;
4526  Oid dbid = dbentry->databaseid;
4527  int rc;
4528  char tmpfile[MAXPGPATH];
4529  char statfile[MAXPGPATH];
4530 
4531  get_dbstat_filename(permanent, true, dbid, tmpfile, MAXPGPATH);
4532  get_dbstat_filename(permanent, false, dbid, statfile, MAXPGPATH);
4533 
4534  elog(DEBUG2, "writing stats file \"%s\"", statfile);
4535 
4536  /*
4537  * Open the statistics temp file to write out the current values.
4538  */
4539  fpout = AllocateFile(tmpfile, PG_BINARY_W);
4540  if (fpout == NULL)
4541  {
4542  ereport(LOG,
4544  errmsg("could not open temporary statistics file \"%s\": %m",
4545  tmpfile)));
4546  return;
4547  }
4548 
4549  /*
4550  * Write the file header --- currently just a format ID.
4551  */
4552  format_id = PGSTAT_FILE_FORMAT_ID;
4553  rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4554  (void) rc; /* we'll check for error with ferror */
4555 
4556  /*
4557  * Walk through the database's access stats per table.
4558  */
4559  hash_seq_init(&tstat, dbentry->tables);
4560  while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
4561  {
4562  fputc('T', fpout);
4563  rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
4564  (void) rc; /* we'll check for error with ferror */
4565  }
4566 
4567  /*
4568  * Walk through the database's function stats table.
4569  */
4570  hash_seq_init(&fstat, dbentry->functions);
4571  while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
4572  {
4573  fputc('F', fpout);
4574  rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
4575  (void) rc; /* we'll check for error with ferror */
4576  }
4577 
4578  /*
4579  * No more output to be done. Close the temp file and replace the old
4580  * pgstat.stat with it. The ferror() check replaces testing for error
4581  * after each individual fputc or fwrite above.
4582  */
4583  fputc('E', fpout);
4584 
4585  if (ferror(fpout))
4586  {
4587  ereport(LOG,
4589  errmsg("could not write temporary statistics file \"%s\": %m",
4590  tmpfile)));
4591  FreeFile(fpout);
4592  unlink(tmpfile);
4593  }
4594  else if (FreeFile(fpout) < 0)
4595  {
4596  ereport(LOG,
4598  errmsg("could not close temporary statistics file \"%s\": %m",
4599  tmpfile)));
4600  unlink(tmpfile);
4601  }
4602  else if (rename(tmpfile, statfile) < 0)
4603  {
4604  ereport(LOG,
4606  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4607  tmpfile, statfile)));
4608  unlink(tmpfile);
4609  }
4610 
4611  if (permanent)
4612  {
4613  get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
4614 
4615  elog(DEBUG2, "removing temporary stats file \"%s\"", statfile);
4616  unlink(statfile);
4617  }
4618 }
4619 
4620 /* ----------
4621  * pgstat_read_statsfiles() -
4622  *
4623  * Reads in some existing statistics collector files and returns the
4624  * databases hash table that is the top level of the data.
4625  *
4626  * If 'onlydb' is not InvalidOid, it means we only want data for that DB
4627  * plus the shared catalogs ("DB 0"). We'll still populate the DB hash
4628  * table for all databases, but we don't bother even creating table/function
4629  * hash tables for other databases.
4630  *
4631  * 'permanent' specifies reading from the permanent files not temporary ones.
4632  * When true (happens only when the collector is starting up), remove the
4633  * files after reading; the in-memory status is now authoritative, and the
4634  * files would be out of date in case somebody else reads them.
4635  *
4636  * If a 'deep' read is requested, table/function stats are read, otherwise
4637  * the table/function hash tables remain empty.
4638  * ----------
4639  */
4640 static HTAB *
4641 pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
4642 {
4643  PgStat_StatDBEntry *dbentry;
4644  PgStat_StatDBEntry dbbuf;
4645  HASHCTL hash_ctl;
4646  HTAB *dbhash;
4647  FILE *fpin;
4648  int32 format_id;
4649  bool found;
4650  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4651 
4652  /*
4653  * The tables will live in pgStatLocalContext.
4654  */
4656 
4657  /*
4658  * Create the DB hashtable
4659  */
4660  memset(&hash_ctl, 0, sizeof(hash_ctl));
4661  hash_ctl.keysize = sizeof(Oid);
4662  hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
4663  hash_ctl.hcxt = pgStatLocalContext;
4664  dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
4666 
4667  /*
4668  * Clear out global and archiver statistics so they start from zero in
4669  * case we can't load an existing statsfile.
4670  */
4671  memset(&globalStats, 0, sizeof(globalStats));
4672  memset(&archiverStats, 0, sizeof(archiverStats));
4673 
4674  /*
4675  * Set the current timestamp (will be kept only in case we can't load an
4676  * existing statsfile).
4677  */
4678  globalStats.stat_reset_timestamp = GetCurrentTimestamp();
4679  archiverStats.stat_reset_timestamp = globalStats.stat_reset_timestamp;
4680 
4681  /*
4682  * Try to open the stats file. If it doesn't exist, the backends simply
4683  * return zero for anything and the collector simply starts from scratch
4684  * with empty counters.
4685  *
4686  * ENOENT is a possibility if the stats collector is not running or has
4687  * not yet written the stats file the first time. Any other failure
4688  * condition is suspicious.
4689  */
4690  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
4691  {
4692  if (errno != ENOENT)
4695  errmsg("could not open statistics file \"%s\": %m",
4696  statfile)));
4697  return dbhash;
4698  }
4699 
4700  /*
4701  * Verify it's of the expected format.
4702  */
4703  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
4704  format_id != PGSTAT_FILE_FORMAT_ID)
4705  {
4707  (errmsg("corrupted statistics file \"%s\"", statfile)));
4708  goto done;
4709  }
4710 
4711  /*
4712  * Read global stats struct
4713  */
4714  if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats))
4715  {
4717  (errmsg("corrupted statistics file \"%s\"", statfile)));
4718  goto done;
4719  }
4720 
4721  /*
4722  * Read archiver stats struct
4723  */
4724  if (fread(&archiverStats, 1, sizeof(archiverStats), fpin) != sizeof(archiverStats))
4725  {
4727  (errmsg("corrupted statistics file \"%s\"", statfile)));
4728  goto done;
4729  }
4730 
4731  /*
4732  * We found an existing collector stats file. Read it and put all the
4733  * hashtable entries into place.
4734  */
4735  for (;;)
4736  {
4737  switch (fgetc(fpin))
4738  {
4739  /*
4740  * 'D' A PgStat_StatDBEntry struct describing a database
4741  * follows.
4742  */
4743  case 'D':
4744  if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
4745  fpin) != offsetof(PgStat_StatDBEntry, tables))
4746  {
4748  (errmsg("corrupted statistics file \"%s\"",
4749  statfile)));
4750  goto done;
4751  }
4752 
4753  /*
4754  * Add to the DB hash
4755  */
4756  dbentry = (PgStat_StatDBEntry *) hash_search(dbhash,
4757  (void *) &dbbuf.databaseid,
4758  HASH_ENTER,
4759  &found);
4760  if (found)
4761  {
4763  (errmsg("corrupted statistics file \"%s\"",
4764  statfile)));
4765  goto done;
4766  }
4767 
4768  memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
4769  dbentry->tables = NULL;
4770  dbentry->functions = NULL;
4771 
4772  /*
4773  * Don't create tables/functions hashtables for uninteresting
4774  * databases.
4775  */
4776  if (onlydb != InvalidOid)
4777  {
4778  if (dbbuf.databaseid != onlydb &&
4779  dbbuf.databaseid != InvalidOid)
4780  break;
4781  }
4782 
4783  memset(&hash_ctl, 0, sizeof(hash_ctl));
4784  hash_ctl.keysize = sizeof(Oid);
4785  hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
4786  hash_ctl.hcxt = pgStatLocalContext;
4787  dbentry->tables = hash_create("Per-database table",
4789  &hash_ctl,
4791 
4792  hash_ctl.keysize = sizeof(Oid);
4793  hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
4794  hash_ctl.hcxt = pgStatLocalContext;
4795  dbentry->functions = hash_create("Per-database function",
4797  &hash_ctl,
4799 
4800  /*
4801  * If requested, read the data from the database-specific
4802  * file. Otherwise we just leave the hashtables empty.
4803  */
4804  if (deep)
4806  dbentry->tables,
4807  dbentry->functions,
4808  permanent);
4809 
4810  break;
4811 
4812  case 'E':
4813  goto done;
4814 
4815  default:
4817  (errmsg("corrupted statistics file \"%s\"",
4818  statfile)));
4819  goto done;
4820  }
4821  }
4822 
4823 done:
4824  FreeFile(fpin);
4825 
4826  /* If requested to read the permanent file, also get rid of it. */
4827  if (permanent)
4828  {
4829  elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
4830  unlink(statfile);
4831  }
4832 
4833  return dbhash;
4834 }
4835 
4836 
4837 /* ----------
4838  * pgstat_read_db_statsfile() -
4839  *
4840  * Reads in the existing statistics collector file for the given database,
4841  * filling the passed-in tables and functions hash tables.
4842  *
4843  * As in pgstat_read_statsfiles, if the permanent file is requested, it is
4844  * removed after reading.
4845  *
4846  * Note: this code has the ability to skip storing per-table or per-function
4847  * data, if NULL is passed for the corresponding hashtable. That's not used
4848  * at the moment though.
4849  * ----------
4850  */
4851 static void
4852 pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash,
4853  bool permanent)
4854 {
4855  PgStat_StatTabEntry *tabentry;
4856  PgStat_StatTabEntry tabbuf;
4857  PgStat_StatFuncEntry funcbuf;
4858  PgStat_StatFuncEntry *funcentry;
4859  FILE *fpin;
4860  int32 format_id;
4861  bool found;
4862  char statfile[MAXPGPATH];
4863 
4864  get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
4865 
4866  /*
4867  * Try to open the stats file. If it doesn't exist, the backends simply
4868  * return zero for anything and the collector simply starts from scratch
4869  * with empty counters.
4870  *
4871  * ENOENT is a possibility if the stats collector is not running or has
4872  * not yet written the stats file the first time. Any other failure
4873  * condition is suspicious.
4874  */
4875  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
4876  {
4877  if (errno != ENOENT)
4880  errmsg("could not open statistics file \"%s\": %m",
4881  statfile)));
4882  return;
4883  }
4884 
4885  /*
4886  * Verify it's of the expected format.
4887  */
4888  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
4889  format_id != PGSTAT_FILE_FORMAT_ID)
4890  {
4892  (errmsg("corrupted statistics file \"%s\"", statfile)));
4893  goto done;
4894  }
4895 
4896  /*
4897  * We found an existing collector stats file. Read it and put all the
4898  * hashtable entries into place.
4899  */
4900  for (;;)
4901  {
4902  switch (fgetc(fpin))
4903  {
4904  /*
4905  * 'T' A PgStat_StatTabEntry follows.
4906  */
4907  case 'T':
4908  if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry),
4909  fpin) != sizeof(PgStat_StatTabEntry))
4910  {
4912  (errmsg("corrupted statistics file \"%s\"",
4913  statfile)));
4914  goto done;
4915  }
4916 
4917  /*
4918  * Skip if table data not wanted.
4919  */
4920  if (tabhash == NULL)
4921  break;
4922 
4923  tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
4924  (void *) &tabbuf.tableid,
4925  HASH_ENTER, &found);
4926 
4927  if (found)
4928  {
4930  (errmsg("corrupted statistics file \"%s\"",
4931  statfile)));
4932  goto done;
4933  }
4934 
4935  memcpy(tabentry, &tabbuf, sizeof(tabbuf));
4936  break;
4937 
4938  /*
4939  * 'F' A PgStat_StatFuncEntry follows.
4940  */
4941  case 'F':
4942  if (fread(&funcbuf, 1, sizeof(PgStat_StatFuncEntry),
4943  fpin) != sizeof(PgStat_StatFuncEntry))
4944  {
4946  (errmsg("corrupted statistics file \"%s\"",
4947  statfile)));
4948  goto done;
4949  }
4950 
4951  /*
4952  * Skip if function data not wanted.
4953  */
4954  if (funchash == NULL)
4955  break;
4956 
4957  funcentry = (PgStat_StatFuncEntry *) hash_search(funchash,
4958  (void *) &funcbuf.functionid,
4959  HASH_ENTER, &found);
4960 
4961  if (found)
4962  {
4964  (errmsg("corrupted statistics file \"%s\"",
4965  statfile)));
4966  goto done;
4967  }
4968 
4969  memcpy(funcentry, &funcbuf, sizeof(funcbuf));
4970  break;
4971 
4972  /*
4973  * 'E' The EOF marker of a complete stats file.
4974  */
4975  case 'E':
4976  goto done;
4977 
4978  default:
4980  (errmsg("corrupted statistics file \"%s\"",
4981  statfile)));
4982  goto done;
4983  }
4984  }
4985 
4986 done:
4987  FreeFile(fpin);
4988 
4989  if (permanent)
4990  {
4991  elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
4992  unlink(statfile);
4993  }
4994 }
4995 
4996 /* ----------
4997  * pgstat_read_db_statsfile_timestamp() -
4998  *
4999  * Attempt to determine the timestamp of the last db statfile write.
5000  * Returns TRUE if successful; the timestamp is stored in *ts.
5001  *
5002  * This needs to be careful about handling databases for which no stats file
5003  * exists, such as databases without a stat entry or those not yet written:
5004  *
5005  * - if there's a database entry in the global file, return the corresponding
5006  * stats_timestamp value.
5007  *
5008  * - if there's no db stat entry (e.g. for a new or inactive database),
5009  * there's no stats_timestamp value, but also nothing to write so we return
5010  * the timestamp of the global statfile.
5011  * ----------
5012  */
5013 static bool
5014 pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent,
5015  TimestampTz *ts)
5016 {
5017  PgStat_StatDBEntry dbentry;
5018  PgStat_GlobalStats myGlobalStats;
5019  PgStat_ArchiverStats myArchiverStats;
5020  FILE *fpin;
5021  int32 format_id;
5022  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
5023 
5024  /*
5025  * Try to open the stats file. As above, anything but ENOENT is worthy of
5026  * complaining about.
5027  */
5028  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
5029  {
5030  if (errno != ENOENT)
5033  errmsg("could not open statistics file \"%s\": %m",
5034  statfile)));
5035  return false;
5036  }
5037 
5038  /*
5039  * Verify it's of the expected format.
5040  */
5041  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
5042  format_id != PGSTAT_FILE_FORMAT_ID)
5043  {
5045  (errmsg("corrupted statistics file \"%s\"", statfile)));
5046  FreeFile(fpin);
5047  return false;
5048  }
5049 
5050  /*
5051  * Read global stats struct
5052  */
5053  if (fread(&myGlobalStats, 1, sizeof(myGlobalStats),
5054  fpin) != sizeof(myGlobalStats))
5055  {
5057  (errmsg("corrupted statistics file \"%s\"", statfile)));
5058  FreeFile(fpin);
5059  return false;
5060  }
5061 
5062  /*
5063  * Read archiver stats struct
5064  */
5065  if (fread(&myArchiverStats, 1, sizeof(myArchiverStats),
5066  fpin) != sizeof(myArchiverStats))
5067  {
5069  (errmsg("corrupted statistics file \"%s\"", statfile)));
5070  FreeFile(fpin);
5071  return false;
5072  }
5073 
5074  /* By default, we're going to return the timestamp of the global file. */
5075  *ts = myGlobalStats.stats_timestamp;
5076 
5077  /*
5078  * We found an existing collector stats file. Read it and look for a
5079  * record for the requested database. If found, use its timestamp.
5080  */
5081  for (;;)
5082  {
5083  switch (fgetc(fpin))
5084  {
5085  /*
5086  * 'D' A PgStat_StatDBEntry struct describing a database
5087  * follows.
5088  */
5089  case 'D':
5090  if (fread(&dbentry, 1, offsetof(PgStat_StatDBEntry, tables),
5091  fpin) != offsetof(PgStat_StatDBEntry, tables))
5092  {
5094  (errmsg("corrupted statistics file \"%s\"",
5095  statfile)));
5096  goto done;
5097  }
5098 
5099  /*
5100  * If this is the DB we're looking for, save its timestamp and
5101  * we're done.
5102  */
5103  if (dbentry.databaseid == databaseid)
5104  {
5105  *ts = dbentry.stats_timestamp;
5106  goto done;
5107  }
5108 
5109  break;
5110 
5111  case 'E':
5112  goto done;
5113 
5114  default:
5116  (errmsg("corrupted statistics file \"%s\"",
5117  statfile)));
5118  goto done;
5119  }
5120  }
5121 
5122 done:
5123  FreeFile(fpin);
5124  return true;
5125 }
5126 
5127 /*
5128  * If not already done, read the statistics collector stats file into
5129  * some hash tables. The results will be kept until pgstat_clear_snapshot()
5130  * is called (typically, at end of transaction).
5131  */
5132 static void
5134 {
5135  TimestampTz min_ts = 0;
5136  TimestampTz ref_ts = 0;
5137  Oid inquiry_db;
5138  int count;
5139 
5140  /* already read it? */
5141  if (pgStatDBHash)
5142  return;
5144 
5145  /*
5146  * In a normal backend, we check staleness of the data for our own DB, and
5147  * so we send MyDatabaseId in inquiry messages. In the autovac launcher,
5148  * check staleness of the shared-catalog data, and send InvalidOid in
5149  * inquiry messages so as not to force writing unnecessary data.
5150  */
5152  inquiry_db = InvalidOid;
5153  else
5154  inquiry_db = MyDatabaseId;
5155 
5156  /*
5157  * Loop until fresh enough stats file is available or we ran out of time.
5158  * The stats inquiry message is sent repeatedly in case collector drops
5159  * it; but not every single time, as that just swamps the collector.
5160  */
5161  for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++)
5162  {
5163  bool ok;
5164  TimestampTz file_ts = 0;
5165  TimestampTz cur_ts;
5166 
5168 
5169  ok = pgstat_read_db_statsfile_timestamp(inquiry_db, false, &file_ts);
5170 
5171  cur_ts = GetCurrentTimestamp();
5172  /* Calculate min acceptable timestamp, if we didn't already */
5173  if (count == 0 || cur_ts < ref_ts)
5174  {
5175  /*
5176  * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL
5177  * msec before now. This indirectly ensures that the collector
5178  * needn't write the file more often than PGSTAT_STAT_INTERVAL. In
5179  * an autovacuum worker, however, we want a lower delay to avoid
5180  * using stale data, so we use PGSTAT_RETRY_DELAY (since the
5181  * number of workers is low, this shouldn't be a problem).
5182  *
5183  * We don't recompute min_ts after sleeping, except in the
5184  * unlikely case that cur_ts went backwards. So we might end up
5185  * accepting a file a bit older than PGSTAT_STAT_INTERVAL. In
5186  * practice that shouldn't happen, though, as long as the sleep
5187  * time is less than PGSTAT_STAT_INTERVAL; and we don't want to
5188  * tell the collector that our cutoff time is less than what we'd
5189  * actually accept.
5190  */
5191  ref_ts = cur_ts;
5193  min_ts = TimestampTzPlusMilliseconds(ref_ts,
5195  else
5196  min_ts = TimestampTzPlusMilliseconds(ref_ts,
5198  }
5199 
5200  /*
5201  * If the file timestamp is actually newer than cur_ts, we must have
5202  * had a clock glitch (system time went backwards) or there is clock
5203  * skew between our processor and the stats collector's processor.
5204  * Accept the file, but send an inquiry message anyway to make
5205  * pgstat_recv_inquiry do a sanity check on the collector's time.
5206  */
5207  if (ok && file_ts > cur_ts)
5208  {
5209  /*
5210  * A small amount of clock skew between processors isn't terribly
5211  * surprising, but a large difference is worth logging. We
5212  * arbitrarily define "large" as 1000 msec.
5213  */
5214  if (file_ts >= TimestampTzPlusMilliseconds(cur_ts, 1000))
5215  {
5216  char *filetime;
5217  char *mytime;
5218 
5219  /* Copy because timestamptz_to_str returns a static buffer */
5220  filetime = pstrdup(timestamptz_to_str(file_ts));
5221  mytime = pstrdup(timestamptz_to_str(cur_ts));
5222  elog(LOG, "stats collector's time %s is later than backend local time %s",
5223  filetime, mytime);
5224  pfree(filetime);
5225  pfree(mytime);
5226  }
5227 
5228  pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
5229  break;
5230  }
5231 
5232  /* Normal acceptance case: file is not older than cutoff time */
5233  if (ok && file_ts >= min_ts)
5234  break;
5235 
5236  /* Not there or too old, so kick the collector and wait a bit */
5237  if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
5238  pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
5239 
5240  pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
5241  }
5242 
5243  if (count >= PGSTAT_POLL_LOOP_COUNT)
5244  ereport(LOG,
5245  (errmsg("using stale statistics instead of current ones "
5246  "because stats collector is not responding")));
5247 
5248  /*
5249  * Autovacuum launcher wants stats about all databases, but a shallow read
5250  * is sufficient. Regular backends want a deep read for just the tables
5251  * they can see (MyDatabaseId + shared catalogs).
5252  */
5254  pgStatDBHash = pgstat_read_statsfiles(InvalidOid, false, false);
5255  else
5256  pgStatDBHash = pgstat_read_statsfiles(MyDatabaseId, false, true);
5257 }
5258 
5259 
5260 /* ----------
5261  * pgstat_setup_memcxt() -
5262  *
5263  * Create pgStatLocalContext, if not already done.
5264  * ----------
5265  */
5266 static void
5268 {
5269  if (!pgStatLocalContext)
5270  pgStatLocalContext = AllocSetContextCreate(TopMemoryContext,
5271  "Statistics snapshot",
5273 }
5274 
5275 
5276 /* ----------
5277  * pgstat_clear_snapshot() -
5278  *
5279  * Discard any data collected in the current transaction. Any subsequent
5280  * request will cause new snapshots to be read.
5281  *
5282  * This is also invoked during transaction commit or abort to discard
5283  * the no-longer-wanted snapshot.
5284  * ----------
5285  */
5286 void
5288 {
5289  /* Release memory, if any was allocated */
5290  if (pgStatLocalContext)
5291  MemoryContextDelete(pgStatLocalContext);
5292 
5293  /* Reset variables */
5294  pgStatLocalContext = NULL;
5295  pgStatDBHash = NULL;
5296  localBackendStatusTable = NULL;
5297  localNumBackends = 0;
5298 }
5299 
5300 
5301 /* ----------
5302  * pgstat_recv_inquiry() -
5303  *
5304  * Process stat inquiry requests.
5305  * ----------
5306  */
5307 static void
5309 {
5310  PgStat_StatDBEntry *dbentry;
5311 
5312  elog(DEBUG2, "received inquiry for database %u", msg->databaseid);
5313 
5314  /*
5315  * If there's already a write request for this DB, there's nothing to do.
5316  *
5317  * Note that if a request is found, we return early and skip the below
5318  * check for clock skew. This is okay, since the only way for a DB
5319  * request to be present in the list is that we have been here since the
5320  * last write round. It seems sufficient to check for clock skew once per
5321  * write round.
5322  */
5323  if (list_member_oid(pending_write_requests, msg->databaseid))
5324  return;
5325 
5326  /*
5327  * Check to see if we last wrote this database at a time >= the requested
5328  * cutoff time. If so, this is a stale request that was generated before
5329  * we updated the DB file, and we don't need to do so again.
5330  *
5331  * If the requestor's local clock time is older than stats_timestamp, we
5332  * should suspect a clock glitch, ie system time going backwards; though
5333  * the more likely explanation is just delayed message receipt. It is
5334  * worth expending a GetCurrentTimestamp call to be sure, since a large
5335  * retreat in the system clock reading could otherwise cause us to neglect
5336  * to update the stats file for a long time.
5337  */
5338  dbentry = pgstat_get_db_entry(msg->databaseid, false);
5339  if (dbentry == NULL)
5340  {
5341  /*
5342  * We have no data for this DB. Enter a write request anyway so that
5343  * the global stats will get updated. This is needed to prevent
5344  * backend_read_statsfile from waiting for data that we cannot supply,
5345  * in the case of a new DB that nobody has yet reported any stats for.
5346  * See the behavior of pgstat_read_db_statsfile_timestamp.
5347  */
5348  }
5349  else if (msg->clock_time < dbentry->stats_timestamp)
5350  {
5351  TimestampTz cur_ts = GetCurrentTimestamp();
5352 
5353  if (cur_ts < dbentry->stats_timestamp)
5354  {
5355  /*
5356  * Sure enough, time went backwards. Force a new stats file write
5357  * to get back in sync; but first, log a complaint.
5358  */
5359  char *writetime;
5360  char *mytime;
5361 
5362  /* Copy because timestamptz_to_str returns a static buffer */
5363  writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
5364  mytime = pstrdup(timestamptz_to_str(cur_ts));
5365  elog(LOG,
5366  "stats_timestamp %s is later than collector's time %s for database %u",
5367  writetime, mytime, dbentry->databaseid);
5368  pfree(writetime);
5369  pfree(mytime);
5370  }
5371  else
5372  {
5373  /*
5374  * Nope, it's just an old request. Assuming msg's clock_time is
5375  * >= its cutoff_time, it must be stale, so we can ignore it.
5376  */
5377  return;
5378  }
5379  }
5380  else if (msg->cutoff_time <= dbentry->stats_timestamp)
5381  {
5382  /* Stale request, ignore it */
5383  return;
5384  }
5385 
5386  /*
5387  * We need to write this DB, so create a request.
5388  */
5389  pending_write_requests = lappend_oid(pending_write_requests,
5390  msg->databaseid);
5391 }
5392 
5393 
5394 /* ----------
5395  * pgstat_recv_tabstat() -
5396  *
5397  * Count what the backend has done.
5398  * ----------
5399  */
5400 static void
5402 {
5403  PgStat_StatDBEntry *dbentry;
5404  PgStat_StatTabEntry *tabentry;
5405  int i;
5406  bool found;
5407 
5408  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5409 
5410  /*
5411  * Update database-wide stats.
5412  */
5413  dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
5414  dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
5415  dbentry->n_block_read_time += msg->m_block_read_time;
5416  dbentry->n_block_write_time += msg->m_block_write_time;
5417 
5418  /*
5419  * Process all table entries in the message.
5420  */
5421  for (i = 0; i < msg->m_nentries; i++)
5422  {
5423  PgStat_TableEntry *tabmsg = &(msg->m_entry[i]);
5424 
5425  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
5426  (void *) &(tabmsg->t_id),
5427  HASH_ENTER, &found);
5428 
5429  if (!found)
5430  {
5431  /*
5432  * If it's a new table entry, initialize counters to the values we
5433  * just got.
5434  */
5435  tabentry->numscans = tabmsg->t_counts.t_numscans;
5436  tabentry->tuples_returned = tabmsg->t_counts.t_tuples_returned;
5437  tabentry->tuples_fetched = tabmsg->t_counts.t_tuples_fetched;
5438  tabentry->tuples_inserted = tabmsg->t_counts.t_tuples_inserted;
5439  tabentry->tuples_updated = tabmsg->t_counts.t_tuples_updated;
5440  tabentry->tuples_deleted = tabmsg->t_counts.t_tuples_deleted;
5441  tabentry->tuples_hot_updated = tabmsg->t_counts.t_tuples_hot_updated;
5442  tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
5443  tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
5444  tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
5445  tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
5446  tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
5447 
5448  tabentry->vacuum_timestamp = 0;
5449  tabentry->vacuum_count = 0;
5450  tabentry->autovac_vacuum_timestamp = 0;
5451  tabentry->autovac_vacuum_count = 0;
5452  tabentry->analyze_timestamp = 0;
5453  tabentry->analyze_count = 0;
5454  tabentry->autovac_analyze_timestamp = 0;
5455  tabentry->autovac_analyze_count = 0;
5456  }
5457  else
5458  {
5459  /*
5460  * Otherwise add the values to the existing entry.
5461  */
5462  tabentry->numscans += tabmsg->t_counts.t_numscans;
5463  tabentry->tuples_returned += tabmsg->t_counts.t_tuples_returned;
5464  tabentry->tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5465  tabentry->tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5466  tabentry->tuples_updated += tabmsg->t_counts.t_tuples_updated;
5467  tabentry->tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5468  tabentry->tuples_hot_updated += tabmsg->t_counts.t_tuples_hot_updated;
5469  /* If table was truncated, first reset the live/dead counters */
5470  if (tabmsg->t_counts.t_truncated)
5471  {
5472  tabentry->n_live_tuples = 0;
5473  tabentry->n_dead_tuples = 0;
5474  }
5475  tabentry->n_live_tuples += tabmsg->t_counts.t_delta_live_tuples;
5476  tabentry->n_dead_tuples += tabmsg->t_counts.t_delta_dead_tuples;
5477  tabentry->changes_since_analyze += tabmsg->t_counts.t_changed_tuples;
5478  tabentry->blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5479  tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
5480  }
5481 
5482  /* Clamp n_live_tuples in case of negative delta_live_tuples */
5483  tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
5484  /* Likewise for n_dead_tuples */
5485  tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
5486 
5487  /*
5488  * Add per-table stats to the per-database entry, too.
5489  */
5490  dbentry->n_tuples_returned += tabmsg->t_counts.t_tuples_returned;
5491  dbentry->n_tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5492  dbentry->n_tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5493  dbentry->n_tuples_updated += tabmsg->t_counts.t_tuples_updated;
5494  dbentry->n_tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5495  dbentry->n_blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5496  dbentry->n_blocks_hit += tabmsg->t_counts.t_blocks_hit;
5497  }
5498 }
5499 
5500 
5501 /* ----------
5502  * pgstat_recv_tabpurge() -
5503  *
5504  * Arrange for dead table removal.
5505  * ----------
5506  */
5507 static void
5509 {
5510  PgStat_StatDBEntry *dbentry;
5511  int i;
5512 
5513  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5514 
5515  /*
5516  * No need to purge if we don't even know the database.
5517  */
5518  if (!dbentry || !dbentry->tables)
5519  return;
5520 
5521  /*
5522  * Process all table entries in the message.
5523  */
5524  for (i = 0; i < msg->m_nentries; i++)
5525  {
5526  /* Remove from hashtable if present; we don't care if it's not. */
5527  (void) hash_search(dbentry->tables,
5528  (void *) &(msg->m_tableid[i]),
5529  HASH_REMOVE, NULL);
5530  }
5531 }
5532 
5533 
5534 /* ----------
5535  * pgstat_recv_dropdb() -
5536  *
5537  * Arrange for dead database removal
5538  * ----------
5539  */
5540 static void
5542 {
5543  Oid dbid = msg->m_databaseid;
5544  PgStat_StatDBEntry *dbentry;
5545 
5546  /*
5547  * Lookup the database in the hashtable.
5548  */
5549  dbentry = pgstat_get_db_entry(dbid, false);
5550 
5551  /*
5552  * If found, remove it (along with the db statfile).
5553  */
5554  if (dbentry)
5555  {
5556  char statfile[MAXPGPATH];
5557 
5558  get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
5559 
5560  elog(DEBUG2, "removing stats file \"%s\"", statfile);
5561  unlink(statfile);
5562 
5563  if (dbentry->tables != NULL)
5564  hash_destroy(dbentry->tables);
5565  if (dbentry->functions != NULL)
5566  hash_destroy(dbentry->functions);
5567 
5568  if (hash_search(pgStatDBHash,
5569  (void *) &dbid,
5570  HASH_REMOVE, NULL) == NULL)
5571  ereport(ERROR,
5572  (errmsg("database hash table corrupted during cleanup --- abort")));
5573  }
5574 }
5575 
5576 
5577 /* ----------
5578  * pgstat_recv_resetcounter() -
5579  *
5580  * Reset the statistics for the specified database.
5581  * ----------
5582  */
5583 static void
5585 {
5586  PgStat_StatDBEntry *dbentry;
5587 
5588  /*
5589  * Lookup the database in the hashtable. Nothing to do if not there.
5590  */
5591  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5592 
5593  if (!dbentry)
5594  return;
5595 
5596  /*
5597  * We simply throw away all the database's table entries by recreating a
5598  * new hash table for them.
5599  */
5600  if (dbentry->tables != NULL)
5601  hash_destroy(dbentry->tables);
5602  if (dbentry->functions != NULL)
5603  hash_destroy(dbentry->functions);
5604 
5605  dbentry->tables = NULL;
5606  dbentry->functions = NULL;
5607 
5608  /*
5609  * Reset database-level stats, too. This creates empty hash tables for
5610  * tables and functions.
5611  */
5612  reset_dbentry_counters(dbentry);
5613 }
5614 
5615 /* ----------
5616  * pgstat_recv_resetshared() -
5617  *
5618  * Reset some shared statistics of the cluster.
5619  * ----------
5620  */
5621 static void
5623 {
5624  if (msg->m_resettarget == RESET_BGWRITER)
5625  {
5626  /* Reset the global background writer statistics for the cluster. */
5627  memset(&globalStats, 0, sizeof(globalStats));
5628  globalStats.stat_reset_timestamp = GetCurrentTimestamp();
5629  }
5630  else if (msg->m_resettarget == RESET_ARCHIVER)
5631  {
5632  /* Reset the archiver statistics for the cluster. */
5633  memset(&archiverStats, 0, sizeof(archiverStats));
5634  archiverStats.stat_reset_timestamp = GetCurrentTimestamp();
5635  }
5636 
5637  /*
5638  * Presumably the sender of this message validated the target, don't
5639  * complain here if it's not valid
5640  */
5641 }
5642 
5643 /* ----------
5644  * pgstat_recv_resetsinglecounter() -
5645  *
5646  * Reset a statistics for a single object
5647  * ----------
5648  */
5649 static void
5651 {
5652  PgStat_StatDBEntry *dbentry;
5653 
5654  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5655 
5656  if (!dbentry)
5657  return;
5658 
5659  /* Set the reset timestamp for the whole database */
5661 
5662  /* Remove object if it exists, ignore it if not */
5663  if (msg->m_resettype == RESET_TABLE)
5664  (void) hash_search(dbentry->tables, (void *) &(msg->m_objectid),
5665  HASH_REMOVE, NULL);
5666  else if (msg->m_resettype == RESET_FUNCTION)
5667  (void) hash_search(dbentry->functions, (void *) &(msg->m_objectid),
5668  HASH_REMOVE, NULL);
5669 }
5670 
5671 /* ----------
5672  * pgstat_recv_autovac() -
5673  *
5674  * Process an autovacuum signalling message.
5675  * ----------
5676  */
5677 static void
5679 {
5680  PgStat_StatDBEntry *dbentry;
5681 
5682  /*
5683  * Store the last autovacuum time in the database's hashtable entry.
5684  */
5685  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5686 
5687  dbentry->last_autovac_time = msg->m_start_time;
5688 }
5689 
5690 /* ----------
5691  * pgstat_recv_vacuum() -
5692  *
5693  * Process a VACUUM message.
5694  * ----------
5695  */
5696 static void
5698 {
5699  PgStat_StatDBEntry *dbentry;
5700  PgStat_StatTabEntry *tabentry;
5701 
5702  /*
5703  * Store the data in the table's hashtable entry.
5704  */
5705  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5706 
5707  tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
5708 
5709  tabentry->n_live_tuples = msg->m_live_tuples;
5710  tabentry->n_dead_tuples = msg->m_dead_tuples;
5711 
5712  if (msg->m_autovacuum)
5713  {
5714  tabentry->autovac_vacuum_timestamp = msg->m_vacuumtime;
5715  tabentry->autovac_vacuum_count++;
5716  }
5717  else
5718  {
5719  tabentry->vacuum_timestamp = msg->m_vacuumtime;
5720  tabentry->vacuum_count++;
5721  }
5722 }
5723 
5724 /* ----------
5725  * pgstat_recv_analyze() -
5726  *
5727  * Process an ANALYZE message.
5728  * ----------
5729  */
5730 static void
5732 {
5733  PgStat_StatDBEntry *dbentry;
5734  PgStat_StatTabEntry *tabentry;
5735 
5736  /*
5737  * Store the data in the table's hashtable entry.
5738  */
5739  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5740 
5741  tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
5742 
5743  tabentry->n_live_tuples = msg->m_live_tuples;
5744  tabentry->n_dead_tuples = msg->m_dead_tuples;
5745 
5746  /*
5747  * If commanded, reset changes_since_analyze to zero. This forgets any
5748  * changes that were committed while the ANALYZE was in progress, but we
5749  * have no good way to estimate how many of those there were.
5750  */
5751  if (msg->m_resetcounter)
5752  tabentry->changes_since_analyze = 0;
5753 
5754  if (msg->m_autovacuum)
5755  {
5756  tabentry->autovac_analyze_timestamp = msg->m_analyzetime;
5757  tabentry->autovac_analyze_count++;
5758  }
5759  else
5760  {
5761  tabentry->analyze_timestamp = msg->m_analyzetime;
5762  tabentry->analyze_count++;
5763  }
5764 }
5765 
5766 
5767 /* ----------
5768  * pgstat_recv_archiver() -
5769  *
5770  * Process a ARCHIVER message.
5771  * ----------
5772  */
5773 static void
5775 {
5776  if (msg->m_failed)
5777  {
5778  /* Failed archival attempt */
5779  ++archiverStats.failed_count;
5780  memcpy(archiverStats.last_failed_wal, msg->m_xlog,
5781  sizeof(archiverStats.last_failed_wal));
5782  archiverStats.last_failed_timestamp = msg->m_timestamp;
5783  }
5784  else
5785  {
5786  /* Successful archival operation */
5787  ++archiverStats.archived_count;
5788  memcpy(archiverStats.last_archived_wal, msg->