PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pgstat.c
Go to the documentation of this file.
1 /* ----------
2  * pgstat.c
3  *
4  * All the statistics collector stuff hacked up in one big, ugly file.
5  *
6  * TODO: - Separate collector, postmaster and backend stuff
7  * into different files.
8  *
9  * - Add some automatic call for pgstat vacuuming.
10  *
11  * - Add a pgstat config column to pg_database, so this
12  * entire thing can be enabled/disabled on a per db basis.
13  *
14  * Copyright (c) 2001-2017, PostgreSQL Global Development Group
15  *
16  * src/backend/postmaster/pgstat.c
17  * ----------
18  */
19 #include "postgres.h"
20 
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/param.h>
24 #include <sys/time.h>
25 #include <sys/socket.h>
26 #include <netdb.h>
27 #include <netinet/in.h>
28 #include <arpa/inet.h>
29 #include <signal.h>
30 #include <time.h>
31 #ifdef HAVE_SYS_SELECT_H
32 #include <sys/select.h>
33 #endif
34 
35 #include "pgstat.h"
36 
37 #include "access/heapam.h"
38 #include "access/htup_details.h"
39 #include "access/transam.h"
40 #include "access/twophase_rmgr.h"
41 #include "access/xact.h"
42 #include "catalog/pg_database.h"
43 #include "catalog/pg_proc.h"
44 #include "common/ip.h"
45 #include "libpq/libpq.h"
46 #include "libpq/pqsignal.h"
47 #include "mb/pg_wchar.h"
48 #include "miscadmin.h"
49 #include "pg_trace.h"
50 #include "postmaster/autovacuum.h"
52 #include "postmaster/postmaster.h"
53 #include "replication/walsender.h"
54 #include "storage/backendid.h"
55 #include "storage/dsm.h"
56 #include "storage/fd.h"
57 #include "storage/ipc.h"
58 #include "storage/latch.h"
59 #include "storage/lmgr.h"
60 #include "storage/pg_shmem.h"
61 #include "storage/procsignal.h"
62 #include "storage/sinvaladt.h"
63 #include "utils/ascii.h"
64 #include "utils/guc.h"
65 #include "utils/memutils.h"
66 #include "utils/ps_status.h"
67 #include "utils/rel.h"
68 #include "utils/snapmgr.h"
69 #include "utils/timestamp.h"
70 #include "utils/tqual.h"
71 
72 
73 /* ----------
74  * Timer definitions.
75  * ----------
76  */
77 #define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
78  * updates; in milliseconds. */
79 
80 #define PGSTAT_RETRY_DELAY 10 /* How long to wait between checks for
81  * a new file; in milliseconds. */
82 
83 #define PGSTAT_MAX_WAIT_TIME 10000 /* Maximum time to wait for a stats
84  * file update; in milliseconds. */
85 
86 #define PGSTAT_INQ_INTERVAL 640 /* How often to ping the collector for
87  * a new file; in milliseconds. */
88 
89 #define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart a
90  * failed statistics collector; in
91  * seconds. */
92 
93 #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
94 #define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
95 
96 
97 /* ----------
98  * The initial size hints for the hash tables used in the collector.
99  * ----------
100  */
101 #define PGSTAT_DB_HASH_SIZE 16
102 #define PGSTAT_TAB_HASH_SIZE 512
103 #define PGSTAT_FUNCTION_HASH_SIZE 512
104 
105 
106 /* ----------
107  * Total number of backends including auxiliary
108  *
109  * We reserve a slot for each possible BackendId, plus one for each
110  * possible auxiliary process type. (This scheme assumes there is not
111  * more than one of any auxiliary process type at a time.) MaxBackends
112  * includes autovacuum workers and background workers as well.
113  * ----------
114  */
115 #define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES)
116 
117 
118 /* ----------
119  * GUC parameters
120  * ----------
121  */
123 bool pgstat_track_counts = false;
126 
127 /* ----------
128  * Built from GUC parameter
129  * ----------
130  */
134 
135 /*
136  * BgWriter global statistics counters (unused in other processes).
137  * Stored directly in a stats message structure so it can be sent
138  * without needing to copy things around. We assume this inits to zeroes.
139  */
141 
142 /* ----------
143  * Local data
144  * ----------
145  */
147 
149 
151 
152 static bool pgStatRunningInCollector = false;
153 
154 /*
155  * Structures in which backends store per-table info that's waiting to be
156  * sent to the collector.
157  *
158  * NOTE: once allocated, TabStatusArray structures are never moved or deleted
159  * for the life of the backend. Also, we zero out the t_id fields of the
160  * contained PgStat_TableStatus structs whenever they are not actively in use.
161  * This allows relcache pgstat_info pointers to be treated as long-lived data,
162  * avoiding repeated searches in pgstat_initstats() when a relation is
163  * repeatedly opened during a transaction.
164  */
165 #define TABSTAT_QUANTUM 100 /* we alloc this many at a time */
166 
167 typedef struct TabStatusArray
168 {
169  struct TabStatusArray *tsa_next; /* link to next array, if any */
170  int tsa_used; /* # entries currently used */
173 
175 
176 /*
177  * pgStatTabHash entry
178  */
179 typedef struct TabStatHashEntry
180 {
184 
185 /*
186  * Hash table for O(1) t_id -> tsa_entry lookup
187  */
189 
190 /*
191  * Backends store per-function info that's waiting to be sent to the collector
192  * in this hash table (indexed by function OID).
193  */
195 
196 /*
197  * Indicates if backend has some function stats that it hasn't yet
198  * sent to the collector.
199  */
200 static bool have_function_stats = false;
201 
202 /*
203  * Tuple insertion/deletion counts for an open transaction can't be propagated
204  * into PgStat_TableStatus counters until we know if it is going to commit
205  * or abort. Hence, we keep these counts in per-subxact structs that live
206  * in TopTransactionContext. This data structure is designed on the assumption
207  * that subxacts won't usually modify very many tables.
208  */
209 typedef struct PgStat_SubXactStatus
210 {
211  int nest_level; /* subtransaction nest level */
212  struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
213  PgStat_TableXactStatus *first; /* head of list for this subxact */
215 
217 
218 static int pgStatXactCommit = 0;
219 static int pgStatXactRollback = 0;
222 
223 /* Record that's written to 2PC state file when pgstat state is persisted */
224 typedef struct TwoPhasePgStatRecord
225 {
226  PgStat_Counter tuples_inserted; /* tuples inserted in xact */
227  PgStat_Counter tuples_updated; /* tuples updated in xact */
228  PgStat_Counter tuples_deleted; /* tuples deleted in xact */
229  PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */
230  PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */
231  PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */
232  Oid t_id; /* table's OID */
233  bool t_shared; /* is it a shared catalog? */
234  bool t_truncated; /* was the relation truncated? */
236 
237 /*
238  * Info about current "snapshot" of stats file
239  */
242 
243 /* Status for backends including auxiliary */
245 
246 /* Total number of backends including auxiliary */
247 static int localNumBackends = 0;
248 
249 /*
250  * Cluster wide statistics, kept in the stats collector.
251  * Contains statistics that are not collected per database
252  * or per table.
253  */
256 
257 /*
258  * List of OIDs of databases we need to write out. If an entry is InvalidOid,
259  * it means to write only the shared-catalog stats ("DB 0"); otherwise, we
260  * will write both that DB's data and the shared stats.
261  */
263 
264 /* Signal handler flags */
265 static volatile bool need_exit = false;
266 static volatile bool got_SIGHUP = false;
267 
268 /*
269  * Total time charged to functions so far in the current backend.
270  * We use this to help separate "self" and "other" time charges.
271  * (We assume this initializes to zero.)
272  */
274 
275 
276 /* ----------
277  * Local function forward declarations
278  * ----------
279  */
280 #ifdef EXEC_BACKEND
281 static pid_t pgstat_forkexec(void);
282 #endif
283 
284 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
285 static void pgstat_exit(SIGNAL_ARGS);
286 static void pgstat_beshutdown_hook(int code, Datum arg);
288 
289 static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
291  Oid tableoid, bool create);
292 static void pgstat_write_statsfiles(bool permanent, bool allDbs);
293 static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent);
294 static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep);
295 static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
296 static void backend_read_statsfile(void);
297 static void pgstat_read_current_status(void);
298 
299 static bool pgstat_write_statsfile_needed(void);
300 static bool pgstat_db_requested(Oid databaseid);
301 
302 static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
303 static void pgstat_send_funcstats(void);
304 static HTAB *pgstat_collect_oids(Oid catalogid);
305 
306 static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
307 
308 static void pgstat_setup_memcxt(void);
309 
310 static const char *pgstat_get_wait_activity(WaitEventActivity w);
311 static const char *pgstat_get_wait_client(WaitEventClient w);
312 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
313 static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
314 static const char *pgstat_get_wait_io(WaitEventIO w);
315 
316 static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
317 static void pgstat_send(void *msg, int len);
318 
319 static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);
320 static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
321 static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
322 static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
323 static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
326 static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
327 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
328 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
329 static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
330 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
331 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
332 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
334 static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
335 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
336 
337 /* ------------------------------------------------------------
338  * Public functions called from postmaster follow
339  * ------------------------------------------------------------
340  */
341 
342 /* ----------
343  * pgstat_init() -
344  *
345  * Called from postmaster at startup. Create the resources required
346  * by the statistics collector process. If unable to do so, do not
347  * fail --- better to let the postmaster start with stats collection
348  * disabled.
349  * ----------
350  */
351 void
353 {
354  ACCEPT_TYPE_ARG3 alen;
355  struct addrinfo *addrs = NULL,
356  *addr,
357  hints;
358  int ret;
359  fd_set rset;
360  struct timeval tv;
361  char test_byte;
362  int sel_res;
363  int tries = 0;
364 
365 #define TESTBYTEVAL ((char) 199)
366 
367  /*
368  * This static assertion verifies that we didn't mess up the calculations
369  * involved in selecting maximum payload sizes for our UDP messages.
370  * Because the only consequence of overrunning PGSTAT_MAX_MSG_SIZE would
371  * be silent performance loss from fragmentation, it seems worth having a
372  * compile-time cross-check that we didn't.
373  */
375  "maximum stats message size exceeds PGSTAT_MAX_MSG_SIZE");
376 
377  /*
378  * Create the UDP socket for sending and receiving statistic messages
379  */
380  hints.ai_flags = AI_PASSIVE;
381  hints.ai_family = AF_UNSPEC;
382  hints.ai_socktype = SOCK_DGRAM;
383  hints.ai_protocol = 0;
384  hints.ai_addrlen = 0;
385  hints.ai_addr = NULL;
386  hints.ai_canonname = NULL;
387  hints.ai_next = NULL;
388  ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
389  if (ret || !addrs)
390  {
391  ereport(LOG,
392  (errmsg("could not resolve \"localhost\": %s",
393  gai_strerror(ret))));
394  goto startup_failed;
395  }
396 
397  /*
398  * On some platforms, pg_getaddrinfo_all() may return multiple addresses
399  * only one of which will actually work (eg, both IPv6 and IPv4 addresses
400  * when kernel will reject IPv6). Worse, the failure may occur at the
401  * bind() or perhaps even connect() stage. So we must loop through the
402  * results till we find a working combination. We will generate LOG
403  * messages, but no error, for bogus combinations.
404  */
405  for (addr = addrs; addr; addr = addr->ai_next)
406  {
407 #ifdef HAVE_UNIX_SOCKETS
408  /* Ignore AF_UNIX sockets, if any are returned. */
409  if (addr->ai_family == AF_UNIX)
410  continue;
411 #endif
412 
413  if (++tries > 1)
414  ereport(LOG,
415  (errmsg("trying another address for the statistics collector")));
416 
417  /*
418  * Create the socket.
419  */
420  if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) == PGINVALID_SOCKET)
421  {
422  ereport(LOG,
424  errmsg("could not create socket for statistics collector: %m")));
425  continue;
426  }
427 
428  /*
429  * Bind it to a kernel assigned port on localhost and get the assigned
430  * port via getsockname().
431  */
432  if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
433  {
434  ereport(LOG,
436  errmsg("could not bind socket for statistics collector: %m")));
439  continue;
440  }
441 
442  alen = sizeof(pgStatAddr);
443  if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0)
444  {
445  ereport(LOG,
447  errmsg("could not get address of socket for statistics collector: %m")));
450  continue;
451  }
452 
453  /*
454  * Connect the socket to its own address. This saves a few cycles by
455  * not having to respecify the target address on every send. This also
456  * provides a kernel-level check that only packets from this same
457  * address will be received.
458  */
459  if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0)
460  {
461  ereport(LOG,
463  errmsg("could not connect socket for statistics collector: %m")));
466  continue;
467  }
468 
469  /*
470  * Try to send and receive a one-byte test message on the socket. This
471  * is to catch situations where the socket can be created but will not
472  * actually pass data (for instance, because kernel packet filtering
473  * rules prevent it).
474  */
475  test_byte = TESTBYTEVAL;
476 
477 retry1:
478  if (send(pgStatSock, &test_byte, 1, 0) != 1)
479  {
480  if (errno == EINTR)
481  goto retry1; /* if interrupted, just retry */
482  ereport(LOG,
484  errmsg("could not send test message on socket for statistics collector: %m")));
487  continue;
488  }
489 
490  /*
491  * There could possibly be a little delay before the message can be
492  * received. We arbitrarily allow up to half a second before deciding
493  * it's broken.
494  */
495  for (;;) /* need a loop to handle EINTR */
496  {
497  FD_ZERO(&rset);
498  FD_SET(pgStatSock, &rset);
499 
500  tv.tv_sec = 0;
501  tv.tv_usec = 500000;
502  sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
503  if (sel_res >= 0 || errno != EINTR)
504  break;
505  }
506  if (sel_res < 0)
507  {
508  ereport(LOG,
510  errmsg("select() failed in statistics collector: %m")));
513  continue;
514  }
515  if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
516  {
517  /*
518  * This is the case we actually think is likely, so take pains to
519  * give a specific message for it.
520  *
521  * errno will not be set meaningfully here, so don't use it.
522  */
523  ereport(LOG,
524  (errcode(ERRCODE_CONNECTION_FAILURE),
525  errmsg("test message did not get through on socket for statistics collector")));
528  continue;
529  }
530 
531  test_byte++; /* just make sure variable is changed */
532 
533 retry2:
534  if (recv(pgStatSock, &test_byte, 1, 0) != 1)
535  {
536  if (errno == EINTR)
537  goto retry2; /* if interrupted, just retry */
538  ereport(LOG,
540  errmsg("could not receive test message on socket for statistics collector: %m")));
543  continue;
544  }
545 
546  if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
547  {
548  ereport(LOG,
549  (errcode(ERRCODE_INTERNAL_ERROR),
550  errmsg("incorrect test message transmission on socket for statistics collector")));
553  continue;
554  }
555 
556  /* If we get here, we have a working socket */
557  break;
558  }
559 
560  /* Did we find a working address? */
561  if (!addr || pgStatSock == PGINVALID_SOCKET)
562  goto startup_failed;
563 
564  /*
565  * Set the socket to non-blocking IO. This ensures that if the collector
566  * falls behind, statistics messages will be discarded; backends won't
567  * block waiting to send messages to the collector.
568  */
570  {
571  ereport(LOG,
573  errmsg("could not set statistics collector socket to nonblocking mode: %m")));
574  goto startup_failed;
575  }
576 
577  pg_freeaddrinfo_all(hints.ai_family, addrs);
578 
579  return;
580 
581 startup_failed:
582  ereport(LOG,
583  (errmsg("disabling statistics collector for lack of working socket")));
584 
585  if (addrs)
586  pg_freeaddrinfo_all(hints.ai_family, addrs);
587 
591 
592  /*
593  * Adjust GUC variables to suppress useless activity, and for debugging
594  * purposes (seeing track_counts off is a clue that we failed here). We
595  * use PGC_S_OVERRIDE because there is no point in trying to turn it back
596  * on from postgresql.conf without a restart.
597  */
598  SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
599 }
600 
601 /*
602  * subroutine for pgstat_reset_all
603  */
604 static void
606 {
607  DIR *dir;
608  struct dirent *entry;
609  char fname[MAXPGPATH * 2];
610 
611  dir = AllocateDir(directory);
612  while ((entry = ReadDir(dir, directory)) != NULL)
613  {
614  int nchars;
615  Oid tmp_oid;
616 
617  /*
618  * Skip directory entries that don't match the file names we write.
619  * See get_dbstat_filename for the database-specific pattern.
620  */
621  if (strncmp(entry->d_name, "global.", 7) == 0)
622  nchars = 7;
623  else
624  {
625  nchars = 0;
626  (void) sscanf(entry->d_name, "db_%u.%n",
627  &tmp_oid, &nchars);
628  if (nchars <= 0)
629  continue;
630  /* %u allows leading whitespace, so reject that */
631  if (strchr("0123456789", entry->d_name[3]) == NULL)
632  continue;
633  }
634 
635  if (strcmp(entry->d_name + nchars, "tmp") != 0 &&
636  strcmp(entry->d_name + nchars, "stat") != 0)
637  continue;
638 
639  snprintf(fname, sizeof(fname), "%s/%s", directory,
640  entry->d_name);
641  unlink(fname);
642  }
643  FreeDir(dir);
644 }
645 
646 /*
647  * pgstat_reset_all() -
648  *
649  * Remove the stats files. This is currently used only if WAL
650  * recovery is needed after a crash.
651  */
652 void
654 {
657 }
658 
659 #ifdef EXEC_BACKEND
660 
661 /*
662  * pgstat_forkexec() -
663  *
664  * Format up the arglist for, then fork and exec, statistics collector process
665  */
666 static pid_t
667 pgstat_forkexec(void)
668 {
669  char *av[10];
670  int ac = 0;
671 
672  av[ac++] = "postgres";
673  av[ac++] = "--forkcol";
674  av[ac++] = NULL; /* filled in by postmaster_forkexec */
675 
676  av[ac] = NULL;
677  Assert(ac < lengthof(av));
678 
679  return postmaster_forkexec(ac, av);
680 }
681 #endif /* EXEC_BACKEND */
682 
683 
684 /*
685  * pgstat_start() -
686  *
687  * Called from postmaster at startup or after an existing collector
688  * died. Attempt to fire up a fresh statistics collector.
689  *
690  * Returns PID of child process, or 0 if fail.
691  *
692  * Note: if fail, we will be called again from the postmaster main loop.
693  */
694 int
696 {
697  time_t curtime;
698  pid_t pgStatPid;
699 
700  /*
701  * Check that the socket is there, else pgstat_init failed and we can do
702  * nothing useful.
703  */
705  return 0;
706 
707  /*
708  * Do nothing if too soon since last collector start. This is a safety
709  * valve to protect against continuous respawn attempts if the collector
710  * is dying immediately at launch. Note that since we will be re-called
711  * from the postmaster main loop, we will get another chance later.
712  */
713  curtime = time(NULL);
714  if ((unsigned int) (curtime - last_pgstat_start_time) <
715  (unsigned int) PGSTAT_RESTART_INTERVAL)
716  return 0;
717  last_pgstat_start_time = curtime;
718 
719  /*
720  * Okay, fork off the collector.
721  */
722 #ifdef EXEC_BACKEND
723  switch ((pgStatPid = pgstat_forkexec()))
724 #else
725  switch ((pgStatPid = fork_process()))
726 #endif
727  {
728  case -1:
729  ereport(LOG,
730  (errmsg("could not fork statistics collector: %m")));
731  return 0;
732 
733 #ifndef EXEC_BACKEND
734  case 0:
735  /* in postmaster child ... */
737 
738  /* Close the postmaster's sockets */
739  ClosePostmasterPorts(false);
740 
741  /* Drop our connection to postmaster's shared memory, as well */
742  dsm_detach_all();
744 
746  break;
747 #endif
748 
749  default:
750  return (int) pgStatPid;
751  }
752 
753  /* shouldn't get here */
754  return 0;
755 }
756 
757 void
759 {
761 }
762 
763 /* ------------------------------------------------------------
764  * Public functions used by backends follow
765  *------------------------------------------------------------
766  */
767 
768 
769 /* ----------
770  * pgstat_report_stat() -
771  *
772  * Must be called by processes that performs DML: tcop/postgres.c, logical
773  * receiver processes, SPI worker, etc. to send the so far collected
774  * per-table and function usage statistics to the collector. Note that this
775  * is called only when not within a transaction, so it is fair to use
776  * transaction stop time as an approximation of current time.
777  * ----------
778  */
779 void
781 {
782  /* we assume this inits to all zeroes: */
783  static const PgStat_TableCounts all_zeroes;
784  static TimestampTz last_report = 0;
785 
787  PgStat_MsgTabstat regular_msg;
788  PgStat_MsgTabstat shared_msg;
789  TabStatusArray *tsa;
790  int i;
791 
792  /* Don't expend a clock check if nothing to do */
793  if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
794  pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
796  return;
797 
798  /*
799  * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
800  * msec since we last sent one, or the caller wants to force stats out.
801  */
803  if (!force &&
805  return;
806  last_report = now;
807 
808  /*
809  * Scan through the TabStatusArray struct(s) to find tables that actually
810  * have counts, and build messages to send. We have to separate shared
811  * relations from regular ones because the databaseid field in the message
812  * header has to depend on that.
813  */
814  regular_msg.m_databaseid = MyDatabaseId;
815  shared_msg.m_databaseid = InvalidOid;
816  regular_msg.m_nentries = 0;
817  shared_msg.m_nentries = 0;
818 
819  for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
820  {
821  for (i = 0; i < tsa->tsa_used; i++)
822  {
823  PgStat_TableStatus *entry = &tsa->tsa_entries[i];
824  PgStat_MsgTabstat *this_msg;
825  PgStat_TableEntry *this_ent;
826 
827  /* Shouldn't have any pending transaction-dependent counts */
828  Assert(entry->trans == NULL);
829 
830  /*
831  * Ignore entries that didn't accumulate any actual counts, such
832  * as indexes that were opened by the planner but not used.
833  */
834  if (memcmp(&entry->t_counts, &all_zeroes,
835  sizeof(PgStat_TableCounts)) == 0)
836  continue;
837 
838  /*
839  * OK, insert data into the appropriate message, and send if full.
840  */
841  this_msg = entry->t_shared ? &shared_msg : &regular_msg;
842  this_ent = &this_msg->m_entry[this_msg->m_nentries];
843  this_ent->t_id = entry->t_id;
844  memcpy(&this_ent->t_counts, &entry->t_counts,
845  sizeof(PgStat_TableCounts));
846  if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
847  {
848  pgstat_send_tabstat(this_msg);
849  this_msg->m_nentries = 0;
850  }
851  }
852  /* zero out TableStatus structs after use */
853  MemSet(tsa->tsa_entries, 0,
854  tsa->tsa_used * sizeof(PgStat_TableStatus));
855  tsa->tsa_used = 0;
856  }
857 
858  /*
859  * pgStatTabHash is outdated on this point so we have to clean it,
860  * hash_destroy() will remove hash memory context, allocated in
861  * make_sure_stat_tab_initialized()
862  */
863  hash_destroy(pgStatTabHash);
864  pgStatTabHash = NULL;
865 
866  /*
867  * Send partial messages. Make sure that any pending xact commit/abort
868  * gets counted, even if there are no table stats to send.
869  */
870  if (regular_msg.m_nentries > 0 ||
872  pgstat_send_tabstat(&regular_msg);
873  if (shared_msg.m_nentries > 0)
874  pgstat_send_tabstat(&shared_msg);
875 
876  /* Now, send function statistics */
878 }
879 
880 /*
881  * Subroutine for pgstat_report_stat: finish and send a tabstat message
882  */
883 static void
885 {
886  int n;
887  int len;
888 
889  /* It's unlikely we'd get here with no socket, but maybe not impossible */
891  return;
892 
893  /*
894  * Report and reset accumulated xact commit/rollback and I/O timings
895  * whenever we send a normal tabstat message
896  */
897  if (OidIsValid(tsmsg->m_databaseid))
898  {
903  pgStatXactCommit = 0;
904  pgStatXactRollback = 0;
907  }
908  else
909  {
910  tsmsg->m_xact_commit = 0;
911  tsmsg->m_xact_rollback = 0;
912  tsmsg->m_block_read_time = 0;
913  tsmsg->m_block_write_time = 0;
914  }
915 
916  n = tsmsg->m_nentries;
917  len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
918  n * sizeof(PgStat_TableEntry);
919 
921  pgstat_send(tsmsg, len);
922 }
923 
924 /*
925  * Subroutine for pgstat_report_stat: populate and send a function stat message
926  */
927 static void
929 {
930  /* we assume this inits to all zeroes: */
931  static const PgStat_FunctionCounts all_zeroes;
932 
933  PgStat_MsgFuncstat msg;
935  HASH_SEQ_STATUS fstat;
936 
937  if (pgStatFunctions == NULL)
938  return;
939 
942  msg.m_nentries = 0;
943 
944  hash_seq_init(&fstat, pgStatFunctions);
945  while ((entry = (PgStat_BackendFunctionEntry *) hash_seq_search(&fstat)) != NULL)
946  {
947  PgStat_FunctionEntry *m_ent;
948 
949  /* Skip it if no counts accumulated since last time */
950  if (memcmp(&entry->f_counts, &all_zeroes,
951  sizeof(PgStat_FunctionCounts)) == 0)
952  continue;
953 
954  /* need to convert format of time accumulators */
955  m_ent = &msg.m_entry[msg.m_nentries];
956  m_ent->f_id = entry->f_id;
957  m_ent->f_numcalls = entry->f_counts.f_numcalls;
960 
961  if (++msg.m_nentries >= PGSTAT_NUM_FUNCENTRIES)
962  {
963  pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
964  msg.m_nentries * sizeof(PgStat_FunctionEntry));
965  msg.m_nentries = 0;
966  }
967 
968  /* reset the entry's counts */
969  MemSet(&entry->f_counts, 0, sizeof(PgStat_FunctionCounts));
970  }
971 
972  if (msg.m_nentries > 0)
973  pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
974  msg.m_nentries * sizeof(PgStat_FunctionEntry));
975 
976  have_function_stats = false;
977 }
978 
979 
980 /* ----------
981  * pgstat_vacuum_stat() -
982  *
983  * Will tell the collector about objects he can get rid of.
984  * ----------
985  */
986 void
988 {
989  HTAB *htab;
990  PgStat_MsgTabpurge msg;
991  PgStat_MsgFuncpurge f_msg;
992  HASH_SEQ_STATUS hstat;
993  PgStat_StatDBEntry *dbentry;
994  PgStat_StatTabEntry *tabentry;
995  PgStat_StatFuncEntry *funcentry;
996  int len;
997 
999  return;
1000 
1001  /*
1002  * If not done for this transaction, read the statistics collector stats
1003  * file into some hash tables.
1004  */
1006 
1007  /*
1008  * Read pg_database and make a list of OIDs of all existing databases
1009  */
1011 
1012  /*
1013  * Search the database hash table for dead databases and tell the
1014  * collector to drop them.
1015  */
1016  hash_seq_init(&hstat, pgStatDBHash);
1017  while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
1018  {
1019  Oid dbid = dbentry->databaseid;
1020 
1022 
1023  /* the DB entry for shared tables (with InvalidOid) is never dropped */
1024  if (OidIsValid(dbid) &&
1025  hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
1026  pgstat_drop_database(dbid);
1027  }
1028 
1029  /* Clean up */
1030  hash_destroy(htab);
1031 
1032  /*
1033  * Lookup our own database entry; if not found, nothing more to do.
1034  */
1035  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1036  (void *) &MyDatabaseId,
1037  HASH_FIND, NULL);
1038  if (dbentry == NULL || dbentry->tables == NULL)
1039  return;
1040 
1041  /*
1042  * Similarly to above, make a list of all known relations in this DB.
1043  */
1045 
1046  /*
1047  * Initialize our messages table counter to zero
1048  */
1049  msg.m_nentries = 0;
1050 
1051  /*
1052  * Check for all tables listed in stats hashtable if they still exist.
1053  */
1054  hash_seq_init(&hstat, dbentry->tables);
1055  while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
1056  {
1057  Oid tabid = tabentry->tableid;
1058 
1060 
1061  if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
1062  continue;
1063 
1064  /*
1065  * Not there, so add this table's Oid to the message
1066  */
1067  msg.m_tableid[msg.m_nentries++] = tabid;
1068 
1069  /*
1070  * If the message is full, send it out and reinitialize to empty
1071  */
1072  if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
1073  {
1074  len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1075  +msg.m_nentries * sizeof(Oid);
1076 
1078  msg.m_databaseid = MyDatabaseId;
1079  pgstat_send(&msg, len);
1080 
1081  msg.m_nentries = 0;
1082  }
1083  }
1084 
1085  /*
1086  * Send the rest
1087  */
1088  if (msg.m_nentries > 0)
1089  {
1090  len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1091  +msg.m_nentries * sizeof(Oid);
1092 
1094  msg.m_databaseid = MyDatabaseId;
1095  pgstat_send(&msg, len);
1096  }
1097 
1098  /* Clean up */
1099  hash_destroy(htab);
1100 
1101  /*
1102  * Now repeat the above steps for functions. However, we needn't bother
1103  * in the common case where no function stats are being collected.
1104  */
1105  if (dbentry->functions != NULL &&
1106  hash_get_num_entries(dbentry->functions) > 0)
1107  {
1109 
1111  f_msg.m_databaseid = MyDatabaseId;
1112  f_msg.m_nentries = 0;
1113 
1114  hash_seq_init(&hstat, dbentry->functions);
1115  while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&hstat)) != NULL)
1116  {
1117  Oid funcid = funcentry->functionid;
1118 
1120 
1121  if (hash_search(htab, (void *) &funcid, HASH_FIND, NULL) != NULL)
1122  continue;
1123 
1124  /*
1125  * Not there, so add this function's Oid to the message
1126  */
1127  f_msg.m_functionid[f_msg.m_nentries++] = funcid;
1128 
1129  /*
1130  * If the message is full, send it out and reinitialize to empty
1131  */
1132  if (f_msg.m_nentries >= PGSTAT_NUM_FUNCPURGE)
1133  {
1134  len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1135  +f_msg.m_nentries * sizeof(Oid);
1136 
1137  pgstat_send(&f_msg, len);
1138 
1139  f_msg.m_nentries = 0;
1140  }
1141  }
1142 
1143  /*
1144  * Send the rest
1145  */
1146  if (f_msg.m_nentries > 0)
1147  {
1148  len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1149  +f_msg.m_nentries * sizeof(Oid);
1150 
1151  pgstat_send(&f_msg, len);
1152  }
1153 
1154  hash_destroy(htab);
1155  }
1156 }
1157 
1158 
1159 /* ----------
1160  * pgstat_collect_oids() -
1161  *
1162  * Collect the OIDs of all objects listed in the specified system catalog
1163  * into a temporary hash table. Caller should hash_destroy the result
1164  * when done with it. (However, we make the table in CurrentMemoryContext
1165  * so that it will be freed properly in event of an error.)
1166  * ----------
1167  */
1168 static HTAB *
1170 {
1171  HTAB *htab;
1172  HASHCTL hash_ctl;
1173  Relation rel;
1174  HeapScanDesc scan;
1175  HeapTuple tup;
1176  Snapshot snapshot;
1177 
1178  memset(&hash_ctl, 0, sizeof(hash_ctl));
1179  hash_ctl.keysize = sizeof(Oid);
1180  hash_ctl.entrysize = sizeof(Oid);
1181  hash_ctl.hcxt = CurrentMemoryContext;
1182  htab = hash_create("Temporary table of OIDs",
1184  &hash_ctl,
1186 
1187  rel = heap_open(catalogid, AccessShareLock);
1188  snapshot = RegisterSnapshot(GetLatestSnapshot());
1189  scan = heap_beginscan(rel, snapshot, 0, NULL);
1190  while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
1191  {
1192  Oid thisoid = HeapTupleGetOid(tup);
1193 
1195 
1196  (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
1197  }
1198  heap_endscan(scan);
1199  UnregisterSnapshot(snapshot);
1201 
1202  return htab;
1203 }
1204 
1205 
1206 /* ----------
1207  * pgstat_drop_database() -
1208  *
1209  * Tell the collector that we just dropped a database.
1210  * (If the message gets lost, we will still clean the dead DB eventually
1211  * via future invocations of pgstat_vacuum_stat().)
1212  * ----------
1213  */
1214 void
1216 {
1217  PgStat_MsgDropdb msg;
1218 
1220  return;
1221 
1223  msg.m_databaseid = databaseid;
1224  pgstat_send(&msg, sizeof(msg));
1225 }
1226 
1227 
1228 /* ----------
1229  * pgstat_drop_relation() -
1230  *
1231  * Tell the collector that we just dropped a relation.
1232  * (If the message gets lost, we will still clean the dead entry eventually
1233  * via future invocations of pgstat_vacuum_stat().)
1234  *
1235  * Currently not used for lack of any good place to call it; we rely
1236  * entirely on pgstat_vacuum_stat() to clean out stats for dead rels.
1237  * ----------
1238  */
1239 #ifdef NOT_USED
1240 void
1241 pgstat_drop_relation(Oid relid)
1242 {
1243  PgStat_MsgTabpurge msg;
1244  int len;
1245 
1247  return;
1248 
1249  msg.m_tableid[0] = relid;
1250  msg.m_nentries = 1;
1251 
1252  len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) +sizeof(Oid);
1253 
1255  msg.m_databaseid = MyDatabaseId;
1256  pgstat_send(&msg, len);
1257 }
1258 #endif /* NOT_USED */
1259 
1260 
1261 /* ----------
1262  * pgstat_reset_counters() -
1263  *
1264  * Tell the statistics collector to reset counters for our database.
1265  *
1266  * Permission checking for this function is managed through the normal
1267  * GRANT system.
1268  * ----------
1269  */
1270 void
1272 {
1274 
1276  return;
1277 
1279  msg.m_databaseid = MyDatabaseId;
1280  pgstat_send(&msg, sizeof(msg));
1281 }
1282 
1283 /* ----------
1284  * pgstat_reset_shared_counters() -
1285  *
1286  * Tell the statistics collector to reset cluster-wide shared counters.
1287  *
1288  * Permission checking for this function is managed through the normal
1289  * GRANT system.
1290  * ----------
1291  */
1292 void
1293 pgstat_reset_shared_counters(const char *target)
1294 {
1296 
1298  return;
1299 
1300  if (strcmp(target, "archiver") == 0)
1302  else if (strcmp(target, "bgwriter") == 0)
1304  else
1305  ereport(ERROR,
1306  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1307  errmsg("unrecognized reset target: \"%s\"", target),
1308  errhint("Target must be \"archiver\" or \"bgwriter\".")));
1309 
1311  pgstat_send(&msg, sizeof(msg));
1312 }
1313 
1314 /* ----------
1315  * pgstat_reset_single_counter() -
1316  *
1317  * Tell the statistics collector to reset a single counter.
1318  *
1319  * Permission checking for this function is managed through the normal
1320  * GRANT system.
1321  * ----------
1322  */
1323 void
1325 {
1327 
1329  return;
1330 
1332  msg.m_databaseid = MyDatabaseId;
1333  msg.m_resettype = type;
1334  msg.m_objectid = objoid;
1335 
1336  pgstat_send(&msg, sizeof(msg));
1337 }
1338 
1339 /* ----------
1340  * pgstat_report_autovac() -
1341  *
1342  * Called from autovacuum.c to report startup of an autovacuum process.
1343  * We are called before InitPostgres is done, so can't rely on MyDatabaseId;
1344  * the db OID must be passed in, instead.
1345  * ----------
1346  */
1347 void
1349 {
1351 
1353  return;
1354 
1356  msg.m_databaseid = dboid;
1358 
1359  pgstat_send(&msg, sizeof(msg));
1360 }
1361 
1362 
1363 /* ---------
1364  * pgstat_report_vacuum() -
1365  *
1366  * Tell the collector about the table we just vacuumed.
1367  * ---------
1368  */
1369 void
1370 pgstat_report_vacuum(Oid tableoid, bool shared,
1371  PgStat_Counter livetuples, PgStat_Counter deadtuples)
1372 {
1373  PgStat_MsgVacuum msg;
1374 
1376  return;
1377 
1379  msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
1380  msg.m_tableoid = tableoid;
1383  msg.m_live_tuples = livetuples;
1384  msg.m_dead_tuples = deadtuples;
1385  pgstat_send(&msg, sizeof(msg));
1386 }
1387 
1388 /* --------
1389  * pgstat_report_analyze() -
1390  *
1391  * Tell the collector about the table we just analyzed.
1392  *
1393  * Caller must provide new live- and dead-tuples estimates, as well as a
1394  * flag indicating whether to reset the changes_since_analyze counter.
1395  * --------
1396  */
1397 void
1399  PgStat_Counter livetuples, PgStat_Counter deadtuples,
1400  bool resetcounter)
1401 {
1402  PgStat_MsgAnalyze msg;
1403 
1405  return;
1406 
1407  /*
1408  * Unlike VACUUM, ANALYZE might be running inside a transaction that has
1409  * already inserted and/or deleted rows in the target table. ANALYZE will
1410  * have counted such rows as live or dead respectively. Because we will
1411  * report our counts of such rows at transaction end, we should subtract
1412  * off these counts from what we send to the collector now, else they'll
1413  * be double-counted after commit. (This approach also ensures that the
1414  * collector ends up with the right numbers if we abort instead of
1415  * committing.)
1416  */
1417  if (rel->pgstat_info != NULL)
1418  {
1420 
1421  for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
1422  {
1423  livetuples -= trans->tuples_inserted - trans->tuples_deleted;
1424  deadtuples -= trans->tuples_updated + trans->tuples_deleted;
1425  }
1426  /* count stuff inserted by already-aborted subxacts, too */
1427  deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
1428  /* Since ANALYZE's counts are estimates, we could have underflowed */
1429  livetuples = Max(livetuples, 0);
1430  deadtuples = Max(deadtuples, 0);
1431  }
1432 
1434  msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
1435  msg.m_tableoid = RelationGetRelid(rel);
1437  msg.m_resetcounter = resetcounter;
1439  msg.m_live_tuples = livetuples;
1440  msg.m_dead_tuples = deadtuples;
1441  pgstat_send(&msg, sizeof(msg));
1442 }
1443 
1444 /* --------
1445  * pgstat_report_recovery_conflict() -
1446  *
1447  * Tell the collector about a Hot Standby recovery conflict.
1448  * --------
1449  */
1450 void
1452 {
1454 
1456  return;
1457 
1459  msg.m_databaseid = MyDatabaseId;
1460  msg.m_reason = reason;
1461  pgstat_send(&msg, sizeof(msg));
1462 }
1463 
1464 /* --------
1465  * pgstat_report_deadlock() -
1466  *
1467  * Tell the collector about a deadlock detected.
1468  * --------
1469  */
1470 void
1472 {
1473  PgStat_MsgDeadlock msg;
1474 
1476  return;
1477 
1479  msg.m_databaseid = MyDatabaseId;
1480  pgstat_send(&msg, sizeof(msg));
1481 }
1482 
1483 /* --------
1484  * pgstat_report_tempfile() -
1485  *
1486  * Tell the collector about a temporary file.
1487  * --------
1488  */
1489 void
1490 pgstat_report_tempfile(size_t filesize)
1491 {
1492  PgStat_MsgTempFile msg;
1493 
1495  return;
1496 
1498  msg.m_databaseid = MyDatabaseId;
1499  msg.m_filesize = filesize;
1500  pgstat_send(&msg, sizeof(msg));
1501 }
1502 
1503 
1504 /* ----------
1505  * pgstat_ping() -
1506  *
1507  * Send some junk data to the collector to increase traffic.
1508  * ----------
1509  */
1510 void
1512 {
1513  PgStat_MsgDummy msg;
1514 
1516  return;
1517 
1519  pgstat_send(&msg, sizeof(msg));
1520 }
1521 
1522 /* ----------
1523  * pgstat_send_inquiry() -
1524  *
1525  * Notify collector that we need fresh data.
1526  * ----------
1527  */
1528 static void
1529 pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
1530 {
1531  PgStat_MsgInquiry msg;
1532 
1534  msg.clock_time = clock_time;
1535  msg.cutoff_time = cutoff_time;
1536  msg.databaseid = databaseid;
1537  pgstat_send(&msg, sizeof(msg));
1538 }
1539 
1540 
1541 /*
1542  * Initialize function call usage data.
1543  * Called by the executor before invoking a function.
1544  */
1545 void
1548 {
1549  PgStat_BackendFunctionEntry *htabent;
1550  bool found;
1551 
1552  if (pgstat_track_functions <= fcinfo->flinfo->fn_stats)
1553  {
1554  /* stats not wanted */
1555  fcu->fs = NULL;
1556  return;
1557  }
1558 
1559  if (!pgStatFunctions)
1560  {
1561  /* First time through - initialize function stat table */
1562  HASHCTL hash_ctl;
1563 
1564  memset(&hash_ctl, 0, sizeof(hash_ctl));
1565  hash_ctl.keysize = sizeof(Oid);
1566  hash_ctl.entrysize = sizeof(PgStat_BackendFunctionEntry);
1567  pgStatFunctions = hash_create("Function stat entries",
1569  &hash_ctl,
1570  HASH_ELEM | HASH_BLOBS);
1571  }
1572 
1573  /* Get the stats entry for this function, create if necessary */
1574  htabent = hash_search(pgStatFunctions, &fcinfo->flinfo->fn_oid,
1575  HASH_ENTER, &found);
1576  if (!found)
1577  MemSet(&htabent->f_counts, 0, sizeof(PgStat_FunctionCounts));
1578 
1579  fcu->fs = &htabent->f_counts;
1580 
1581  /* save stats for this function, later used to compensate for recursion */
1582  fcu->save_f_total_time = htabent->f_counts.f_total_time;
1583 
1584  /* save current backend-wide total time */
1585  fcu->save_total = total_func_time;
1586 
1587  /* get clock time as of function start */
1589 }
1590 
1591 /*
1592  * find_funcstat_entry - find any existing PgStat_BackendFunctionEntry entry
1593  * for specified function
1594  *
1595  * If no entry, return NULL, don't create a new one
1596  */
1599 {
1600  if (pgStatFunctions == NULL)
1601  return NULL;
1602 
1603  return (PgStat_BackendFunctionEntry *) hash_search(pgStatFunctions,
1604  (void *) &func_id,
1605  HASH_FIND, NULL);
1606 }
1607 
1608 /*
1609  * Calculate function call usage and update stat counters.
1610  * Called by the executor after invoking a function.
1611  *
1612  * In the case of a set-returning function that runs in value-per-call mode,
1613  * we will see multiple pgstat_init_function_usage/pgstat_end_function_usage
1614  * calls for what the user considers a single call of the function. The
1615  * finalize flag should be TRUE on the last call.
1616  */
1617 void
1619 {
1620  PgStat_FunctionCounts *fs = fcu->fs;
1621  instr_time f_total;
1622  instr_time f_others;
1623  instr_time f_self;
1624 
1625  /* stats not wanted? */
1626  if (fs == NULL)
1627  return;
1628 
1629  /* total elapsed time in this function call */
1630  INSTR_TIME_SET_CURRENT(f_total);
1631  INSTR_TIME_SUBTRACT(f_total, fcu->f_start);
1632 
1633  /* self usage: elapsed minus anything already charged to other calls */
1634  f_others = total_func_time;
1635  INSTR_TIME_SUBTRACT(f_others, fcu->save_total);
1636  f_self = f_total;
1637  INSTR_TIME_SUBTRACT(f_self, f_others);
1638 
1639  /* update backend-wide total time */
1641 
1642  /*
1643  * Compute the new f_total_time as the total elapsed time added to the
1644  * pre-call value of f_total_time. This is necessary to avoid
1645  * double-counting any time taken by recursive calls of myself. (We do
1646  * not need any similar kluge for self time, since that already excludes
1647  * any recursive calls.)
1648  */
1649  INSTR_TIME_ADD(f_total, fcu->save_f_total_time);
1650 
1651  /* update counters in function stats table */
1652  if (finalize)
1653  fs->f_numcalls++;
1654  fs->f_total_time = f_total;
1655  INSTR_TIME_ADD(fs->f_self_time, f_self);
1656 
1657  /* indicate that we have something to send */
1658  have_function_stats = true;
1659 }
1660 
1661 
1662 /* ----------
1663  * pgstat_initstats() -
1664  *
1665  * Initialize a relcache entry to count access statistics.
1666  * Called whenever a relation is opened.
1667  *
1668  * We assume that a relcache entry's pgstat_info field is zeroed by
1669  * relcache.c when the relcache entry is made; thereafter it is long-lived
1670  * data. We can avoid repeated searches of the TabStatus arrays when the
1671  * same relation is touched repeatedly within a transaction.
1672  * ----------
1673  */
1674 void
1676 {
1677  Oid rel_id = rel->rd_id;
1678  char relkind = rel->rd_rel->relkind;
1679 
1680  /* We only count stats for things that have storage */
1681  if (!(relkind == RELKIND_RELATION ||
1682  relkind == RELKIND_MATVIEW ||
1683  relkind == RELKIND_INDEX ||
1684  relkind == RELKIND_TOASTVALUE ||
1685  relkind == RELKIND_SEQUENCE))
1686  {
1687  rel->pgstat_info = NULL;
1688  return;
1689  }
1690 
1692  {
1693  /* We're not counting at all */
1694  rel->pgstat_info = NULL;
1695  return;
1696  }
1697 
1698  /*
1699  * If we already set up this relation in the current transaction, nothing
1700  * to do.
1701  */
1702  if (rel->pgstat_info != NULL &&
1703  rel->pgstat_info->t_id == rel_id)
1704  return;
1705 
1706  /* Else find or make the PgStat_TableStatus entry, and update link */
1707  rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
1708 }
1709 
1710 /*
1711  * Make sure pgStatTabList and pgStatTabHash are initialized.
1712  */
1713 static void
1715 {
1716  HASHCTL ctl;
1717  MemoryContext new_ctx;
1718 
1719  if(!pgStatTabList)
1720  {
1721  /* This is first time procedure is called */
1723  sizeof(TabStatusArray));
1724  }
1725 
1726  if(pgStatTabHash)
1727  return;
1728 
1729  /* Hash table was freed or never existed. */
1730 
1731  new_ctx = AllocSetContextCreate(
1733  "PGStatLookupHashTableContext",
1735 
1736  memset(&ctl, 0, sizeof(ctl));
1737  ctl.keysize = sizeof(Oid);
1738  ctl.entrysize = sizeof(TabStatHashEntry);
1739  ctl.hcxt = new_ctx;
1740 
1741  pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup hash table",
1743 }
1744 
1745 /*
1746  * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
1747  */
1748 static PgStat_TableStatus *
1749 get_tabstat_entry(Oid rel_id, bool isshared)
1750 {
1751  TabStatHashEntry* hash_entry;
1752  PgStat_TableStatus *entry;
1753  TabStatusArray *tsa;
1754  bool found;
1755 
1757 
1758  /*
1759  * Find an entry or create a new one.
1760  */
1761  hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found);
1762  if(found)
1763  return hash_entry->tsa_entry;
1764 
1765  /*
1766  * `hash_entry` was just created and now we have to fill it.
1767  * First make sure there is a free space in a last element of pgStatTabList.
1768  */
1769  tsa = pgStatTabList;
1770  while(tsa->tsa_used == TABSTAT_QUANTUM)
1771  {
1772  if(tsa->tsa_next == NULL)
1773  {
1775  sizeof(TabStatusArray));
1776  }
1777 
1778  tsa = tsa->tsa_next;
1779  }
1780 
1781  /*
1782  * Add an entry.
1783  */
1784  entry = &tsa->tsa_entries[tsa->tsa_used++];
1785  entry->t_id = rel_id;
1786  entry->t_shared = isshared;
1787 
1788  /*
1789  * Add a corresponding entry to pgStatTabHash.
1790  */
1791  hash_entry->tsa_entry = entry;
1792  return entry;
1793 }
1794 
1795 /*
1796  * find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
1797  *
1798  * If no entry, return NULL, don't create a new one
1799  */
1802 {
1803  TabStatHashEntry* hash_entry;
1804 
1805  /*
1806  * There are no entries at all.
1807  */
1808  if(!pgStatTabHash)
1809  return NULL;
1810 
1811  hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL);
1812  if(!hash_entry)
1813  return NULL;
1814 
1815  return hash_entry->tsa_entry;
1816 }
1817 
1818 /*
1819  * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
1820  */
1821 static PgStat_SubXactStatus *
1823 {
1824  PgStat_SubXactStatus *xact_state;
1825 
1826  xact_state = pgStatXactStack;
1827  if (xact_state == NULL || xact_state->nest_level != nest_level)
1828  {
1829  xact_state = (PgStat_SubXactStatus *)
1831  sizeof(PgStat_SubXactStatus));
1832  xact_state->nest_level = nest_level;
1833  xact_state->prev = pgStatXactStack;
1834  xact_state->first = NULL;
1835  pgStatXactStack = xact_state;
1836  }
1837  return xact_state;
1838 }
1839 
1840 /*
1841  * add_tabstat_xact_level - add a new (sub)transaction state record
1842  */
1843 static void
1844 add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
1845 {
1846  PgStat_SubXactStatus *xact_state;
1848 
1849  /*
1850  * If this is the first rel to be modified at the current nest level, we
1851  * first have to push a transaction stack entry.
1852  */
1853  xact_state = get_tabstat_stack_level(nest_level);
1854 
1855  /* Now make a per-table stack entry */
1856  trans = (PgStat_TableXactStatus *)
1858  sizeof(PgStat_TableXactStatus));
1859  trans->nest_level = nest_level;
1860  trans->upper = pgstat_info->trans;
1861  trans->parent = pgstat_info;
1862  trans->next = xact_state->first;
1863  xact_state->first = trans;
1864  pgstat_info->trans = trans;
1865 }
1866 
1867 /*
1868  * pgstat_count_heap_insert - count a tuple insertion of n tuples
1869  */
1870 void
1872 {
1873  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1874 
1875  if (pgstat_info != NULL)
1876  {
1877  /* We have to log the effect at the proper transactional level */
1878  int nest_level = GetCurrentTransactionNestLevel();
1879 
1880  if (pgstat_info->trans == NULL ||
1881  pgstat_info->trans->nest_level != nest_level)
1882  add_tabstat_xact_level(pgstat_info, nest_level);
1883 
1884  pgstat_info->trans->tuples_inserted += n;
1885  }
1886 }
1887 
1888 /*
1889  * pgstat_count_heap_update - count a tuple update
1890  */
1891 void
1893 {
1894  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1895 
1896  if (pgstat_info != NULL)
1897  {
1898  /* We have to log the effect at the proper transactional level */
1899  int nest_level = GetCurrentTransactionNestLevel();
1900 
1901  if (pgstat_info->trans == NULL ||
1902  pgstat_info->trans->nest_level != nest_level)
1903  add_tabstat_xact_level(pgstat_info, nest_level);
1904 
1905  pgstat_info->trans->tuples_updated++;
1906 
1907  /* t_tuples_hot_updated is nontransactional, so just advance it */
1908  if (hot)
1909  pgstat_info->t_counts.t_tuples_hot_updated++;
1910  }
1911 }
1912 
1913 /*
1914  * pgstat_count_heap_delete - count a tuple deletion
1915  */
1916 void
1918 {
1919  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1920 
1921  if (pgstat_info != NULL)
1922  {
1923  /* We have to log the effect at the proper transactional level */
1924  int nest_level = GetCurrentTransactionNestLevel();
1925 
1926  if (pgstat_info->trans == NULL ||
1927  pgstat_info->trans->nest_level != nest_level)
1928  add_tabstat_xact_level(pgstat_info, nest_level);
1929 
1930  pgstat_info->trans->tuples_deleted++;
1931  }
1932 }
1933 
1934 /*
1935  * pgstat_truncate_save_counters
1936  *
1937  * Whenever a table is truncated, we save its i/u/d counters so that they can
1938  * be cleared, and if the (sub)xact that executed the truncate later aborts,
1939  * the counters can be restored to the saved (pre-truncate) values. Note we do
1940  * this on the first truncate in any particular subxact level only.
1941  */
1942 static void
1944 {
1945  if (!trans->truncated)
1946  {
1947  trans->inserted_pre_trunc = trans->tuples_inserted;
1948  trans->updated_pre_trunc = trans->tuples_updated;
1949  trans->deleted_pre_trunc = trans->tuples_deleted;
1950  trans->truncated = true;
1951  }
1952 }
1953 
1954 /*
1955  * pgstat_truncate_restore_counters - restore counters when a truncate aborts
1956  */
1957 static void
1959 {
1960  if (trans->truncated)
1961  {
1962  trans->tuples_inserted = trans->inserted_pre_trunc;
1963  trans->tuples_updated = trans->updated_pre_trunc;
1964  trans->tuples_deleted = trans->deleted_pre_trunc;
1965  }
1966 }
1967 
1968 /*
1969  * pgstat_count_truncate - update tuple counters due to truncate
1970  */
1971 void
1973 {
1974  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1975 
1976  if (pgstat_info != NULL)
1977  {
1978  /* We have to log the effect at the proper transactional level */
1979  int nest_level = GetCurrentTransactionNestLevel();
1980 
1981  if (pgstat_info->trans == NULL ||
1982  pgstat_info->trans->nest_level != nest_level)
1983  add_tabstat_xact_level(pgstat_info, nest_level);
1984 
1985  pgstat_truncate_save_counters(pgstat_info->trans);
1986  pgstat_info->trans->tuples_inserted = 0;
1987  pgstat_info->trans->tuples_updated = 0;
1988  pgstat_info->trans->tuples_deleted = 0;
1989  }
1990 }
1991 
1992 /*
1993  * pgstat_update_heap_dead_tuples - update dead-tuples count
1994  *
1995  * The semantics of this are that we are reporting the nontransactional
1996  * recovery of "delta" dead tuples; so t_delta_dead_tuples decreases
1997  * rather than increasing, and the change goes straight into the per-table
1998  * counter, not into transactional state.
1999  */
2000 void
2002 {
2003  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
2004 
2005  if (pgstat_info != NULL)
2006  pgstat_info->t_counts.t_delta_dead_tuples -= delta;
2007 }
2008 
2009 
2010 /* ----------
2011  * AtEOXact_PgStat
2012  *
2013  * Called from access/transam/xact.c at top-level transaction commit/abort.
2014  * ----------
2015  */
2016 void
2017 AtEOXact_PgStat(bool isCommit)
2018 {
2019  PgStat_SubXactStatus *xact_state;
2020 
2021  /*
2022  * Count transaction commit or abort. (We use counters, not just bools,
2023  * in case the reporting message isn't sent right away.)
2024  */
2025  if (isCommit)
2026  pgStatXactCommit++;
2027  else
2029 
2030  /*
2031  * Transfer transactional insert/update counts into the base tabstat
2032  * entries. We don't bother to free any of the transactional state, since
2033  * it's all in TopTransactionContext and will go away anyway.
2034  */
2035  xact_state = pgStatXactStack;
2036  if (xact_state != NULL)
2037  {
2039 
2040  Assert(xact_state->nest_level == 1);
2041  Assert(xact_state->prev == NULL);
2042  for (trans = xact_state->first; trans != NULL; trans = trans->next)
2043  {
2044  PgStat_TableStatus *tabstat;
2045 
2046  Assert(trans->nest_level == 1);
2047  Assert(trans->upper == NULL);
2048  tabstat = trans->parent;
2049  Assert(tabstat->trans == trans);
2050  /* restore pre-truncate stats (if any) in case of aborted xact */
2051  if (!isCommit)
2053  /* count attempted actions regardless of commit/abort */
2054  tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
2055  tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
2056  tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
2057  if (isCommit)
2058  {
2059  tabstat->t_counts.t_truncated = trans->truncated;
2060  if (trans->truncated)
2061  {
2062  /* forget live/dead stats seen by backend thus far */
2063  tabstat->t_counts.t_delta_live_tuples = 0;
2064  tabstat->t_counts.t_delta_dead_tuples = 0;
2065  }
2066  /* insert adds a live tuple, delete removes one */
2067  tabstat->t_counts.t_delta_live_tuples +=
2068  trans->tuples_inserted - trans->tuples_deleted;
2069  /* update and delete each create a dead tuple */
2070  tabstat->t_counts.t_delta_dead_tuples +=
2071  trans->tuples_updated + trans->tuples_deleted;
2072  /* insert, update, delete each count as one change event */
2073  tabstat->t_counts.t_changed_tuples +=
2074  trans->tuples_inserted + trans->tuples_updated +
2075  trans->tuples_deleted;
2076  }
2077  else
2078  {
2079  /* inserted tuples are dead, deleted tuples are unaffected */
2080  tabstat->t_counts.t_delta_dead_tuples +=
2081  trans->tuples_inserted + trans->tuples_updated;
2082  /* an aborted xact generates no changed_tuple events */
2083  }
2084  tabstat->trans = NULL;
2085  }
2086  }
2087  pgStatXactStack = NULL;
2088 
2089  /* Make sure any stats snapshot is thrown away */
2091 }
2092 
2093 /* ----------
2094  * AtEOSubXact_PgStat
2095  *
2096  * Called from access/transam/xact.c at subtransaction commit/abort.
2097  * ----------
2098  */
2099 void
2100 AtEOSubXact_PgStat(bool isCommit, int nestDepth)
2101 {
2102  PgStat_SubXactStatus *xact_state;
2103 
2104  /*
2105  * Transfer transactional insert/update counts into the next higher
2106  * subtransaction state.
2107  */
2108  xact_state = pgStatXactStack;
2109  if (xact_state != NULL &&
2110  xact_state->nest_level >= nestDepth)
2111  {
2113  PgStat_TableXactStatus *next_trans;
2114 
2115  /* delink xact_state from stack immediately to simplify reuse case */
2116  pgStatXactStack = xact_state->prev;
2117 
2118  for (trans = xact_state->first; trans != NULL; trans = next_trans)
2119  {
2120  PgStat_TableStatus *tabstat;
2121 
2122  next_trans = trans->next;
2123  Assert(trans->nest_level == nestDepth);
2124  tabstat = trans->parent;
2125  Assert(tabstat->trans == trans);
2126  if (isCommit)
2127  {
2128  if (trans->upper && trans->upper->nest_level == nestDepth - 1)
2129  {
2130  if (trans->truncated)
2131  {
2132  /* propagate the truncate status one level up */
2134  /* replace upper xact stats with ours */
2135  trans->upper->tuples_inserted = trans->tuples_inserted;
2136  trans->upper->tuples_updated = trans->tuples_updated;
2137  trans->upper->tuples_deleted = trans->tuples_deleted;
2138  }
2139  else
2140  {
2141  trans->upper->tuples_inserted += trans->tuples_inserted;
2142  trans->upper->tuples_updated += trans->tuples_updated;
2143  trans->upper->tuples_deleted += trans->tuples_deleted;
2144  }
2145  tabstat->trans = trans->upper;
2146  pfree(trans);
2147  }
2148  else
2149  {
2150  /*
2151  * When there isn't an immediate parent state, we can just
2152  * reuse the record instead of going through a
2153  * palloc/pfree pushup (this works since it's all in
2154  * TopTransactionContext anyway). We have to re-link it
2155  * into the parent level, though, and that might mean
2156  * pushing a new entry into the pgStatXactStack.
2157  */
2158  PgStat_SubXactStatus *upper_xact_state;
2159 
2160  upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
2161  trans->next = upper_xact_state->first;
2162  upper_xact_state->first = trans;
2163  trans->nest_level = nestDepth - 1;
2164  }
2165  }
2166  else
2167  {
2168  /*
2169  * On abort, update top-level tabstat counts, then forget the
2170  * subtransaction
2171  */
2172 
2173  /* first restore values obliterated by truncate */
2175  /* count attempted actions regardless of commit/abort */
2176  tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
2177  tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
2178  tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
2179  /* inserted tuples are dead, deleted tuples are unaffected */
2180  tabstat->t_counts.t_delta_dead_tuples +=
2181  trans->tuples_inserted + trans->tuples_updated;
2182  tabstat->trans = trans->upper;
2183  pfree(trans);
2184  }
2185  }
2186  pfree(xact_state);
2187  }
2188 }
2189 
2190 
2191 /*
2192  * AtPrepare_PgStat
2193  * Save the transactional stats state at 2PC transaction prepare.
2194  *
2195  * In this phase we just generate 2PC records for all the pending
2196  * transaction-dependent stats work.
2197  */
2198 void
2200 {
2201  PgStat_SubXactStatus *xact_state;
2202 
2203  xact_state = pgStatXactStack;
2204  if (xact_state != NULL)
2205  {
2207 
2208  Assert(xact_state->nest_level == 1);
2209  Assert(xact_state->prev == NULL);
2210  for (trans = xact_state->first; trans != NULL; trans = trans->next)
2211  {
2212  PgStat_TableStatus *tabstat;
2213  TwoPhasePgStatRecord record;
2214 
2215  Assert(trans->nest_level == 1);
2216  Assert(trans->upper == NULL);
2217  tabstat = trans->parent;
2218  Assert(tabstat->trans == trans);
2219 
2220  record.tuples_inserted = trans->tuples_inserted;
2221  record.tuples_updated = trans->tuples_updated;
2222  record.tuples_deleted = trans->tuples_deleted;
2223  record.inserted_pre_trunc = trans->inserted_pre_trunc;
2224  record.updated_pre_trunc = trans->updated_pre_trunc;
2225  record.deleted_pre_trunc = trans->deleted_pre_trunc;
2226  record.t_id = tabstat->t_id;
2227  record.t_shared = tabstat->t_shared;
2228  record.t_truncated = trans->truncated;
2229 
2231  &record, sizeof(TwoPhasePgStatRecord));
2232  }
2233  }
2234 }
2235 
2236 /*
2237  * PostPrepare_PgStat
2238  * Clean up after successful PREPARE.
2239  *
2240  * All we need do here is unlink the transaction stats state from the
2241  * nontransactional state. The nontransactional action counts will be
2242  * reported to the stats collector immediately, while the effects on live
2243  * and dead tuple counts are preserved in the 2PC state file.
2244  *
2245  * Note: AtEOXact_PgStat is not called during PREPARE.
2246  */
2247 void
2249 {
2250  PgStat_SubXactStatus *xact_state;
2251 
2252  /*
2253  * We don't bother to free any of the transactional state, since it's all
2254  * in TopTransactionContext and will go away anyway.
2255  */
2256  xact_state = pgStatXactStack;
2257  if (xact_state != NULL)
2258  {
2260 
2261  for (trans = xact_state->first; trans != NULL; trans = trans->next)
2262  {
2263  PgStat_TableStatus *tabstat;
2264 
2265  tabstat = trans->parent;
2266  tabstat->trans = NULL;
2267  }
2268  }
2269  pgStatXactStack = NULL;
2270 
2271  /* Make sure any stats snapshot is thrown away */
2273 }
2274 
2275 /*
2276  * 2PC processing routine for COMMIT PREPARED case.
2277  *
2278  * Load the saved counts into our local pgstats state.
2279  */
2280 void
2282  void *recdata, uint32 len)
2283 {
2284  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2285  PgStat_TableStatus *pgstat_info;
2286 
2287  /* Find or create a tabstat entry for the rel */
2288  pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2289 
2290  /* Same math as in AtEOXact_PgStat, commit case */
2291  pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2292  pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2293  pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2294  pgstat_info->t_counts.t_truncated = rec->t_truncated;
2295  if (rec->t_truncated)
2296  {
2297  /* forget live/dead stats seen by backend thus far */
2298  pgstat_info->t_counts.t_delta_live_tuples = 0;
2299  pgstat_info->t_counts.t_delta_dead_tuples = 0;
2300  }
2301  pgstat_info->t_counts.t_delta_live_tuples +=
2302  rec->tuples_inserted - rec->tuples_deleted;
2303  pgstat_info->t_counts.t_delta_dead_tuples +=
2304  rec->tuples_updated + rec->tuples_deleted;
2305  pgstat_info->t_counts.t_changed_tuples +=
2306  rec->tuples_inserted + rec->tuples_updated +
2307  rec->tuples_deleted;
2308 }
2309 
2310 /*
2311  * 2PC processing routine for ROLLBACK PREPARED case.
2312  *
2313  * Load the saved counts into our local pgstats state, but treat them
2314  * as aborted.
2315  */
2316 void
2318  void *recdata, uint32 len)
2319 {
2320  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2321  PgStat_TableStatus *pgstat_info;
2322 
2323  /* Find or create a tabstat entry for the rel */
2324  pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2325 
2326  /* Same math as in AtEOXact_PgStat, abort case */
2327  if (rec->t_truncated)
2328  {
2329  rec->tuples_inserted = rec->inserted_pre_trunc;
2330  rec->tuples_updated = rec->updated_pre_trunc;
2331  rec->tuples_deleted = rec->deleted_pre_trunc;
2332  }
2333  pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2334  pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2335  pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2336  pgstat_info->t_counts.t_delta_dead_tuples +=
2337  rec->tuples_inserted + rec->tuples_updated;
2338 }
2339 
2340 
2341 /* ----------
2342  * pgstat_fetch_stat_dbentry() -
2343  *
2344  * Support function for the SQL-callable pgstat* functions. Returns
2345  * the collected statistics for one database or NULL. NULL doesn't mean
2346  * that the database doesn't exist, it is just not yet known by the
2347  * collector, so the caller is better off to report ZERO instead.
2348  * ----------
2349  */
2352 {
2353  /*
2354  * If not done for this transaction, read the statistics collector stats
2355  * file into some hash tables.
2356  */
2358 
2359  /*
2360  * Lookup the requested database; return NULL if not found
2361  */
2362  return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2363  (void *) &dbid,
2364  HASH_FIND, NULL);
2365 }
2366 
2367 
2368 /* ----------
2369  * pgstat_fetch_stat_tabentry() -
2370  *
2371  * Support function for the SQL-callable pgstat* functions. Returns
2372  * the collected statistics for one table or NULL. NULL doesn't mean
2373  * that the table doesn't exist, it is just not yet known by the
2374  * collector, so the caller is better off to report ZERO instead.
2375  * ----------
2376  */
2379 {
2380  Oid dbid;
2381  PgStat_StatDBEntry *dbentry;
2382  PgStat_StatTabEntry *tabentry;
2383 
2384  /*
2385  * If not done for this transaction, read the statistics collector stats
2386  * file into some hash tables.
2387  */
2389 
2390  /*
2391  * Lookup our database, then look in its table hash table.
2392  */
2393  dbid = MyDatabaseId;
2394  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2395  (void *) &dbid,
2396  HASH_FIND, NULL);
2397  if (dbentry != NULL && dbentry->tables != NULL)
2398  {
2399  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2400  (void *) &relid,
2401  HASH_FIND, NULL);
2402  if (tabentry)
2403  return tabentry;
2404  }
2405 
2406  /*
2407  * If we didn't find it, maybe it's a shared table.
2408  */
2409  dbid = InvalidOid;
2410  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2411  (void *) &dbid,
2412  HASH_FIND, NULL);
2413  if (dbentry != NULL && dbentry->tables != NULL)
2414  {
2415  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2416  (void *) &relid,
2417  HASH_FIND, NULL);
2418  if (tabentry)
2419  return tabentry;
2420  }
2421 
2422  return NULL;
2423 }
2424 
2425 
2426 /* ----------
2427  * pgstat_fetch_stat_funcentry() -
2428  *
2429  * Support function for the SQL-callable pgstat* functions. Returns
2430  * the collected statistics for one function or NULL.
2431  * ----------
2432  */
2435 {
2436  PgStat_StatDBEntry *dbentry;
2437  PgStat_StatFuncEntry *funcentry = NULL;
2438 
2439  /* load the stats file if needed */
2441 
2442  /* Lookup our database, then find the requested function. */
2444  if (dbentry != NULL && dbentry->functions != NULL)
2445  {
2446  funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
2447  (void *) &func_id,
2448  HASH_FIND, NULL);
2449  }
2450 
2451  return funcentry;
2452 }
2453 
2454 
2455 /* ----------
2456  * pgstat_fetch_stat_beentry() -
2457  *
2458  * Support function for the SQL-callable pgstat* functions. Returns
2459  * our local copy of the current-activity entry for one backend.
2460  *
2461  * NB: caller is responsible for a check if the user is permitted to see
2462  * this info (especially the querystring).
2463  * ----------
2464  */
2467 {
2469 
2470  if (beid < 1 || beid > localNumBackends)
2471  return NULL;
2472 
2473  return &localBackendStatusTable[beid - 1].backendStatus;
2474 }
2475 
2476 
2477 /* ----------
2478  * pgstat_fetch_stat_local_beentry() -
2479  *
2480  * Like pgstat_fetch_stat_beentry() but with locally computed additions (like
2481  * xid and xmin values of the backend)
2482  *
2483  * NB: caller is responsible for a check if the user is permitted to see
2484  * this info (especially the querystring).
2485  * ----------
2486  */
2489 {
2491 
2492  if (beid < 1 || beid > localNumBackends)
2493  return NULL;
2494 
2495  return &localBackendStatusTable[beid - 1];
2496 }
2497 
2498 
2499 /* ----------
2500  * pgstat_fetch_stat_numbackends() -
2501  *
2502  * Support function for the SQL-callable pgstat* functions. Returns
2503  * the maximum current backend id.
2504  * ----------
2505  */
2506 int
2508 {
2510 
2511  return localNumBackends;
2512 }
2513 
2514 /*
2515  * ---------
2516  * pgstat_fetch_stat_archiver() -
2517  *
2518  * Support function for the SQL-callable pgstat* functions. Returns
2519  * a pointer to the archiver statistics struct.
2520  * ---------
2521  */
2524 {
2526 
2527  return &archiverStats;
2528 }
2529 
2530 
2531 /*
2532  * ---------
2533  * pgstat_fetch_global() -
2534  *
2535  * Support function for the SQL-callable pgstat* functions. Returns
2536  * a pointer to the global statistics struct.
2537  * ---------
2538  */
2541 {
2543 
2544  return &globalStats;
2545 }
2546 
2547 
2548 /* ------------------------------------------------------------
2549  * Functions for management of the shared-memory PgBackendStatus array
2550  * ------------------------------------------------------------
2551  */
2552 
2559 #ifdef USE_SSL
2560 static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
2561 #endif
2562 
2563 
2564 /*
2565  * Report shared-memory space needed by CreateSharedBackendStatus.
2566  */
2567 Size
2569 {
2570  Size size;
2571 
2572  /* BackendStatusArray: */
2573  size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
2574  /* BackendAppnameBuffer: */
2575  size = add_size(size,
2577  /* BackendClientHostnameBuffer: */
2578  size = add_size(size,
2580  /* BackendActivityBuffer: */
2581  size = add_size(size,
2583 #ifdef USE_SSL
2584  /* BackendSslStatusBuffer: */
2585  size = add_size(size,
2587 #endif
2588  return size;
2589 }
2590 
2591 /*
2592  * Initialize the shared status array and several string buffers
2593  * during postmaster startup.
2594  */
2595 void
2597 {
2598  Size size;
2599  bool found;
2600  int i;
2601  char *buffer;
2602 
2603  /* Create or attach to the shared array */
2604  size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
2605  BackendStatusArray = (PgBackendStatus *)
2606  ShmemInitStruct("Backend Status Array", size, &found);
2607 
2608  if (!found)
2609  {
2610  /*
2611  * We're the first - initialize.
2612  */
2613  MemSet(BackendStatusArray, 0, size);
2614  }
2615 
2616  /* Create or attach to the shared appname buffer */
2617  size = mul_size(NAMEDATALEN, MaxBackends);
2618  BackendAppnameBuffer = (char *)
2619  ShmemInitStruct("Backend Application Name Buffer", size, &found);
2620 
2621  if (!found)
2622  {
2623  MemSet(BackendAppnameBuffer, 0, size);
2624 
2625  /* Initialize st_appname pointers. */
2626  buffer = BackendAppnameBuffer;
2627  for (i = 0; i < NumBackendStatSlots; i++)
2628  {
2629  BackendStatusArray[i].st_appname = buffer;
2630  buffer += NAMEDATALEN;
2631  }
2632  }
2633 
2634  /* Create or attach to the shared client hostname buffer */
2635  size = mul_size(NAMEDATALEN, MaxBackends);
2636  BackendClientHostnameBuffer = (char *)
2637  ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
2638 
2639  if (!found)
2640  {
2641  MemSet(BackendClientHostnameBuffer, 0, size);
2642 
2643  /* Initialize st_clienthostname pointers. */
2644  buffer = BackendClientHostnameBuffer;
2645  for (i = 0; i < NumBackendStatSlots; i++)
2646  {
2647  BackendStatusArray[i].st_clienthostname = buffer;
2648  buffer += NAMEDATALEN;
2649  }
2650  }
2651 
2652  /* Create or attach to the shared activity buffer */
2653  BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
2655  BackendActivityBuffer = (char *)
2656  ShmemInitStruct("Backend Activity Buffer",
2657  BackendActivityBufferSize,
2658  &found);
2659 
2660  if (!found)
2661  {
2662  MemSet(BackendActivityBuffer, 0, size);
2663 
2664  /* Initialize st_activity pointers. */
2665  buffer = BackendActivityBuffer;
2666  for (i = 0; i < NumBackendStatSlots; i++)
2667  {
2668  BackendStatusArray[i].st_activity = buffer;
2670  }
2671  }
2672 
2673 #ifdef USE_SSL
2674  /* Create or attach to the shared SSL status buffer */
2676  BackendSslStatusBuffer = (PgBackendSSLStatus *)
2677  ShmemInitStruct("Backend SSL Status Buffer", size, &found);
2678 
2679  if (!found)
2680  {
2681  PgBackendSSLStatus *ptr;
2682 
2683  MemSet(BackendSslStatusBuffer, 0, size);
2684 
2685  /* Initialize st_sslstatus pointers. */
2686  ptr = BackendSslStatusBuffer;
2687  for (i = 0; i < NumBackendStatSlots; i++)
2688  {
2689  BackendStatusArray[i].st_sslstatus = ptr;
2690  ptr++;
2691  }
2692  }
2693 #endif
2694 }
2695 
2696 
2697 /* ----------
2698  * pgstat_initialize() -
2699  *
2700  * Initialize pgstats state, and set up our on-proc-exit hook.
2701  * Called from InitPostgres and AuxiliaryProcessMain. For auxiliary process,
2702  * MyBackendId is invalid. Otherwise, MyBackendId must be set,
2703  * but we must not have started any transaction yet (since the
2704  * exit hook must run after the last transaction exit).
2705  * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
2706  * ----------
2707  */
2708 void
2710 {
2711  /* Initialize MyBEEntry */
2713  {
2715  MyBEEntry = &BackendStatusArray[MyBackendId - 1];
2716  }
2717  else
2718  {
2719  /* Must be an auxiliary process */
2721 
2722  /*
2723  * Assign the MyBEEntry for an auxiliary process. Since it doesn't
2724  * have a BackendId, the slot is statically allocated based on the
2725  * auxiliary process type (MyAuxProcType). Backends use slots indexed
2726  * in the range from 1 to MaxBackends (inclusive), so we use
2727  * MaxBackends + AuxBackendType + 1 as the index of the slot for an
2728  * auxiliary process.
2729  */
2730  MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
2731  }
2732 
2733  /* Set up a process-exit hook to clean up */
2735 }
2736 
2737 /* ----------
2738  * pgstat_bestart() -
2739  *
2740  * Initialize this backend's entry in the PgBackendStatus array.
2741  * Called from InitPostgres.
2742  *
2743  * Apart from auxiliary processes, MyBackendId, MyDatabaseId,
2744  * session userid, and application_name must be set for a
2745  * backend (hence, this cannot be combined with pgstat_initialize).
2746  * ----------
2747  */
2748 void
2750 {
2751  TimestampTz proc_start_timestamp;
2752  SockAddr clientaddr;
2753  volatile PgBackendStatus *beentry;
2754 
2755  /*
2756  * To minimize the time spent modifying the PgBackendStatus entry, fetch
2757  * all the needed data first.
2758  *
2759  * If we have a MyProcPort, use its session start time (for consistency,
2760  * and to save a kernel call).
2761  */
2762  if (MyProcPort)
2763  proc_start_timestamp = MyProcPort->SessionStartTime;
2764  else
2765  proc_start_timestamp = GetCurrentTimestamp();
2766 
2767  /*
2768  * We may not have a MyProcPort (eg, if this is the autovacuum process).
2769  * If so, use all-zeroes client address, which is dealt with specially in
2770  * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
2771  */
2772  if (MyProcPort)
2773  memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr));
2774  else
2775  MemSet(&clientaddr, 0, sizeof(clientaddr));
2776 
2777  /*
2778  * Initialize my status entry, following the protocol of bumping
2779  * st_changecount before and after; and make sure it's even afterwards. We
2780  * use a volatile pointer here to ensure the compiler doesn't try to get
2781  * cute.
2782  */
2783  beentry = MyBEEntry;
2784 
2785  /* pgstats state must be initialized from pgstat_initialize() */
2786  Assert(beentry != NULL);
2787 
2789  {
2791  {
2792  /* Autovacuum Launcher */
2794  }
2795  else if (IsAutoVacuumWorkerProcess())
2796  {
2797  /* Autovacuum Worker */
2798  beentry->st_backendType = B_AUTOVAC_WORKER;
2799  }
2800  else if (am_walsender)
2801  {
2802  /* Wal sender */
2803  beentry->st_backendType = B_WAL_SENDER;
2804  }
2805  else if (IsBackgroundWorker)
2806  {
2807  /* bgworker */
2808  beentry->st_backendType = B_BG_WORKER;
2809  }
2810  else
2811  {
2812  /* client-backend */
2813  beentry->st_backendType = B_BACKEND;
2814  }
2815  }
2816  else
2817  {
2818  /* Must be an auxiliary process */
2820  switch (MyAuxProcType)
2821  {
2822  case StartupProcess:
2823  beentry->st_backendType = B_STARTUP;
2824  break;
2825  case BgWriterProcess:
2826  beentry->st_backendType = B_BG_WRITER;
2827  break;
2828  case CheckpointerProcess:
2829  beentry->st_backendType = B_CHECKPOINTER;
2830  break;
2831  case WalWriterProcess:
2832  beentry->st_backendType = B_WAL_WRITER;
2833  break;
2834  case WalReceiverProcess:
2835  beentry->st_backendType = B_WAL_RECEIVER;
2836  break;
2837  default:
2838  elog(FATAL, "unrecognized process type: %d",
2839  (int) MyAuxProcType);
2840  proc_exit(1);
2841  }
2842  }
2843 
2844  do
2845  {
2847  } while ((beentry->st_changecount & 1) == 0);
2848 
2849  beentry->st_procpid = MyProcPid;
2850  beentry->st_proc_start_timestamp = proc_start_timestamp;
2851  beentry->st_activity_start_timestamp = 0;
2852  beentry->st_state_start_timestamp = 0;
2853  beentry->st_xact_start_timestamp = 0;
2854  beentry->st_databaseid = MyDatabaseId;
2855 
2856  /* We have userid for client-backends, wal-sender and bgworker processes */
2857  if (beentry->st_backendType == B_BACKEND
2858  || beentry->st_backendType == B_WAL_SENDER
2859  || beentry->st_backendType == B_BG_WORKER)
2860  beentry->st_userid = GetSessionUserId();
2861  else
2862  beentry->st_userid = InvalidOid;
2863 
2864  beentry->st_clientaddr = clientaddr;
2867  NAMEDATALEN);
2868  else
2869  beentry->st_clienthostname[0] = '\0';
2870 #ifdef USE_SSL
2871  if (MyProcPort && MyProcPort->ssl != NULL)
2872  {
2873  beentry->st_ssl = true;
2879  }
2880  else
2881  {
2882  beentry->st_ssl = false;
2883  }
2884 #else
2885  beentry->st_ssl = false;
2886 #endif
2887  beentry->st_state = STATE_UNDEFINED;
2888  beentry->st_appname[0] = '\0';
2889  beentry->st_activity[0] = '\0';
2890  /* Also make sure the last byte in each string area is always 0 */
2891  beentry->st_clienthostname[NAMEDATALEN - 1] = '\0';
2892  beentry->st_appname[NAMEDATALEN - 1] = '\0';
2893  beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
2896 
2897  /*
2898  * we don't zero st_progress_param here to save cycles; nobody should
2899  * examine it until st_progress_command has been set to something other
2900  * than PROGRESS_COMMAND_INVALID
2901  */
2902 
2904 
2905  /* Update app name to current GUC setting */
2906  if (application_name)
2908 }
2909 
2910 /*
2911  * Shut down a single backend's statistics reporting at process exit.
2912  *
2913  * Flush any remaining statistics counts out to the collector.
2914  * Without this, operations triggered during backend exit (such as
2915  * temp table deletions) won't be counted.
2916  *
2917  * Lastly, clear out our entry in the PgBackendStatus array.
2918  */
2919 static void
2921 {
2922  volatile PgBackendStatus *beentry = MyBEEntry;
2923 
2924  /*
2925  * If we got as far as discovering our own database ID, we can report what
2926  * we did to the collector. Otherwise, we'd be sending an invalid
2927  * database ID, so forget it. (This means that accesses to pg_database
2928  * during failed backend starts might never get counted.)
2929  */
2930  if (OidIsValid(MyDatabaseId))
2931  pgstat_report_stat(true);
2932 
2933  /*
2934  * Clear my status entry, following the protocol of bumping st_changecount
2935  * before and after. We use a volatile pointer here to ensure the
2936  * compiler doesn't try to get cute.
2937  */
2939 
2940  beentry->st_procpid = 0; /* mark invalid */
2941 
2943 }
2944 
2945 
2946 /* ----------
2947  * pgstat_report_activity() -
2948  *
2949  * Called from tcop/postgres.c to report what the backend is actually doing
2950  * (but note cmd_str can be NULL for certain cases).
2951  *
2952  * All updates of the status entry follow the protocol of bumping
2953  * st_changecount before and after. We use a volatile pointer here to
2954  * ensure the compiler doesn't try to get cute.
2955  * ----------
2956  */
2957 void
2959 {
2960  volatile PgBackendStatus *beentry = MyBEEntry;
2961  TimestampTz start_timestamp;
2962  TimestampTz current_timestamp;
2963  int len = 0;
2964 
2965  TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
2966 
2967  if (!beentry)
2968  return;
2969 
2971  {
2972  if (beentry->st_state != STATE_DISABLED)
2973  {
2974  volatile PGPROC *proc = MyProc;
2975 
2976  /*
2977  * track_activities is disabled, but we last reported a
2978  * non-disabled state. As our final update, change the state and
2979  * clear fields we will not be updating anymore.
2980  */
2982  beentry->st_state = STATE_DISABLED;
2983  beentry->st_state_start_timestamp = 0;
2984  beentry->st_activity[0] = '\0';
2985  beentry->st_activity_start_timestamp = 0;
2986  /* st_xact_start_timestamp and wait_event_info are also disabled */
2987  beentry->st_xact_start_timestamp = 0;
2988  proc->wait_event_info = 0;
2990  }
2991  return;
2992  }
2993 
2994  /*
2995  * To minimize the time spent modifying the entry, fetch all the needed
2996  * data first.
2997  */
2998  start_timestamp = GetCurrentStatementStartTimestamp();
2999  if (cmd_str != NULL)
3000  {
3001  len = pg_mbcliplen(cmd_str, strlen(cmd_str),
3003  }
3004  current_timestamp = GetCurrentTimestamp();
3005 
3006  /*
3007  * Now update the status entry
3008  */
3010 
3011  beentry->st_state = state;
3012  beentry->st_state_start_timestamp = current_timestamp;
3013 
3014  if (cmd_str != NULL)
3015  {
3016  memcpy((char *) beentry->st_activity, cmd_str, len);
3017  beentry->st_activity[len] = '\0';
3018  beentry->st_activity_start_timestamp = start_timestamp;
3019  }
3020 
3022 }
3023 
3024 /*-----------
3025  * pgstat_progress_start_command() -
3026  *
3027  * Set st_progress_command (and st_progress_command_target) in own backend
3028  * entry. Also, zero-initialize st_progress_param array.
3029  *-----------
3030  */
3031 void
3033 {
3034  volatile PgBackendStatus *beentry = MyBEEntry;
3035 
3036  if (!beentry || !pgstat_track_activities)
3037  return;
3038 
3040  beentry->st_progress_command = cmdtype;
3041  beentry->st_progress_command_target = relid;
3042  MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
3044 }
3045 
3046 /*-----------
3047  * pgstat_progress_update_param() -
3048  *
3049  * Update index'th member in st_progress_param[] of own backend entry.
3050  *-----------
3051  */
3052 void
3054 {
3055  volatile PgBackendStatus *beentry = MyBEEntry;
3056 
3057  Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
3058 
3059  if (!beentry || !pgstat_track_activities)
3060  return;
3061 
3063  beentry->st_progress_param[index] = val;
3065 }
3066 
3067 /*-----------
3068  * pgstat_progress_update_multi_param() -
3069  *
3070  * Update multiple members in st_progress_param[] of own backend entry.
3071  * This is atomic; readers won't see intermediate states.
3072  *-----------
3073  */
3074 void
3076  const int64 *val)
3077 {
3078  volatile PgBackendStatus *beentry = MyBEEntry;
3079  int i;
3080 
3081  if (!beentry || !pgstat_track_activities || nparam == 0)
3082  return;
3083 
3085 
3086  for (i = 0; i < nparam; ++i)
3087  {
3088  Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
3089 
3090  beentry->st_progress_param[index[i]] = val[i];
3091  }
3092 
3094 }
3095 
3096 /*-----------
3097  * pgstat_progress_end_command() -
3098  *
3099  * Reset st_progress_command (and st_progress_command_target) in own backend
3100  * entry. This signals the end of the command.
3101  *-----------
3102  */
3103 void
3105 {
3106  volatile PgBackendStatus *beentry = MyBEEntry;
3107 
3108  if (!beentry)
3109  return;
3112  return;
3113 
3118 }
3119 
3120 /* ----------
3121  * pgstat_report_appname() -
3122  *
3123  * Called to update our application name.
3124  * ----------
3125  */
3126 void
3127 pgstat_report_appname(const char *appname)
3128 {
3129  volatile PgBackendStatus *beentry = MyBEEntry;
3130  int len;
3131 
3132  if (!beentry)
3133  return;
3134 
3135  /* This should be unnecessary if GUC did its job, but be safe */
3136  len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
3137 
3138  /*
3139  * Update my status entry, following the protocol of bumping
3140  * st_changecount before and after. We use a volatile pointer here to
3141  * ensure the compiler doesn't try to get cute.
3142  */
3144 
3145  memcpy((char *) beentry->st_appname, appname, len);
3146  beentry->st_appname[len] = '\0';
3147 
3149 }
3150 
3151 /*
3152  * Report current transaction start timestamp as the specified value.
3153  * Zero means there is no active transaction.
3154  */
3155 void
3157 {
3158  volatile PgBackendStatus *beentry = MyBEEntry;
3159 
3160  if (!pgstat_track_activities || !beentry)
3161  return;
3162 
3163  /*
3164  * Update my status entry, following the protocol of bumping
3165  * st_changecount before and after. We use a volatile pointer here to
3166  * ensure the compiler doesn't try to get cute.
3167  */
3169  beentry->st_xact_start_timestamp = tstamp;
3171 }
3172 
3173 /* ----------
3174  * pgstat_read_current_status() -
3175  *
3176  * Copy the current contents of the PgBackendStatus array to local memory,
3177  * if not already done in this transaction.
3178  * ----------
3179  */
3180 static void
3182 {
3183  volatile PgBackendStatus *beentry;
3184  LocalPgBackendStatus *localtable;
3185  LocalPgBackendStatus *localentry;
3186  char *localappname,
3187  *localactivity;
3188 #ifdef USE_SSL
3189  PgBackendSSLStatus *localsslstatus;
3190 #endif
3191  int i;
3192 
3194  if (localBackendStatusTable)
3195  return; /* already done */
3196 
3198 
3199  localtable = (LocalPgBackendStatus *)
3200  MemoryContextAlloc(pgStatLocalContext,
3202  localappname = (char *)
3203  MemoryContextAlloc(pgStatLocalContext,
3205  localactivity = (char *)
3206  MemoryContextAlloc(pgStatLocalContext,
3207  pgstat_track_activity_query_size * NumBackendStatSlots);
3208 #ifdef USE_SSL
3209  localsslstatus = (PgBackendSSLStatus *)
3210  MemoryContextAlloc(pgStatLocalContext,
3212 #endif
3213 
3214  localNumBackends = 0;
3215 
3216  beentry = BackendStatusArray;
3217  localentry = localtable;
3218  for (i = 1; i <= NumBackendStatSlots; i++)
3219  {
3220  /*
3221  * Follow the protocol of retrying if st_changecount changes while we
3222  * copy the entry, or if it's odd. (The check for odd is needed to
3223  * cover the case where we are able to completely copy the entry while
3224  * the source backend is between increment steps.) We use a volatile
3225  * pointer here to ensure the compiler doesn't try to get cute.
3226  */
3227  for (;;)
3228  {
3229  int before_changecount;
3230  int after_changecount;
3231 
3232  pgstat_save_changecount_before(beentry, before_changecount);
3233 
3234  localentry->backendStatus.st_procpid = beentry->st_procpid;
3235  if (localentry->backendStatus.st_procpid > 0)
3236  {
3237  memcpy(&localentry->backendStatus, (char *) beentry, sizeof(PgBackendStatus));
3238 
3239  /*
3240  * strcpy is safe even if the string is modified concurrently,
3241  * because there's always a \0 at the end of the buffer.
3242  */
3243  strcpy(localappname, (char *) beentry->st_appname);
3244  localentry->backendStatus.st_appname = localappname;
3245  strcpy(localactivity, (char *) beentry->st_activity);
3246  localentry->backendStatus.st_activity = localactivity;
3247  localentry->backendStatus.st_ssl = beentry->st_ssl;
3248 #ifdef USE_SSL
3249  if (beentry->st_ssl)
3250  {
3251  memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
3252  localentry->backendStatus.st_sslstatus = localsslstatus;
3253  }
3254 #endif
3255  }
3256 
3257  pgstat_save_changecount_after(beentry, after_changecount);
3258  if (before_changecount == after_changecount &&
3259  (before_changecount & 1) == 0)
3260  break;
3261 
3262  /* Make sure we can break out of loop if stuck... */
3264  }
3265 
3266  beentry++;
3267  /* Only valid entries get included into the local array */
3268  if (localentry->backendStatus.st_procpid > 0)
3269  {
3271  &localentry->backend_xid,
3272  &localentry->backend_xmin);
3273 
3274  localentry++;
3275  localappname += NAMEDATALEN;
3276  localactivity += pgstat_track_activity_query_size;
3277 #ifdef USE_SSL
3278  localsslstatus++;
3279 #endif
3280  localNumBackends++;
3281  }
3282  }
3283 
3284  /* Set the pointer only after completion of a valid table */
3285  localBackendStatusTable = localtable;
3286 }
3287 
3288 /* ----------
3289  * pgstat_get_wait_event_type() -
3290  *
3291  * Return a string representing the current wait event type, backend is
3292  * waiting on.
3293  */
3294 const char *
3296 {
3297  uint32 classId;
3298  const char *event_type;
3299 
3300  /* report process as not waiting. */
3301  if (wait_event_info == 0)
3302  return NULL;
3303 
3304  classId = wait_event_info & 0xFF000000;
3305 
3306  switch (classId)
3307  {
3308  case PG_WAIT_LWLOCK:
3309  event_type = "LWLock";
3310  break;
3311  case PG_WAIT_LOCK:
3312  event_type = "Lock";
3313  break;
3314  case PG_WAIT_BUFFER_PIN:
3315  event_type = "BufferPin";
3316  break;
3317  case PG_WAIT_ACTIVITY:
3318  event_type = "Activity";
3319  break;
3320  case PG_WAIT_CLIENT:
3321  event_type = "Client";
3322  break;
3323  case PG_WAIT_EXTENSION:
3324  event_type = "Extension";
3325  break;
3326  case PG_WAIT_IPC:
3327  event_type = "IPC";
3328  break;
3329  case PG_WAIT_TIMEOUT:
3330  event_type = "Timeout";
3331  break;
3332  case PG_WAIT_IO:
3333  event_type = "IO";
3334  break;
3335  default:
3336  event_type = "???";
3337  break;
3338  }
3339 
3340  return event_type;
3341 }
3342 
3343 /* ----------
3344  * pgstat_get_wait_event() -
3345  *
3346  * Return a string representing the current wait event, backend is
3347  * waiting on.
3348  */
3349 const char *
3351 {
3352  uint32 classId;
3353  uint16 eventId;
3354  const char *event_name;
3355 
3356  /* report process as not waiting. */
3357  if (wait_event_info == 0)
3358  return NULL;
3359 
3360  classId = wait_event_info & 0xFF000000;
3361  eventId = wait_event_info & 0x0000FFFF;
3362 
3363  switch (classId)
3364  {
3365  case PG_WAIT_LWLOCK:
3366  event_name = GetLWLockIdentifier(classId, eventId);
3367  break;
3368  case PG_WAIT_LOCK:
3369  event_name = GetLockNameFromTagType(eventId);
3370  break;
3371  case PG_WAIT_BUFFER_PIN:
3372  event_name = "BufferPin";
3373  break;
3374  case PG_WAIT_ACTIVITY:
3375  {
3376  WaitEventActivity w = (WaitEventActivity) wait_event_info;
3377 
3378  event_name = pgstat_get_wait_activity(w);
3379  break;
3380  }
3381  case PG_WAIT_CLIENT:
3382  {
3383  WaitEventClient w = (WaitEventClient) wait_event_info;
3384 
3385  event_name = pgstat_get_wait_client(w);
3386  break;
3387  }
3388  case PG_WAIT_EXTENSION:
3389  event_name = "Extension";
3390  break;
3391  case PG_WAIT_IPC:
3392  {
3393  WaitEventIPC w = (WaitEventIPC) wait_event_info;
3394 
3395  event_name = pgstat_get_wait_ipc(w);
3396  break;
3397  }
3398  case PG_WAIT_TIMEOUT:
3399  {
3400  WaitEventTimeout w = (WaitEventTimeout) wait_event_info;
3401 
3402  event_name = pgstat_get_wait_timeout(w);
3403  break;
3404  }
3405  case PG_WAIT_IO:
3406  {
3407  WaitEventIO w = (WaitEventIO) wait_event_info;
3408 
3409  event_name = pgstat_get_wait_io(w);
3410  break;
3411  }
3412  default:
3413  event_name = "unknown wait event";
3414  break;
3415  }
3416 
3417  return event_name;
3418 }
3419 
3420 /* ----------
3421  * pgstat_get_wait_activity() -
3422  *
3423  * Convert WaitEventActivity to string.
3424  * ----------
3425  */
3426 static const char *
3428 {
3429  const char *event_name = "unknown wait event";
3430 
3431  switch (w)
3432  {
3434  event_name = "ArchiverMain";
3435  break;
3437  event_name = "AutoVacuumMain";
3438  break;
3440  event_name = "BgWriterHibernate";
3441  break;
3443  event_name = "BgWriterMain";
3444  break;
3446  event_name = "CheckpointerMain";
3447  break;
3449  event_name = "PgStatMain";
3450  break;
3452  event_name = "RecoveryWalAll";
3453  break;
3455  event_name = "RecoveryWalStream";
3456  break;
3458  event_name = "SysLoggerMain";
3459  break;
3461  event_name = "WalReceiverMain";
3462  break;
3464  event_name = "WalSenderMain";
3465  break;
3467  event_name = "WalWriterMain";
3468  break;
3470  event_name = "LogicalLauncherMain";
3471  break;
3473  event_name = "LogicalApplyMain";
3474  break;
3475  /* no default case, so that compiler will warn */
3476  }
3477 
3478  return event_name;
3479 }
3480 
3481 /* ----------
3482  * pgstat_get_wait_client() -
3483  *
3484  * Convert WaitEventClient to string.
3485  * ----------
3486  */
3487 static const char *
3489 {
3490  const char *event_name = "unknown wait event";
3491 
3492  switch (w)
3493  {
3495  event_name = "ClientRead";
3496  break;
3498  event_name = "ClientWrite";
3499  break;
3501  event_name = "SSLOpenServer";
3502  break;
3504  event_name = "WalReceiverWaitStart";
3505  break;
3507  event_name = "LibPQWalReceiver";
3508  break;
3510  event_name = "WalSenderWaitForWAL";
3511  break;
3513  event_name = "WalSenderWriteData";
3514  break;
3515  /* no default case, so that compiler will warn */
3516  }
3517 
3518  return event_name;
3519 }
3520 
3521 /* ----------
3522  * pgstat_get_wait_ipc() -
3523  *
3524  * Convert WaitEventIPC to string.
3525  * ----------
3526  */
3527 static const char *
3529 {
3530  const char *event_name = "unknown wait event";
3531 
3532  switch (w)
3533  {
3535  event_name = "BgWorkerShutdown";
3536  break;
3538  event_name = "BgWorkerStartup";
3539  break;
3540  case WAIT_EVENT_BTREE_PAGE:
3541  event_name = "BtreePage";
3542  break;
3544  event_name = "ExecuteGather";
3545  break;
3547  event_name = "MessageQueueInternal";
3548  break;
3550  event_name = "MessageQueuePutMessage";
3551  break;
3552  case WAIT_EVENT_MQ_RECEIVE:
3553  event_name = "MessageQueueReceive";
3554  break;
3555  case WAIT_EVENT_MQ_SEND:
3556  event_name = "MessageQueueSend";
3557  break;
3559  event_name = "ParallelFinish";
3560  break;
3562  event_name = "ParallelBitmapScan";
3563  break;
3565  event_name = "ProcArrayGroupUpdate";
3566  break;
3568  event_name = "SafeSnapshot";
3569  break;
3570  case WAIT_EVENT_SYNC_REP:
3571  event_name = "SyncRep";
3572  break;
3574  event_name = "LogicalSyncData";
3575  break;
3577  event_name = "LogicalSyncStateChange";
3578  break;
3579  /* no default case, so that compiler will warn */
3580  }
3581 
3582  return event_name;
3583 }
3584 
3585 /* ----------
3586  * pgstat_get_wait_timeout() -
3587  *
3588  * Convert WaitEventTimeout to string.
3589  * ----------
3590  */
3591 static const char *
3593 {
3594  const char *event_name = "unknown wait event";
3595 
3596  switch (w)
3597  {
3599  event_name = "BaseBackupThrottle";
3600  break;
3601  case WAIT_EVENT_PG_SLEEP:
3602  event_name = "PgSleep";
3603  break;
3605  event_name = "RecoveryApplyDelay";
3606  break;
3607  /* no default case, so that compiler will warn */
3608  }
3609 
3610  return event_name;
3611 }
3612 
3613 /* ----------
3614  * pgstat_get_wait_io() -
3615  *
3616  * Convert WaitEventIO to string.
3617  * ----------
3618  */
3619 static const char *
3621 {
3622  const char *event_name = "unknown wait event";
3623 
3624  switch (w)
3625  {
3627  event_name = "BufFileRead";
3628  break;
3630  event_name = "BufFileWrite";
3631  break;
3633  event_name = "ControlFileRead";
3634  break;
3636  event_name = "ControlFileSync";
3637  break;
3639  event_name = "ControlFileSyncUpdate";
3640  break;
3642  event_name = "ControlFileWrite";
3643  break;
3645  event_name = "ControlFileWriteUpdate";
3646  break;
3648  event_name = "CopyFileRead";
3649  break;
3651  event_name = "CopyFileWrite";
3652  break;
3654  event_name = "DataFileExtend";
3655  break;
3657  event_name = "DataFileFlush";
3658  break;
3660  event_name = "DataFileImmediateSync";
3661  break;
3663  event_name = "DataFilePrefetch";
3664  break;
3666  event_name = "DataFileRead";
3667  break;
3669  event_name = "DataFileSync";
3670  break;
3672  event_name = "DataFileTruncate";
3673  break;
3675  event_name = "DataFileWrite";
3676  break;
3678  event_name = "DSMFillZeroWrite";
3679  break;
3681  event_name = "LockFileAddToDataDirRead";
3682  break;
3684  event_name = "LockFileAddToDataDirSync";
3685  break;
3687  event_name = "LockFileAddToDataDirWrite";
3688  break;
3690  event_name = "LockFileCreateRead";
3691  break;
3693  event_name = "LockFileCreateSync";
3694  break;
3696  event_name = "LockFileCreateWRITE";
3697  break;
3699  event_name = "LockFileReCheckDataDirRead";
3700  break;
3702  event_name = "LogicalRewriteCheckpointSync";
3703  break;
3705  event_name = "LogicalRewriteMappingSync";
3706  break;
3708  event_name = "LogicalRewriteMappingWrite";
3709  break;
3711  event_name = "LogicalRewriteSync";
3712  break;
3714  event_name = "LogicalRewriteTruncate";
3715  break;
3717  event_name = "LogicalRewriteWrite";
3718  break;
3720  event_name = "RelationMapRead";
3721  break;
3723  event_name = "RelationMapSync";
3724  break;
3726  event_name = "RelationMapWrite";
3727  break;
3729  event_name = "ReorderBufferRead";
3730  break;
3732  event_name = "ReorderBufferWrite";
3733  break;
3735  event_name = "ReorderLogicalMappingRead";
3736  break;
3738  event_name = "ReplicationSlotRead";
3739  break;
3741  event_name = "ReplicationSlotRestoreSync";
3742  break;
3744  event_name = "ReplicationSlotSync";
3745  break;
3747  event_name = "ReplicationSlotWrite";
3748  break;
3750  event_name = "SLRUFlushSync";
3751  break;
3752  case WAIT_EVENT_SLRU_READ:
3753  event_name = "SLRURead";
3754  break;
3755  case WAIT_EVENT_SLRU_SYNC:
3756  event_name = "SLRUSync";
3757  break;
3758  case WAIT_EVENT_SLRU_WRITE:
3759  event_name = "SLRUWrite";
3760  break;
3762  event_name = "SnapbuildRead";
3763  break;
3765  event_name = "SnapbuildSync";
3766  break;
3768  event_name = "SnapbuildWrite";
3769  break;
3771  event_name = "TimelineHistoryFileSync";
3772  break;
3774  event_name = "TimelineHistoryFileWrite";
3775  break;
3777  event_name = "TimelineHistoryRead";
3778  break;
3780  event_name = "TimelineHistorySync";
3781  break;
3783  event_name = "TimelineHistoryWrite";
3784  break;
3786  event_name = "TwophaseFileRead";
3787  break;
3789  event_name = "TwophaseFileSync";
3790  break;
3792  event_name = "TwophaseFileWrite";
3793  break;
3795  event_name = "WALSenderTimelineHistoryRead";
3796  break;
3798  event_name = "WALBootstrapSync";
3799  break;
3801  event_name = "WALBootstrapWrite";
3802  break;
3804  event_name = "WALCopyRead";
3805  break;
3807  event_name = "WALCopySync";
3808  break;
3810  event_name = "WALCopyWrite";
3811  break;
3813  event_name = "WALInitSync";
3814  break;
3816  event_name = "WALInitWrite";
3817  break;
3818  case WAIT_EVENT_WAL_READ:
3819  event_name = "WALRead";
3820  break;
3822  event_name = "WALSyncMethodAssign";
3823  break;
3824  case WAIT_EVENT_WAL_WRITE:
3825  event_name = "WALWrite";
3826  break;
3827 
3828  /* no default case, so that compiler will warn */
3829  }
3830 
3831  return event_name;
3832 }
3833 
3834 
3835 /* ----------
3836  * pgstat_get_backend_current_activity() -
3837  *
3838  * Return a string representing the current activity of the backend with
3839  * the specified PID. This looks directly at the BackendStatusArray,
3840  * and so will provide current information regardless of the age of our
3841  * transaction's snapshot of the status array.
3842  *
3843  * It is the caller's responsibility to invoke this only for backends whose
3844  * state is expected to remain stable while the result is in use. The
3845  * only current use is in deadlock reporting, where we can expect that
3846  * the target backend is blocked on a lock. (There are corner cases
3847  * where the target's wait could get aborted while we are looking at it,
3848  * but the very worst consequence is to return a pointer to a string
3849  * that's been changed, so we won't worry too much.)
3850  *
3851  * Note: return strings for special cases match pg_stat_get_backend_activity.
3852  * ----------
3853  */
3854 const char *
3855 pgstat_get_backend_current_activity(int pid, bool checkUser)
3856 {
3857  PgBackendStatus *beentry;
3858  int i;
3859 
3860  beentry = BackendStatusArray;
3861  for (i = 1; i <= MaxBackends; i++)
3862  {
3863  /*
3864  * Although we expect the target backend's entry to be stable, that
3865  * doesn't imply that anyone else's is. To avoid identifying the
3866  * wrong backend, while we check for a match to the desired PID we
3867  * must follow the protocol of retrying if st_changecount changes
3868  * while we examine the entry, or if it's odd. (This might be
3869  * unnecessary, since fetching or storing an int is almost certainly
3870  * atomic, but let's play it safe.) We use a volatile pointer here to
3871  * ensure the compiler doesn't try to get cute.
3872  */
3873  volatile PgBackendStatus *vbeentry = beentry;
3874  bool found;
3875 
3876  for (;;)
3877  {
3878  int before_changecount;
3879  int after_changecount;
3880 
3881  pgstat_save_changecount_before(vbeentry, before_changecount);
3882 
3883  found = (vbeentry->st_procpid == pid);
3884 
3885  pgstat_save_changecount_after(vbeentry, after_changecount);
3886 
3887  if (before_changecount == after_changecount &&
3888  (before_changecount & 1) == 0)
3889  break;
3890 
3891  /* Make sure we can break out of loop if stuck... */
3893  }
3894 
3895  if (found)
3896  {
3897  /* Now it is safe to use the non-volatile pointer */
3898  if (checkUser && !superuser() && beentry->st_userid != GetUserId())
3899  return "<insufficient privilege>";
3900  else if (*(beentry->st_activity) == '\0')
3901  return "<command string not enabled>";
3902  else
3903  return beentry->st_activity;
3904  }
3905 
3906  beentry++;
3907  }
3908 
3909  /* If we get here, caller is in error ... */
3910  return "<backend information not available>";
3911 }
3912 
3913 /* ----------
3914  * pgstat_get_crashed_backend_activity() -
3915  *
3916  * Return a string representing the current activity of the backend with
3917  * the specified PID. Like the function above, but reads shared memory with
3918  * the expectation that it may be corrupt. On success, copy the string
3919  * into the "buffer" argument and return that pointer. On failure,
3920  * return NULL.
3921  *
3922  * This function is only intended to be used by the postmaster to report the
3923  * query that crashed a backend. In particular, no attempt is made to
3924  * follow the correct concurrency protocol when accessing the
3925  * BackendStatusArray. But that's OK, in the worst case we'll return a
3926  * corrupted message. We also must take care not to trip on ereport(ERROR).
3927  * ----------
3928  */
3929 const char *
3930 pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
3931 {
3932  volatile PgBackendStatus *beentry;
3933  int i;
3934 
3935  beentry = BackendStatusArray;
3936 
3937  /*
3938  * We probably shouldn't get here before shared memory has been set up,
3939  * but be safe.
3940  */
3941  if (beentry == NULL || BackendActivityBuffer == NULL)
3942  return NULL;
3943 
3944  for (i = 1; i <= MaxBackends; i++)
3945  {
3946  if (beentry->st_procpid == pid)
3947  {
3948  /* Read pointer just once, so it can't change after validation */
3949  const char *activity = beentry->st_activity;
3950  const char *activity_last;
3951 
3952  /*
3953  * We mustn't access activity string before we verify that it
3954  * falls within the BackendActivityBuffer. To make sure that the
3955  * entire string including its ending is contained within the
3956  * buffer, subtract one activity length from the buffer size.
3957  */
3958  activity_last = BackendActivityBuffer + BackendActivityBufferSize
3960 
3961  if (activity < BackendActivityBuffer ||
3962  activity > activity_last)
3963  return NULL;
3964 
3965  /* If no string available, no point in a report */
3966  if (activity[0] == '\0')
3967  return NULL;
3968 
3969  /*
3970  * Copy only ASCII-safe characters so we don't run into encoding
3971  * problems when reporting the message; and be sure not to run off
3972  * the end of memory.
3973  */
3974  ascii_safe_strlcpy(buffer, activity,
3975  Min(buflen, pgstat_track_activity_query_size));
3976 
3977  return buffer;
3978  }
3979 
3980  beentry++;
3981  }
3982 
3983  /* PID not found */
3984  return NULL;
3985 }
3986 
3987 const char *
3989 {
3990  const char *backendDesc = "unknown process type";
3991 
3992  switch (backendType)
3993  {
3994  case B_AUTOVAC_LAUNCHER:
3995  backendDesc = "autovacuum launcher";
3996  break;
3997  case B_AUTOVAC_WORKER:
3998  backendDesc = "autovacuum worker";
3999  break;
4000  case B_BACKEND:
4001  backendDesc = "client backend";
4002  break;
4003  case B_BG_WORKER:
4004  backendDesc = "background worker";
4005  break;
4006  case B_BG_WRITER:
4007  backendDesc = "background writer";
4008  break;
4009  case B_CHECKPOINTER:
4010  backendDesc = "checkpointer";
4011  break;
4012  case B_STARTUP:
4013  backendDesc = "startup";
4014  break;
4015  case B_WAL_RECEIVER:
4016  backendDesc = "walreceiver";
4017  break;
4018  case B_WAL_SENDER:
4019  backendDesc = "walsender";
4020  break;
4021  case B_WAL_WRITER:
4022  backendDesc = "walwriter";
4023  break;
4024  }
4025 
4026  return backendDesc;
4027 }
4028 /* ------------------------------------------------------------
4029  * Local support functions follow
4030  * ------------------------------------------------------------
4031  */
4032 
4033 
4034 /* ----------
4035  * pgstat_setheader() -
4036  *
4037  * Set common header fields in a statistics message
4038  * ----------
4039  */
4040 static void
4042 {
4043  hdr->m_type = mtype;
4044 }
4045 
4046 
4047 /* ----------
4048  * pgstat_send() -
4049  *
4050  * Send out one statistics message to the collector
4051  * ----------
4052  */
4053 static void
4054 pgstat_send(void *msg, int len)
4055 {
4056  int rc;
4057 
4059  return;
4060 
4061  ((PgStat_MsgHdr *) msg)->m_size = len;
4062 
4063  /* We'll retry after EINTR, but ignore all other failures */
4064  do
4065  {
4066  rc = send(pgStatSock, msg, len, 0);
4067  } while (rc < 0 && errno == EINTR);
4068 
4069 #ifdef USE_ASSERT_CHECKING
4070  /* In debug builds, log send failures ... */
4071  if (rc < 0)
4072  elog(LOG, "could not send to statistics collector: %m");
4073 #endif
4074 }
4075 
4076 /* ----------
4077  * pgstat_send_archiver() -
4078  *
4079  * Tell the collector about the WAL file that we successfully
4080  * archived or failed to archive.
4081  * ----------
4082  */
4083 void
4084 pgstat_send_archiver(const char *xlog, bool failed)
4085 {
4086  PgStat_MsgArchiver msg;
4087 
4088  /*
4089  * Prepare and send the message
4090  */
4092  msg.m_failed = failed;
4093  StrNCpy(msg.m_xlog, xlog, sizeof(msg.m_xlog));
4095  pgstat_send(&msg, sizeof(msg));
4096 }
4097 
4098 /* ----------
4099  * pgstat_send_bgwriter() -
4100  *
4101  * Send bgwriter statistics to the collector
4102  * ----------
4103  */
4104 void
4106 {
4107  /* We assume this initializes to zeroes */
4108  static const PgStat_MsgBgWriter all_zeroes;
4109 
4110  /*
4111  * This function can be called even if nothing at all has happened. In
4112  * this case, avoid sending a completely empty message to the stats
4113  * collector.
4114  */
4115  if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
4116  return;
4117 
4118  /*
4119  * Prepare and send the message
4120  */
4121  pgstat_setheader(&BgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER);
4122  pgstat_send(&BgWriterStats, sizeof(BgWriterStats));
4123 
4124  /*
4125  * Clear out the statistics buffer, so it can be re-used.
4126  */
4127  MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
4128 }
4129 
4130 
4131 /* ----------
4132  * PgstatCollectorMain() -
4133  *
4134  * Start up the statistics collector process. This is the body of the
4135  * postmaster child process.
4136  *
4137  * The argc/argv parameters are valid only in EXEC_BACKEND case.
4138  * ----------
4139  */
4140 NON_EXEC_STATIC void
4141 PgstatCollectorMain(int argc, char *argv[])
4142 {
4143  int len;
4144  PgStat_Msg msg;
4145  int wr;
4146 
4147  /*
4148  * Ignore all signals usually bound to some action in the postmaster,
4149  * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
4150  * support latch operations, because we only use a local latch.
4151  */
4153  pqsignal(SIGINT, SIG_IGN);
4154  pqsignal(SIGTERM, SIG_IGN);
4166 
4167  /*
4168  * Identify myself via ps
4169  */
4170  init_ps_display("stats collector process", "", "", "");
4171 
4172  /*
4173  * Read in existing stats files or initialize the stats to zero.
4174  */
4175  pgStatRunningInCollector = true;
4176  pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
4177 
4178  /*
4179  * Loop to process messages until we get SIGQUIT or detect ungraceful
4180  * death of our parent postmaster.
4181  *
4182  * For performance reasons, we don't want to do ResetLatch/WaitLatch after
4183  * every message; instead, do that only after a recv() fails to obtain a
4184  * message. (This effectively means that if backends are sending us stuff
4185  * like mad, we won't notice postmaster death until things slack off a
4186  * bit; which seems fine.) To do that, we have an inner loop that
4187  * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
4188  * the inner loop, which means that such interrupts will get serviced but
4189  * the latch won't get cleared until next time there is a break in the
4190  * action.
4191  */
4192  for (;;)
4193  {
4194  /* Clear any already-pending wakeups */
4196 
4197  /*
4198  * Quit if we get SIGQUIT from the postmaster.
4199  */
4200  if (need_exit)
4201  break;
4202 
4203  /*
4204  * Inner loop iterates as long as we keep getting messages, or until
4205  * need_exit becomes set.
4206  */
4207  while (!need_exit)
4208  {
4209  /*
4210  * Reload configuration if we got SIGHUP from the postmaster.
4211  */
4212  if (got_SIGHUP)
4213  {
4214  got_SIGHUP = false;
4216  }
4217 
4218  /*
4219  * Write the stats file(s) if a new request has arrived that is
4220  * not satisfied by existing file(s).
4221  */
4223  pgstat_write_statsfiles(false, false);
4224 
4225  /*
4226  * Try to receive and process a message. This will not block,
4227  * since the socket is set to non-blocking mode.
4228  *
4229  * XXX On Windows, we have to force pgwin32_recv to cooperate,
4230  * despite the previous use of pg_set_noblock() on the socket.
4231  * This is extremely broken and should be fixed someday.
4232  */
4233 #ifdef WIN32
4234  pgwin32_noblock = 1;
4235 #endif
4236 
4237  len = recv(pgStatSock, (char *) &msg,
4238  sizeof(PgStat_Msg), 0);
4239 
4240 #ifdef WIN32
4241  pgwin32_noblock = 0;
4242 #endif
4243 
4244  if (len < 0)
4245  {
4246  if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
4247  break; /* out of inner loop */
4248  ereport(ERROR,
4250  errmsg("could not read statistics message: %m")));
4251  }
4252 
4253  /*
4254  * We ignore messages that are smaller than our common header
4255  */
4256  if (len < sizeof(PgStat_MsgHdr))
4257  continue;
4258 
4259  /*
4260  * The received length must match the length in the header
4261  */
4262  if (msg.msg_hdr.m_size != len)
4263  continue;
4264 
4265  /*
4266  * O.K. - we accept this message. Process it.
4267  */
4268  switch (msg.msg_hdr.m_type)
4269  {
4270  case PGSTAT_MTYPE_DUMMY:
4271  break;
4272 
4273  case PGSTAT_MTYPE_INQUIRY:
4274  pgstat_recv_inquiry((PgStat_MsgInquiry *) &msg, len);
4275  break;
4276 
4277  case PGSTAT_MTYPE_TABSTAT:
4278  pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, len);
4279  break;
4280 
4281  case PGSTAT_MTYPE_TABPURGE:
4283  break;
4284 
4285  case PGSTAT_MTYPE_DROPDB:
4286  pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, len);
4287  break;
4288 
4291  len);
4292  break;
4293 
4297  len);
4298  break;
4299 
4303  len);
4304  break;
4305 
4308  break;
4309 
4310  case PGSTAT_MTYPE_VACUUM:
4311  pgstat_recv_vacuum((PgStat_MsgVacuum *) &msg, len);
4312  break;
4313 
4314  case PGSTAT_MTYPE_ANALYZE:
4315  pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, len);
4316  break;
4317 
4318  case PGSTAT_MTYPE_ARCHIVER:
4320  break;
4321 
4322  case PGSTAT_MTYPE_BGWRITER:
4324  break;
4325 
4326  case PGSTAT_MTYPE_FUNCSTAT:
4328  break;
4329 
4332  break;
4333 
4336  break;
4337 
4338  case PGSTAT_MTYPE_DEADLOCK:
4340  break;
4341 
4342  case PGSTAT_MTYPE_TEMPFILE:
4344  break;
4345 
4346  default:
4347  break;
4348  }
4349  } /* end of inner message-processing loop */
4350 
4351  /* Sleep until there's something to do */
4352 #ifndef WIN32
4355  pgStatSock, -1L,
4357 #else
4358 
4359  /*
4360  * Windows, at least in its Windows Server 2003 R2 incarnation,
4361  * sometimes loses FD_READ events. Waking up and retrying the recv()
4362  * fixes that, so don't sleep indefinitely. This is a crock of the
4363  * first water, but until somebody wants to debug exactly what's
4364  * happening there, this is the best we can do. The two-second
4365  * timeout matches our pre-9.2 behavior, and needs to be short enough
4366  * to not provoke "using stale statistics" complaints from
4367  * backend_read_statsfile.
4368  */
4371  pgStatSock,
4372  2 * 1000L /* msec */,
4374 #endif
4375 
4376  /*
4377  * Emergency bailout if postmaster has died. This is to avoid the
4378  * necessity for manual cleanup of all postmaster children.
4379  */
4380  if (wr & WL_POSTMASTER_DEATH)
4381  break;
4382  } /* end of outer loop */
4383 
4384  /*
4385  * Save the final stats to reuse at next startup.
4386  */
4387  pgstat_write_statsfiles(true, true);
4388 
4389  exit(0);
4390 }
4391 
4392 
4393 /* SIGQUIT signal handler for collector process */
4394 static void
4396 {
4397  int save_errno = errno;
4398 
4399  need_exit = true;
4400  SetLatch(MyLatch);
4401 
4402  errno = save_errno;
4403 }
4404 
4405 /* SIGHUP handler for collector process */
4406 static void
4408 {
4409  int save_errno = errno;
4410 
4411  got_SIGHUP = true;
4412  SetLatch(MyLatch);
4413 
4414  errno = save_errno;
4415 }
4416 
4417 /*
4418  * Subroutine to clear stats in a database entry
4419  *
4420  * Tables and functions hashes are initialized to empty.
4421  */
4422 static void
4424 {
4425  HASHCTL hash_ctl;
4426 
4427  dbentry->n_xact_commit = 0;
4428  dbentry->n_xact_rollback = 0;
4429  dbentry->n_blocks_fetched = 0;
4430  dbentry->n_blocks_hit = 0;
4431  dbentry->n_tuples_returned = 0;
4432  dbentry->n_tuples_fetched = 0;
4433  dbentry->n_tuples_inserted = 0;
4434  dbentry->n_tuples_updated = 0;
4435  dbentry->n_tuples_deleted = 0;
4436  dbentry->last_autovac_time = 0;
4437  dbentry->n_conflict_tablespace = 0;
4438  dbentry->n_conflict_lock = 0;
4439  dbentry->n_conflict_snapshot = 0;
4440  dbentry->n_conflict_bufferpin = 0;
4441  dbentry->n_conflict_startup_deadlock = 0;
4442  dbentry->n_temp_files = 0;
4443  dbentry->n_temp_bytes = 0;
4444  dbentry->n_deadlocks = 0;
4445  dbentry->n_block_read_time = 0;
4446  dbentry->n_block_write_time = 0;
4447 
4449  dbentry->stats_timestamp = 0;
4450 
4451  memset(&hash_ctl, 0, sizeof(hash_ctl));
4452  hash_ctl.keysize = sizeof(Oid);
4453  hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
4454  dbentry->tables = hash_create("Per-database table",
4456  &hash_ctl,
4457  HASH_ELEM | HASH_BLOBS);
4458 
4459  hash_ctl.keysize = sizeof(Oid);
4460  hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
4461  dbentry->functions = hash_create("Per-database function",
4463  &hash_ctl,
4464  HASH_ELEM | HASH_BLOBS);
4465 }
4466 
4467 /*
4468  * Lookup the hash table entry for the specified database. If no hash
4469  * table entry exists, initialize it, if the create parameter is true.
4470  * Else, return NULL.
4471  */
4472 static PgStat_StatDBEntry *
4473 pgstat_get_db_entry(Oid databaseid, bool create)
4474 {
4476  bool found;
4477  HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4478 
4479  /* Lookup or create the hash table entry for this database */
4480  result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
4481  &databaseid,
4482  action, &found);
4483 
4484  if (!create && !found)
4485  return NULL;
4486 
4487  /*
4488  * If not found, initialize the new one. This creates empty hash tables
4489  * for tables and functions, too.
4490  */
4491  if (!found)
4492  reset_dbentry_counters(result);
4493 
4494  return result;
4495 }
4496 
4497 
4498 /*
4499  * Lookup the hash table entry for the specified table. If no hash
4500  * table entry exists, initialize it, if the create parameter is true.
4501  * Else, return NULL.
4502  */
4503 static PgStat_StatTabEntry *
4504 pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
4505 {
4507  bool found;
4508  HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4509 
4510  /* Lookup or create the hash table entry for this table */
4511  result = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
4512  &tableoid,
4513  action, &found);
4514 
4515  if (!create && !found)
4516  return NULL;
4517 
4518  /* If not found, initialize the new one. */
4519  if (!found)
4520  {
4521  result->numscans = 0;
4522  result->tuples_returned = 0;
4523  result->tuples_fetched = 0;
4524  result->tuples_inserted = 0;
4525  result->tuples_updated = 0;
4526  result->tuples_deleted = 0;
4527  result->tuples_hot_updated = 0;
4528  result->n_live_tuples = 0;
4529  result->n_dead_tuples = 0;
4530  result->changes_since_analyze = 0;
4531  result->blocks_fetched = 0;
4532  result->blocks_hit = 0;
4533  result->vacuum_timestamp = 0;
4534  result->vacuum_count = 0;
4535  result->autovac_vacuum_timestamp = 0;
4536  result->autovac_vacuum_count = 0;
4537  result->analyze_timestamp = 0;
4538  result->analyze_count = 0;
4539  result->autovac_analyze_timestamp = 0;
4540  result->autovac_analyze_count = 0;
4541  }
4542 
4543  return result;
4544 }
4545 
4546 
4547 /* ----------
4548  * pgstat_write_statsfiles() -
4549  * Write the global statistics file, as well as requested DB files.
4550  *
4551  * 'permanent' specifies writing to the permanent files not temporary ones.
4552  * When true (happens only when the collector is shutting down), also remove
4553  * the temporary files so that backends starting up under a new postmaster
4554  * can't read old data before the new collector is ready.
4555  *
4556  * When 'allDbs' is false, only the requested databases (listed in
4557  * pending_write_requests) will be written; otherwise, all databases
4558  * will be written.
4559  * ----------
4560  */
4561 static void
4562 pgstat_write_statsfiles(bool permanent, bool allDbs)
4563 {
4564  HASH_SEQ_STATUS hstat;
4565  PgStat_StatDBEntry *dbentry;
4566  FILE *fpout;
4567  int32 format_id;
4568  const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
4569  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4570  int rc;
4571 
4572  elog(DEBUG2, "writing stats file \"%s\"", statfile);
4573 
4574  /*
4575  * Open the statistics temp file to write out the current values.
4576  */
4577  fpout = AllocateFile(tmpfile, PG_BINARY_W);
4578  if (fpout == NULL)
4579  {
4580  ereport(LOG,
4582  errmsg("could not open temporary statistics file \"%s\": %m",
4583  tmpfile)));
4584  return;
4585  }
4586 
4587  /*
4588  * Set the timestamp of the stats file.
4589  */
4590  globalStats.stats_timestamp = GetCurrentTimestamp();
4591 
4592  /*
4593  * Write the file header --- currently just a format ID.
4594  */
4595  format_id = PGSTAT_FILE_FORMAT_ID;
4596  rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4597  (void) rc; /* we'll check for error with ferror */
4598 
4599  /*
4600  * Write global stats struct
4601  */
4602  rc = fwrite(&globalStats, sizeof(globalStats), 1, fpout);
4603  (void) rc; /* we'll check for error with ferror */
4604 
4605  /*
4606  * Write archiver stats struct
4607  */
4608  rc = fwrite(&archiverStats, sizeof(archiverStats), 1, fpout);
4609  (void) rc; /* we'll check for error with ferror */
4610 
4611  /*
4612  * Walk through the database table.
4613  */
4614  hash_seq_init(&hstat, pgStatDBHash);
4615  while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
4616  {
4617  /*
4618  * Write out the table and function stats for this DB into the
4619  * appropriate per-DB stat file, if required.
4620  */
4621  if (allDbs || pgstat_db_requested(dbentry->databaseid))
4622  {
4623  /* Make DB's timestamp consistent with the global stats */
4624  dbentry->stats_timestamp = globalStats.stats_timestamp;
4625 
4626  pgstat_write_db_statsfile(dbentry, permanent);
4627  }
4628 
4629  /*
4630  * Write out the DB entry. We don't write the tables or functions
4631  * pointers, since they're of no use to any other process.
4632  */
4633  fputc('D', fpout);
4634  rc = fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
4635  (void) rc; /* we'll check for error with ferror */
4636  }
4637 
4638  /*
4639  * No more output to be done. Close the temp file and replace the old
4640  * pgstat.stat with it. The ferror() check replaces testing for error
4641  * after each individual fputc or fwrite above.
4642  */
4643  fputc('E', fpout);
4644 
4645  if (ferror(fpout))
4646  {
4647  ereport(LOG,
4649  errmsg("could not write temporary statistics file \"%s\": %m",
4650  tmpfile)));
4651  FreeFile(fpout);
4652  unlink(tmpfile);
4653  }
4654  else if (FreeFile(fpout) < 0)
4655  {
4656  ereport(LOG,
4658  errmsg("could not close temporary statistics file \"%s\": %m",
4659  tmpfile)));
4660  unlink(tmpfile);
4661  }
4662  else if (rename(tmpfile, statfile) < 0)
4663  {
4664  ereport(LOG,
4666  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4667  tmpfile, statfile)));
4668  unlink(tmpfile);
4669  }
4670 
4671  if (permanent)
4673 
4674  /*
4675  * Now throw away the list of requests. Note that requests sent after we
4676  * started the write are still waiting on the network socket.
4677  */
4678  list_free(pending_write_requests);
4679  pending_write_requests = NIL;
4680 }
4681 
4682 /*
4683  * return the filename for a DB stat file; filename is the output buffer,
4684  * of length len.
4685  */
4686 static void
4687 get_dbstat_filename(bool permanent, bool tempname, Oid databaseid,
4688  char *filename, int len)
4689 {
4690  int printed;
4691 
4692  /* NB -- pgstat_reset_remove_files knows about the pattern this uses */
4693  printed = snprintf(filename, len, "%s/db_%u.%s",
4694  permanent ? PGSTAT_STAT_PERMANENT_DIRECTORY :
4696  databaseid,
4697  tempname ? "tmp" : "stat");
4698  if (printed > len)
4699  elog(ERROR, "overlength pgstat path");
4700 }
4701 
4702 /* ----------
4703  * pgstat_write_db_statsfile() -
4704  * Write the stat file for a single database.
4705  *
4706  * If writing to the permanent file (happens when the collector is
4707  * shutting down only), remove the temporary file so that backends
4708  * starting up under a new postmaster can't read the old data before
4709  * the new collector is ready.
4710  * ----------
4711  */
4712 static void
4714 {
4715  HASH_SEQ_STATUS tstat;
4716  HASH_SEQ_STATUS fstat;
4717  PgStat_StatTabEntry *tabentry;
4718  PgStat_StatFuncEntry *funcentry;
4719  FILE *fpout;
4720  int32 format_id;
4721  Oid dbid = dbentry->databaseid;
4722  int rc;
4723  char tmpfile[MAXPGPATH];
4724  char statfile[MAXPGPATH];
4725 
4726  get_dbstat_filename(permanent, true, dbid, tmpfile, MAXPGPATH);
4727  get_dbstat_filename(permanent, false, dbid, statfile, MAXPGPATH);
4728 
4729  elog(DEBUG2, "writing stats file \"%s\"", statfile);
4730 
4731  /*
4732  * Open the statistics temp file to write out the current values.
4733  */
4734  fpout = AllocateFile(tmpfile, PG_BINARY_W);
4735  if (fpout == NULL)
4736  {
4737  ereport(LOG,
4739  errmsg("could not open temporary statistics file \"%s\": %m",
4740  tmpfile)));
4741  return;
4742  }
4743 
4744  /*
4745  * Write the file header --- currently just a format ID.
4746  */
4747  format_id = PGSTAT_FILE_FORMAT_ID;
4748  rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4749  (void) rc; /* we'll check for error with ferror */
4750 
4751  /*
4752  * Walk through the database's access stats per table.
4753  */
4754  hash_seq_init(&tstat, dbentry->tables);
4755  while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
4756  {
4757  fputc('T', fpout);
4758  rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
4759  (void) rc; /* we'll check for error with ferror */
4760  }
4761 
4762  /*
4763  * Walk through the database's function stats table.
4764  */
4765  hash_seq_init(&fstat, dbentry->functions);
4766  while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
4767  {
4768  fputc('F', fpout);
4769  rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
4770  (void) rc; /* we'll check for error with ferror */
4771  }
4772 
4773  /*
4774  * No more output to be done. Close the temp file and replace the old
4775  * pgstat.stat with it. The ferror() check replaces testing for error
4776  * after each individual fputc or fwrite above.
4777  */
4778  fputc('E', fpout);
4779 
4780  if (ferror(fpout))
4781  {
4782  ereport(LOG,
4784  errmsg("could not write temporary statistics file \"%s\": %m",
4785  tmpfile)));
4786  FreeFile(fpout);
4787  unlink(tmpfile);
4788  }
4789  else if (FreeFile(fpout) < 0)
4790  {
4791  ereport(LOG,
4793  errmsg("could not close temporary statistics file \"%s\": %m",
4794  tmpfile)));
4795  unlink(tmpfile);
4796  }
4797  else if (rename(tmpfile, statfile) < 0)
4798  {
4799  ereport(LOG,
4801  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4802  tmpfile, statfile)));
4803  unlink(tmpfile);
4804  }
4805 
4806  if (permanent)
4807  {
4808  get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
4809 
4810  elog(DEBUG2, "removing temporary stats file \"%s\"", statfile);
4811  unlink(statfile);
4812  }
4813 }
4814 
4815 /* ----------
4816  * pgstat_read_statsfiles() -
4817  *
4818  * Reads in some existing statistics collector files and returns the
4819  * databases hash table that is the top level of the data.
4820  *
4821  * If 'onlydb' is not InvalidOid, it means we only want data for that DB
4822  * plus the shared catalogs ("DB 0"). We'll still populate the DB hash
4823  * table for all databases, but we don't bother even creating table/function
4824  * hash tables for other databases.
4825  *
4826  * 'permanent' specifies reading from the permanent files not temporary ones.
4827  * When true (happens only when the collector is starting up), remove the
4828  * files after reading; the in-memory status is now authoritative, and the
4829  * files would be out of date in case somebody else reads them.
4830  *
4831  * If a 'deep' read is requested, table/function stats are read, otherwise
4832  * the table/function hash tables remain empty.
4833  * ----------
4834  */
4835 static HTAB *
4836 pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
4837 {
4838  PgStat_StatDBEntry *dbentry;
4839  PgStat_StatDBEntry dbbuf;
4840  HASHCTL hash_ctl;
4841  HTAB *dbhash;
4842  FILE *fpin;
4843  int32 format_id;
4844  bool found;
4845  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4846 
4847  /*
4848  * The tables will live in pgStatLocalContext.
4849  */
4851 
4852  /*
4853  * Create the DB hashtable
4854  */
4855  memset(&hash_ctl, 0, sizeof(hash_ctl));
4856  hash_ctl.keysize = sizeof(Oid);
4857  hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
4858  hash_ctl.hcxt = pgStatLocalContext;
4859  dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
4861 
4862  /*
4863  * Clear out global and archiver statistics so they start from zero in
4864  * case we can't load an existing statsfile.
4865  */
4866  memset(&globalStats, 0, sizeof(globalStats));
4867  memset(&archiverStats, 0, sizeof(archiverStats));
4868 
4869  /*
4870  * Set the current timestamp (will be kept only in case we can't load an
4871  * existing statsfile).
4872  */
4873  globalStats.stat_reset_timestamp = GetCurrentTimestamp();
4874  archiverStats.stat_reset_timestamp = globalStats.stat_reset_timestamp;
4875 
4876  /*
4877  * Try to open the stats file. If it doesn't exist, the backends simply
4878  * return zero for anything and the collector simply starts from scratch
4879  * with empty counters.
4880  *
4881  * ENOENT is a possibility if the stats collector is not running or has
4882  * not yet written the stats file the first time. Any other failure
4883  * condition is suspicious.
4884  */
4885  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
4886  {
4887  if (errno != ENOENT)
4890  errmsg("could not open statistics file \"%s\": %m",
4891  statfile)));
4892  return dbhash;
4893  }
4894 
4895  /*
4896  * Verify it's of the expected format.
4897  */
4898  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
4899  format_id != PGSTAT_FILE_FORMAT_ID)
4900  {
4902  (errmsg("corrupted statistics file \"%s\"", statfile)));
4903  goto done;
4904  }
4905 
4906  /*
4907  * Read global stats struct
4908  */
4909  if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats))
4910  {
4912  (errmsg("corrupted statistics file \"%s\"", statfile)));
4913  goto done;
4914  }
4915 
4916  /*
4917  * Read archiver stats struct
4918  */
4919  if (fread(&archiverStats, 1, sizeof(archiverStats), fpin) != sizeof(archiverStats))
4920  {
4922  (errmsg("corrupted statistics file \"%s\"", statfile)));
4923  goto done;
4924  }
4925 
4926  /*
4927  * We found an existing collector stats file. Read it and put all the
4928  * hashtable entries into place.
4929  */
4930  for (;;)
4931  {
4932  switch (fgetc(fpin))
4933  {
4934  /*
4935  * 'D' A PgStat_StatDBEntry struct describing a database
4936  * follows.
4937  */
4938  case 'D':
4939  if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
4940  fpin) != offsetof(PgStat_StatDBEntry, tables))
4941  {
4943  (errmsg("corrupted statistics file \"%s\"",
4944  statfile)));
4945  goto done;
4946  }
4947 
4948  /*
4949  * Add to the DB hash
4950  */
4951  dbentry = (PgStat_StatDBEntry *) hash_search(dbhash,
4952  (void *) &dbbuf.databaseid,
4953  HASH_ENTER,
4954  &found);
4955  if (found)
4956  {
4958  (errmsg("corrupted statistics file \"%s\"",
4959  statfile)));
4960  goto done;
4961  }
4962 
4963  memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
4964  dbentry->tables = NULL;
4965  dbentry->functions = NULL;
4966 
4967  /*
4968  * Don't create tables/functions hashtables for uninteresting
4969  * databases.
4970  */
4971  if (onlydb != InvalidOid)
4972  {
4973  if (dbbuf.databaseid != onlydb &&
4974  dbbuf.databaseid != InvalidOid)
4975  break;
4976  }
4977 
4978  memset(&hash_ctl, 0, sizeof(hash_ctl));
4979  hash_ctl.keysize = sizeof(Oid);
4980  hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
4981  hash_ctl.hcxt = pgStatLocalContext;
4982  dbentry->tables = hash_create("Per-database table",
4984  &hash_ctl,
4986 
4987  hash_ctl.keysize = sizeof(Oid);
4988  hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
4989  hash_ctl.hcxt = pgStatLocalContext;
4990  dbentry->functions = hash_create("Per-database function",
4992  &hash_ctl,
4994 
4995  /*
4996  * If requested, read the data from the database-specific
4997  * file. Otherwise we just leave the hashtables empty.
4998  */
4999  if (deep)
5001  dbentry->tables,
5002  dbentry->functions,
5003  permanent);
5004 
5005  break;
5006 
5007  case 'E':
5008  goto done;
5009 
5010  default:
5012  (errmsg("corrupted statistics file \"%s\"",
5013  statfile)));
5014  goto done;
5015  }
5016  }
5017 
5018 done:
5019  FreeFile(fpin);
5020 
5021  /* If requested to read the permanent file, also get rid of it. */
5022  if (permanent)
5023  {
5024  elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
5025  unlink(statfile);
5026  }
5027 
5028  return dbhash;
5029 }
5030 
5031 
5032 /* ----------
5033  * pgstat_read_db_statsfile() -
5034  *
5035  * Reads in the existing statistics collector file for the given database,
5036  * filling the passed-in tables and functions hash tables.
5037  *
5038  * As in pgstat_read_statsfiles, if the permanent file is requested, it is
5039  * removed after reading.
5040  *
5041  * Note: this code has the ability to skip storing per-table or per-function
5042  * data, if NULL is passed for the corresponding hashtable. That's not used
5043  * at the moment though.
5044  * ----------
5045  */
5046 static void
5047 pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash,
5048  bool permanent)
5049 {
5050  PgStat_StatTabEntry *tabentry;
5051  PgStat_StatTabEntry tabbuf;
5052  PgStat_StatFuncEntry funcbuf;
5053  PgStat_StatFuncEntry *funcentry;
5054  FILE *fpin;
5055  int32 format_id;
5056  bool found;
5057  char statfile[MAXPGPATH];
5058 
5059  get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
5060 
5061  /*
5062  * Try to open the stats file. If it doesn't exist, the backends simply
5063  * return zero for anything and the collector simply starts from scratch
5064  * with empty counters.
5065  *
5066  * ENOENT is a possibility if the stats collector is not running or has
5067  * not yet written the stats file the first time. Any other failure
5068  * condition is suspicious.
5069  */
5070  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
5071  {
5072  if (errno != ENOENT)
5075  errmsg("could not open statistics file \"%s\": %m",
5076  statfile)));
5077  return;
5078  }
5079 
5080  /*
5081  * Verify it's of the expected format.
5082  */
5083  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
5084  format_id != PGSTAT_FILE_FORMAT_ID)
5085  {
5087  (errmsg("corrupted statistics file \"%s\"", statfile)));
5088  goto done;
5089  }
5090 
5091  /*
5092  * We found an existing collector stats file. Read it and put all the
5093  * hashtable entries into place.
5094  */
5095  for (;;)
5096  {
5097  switch (fgetc(fpin))
5098  {
5099  /*
5100  * 'T' A PgStat_StatTabEntry follows.
5101  */
5102  case 'T':
5103  if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry),
5104  fpin) != sizeof(PgStat_StatTabEntry))
5105  {
5107  (errmsg("corrupted statistics file \"%s\"",
5108  statfile)));
5109  goto done;
5110  }
5111 
5112  /*
5113  * Skip if table data not wanted.
5114  */
5115  if (tabhash == NULL)
5116  break;
5117 
5118  tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
5119  (void *) &tabbuf.tableid,
5120  HASH_ENTER, &found);
5121 
5122  if (found)
5123  {
5125  (errmsg("corrupted statistics file \"%s\"",
5126  statfile)));
5127  goto done;
5128  }
5129 
5130  memcpy(tabentry, &tabbuf, sizeof(tabbuf));
5131  break;
5132 
5133  /*
5134  * 'F' A PgStat_StatFuncEntry follows.
5135  */
5136  case 'F':
5137  if (fread(&funcbuf, 1, sizeof(PgStat_StatFuncEntry),
5138  fpin) != sizeof(PgStat_StatFuncEntry))
5139  {
5141  (errmsg("corrupted statistics file \"%s\"",
5142  statfile)));
5143  goto done;
5144  }
5145 
5146  /*
5147  * Skip if function data not wanted.
5148  */
5149  if (funchash == NULL)
5150  break;
5151 
5152  funcentry = (PgStat_StatFuncEntry *) hash_search(funchash,
5153  (void *) &funcbuf.functionid,
5154  HASH_ENTER, &found);
5155 
5156  if (found)
5157  {
5159  (errmsg("corrupted statistics file \"%s\"",
5160  statfile)));
5161  goto done;
5162  }
5163 
5164  memcpy(funcentry, &funcbuf, sizeof(funcbuf));
5165  break;
5166 
5167  /*
5168  * 'E' The EOF marker of a complete stats file.
5169  */
5170  case 'E':
5171  goto done;
5172 
5173  default:
5175  (errmsg("corrupted statistics file \"%s\"",
5176  statfile)));
5177  goto done;
5178  }
5179  }
5180 
5181 done:
5182  FreeFile(fpin);
5183 
5184  if (permanent)
5185  {
5186  elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
5187  unlink(statfile);
5188  }
5189 }
5190 
5191 /* ----------
5192  * pgstat_read_db_statsfile_timestamp() -
5193  *
5194  * Attempt to determine the timestamp of the last db statfile write.
5195  * Returns TRUE if successful; the timestamp is stored in *ts.
5196  *
5197  * This needs to be careful about handling databases for which no stats file
5198  * exists, such as databases without a stat entry or those not yet written:
5199  *
5200  * - if there's a database entry in the global file, return the corresponding
5201  * stats_timestamp value.
5202  *
5203  * - if there's no db stat entry (e.g. for a new or inactive database),
5204  * there's no stats_timestamp value, but also nothing to write so we return
5205  * the timestamp of the global statfile.
5206  * ----------
5207  */
5208 static bool
5209 pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent,
5210  TimestampTz *ts)
5211 {
5212  PgStat_StatDBEntry dbentry;
5213  PgStat_GlobalStats myGlobalStats;
5214  PgStat_ArchiverStats myArchiverStats;
5215  FILE *fpin;
5216  int32 format_id;
5217  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
5218 
5219  /*
5220  * Try to open the stats file. As above, anything but ENOENT is worthy of
5221  * complaining about.
5222  */
5223  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
5224  {
5225  if (errno != ENOENT)
5228  errmsg("could not open statistics file \"%s\": %m",
5229  statfile)));
5230  return false;
5231  }
5232 
5233  /*
5234  * Verify it's of the expected format.
5235  */
5236  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
5237  format_id != PGSTAT_FILE_FORMAT_ID)
5238  {
5240  (errmsg("corrupted statistics file \"%s\"", statfile)));
5241  FreeFile(fpin);
5242  return false;
5243  }
5244 
5245  /*
5246  * Read global stats struct
5247  */
5248  if (fread(&myGlobalStats, 1, sizeof(myGlobalStats),
5249  fpin) != sizeof(myGlobalStats))
5250  {
5252  (errmsg("corrupted statistics file \"%s\"", statfile)));
5253  FreeFile(fpin);
5254  return false;
5255  }
5256 
5257  /*
5258  * Read archiver stats struct
5259  */
5260  if (fread(&myArchiverStats, 1, sizeof(myArchiverStats),
5261  fpin) != sizeof(myArchiverStats))
5262  {
5264  (errmsg("corrupted statistics file \"%s\"", statfile)));
5265  FreeFile(fpin);
5266  return false;
5267  }
5268 
5269  /* By default, we're going to return the timestamp of the global file. */
5270  *ts = myGlobalStats.stats_timestamp;
5271 
5272  /*
5273  * We found an existing collector stats file. Read it and look for a
5274  * record for the requested database. If found, use its timestamp.
5275  */
5276  for (;;)
5277  {
5278  switch (fgetc(fpin))
5279  {
5280  /*
5281  * 'D' A PgStat_StatDBEntry struct describing a database
5282  * follows.
5283  */
5284  case 'D':
5285  if (fread(&dbentry, 1, offsetof(PgStat_StatDBEntry, tables),
5286  fpin) != offsetof(PgStat_StatDBEntry, tables))
5287  {
5289  (errmsg("corrupted statistics file \"%s\"",
5290  statfile)));
5291  goto done;
5292  }
5293 
5294  /*
5295  * If this is the DB we're looking for, save its timestamp and
5296  * we're done.
5297  */
5298  if (dbentry.databaseid == databaseid)
5299  {
5300  *ts = dbentry.stats_timestamp;
5301  goto done;
5302  }
5303 
5304  break;
5305 
5306  case 'E':
5307  goto done;
5308 
5309  default:
5311  (errmsg("corrupted statistics file \"%s\"",
5312  statfile)));
5313  goto done;
5314  }
5315  }
5316 
5317 done:
5318  FreeFile(fpin);
5319  return true;
5320 }
5321 
5322 /*
5323  * If not already done, read the statistics collector stats file into
5324  * some hash tables. The results will be kept until pgstat_clear_snapshot()
5325  * is called (typically, at end of transaction).
5326  */
5327 static void
5329 {
5330  TimestampTz min_ts = 0;
5331  TimestampTz ref_ts = 0;
5332  Oid inquiry_db;
5333  int count;
5334 
5335  /* already read it? */
5336  if (pgStatDBHash)
5337  return;
5339 
5340  /*
5341  * In a normal backend, we check staleness of the data for our own DB, and
5342  * so we send MyDatabaseId in inquiry messages. In the autovac launcher,
5343  * check staleness of the shared-catalog data, and send InvalidOid in
5344  * inquiry messages so as not to force writing unnecessary data.
5345  */
5347  inquiry_db = InvalidOid;
5348  else
5349  inquiry_db = MyDatabaseId;
5350 
5351  /*
5352  * Loop until fresh enough stats file is available or we ran out of time.
5353  * The stats inquiry message is sent repeatedly in case collector drops
5354  * it; but not every single time, as that just swamps the collector.
5355  */
5356  for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++)
5357  {
5358  bool ok;
5359  TimestampTz file_ts = 0;
5360  TimestampTz cur_ts;
5361 
5363 
5364  ok = pgstat_read_db_statsfile_timestamp(inquiry_db, false, &file_ts);
5365 
5366  cur_ts = GetCurrentTimestamp();
5367  /* Calculate min acceptable timestamp, if we didn't already */
5368  if (count == 0 || cur_ts < ref_ts)
5369  {
5370  /*
5371  * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL
5372  * msec before now. This indirectly ensures that the collector
5373  * needn't write the file more often than PGSTAT_STAT_INTERVAL. In
5374  * an autovacuum worker, however, we want a lower delay to avoid
5375  * using stale data, so we use PGSTAT_RETRY_DELAY (since the
5376  * number of workers is low, this shouldn't be a problem).
5377  *
5378  * We don't recompute min_ts after sleeping, except in the
5379  * unlikely case that cur_ts went backwards. So we might end up
5380  * accepting a file a bit older than PGSTAT_STAT_INTERVAL. In
5381  * practice that shouldn't happen, though, as long as the sleep
5382  * time is less than PGSTAT_STAT_INTERVAL; and we don't want to
5383  * tell the collector that our cutoff time is less than what we'd
5384  * actually accept.
5385  */
5386  ref_ts = cur_ts;
5388  min_ts = TimestampTzPlusMilliseconds(ref_ts,
5390  else
5391  min_ts = TimestampTzPlusMilliseconds(ref_ts,
5393  }
5394 
5395  /*
5396  * If the file timestamp is actually newer than cur_ts, we must have
5397  * had a clock glitch (system time went backwards) or there is clock
5398  * skew between our processor and the stats collector's processor.
5399  * Accept the file, but send an inquiry message anyway to make
5400  * pgstat_recv_inquiry do a sanity check on the collector's time.
5401  */
5402  if (ok && file_ts > cur_ts)
5403  {
5404  /*
5405  * A small amount of clock skew between processors isn't terribly
5406  * surprising, but a large difference is worth logging. We
5407  * arbitrarily define "large" as 1000 msec.
5408  */
5409  if (file_ts >= TimestampTzPlusMilliseconds(cur_ts, 1000))
5410  {
5411  char *filetime;
5412  char *mytime;
5413 
5414  /* Copy because timestamptz_to_str returns a static buffer */
5415  filetime = pstrdup(timestamptz_to_str(file_ts));
5416  mytime = pstrdup(timestamptz_to_str(cur_ts));
5417  elog(LOG, "stats collector's time %s is later than backend local time %s",
5418  filetime, mytime);
5419  pfree(filetime);
5420  pfree(mytime);
5421  }
5422 
5423  pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
5424  break;
5425  }
5426 
5427  /* Normal acceptance case: file is not older than cutoff time */
5428  if (ok && file_ts >= min_ts)
5429  break;
5430 
5431  /* Not there or too old, so kick the collector and wait a bit */
5432  if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
5433  pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
5434 
5435  pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
5436  }
5437 
5438  if (count >= PGSTAT_POLL_LOOP_COUNT)
5439  ereport(LOG,
5440  (errmsg("using stale statistics instead of current ones "
5441  "because stats collector is not responding")));
5442 
5443  /*
5444  * Autovacuum launcher wants stats about all databases, but a shallow read
5445  * is sufficient. Regular backends want a deep read for just the tables
5446  * they can see (MyDatabaseId + shared catalogs).
5447  */
5449  pgStatDBHash = pgstat_read_statsfiles(InvalidOid, false, false);
5450  else
5451  pgStatDBHash = pgstat_read_statsfiles(MyDatabaseId, false, true);
5452 }
5453 
5454 
5455 /* ----------
5456  * pgstat_setup_memcxt() -
5457  *
5458  * Create pgStatLocalContext, if not already done.
5459  * ----------
5460  */
5461 static void
5463 {
5464  if (!pgStatLocalContext)
5465  pgStatLocalContext = AllocSetContextCreate(TopMemoryContext,
5466  "Statistics snapshot",
5468 }
5469 
5470 
5471 /* ----------
5472  * pgstat_clear_snapshot() -
5473  *
5474  * Discard any data collected in the current transaction. Any subsequent
5475  * request will cause new snapshots to be read.
5476  *
5477  * This is also invoked during transaction commit or abort to discard
5478  * the no-longer-wanted snapshot.
5479  * ----------
5480  */
5481 void
5483 {
5484  /* Release memory, if any was allocated */
5485  if (pgStatLocalContext)
5486  MemoryContextDelete(pgStatLocalContext);
5487 
5488  /* Reset variables */
5489  pgStatLocalContext = NULL;
5490  pgStatDBHash = NULL;
5491  localBackendStatusTable = NULL;
5492  localNumBackends = 0;
5493 }
5494 
5495 
5496 /* ----------
5497  * pgstat_recv_inquiry() -
5498  *
5499  * Process stat inquiry requests.
5500  * ----------
5501  */
5502 static void
5504 {
5505  PgStat_StatDBEntry *dbentry;
5506 
5507  elog(DEBUG2, "received inquiry for database %u", msg->databaseid);
5508 
5509  /*
5510  * If there's already a write request for this DB, there's nothing to do.
5511  *
5512  * Note that if a request is found, we return early and skip the below
5513  * check for clock skew. This is okay, since the only way for a DB
5514  * request to be present in the list is that we have been here since the
5515  * last write round. It seems sufficient to check for clock skew once per
5516  * write round.
5517  */
5518  if (list_member_oid(pending_write_requests, msg->databaseid))
5519  return;
5520 
5521  /*
5522  * Check to see if we last wrote this database at a time >= the requested
5523  * cutoff time. If so, this is a stale request that was generated before
5524  * we updated the DB file, and we don't need to do so again.
5525  *
5526  * If the requestor's local clock time is older than stats_timestamp, we
5527  * should suspect a clock glitch, ie system time going backwards; though
5528  * the more likely explanation is just delayed message receipt. It is
5529  * worth expending a GetCurrentTimestamp call to be sure, since a large
5530  * retreat in the system clock reading could otherwise cause us to neglect
5531  * to update the stats file for a long time.
5532  */
5533  dbentry = pgstat_get_db_entry(msg->databaseid, false);
5534  if (dbentry == NULL)
5535  {
5536  /*
5537  * We have no data for this DB. Enter a write request anyway so that
5538  * the global stats will get updated. This is needed to prevent
5539  * backend_read_statsfile from waiting for data that we cannot supply,
5540  * in the case of a new DB that nobody has yet reported any stats for.
5541  * See the behavior of pgstat_read_db_statsfile_timestamp.
5542  */
5543  }
5544  else if (msg->clock_time < dbentry->stats_timestamp)
5545  {
5546  TimestampTz cur_ts = GetCurrentTimestamp();
5547 
5548  if (cur_ts < dbentry->stats_timestamp)
5549  {
5550  /*
5551  * Sure enough, time went backwards. Force a new stats file write
5552  * to get back in sync; but first, log a complaint.
5553  */
5554  char *writetime;
5555  char *mytime;
5556 
5557  /* Copy because timestamptz_to_str returns a static buffer */
5558  writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
5559  mytime = pstrdup(timestamptz_to_str(cur_ts));
5560  elog(LOG,
5561  "stats_timestamp %s is later than collector's time %s for database %u",
5562  writetime, mytime, dbentry->databaseid);
5563  pfree(writetime);
5564  pfree(mytime);
5565  }
5566  else
5567  {
5568  /*
5569  * Nope, it's just an old request. Assuming msg's clock_time is
5570  * >= its cutoff_time, it must be stale, so we can ignore it.
5571  */
5572  return;
5573  }
5574  }
5575  else if (msg->cutoff_time <= dbentry->stats_timestamp)
5576  {
5577  /* Stale request, ignore it */
5578  return;
5579  }
5580 
5581  /*
5582  * We need to write this DB, so create a request.
5583  */
5584  pending_write_requests = lappend_oid(pending_write_requests,
5585  msg->databaseid);
5586 }
5587 
5588 
5589 /* ----------
5590  * pgstat_recv_tabstat() -
5591  *
5592  * Count what the backend has done.
5593  * ----------
5594  */
5595 static void
5597 {
5598  PgStat_StatDBEntry *dbentry;
5599  PgStat_StatTabEntry *tabentry;
5600  int i;
5601  bool found;
5602 
5603  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5604 
5605  /*
5606  * Update database-wide stats.
5607  */
5608  dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
5609  dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
5610  dbentry->n_block_read_time += msg->m_block_read_time;
5611  dbentry->n_block_write_time += msg->m_block_write_time;
5612 
5613  /*
5614  * Process all table entries in the message.
5615  */
5616  for (i = 0; i < msg->m_nentries; i++)
5617  {
5618  PgStat_TableEntry *tabmsg = &(msg->m_entry[i]);
5619 
5620  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
5621  (void *) &(tabmsg->t_id),
5622  HASH_ENTER, &found);
5623 
5624  if (!found)
5625  {
5626  /*
5627  * If it's a new table entry, initialize counters to the values we
5628  * just got.
5629  */
5630  tabentry->numscans = tabmsg->t_counts.t_numscans;
5631  tabentry->tuples_returned = tabmsg->t_counts.t_tuples_returned;
5632  tabentry->tuples_fetched = tabmsg->t_counts.t_tuples_fetched;
5633  tabentry->tuples_inserted = tabmsg->t_counts.t_tuples_inserted;
5634  tabentry->tuples_updated = tabmsg->t_counts.t_tuples_updated;
5635  tabentry->tuples_deleted = tabmsg->t_counts.t_tuples_deleted;
5636  tabentry->tuples_hot_updated = tabmsg->t_counts.t_tuples_hot_updated;
5637  tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
5638  tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
5639  tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
5640  tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
5641  tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
5642 
5643  tabentry->vacuum_timestamp = 0;
5644  tabentry->vacuum_count = 0;
5645  tabentry->autovac_vacuum_timestamp = 0;
5646  tabentry->autovac_vacuum_count = 0;
5647  tabentry->analyze_timestamp = 0;
5648  tabentry->analyze_count = 0;
5649  tabentry->autovac_analyze_timestamp = 0;
5650  tabentry->autovac_analyze_count = 0;
5651  }
5652  else
5653  {
5654  /*
5655  * Otherwise add the values to the existing entry.
5656  */
5657  tabentry->numscans += tabmsg->t_counts.t_numscans;
5658  tabentry->tuples_returned += tabmsg->t_counts.t_tuples_returned;
5659  tabentry->tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5660  tabentry->tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5661  tabentry->tuples_updated += tabmsg->t_counts.t_tuples_updated;
5662  tabentry->tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5663  tabentry->tuples_hot_updated += tabmsg->t_counts.t_tuples_hot_updated;
5664  /* If table was truncated, first reset the live/dead counters */
5665  if (tabmsg->t_counts.t_truncated)
5666  {
5667  tabentry->n_live_tuples = 0;
5668  tabentry->n_dead_tuples = 0;
5669  }
5670  tabentry->n_live_tuples += tabmsg->t_counts.t_delta_live_tuples;
5671  tabentry->n_dead_tuples += tabmsg->t_counts.t_delta_dead_tuples;
5672  tabentry->changes_since_analyze += tabmsg->t_counts.t_changed_tuples;
5673  tabentry->blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5674  tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
5675  }
5676 
5677  /* Clamp n_live_tuples in case of negative delta_live_tuples */
5678  tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
5679  /* Likewise for n_dead_tuples */
5680  tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
5681 
5682  /*
5683  * Add per-table stats to the per-database entry, too.
5684  */
5685  dbentry->n_tuples_returned += tabmsg->t_counts.t_tuples_returned;
5686  dbentry->n_tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5687  dbentry->n_tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5688  dbentry->n_tuples_updated += tabmsg->t_counts.t_tuples_updated;
5689  dbentry->n_tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5690  dbentry->n_blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5691  dbentry->n_blocks_hit += tabmsg->t_counts.t_blocks_hit;
5692  }
5693 }
5694 
5695 
5696 /* ----------
5697  * pgstat_recv_tabpurge() -
5698  *
5699  * Arrange for dead table removal.
5700  * ----------
5701  */
5702 static void
5704 {
5705  PgStat_StatDBEntry *dbentry;
5706  int i;
5707 
5708  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5709 
5710  /*
5711  * No need to purge if we don't even know the database.
5712  */
5713  if (!dbentry || !dbentry->tables)
5714  return;
5715 
5716  /*
5717  * Process all table entries in the message.
5718  */
5719  for (i = 0; i < msg->m_nentries; i++)
5720  {
5721  /* Remove from hashtable if present; we don't care if it's not. */
5722  (void) hash_search(dbentry->tables,
5723  (void *) &(msg->m_tableid[i]),
5724  HASH_REMOVE, NULL);
5725  }
5726 }
5727 
5728 
5729 /* ----------
5730  * pgstat_recv_dropdb() -
5731  *
5732  * Arrange for dead database removal
5733  * ----------
5734  */
5735 static void
5737 {
5738  Oid dbid = msg->m_databaseid;
5739  PgStat_StatDBEntry *dbentry;
5740 
5741  /*
5742  * Lookup the database in the hashtable.
5743  */
5744  dbentry = pgstat_get_db_entry(dbid, false);
5745 
5746  /*
5747  * If found, remove it (along with the db statfile).
5748  */
5749  if (dbentry)
5750  {
5751  char statfile[MAXPGPATH];
5752 
5753  get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
5754 
5755  elog(DEBUG2, "removing stats file \"%s\"", statfile);
5756  unlink(statfile);
5757 
5758  if (dbentry->tables != NULL)
5759  hash_destroy(dbentry->tables);
5760  if (dbentry->functions != NULL)
5761  hash_destroy(dbentry->functions);
5762 
5763  if (hash_search(pgStatDBHash,
5764  (void *) &dbid,
5765  HASH_REMOVE, NULL) == NULL)
5766  ereport(ERROR,
5767  (errmsg("database hash table corrupted during cleanup --- abort")));
5768  }
5769 }
5770 
5771 
5772 /* ----------
5773  * pgstat_recv_resetcounter() -
5774  *
5775  * Reset the statistics for the specified database.
5776  * ----------
5777  */
5778 static void
5780 {