PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pgstat.c
Go to the documentation of this file.
1 /* ----------
2  * pgstat.c
3  *
4  * All the statistics collector stuff hacked up in one big, ugly file.
5  *
6  * TODO: - Separate collector, postmaster and backend stuff
7  * into different files.
8  *
9  * - Add some automatic call for pgstat vacuuming.
10  *
11  * - Add a pgstat config column to pg_database, so this
12  * entire thing can be enabled/disabled on a per db basis.
13  *
14  * Copyright (c) 2001-2017, PostgreSQL Global Development Group
15  *
16  * src/backend/postmaster/pgstat.c
17  * ----------
18  */
19 #include "postgres.h"
20 
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/param.h>
24 #include <sys/time.h>
25 #include <sys/socket.h>
26 #include <netdb.h>
27 #include <netinet/in.h>
28 #include <arpa/inet.h>
29 #include <signal.h>
30 #include <time.h>
31 #ifdef HAVE_SYS_SELECT_H
32 #include <sys/select.h>
33 #endif
34 
35 #include "pgstat.h"
36 
37 #include "access/heapam.h"
38 #include "access/htup_details.h"
39 #include "access/transam.h"
40 #include "access/twophase_rmgr.h"
41 #include "access/xact.h"
42 #include "catalog/pg_database.h"
43 #include "catalog/pg_proc.h"
44 #include "common/ip.h"
45 #include "libpq/libpq.h"
46 #include "libpq/pqsignal.h"
47 #include "mb/pg_wchar.h"
48 #include "miscadmin.h"
49 #include "pg_trace.h"
50 #include "postmaster/autovacuum.h"
52 #include "postmaster/postmaster.h"
53 #include "storage/backendid.h"
54 #include "storage/dsm.h"
55 #include "storage/fd.h"
56 #include "storage/ipc.h"
57 #include "storage/latch.h"
58 #include "storage/lmgr.h"
59 #include "storage/pg_shmem.h"
60 #include "storage/procsignal.h"
61 #include "storage/sinvaladt.h"
62 #include "utils/ascii.h"
63 #include "utils/guc.h"
64 #include "utils/memutils.h"
65 #include "utils/ps_status.h"
66 #include "utils/rel.h"
67 #include "utils/snapmgr.h"
68 #include "utils/timestamp.h"
69 #include "utils/tqual.h"
70 
71 
72 /* ----------
73  * Timer definitions.
74  * ----------
75  */
76 #define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
77  * updates; in milliseconds. */
78 
79 #define PGSTAT_RETRY_DELAY 10 /* How long to wait between checks for
80  * a new file; in milliseconds. */
81 
82 #define PGSTAT_MAX_WAIT_TIME 10000 /* Maximum time to wait for a stats
83  * file update; in milliseconds. */
84 
85 #define PGSTAT_INQ_INTERVAL 640 /* How often to ping the collector for
86  * a new file; in milliseconds. */
87 
88 #define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart a
89  * failed statistics collector; in
90  * seconds. */
91 
92 #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
93 #define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
94 
95 
96 /* ----------
97  * The initial size hints for the hash tables used in the collector.
98  * ----------
99  */
100 #define PGSTAT_DB_HASH_SIZE 16
101 #define PGSTAT_TAB_HASH_SIZE 512
102 #define PGSTAT_FUNCTION_HASH_SIZE 512
103 
104 
105 /* ----------
106  * GUC parameters
107  * ----------
108  */
110 bool pgstat_track_counts = false;
113 
114 /* ----------
115  * Built from GUC parameter
116  * ----------
117  */
121 
122 /*
123  * BgWriter global statistics counters (unused in other processes).
124  * Stored directly in a stats message structure so it can be sent
125  * without needing to copy things around. We assume this inits to zeroes.
126  */
128 
129 /* ----------
130  * Local data
131  * ----------
132  */
134 
136 
138 
139 static bool pgStatRunningInCollector = false;
140 
141 /*
142  * Structures in which backends store per-table info that's waiting to be
143  * sent to the collector.
144  *
145  * NOTE: once allocated, TabStatusArray structures are never moved or deleted
146  * for the life of the backend. Also, we zero out the t_id fields of the
147  * contained PgStat_TableStatus structs whenever they are not actively in use.
148  * This allows relcache pgstat_info pointers to be treated as long-lived data,
149  * avoiding repeated searches in pgstat_initstats() when a relation is
150  * repeatedly opened during a transaction.
151  */
152 #define TABSTAT_QUANTUM 100 /* we alloc this many at a time */
153 
154 typedef struct TabStatusArray
155 {
156  struct TabStatusArray *tsa_next; /* link to next array, if any */
157  int tsa_used; /* # entries currently used */
160 
162 
163 /*
164  * Backends store per-function info that's waiting to be sent to the collector
165  * in this hash table (indexed by function OID).
166  */
168 
169 /*
170  * Indicates if backend has some function stats that it hasn't yet
171  * sent to the collector.
172  */
173 static bool have_function_stats = false;
174 
175 /*
176  * Tuple insertion/deletion counts for an open transaction can't be propagated
177  * into PgStat_TableStatus counters until we know if it is going to commit
178  * or abort. Hence, we keep these counts in per-subxact structs that live
179  * in TopTransactionContext. This data structure is designed on the assumption
180  * that subxacts won't usually modify very many tables.
181  */
182 typedef struct PgStat_SubXactStatus
183 {
184  int nest_level; /* subtransaction nest level */
185  struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
186  PgStat_TableXactStatus *first; /* head of list for this subxact */
188 
190 
191 static int pgStatXactCommit = 0;
192 static int pgStatXactRollback = 0;
195 
196 /* Record that's written to 2PC state file when pgstat state is persisted */
197 typedef struct TwoPhasePgStatRecord
198 {
199  PgStat_Counter tuples_inserted; /* tuples inserted in xact */
200  PgStat_Counter tuples_updated; /* tuples updated in xact */
201  PgStat_Counter tuples_deleted; /* tuples deleted in xact */
202  PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */
203  PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */
204  PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */
205  Oid t_id; /* table's OID */
206  bool t_shared; /* is it a shared catalog? */
207  bool t_truncated; /* was the relation truncated? */
209 
210 /*
211  * Info about current "snapshot" of stats file
212  */
216 static int localNumBackends = 0;
217 
218 /*
219  * Cluster wide statistics, kept in the stats collector.
220  * Contains statistics that are not collected per database
221  * or per table.
222  */
225 
226 /*
227  * List of OIDs of databases we need to write out. If an entry is InvalidOid,
228  * it means to write only the shared-catalog stats ("DB 0"); otherwise, we
229  * will write both that DB's data and the shared stats.
230  */
232 
233 /* Signal handler flags */
234 static volatile bool need_exit = false;
235 static volatile bool got_SIGHUP = false;
236 
237 /*
238  * Total time charged to functions so far in the current backend.
239  * We use this to help separate "self" and "other" time charges.
240  * (We assume this initializes to zero.)
241  */
243 
244 
245 /* ----------
246  * Local function forward declarations
247  * ----------
248  */
249 #ifdef EXEC_BACKEND
250 static pid_t pgstat_forkexec(void);
251 #endif
252 
253 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
254 static void pgstat_exit(SIGNAL_ARGS);
255 static void pgstat_beshutdown_hook(int code, Datum arg);
257 
258 static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
260  Oid tableoid, bool create);
261 static void pgstat_write_statsfiles(bool permanent, bool allDbs);
262 static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent);
263 static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep);
264 static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
265 static void backend_read_statsfile(void);
266 static void pgstat_read_current_status(void);
267 
268 static bool pgstat_write_statsfile_needed(void);
269 static bool pgstat_db_requested(Oid databaseid);
270 
271 static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
272 static void pgstat_send_funcstats(void);
273 static HTAB *pgstat_collect_oids(Oid catalogid);
274 
275 static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
276 
277 static void pgstat_setup_memcxt(void);
278 
279 static const char *pgstat_get_wait_activity(WaitEventActivity w);
280 static const char *pgstat_get_wait_client(WaitEventClient w);
281 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
282 static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
283 
284 static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
285 static void pgstat_send(void *msg, int len);
286 
287 static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);
288 static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
289 static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
290 static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
291 static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
294 static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
295 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
296 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
297 static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
298 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
299 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
300 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
302 static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
303 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
304 
305 /* ------------------------------------------------------------
306  * Public functions called from postmaster follow
307  * ------------------------------------------------------------
308  */
309 
310 /* ----------
311  * pgstat_init() -
312  *
313  * Called from postmaster at startup. Create the resources required
314  * by the statistics collector process. If unable to do so, do not
315  * fail --- better to let the postmaster start with stats collection
316  * disabled.
317  * ----------
318  */
319 void
321 {
322  ACCEPT_TYPE_ARG3 alen;
323  struct addrinfo *addrs = NULL,
324  *addr,
325  hints;
326  int ret;
327  fd_set rset;
328  struct timeval tv;
329  char test_byte;
330  int sel_res;
331  int tries = 0;
332 
333 #define TESTBYTEVAL ((char) 199)
334 
335  /*
336  * This static assertion verifies that we didn't mess up the calculations
337  * involved in selecting maximum payload sizes for our UDP messages.
338  * Because the only consequence of overrunning PGSTAT_MAX_MSG_SIZE would
339  * be silent performance loss from fragmentation, it seems worth having a
340  * compile-time cross-check that we didn't.
341  */
343  "maximum stats message size exceeds PGSTAT_MAX_MSG_SIZE");
344 
345  /*
346  * Create the UDP socket for sending and receiving statistic messages
347  */
348  hints.ai_flags = AI_PASSIVE;
349  hints.ai_family = AF_UNSPEC;
350  hints.ai_socktype = SOCK_DGRAM;
351  hints.ai_protocol = 0;
352  hints.ai_addrlen = 0;
353  hints.ai_addr = NULL;
354  hints.ai_canonname = NULL;
355  hints.ai_next = NULL;
356  ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
357  if (ret || !addrs)
358  {
359  ereport(LOG,
360  (errmsg("could not resolve \"localhost\": %s",
361  gai_strerror(ret))));
362  goto startup_failed;
363  }
364 
365  /*
366  * On some platforms, pg_getaddrinfo_all() may return multiple addresses
367  * only one of which will actually work (eg, both IPv6 and IPv4 addresses
368  * when kernel will reject IPv6). Worse, the failure may occur at the
369  * bind() or perhaps even connect() stage. So we must loop through the
370  * results till we find a working combination. We will generate LOG
371  * messages, but no error, for bogus combinations.
372  */
373  for (addr = addrs; addr; addr = addr->ai_next)
374  {
375 #ifdef HAVE_UNIX_SOCKETS
376  /* Ignore AF_UNIX sockets, if any are returned. */
377  if (addr->ai_family == AF_UNIX)
378  continue;
379 #endif
380 
381  if (++tries > 1)
382  ereport(LOG,
383  (errmsg("trying another address for the statistics collector")));
384 
385  /*
386  * Create the socket.
387  */
388  if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) == PGINVALID_SOCKET)
389  {
390  ereport(LOG,
392  errmsg("could not create socket for statistics collector: %m")));
393  continue;
394  }
395 
396  /*
397  * Bind it to a kernel assigned port on localhost and get the assigned
398  * port via getsockname().
399  */
400  if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
401  {
402  ereport(LOG,
404  errmsg("could not bind socket for statistics collector: %m")));
407  continue;
408  }
409 
410  alen = sizeof(pgStatAddr);
411  if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0)
412  {
413  ereport(LOG,
415  errmsg("could not get address of socket for statistics collector: %m")));
418  continue;
419  }
420 
421  /*
422  * Connect the socket to its own address. This saves a few cycles by
423  * not having to respecify the target address on every send. This also
424  * provides a kernel-level check that only packets from this same
425  * address will be received.
426  */
427  if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0)
428  {
429  ereport(LOG,
431  errmsg("could not connect socket for statistics collector: %m")));
434  continue;
435  }
436 
437  /*
438  * Try to send and receive a one-byte test message on the socket. This
439  * is to catch situations where the socket can be created but will not
440  * actually pass data (for instance, because kernel packet filtering
441  * rules prevent it).
442  */
443  test_byte = TESTBYTEVAL;
444 
445 retry1:
446  if (send(pgStatSock, &test_byte, 1, 0) != 1)
447  {
448  if (errno == EINTR)
449  goto retry1; /* if interrupted, just retry */
450  ereport(LOG,
452  errmsg("could not send test message on socket for statistics collector: %m")));
455  continue;
456  }
457 
458  /*
459  * There could possibly be a little delay before the message can be
460  * received. We arbitrarily allow up to half a second before deciding
461  * it's broken.
462  */
463  for (;;) /* need a loop to handle EINTR */
464  {
465  FD_ZERO(&rset);
466  FD_SET(pgStatSock, &rset);
467 
468  tv.tv_sec = 0;
469  tv.tv_usec = 500000;
470  sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
471  if (sel_res >= 0 || errno != EINTR)
472  break;
473  }
474  if (sel_res < 0)
475  {
476  ereport(LOG,
478  errmsg("select() failed in statistics collector: %m")));
481  continue;
482  }
483  if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
484  {
485  /*
486  * This is the case we actually think is likely, so take pains to
487  * give a specific message for it.
488  *
489  * errno will not be set meaningfully here, so don't use it.
490  */
491  ereport(LOG,
492  (errcode(ERRCODE_CONNECTION_FAILURE),
493  errmsg("test message did not get through on socket for statistics collector")));
496  continue;
497  }
498 
499  test_byte++; /* just make sure variable is changed */
500 
501 retry2:
502  if (recv(pgStatSock, &test_byte, 1, 0) != 1)
503  {
504  if (errno == EINTR)
505  goto retry2; /* if interrupted, just retry */
506  ereport(LOG,
508  errmsg("could not receive test message on socket for statistics collector: %m")));
511  continue;
512  }
513 
514  if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
515  {
516  ereport(LOG,
517  (errcode(ERRCODE_INTERNAL_ERROR),
518  errmsg("incorrect test message transmission on socket for statistics collector")));
521  continue;
522  }
523 
524  /* If we get here, we have a working socket */
525  break;
526  }
527 
528  /* Did we find a working address? */
529  if (!addr || pgStatSock == PGINVALID_SOCKET)
530  goto startup_failed;
531 
532  /*
533  * Set the socket to non-blocking IO. This ensures that if the collector
534  * falls behind, statistics messages will be discarded; backends won't
535  * block waiting to send messages to the collector.
536  */
538  {
539  ereport(LOG,
541  errmsg("could not set statistics collector socket to nonblocking mode: %m")));
542  goto startup_failed;
543  }
544 
545  pg_freeaddrinfo_all(hints.ai_family, addrs);
546 
547  return;
548 
549 startup_failed:
550  ereport(LOG,
551  (errmsg("disabling statistics collector for lack of working socket")));
552 
553  if (addrs)
554  pg_freeaddrinfo_all(hints.ai_family, addrs);
555 
559 
560  /*
561  * Adjust GUC variables to suppress useless activity, and for debugging
562  * purposes (seeing track_counts off is a clue that we failed here). We
563  * use PGC_S_OVERRIDE because there is no point in trying to turn it back
564  * on from postgresql.conf without a restart.
565  */
566  SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
567 }
568 
569 /*
570  * subroutine for pgstat_reset_all
571  */
572 static void
574 {
575  DIR *dir;
576  struct dirent *entry;
577  char fname[MAXPGPATH];
578 
579  dir = AllocateDir(directory);
580  while ((entry = ReadDir(dir, directory)) != NULL)
581  {
582  int nchars;
583  Oid tmp_oid;
584 
585  /*
586  * Skip directory entries that don't match the file names we write.
587  * See get_dbstat_filename for the database-specific pattern.
588  */
589  if (strncmp(entry->d_name, "global.", 7) == 0)
590  nchars = 7;
591  else
592  {
593  nchars = 0;
594  (void) sscanf(entry->d_name, "db_%u.%n",
595  &tmp_oid, &nchars);
596  if (nchars <= 0)
597  continue;
598  /* %u allows leading whitespace, so reject that */
599  if (strchr("0123456789", entry->d_name[3]) == NULL)
600  continue;
601  }
602 
603  if (strcmp(entry->d_name + nchars, "tmp") != 0 &&
604  strcmp(entry->d_name + nchars, "stat") != 0)
605  continue;
606 
607  snprintf(fname, MAXPGPATH, "%s/%s", directory,
608  entry->d_name);
609  unlink(fname);
610  }
611  FreeDir(dir);
612 }
613 
614 /*
615  * pgstat_reset_all() -
616  *
617  * Remove the stats files. This is currently used only if WAL
618  * recovery is needed after a crash.
619  */
620 void
622 {
625 }
626 
627 #ifdef EXEC_BACKEND
628 
629 /*
630  * pgstat_forkexec() -
631  *
632  * Format up the arglist for, then fork and exec, statistics collector process
633  */
634 static pid_t
635 pgstat_forkexec(void)
636 {
637  char *av[10];
638  int ac = 0;
639 
640  av[ac++] = "postgres";
641  av[ac++] = "--forkcol";
642  av[ac++] = NULL; /* filled in by postmaster_forkexec */
643 
644  av[ac] = NULL;
645  Assert(ac < lengthof(av));
646 
647  return postmaster_forkexec(ac, av);
648 }
649 #endif /* EXEC_BACKEND */
650 
651 
652 /*
653  * pgstat_start() -
654  *
655  * Called from postmaster at startup or after an existing collector
656  * died. Attempt to fire up a fresh statistics collector.
657  *
658  * Returns PID of child process, or 0 if fail.
659  *
660  * Note: if fail, we will be called again from the postmaster main loop.
661  */
662 int
664 {
665  time_t curtime;
666  pid_t pgStatPid;
667 
668  /*
669  * Check that the socket is there, else pgstat_init failed and we can do
670  * nothing useful.
671  */
673  return 0;
674 
675  /*
676  * Do nothing if too soon since last collector start. This is a safety
677  * valve to protect against continuous respawn attempts if the collector
678  * is dying immediately at launch. Note that since we will be re-called
679  * from the postmaster main loop, we will get another chance later.
680  */
681  curtime = time(NULL);
682  if ((unsigned int) (curtime - last_pgstat_start_time) <
683  (unsigned int) PGSTAT_RESTART_INTERVAL)
684  return 0;
685  last_pgstat_start_time = curtime;
686 
687  /*
688  * Okay, fork off the collector.
689  */
690 #ifdef EXEC_BACKEND
691  switch ((pgStatPid = pgstat_forkexec()))
692 #else
693  switch ((pgStatPid = fork_process()))
694 #endif
695  {
696  case -1:
697  ereport(LOG,
698  (errmsg("could not fork statistics collector: %m")));
699  return 0;
700 
701 #ifndef EXEC_BACKEND
702  case 0:
703  /* in postmaster child ... */
705 
706  /* Close the postmaster's sockets */
707  ClosePostmasterPorts(false);
708 
709  /* Drop our connection to postmaster's shared memory, as well */
710  dsm_detach_all();
712 
714  break;
715 #endif
716 
717  default:
718  return (int) pgStatPid;
719  }
720 
721  /* shouldn't get here */
722  return 0;
723 }
724 
725 void
727 {
729 }
730 
731 /* ------------------------------------------------------------
732  * Public functions used by backends follow
733  *------------------------------------------------------------
734  */
735 
736 
737 /* ----------
738  * pgstat_report_stat() -
739  *
740  * Called from tcop/postgres.c to send the so far collected per-table
741  * and function usage statistics to the collector. Note that this is
742  * called only when not within a transaction, so it is fair to use
743  * transaction stop time as an approximation of current time.
744  * ----------
745  */
746 void
748 {
749  /* we assume this inits to all zeroes: */
750  static const PgStat_TableCounts all_zeroes;
751  static TimestampTz last_report = 0;
752 
754  PgStat_MsgTabstat regular_msg;
755  PgStat_MsgTabstat shared_msg;
756  TabStatusArray *tsa;
757  int i;
758 
759  /* Don't expend a clock check if nothing to do */
760  if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
761  pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
763  return;
764 
765  /*
766  * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
767  * msec since we last sent one, or the caller wants to force stats out.
768  */
770  if (!force &&
772  return;
773  last_report = now;
774 
775  /*
776  * Scan through the TabStatusArray struct(s) to find tables that actually
777  * have counts, and build messages to send. We have to separate shared
778  * relations from regular ones because the databaseid field in the message
779  * header has to depend on that.
780  */
781  regular_msg.m_databaseid = MyDatabaseId;
782  shared_msg.m_databaseid = InvalidOid;
783  regular_msg.m_nentries = 0;
784  shared_msg.m_nentries = 0;
785 
786  for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
787  {
788  for (i = 0; i < tsa->tsa_used; i++)
789  {
790  PgStat_TableStatus *entry = &tsa->tsa_entries[i];
791  PgStat_MsgTabstat *this_msg;
792  PgStat_TableEntry *this_ent;
793 
794  /* Shouldn't have any pending transaction-dependent counts */
795  Assert(entry->trans == NULL);
796 
797  /*
798  * Ignore entries that didn't accumulate any actual counts, such
799  * as indexes that were opened by the planner but not used.
800  */
801  if (memcmp(&entry->t_counts, &all_zeroes,
802  sizeof(PgStat_TableCounts)) == 0)
803  continue;
804 
805  /*
806  * OK, insert data into the appropriate message, and send if full.
807  */
808  this_msg = entry->t_shared ? &shared_msg : &regular_msg;
809  this_ent = &this_msg->m_entry[this_msg->m_nentries];
810  this_ent->t_id = entry->t_id;
811  memcpy(&this_ent->t_counts, &entry->t_counts,
812  sizeof(PgStat_TableCounts));
813  if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
814  {
815  pgstat_send_tabstat(this_msg);
816  this_msg->m_nentries = 0;
817  }
818  }
819  /* zero out TableStatus structs after use */
820  MemSet(tsa->tsa_entries, 0,
821  tsa->tsa_used * sizeof(PgStat_TableStatus));
822  tsa->tsa_used = 0;
823  }
824 
825  /*
826  * Send partial messages. Make sure that any pending xact commit/abort
827  * gets counted, even if there are no table stats to send.
828  */
829  if (regular_msg.m_nentries > 0 ||
831  pgstat_send_tabstat(&regular_msg);
832  if (shared_msg.m_nentries > 0)
833  pgstat_send_tabstat(&shared_msg);
834 
835  /* Now, send function statistics */
837 }
838 
839 /*
840  * Subroutine for pgstat_report_stat: finish and send a tabstat message
841  */
842 static void
844 {
845  int n;
846  int len;
847 
848  /* It's unlikely we'd get here with no socket, but maybe not impossible */
850  return;
851 
852  /*
853  * Report and reset accumulated xact commit/rollback and I/O timings
854  * whenever we send a normal tabstat message
855  */
856  if (OidIsValid(tsmsg->m_databaseid))
857  {
862  pgStatXactCommit = 0;
863  pgStatXactRollback = 0;
866  }
867  else
868  {
869  tsmsg->m_xact_commit = 0;
870  tsmsg->m_xact_rollback = 0;
871  tsmsg->m_block_read_time = 0;
872  tsmsg->m_block_write_time = 0;
873  }
874 
875  n = tsmsg->m_nentries;
876  len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
877  n * sizeof(PgStat_TableEntry);
878 
880  pgstat_send(tsmsg, len);
881 }
882 
883 /*
884  * Subroutine for pgstat_report_stat: populate and send a function stat message
885  */
886 static void
888 {
889  /* we assume this inits to all zeroes: */
890  static const PgStat_FunctionCounts all_zeroes;
891 
892  PgStat_MsgFuncstat msg;
894  HASH_SEQ_STATUS fstat;
895 
896  if (pgStatFunctions == NULL)
897  return;
898 
901  msg.m_nentries = 0;
902 
903  hash_seq_init(&fstat, pgStatFunctions);
904  while ((entry = (PgStat_BackendFunctionEntry *) hash_seq_search(&fstat)) != NULL)
905  {
906  PgStat_FunctionEntry *m_ent;
907 
908  /* Skip it if no counts accumulated since last time */
909  if (memcmp(&entry->f_counts, &all_zeroes,
910  sizeof(PgStat_FunctionCounts)) == 0)
911  continue;
912 
913  /* need to convert format of time accumulators */
914  m_ent = &msg.m_entry[msg.m_nentries];
915  m_ent->f_id = entry->f_id;
916  m_ent->f_numcalls = entry->f_counts.f_numcalls;
919 
920  if (++msg.m_nentries >= PGSTAT_NUM_FUNCENTRIES)
921  {
922  pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
923  msg.m_nentries * sizeof(PgStat_FunctionEntry));
924  msg.m_nentries = 0;
925  }
926 
927  /* reset the entry's counts */
928  MemSet(&entry->f_counts, 0, sizeof(PgStat_FunctionCounts));
929  }
930 
931  if (msg.m_nentries > 0)
932  pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
933  msg.m_nentries * sizeof(PgStat_FunctionEntry));
934 
935  have_function_stats = false;
936 }
937 
938 
939 /* ----------
940  * pgstat_vacuum_stat() -
941  *
942  * Will tell the collector about objects he can get rid of.
943  * ----------
944  */
945 void
947 {
948  HTAB *htab;
949  PgStat_MsgTabpurge msg;
950  PgStat_MsgFuncpurge f_msg;
951  HASH_SEQ_STATUS hstat;
952  PgStat_StatDBEntry *dbentry;
953  PgStat_StatTabEntry *tabentry;
954  PgStat_StatFuncEntry *funcentry;
955  int len;
956 
958  return;
959 
960  /*
961  * If not done for this transaction, read the statistics collector stats
962  * file into some hash tables.
963  */
965 
966  /*
967  * Read pg_database and make a list of OIDs of all existing databases
968  */
970 
971  /*
972  * Search the database hash table for dead databases and tell the
973  * collector to drop them.
974  */
975  hash_seq_init(&hstat, pgStatDBHash);
976  while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
977  {
978  Oid dbid = dbentry->databaseid;
979 
981 
982  /* the DB entry for shared tables (with InvalidOid) is never dropped */
983  if (OidIsValid(dbid) &&
984  hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
985  pgstat_drop_database(dbid);
986  }
987 
988  /* Clean up */
989  hash_destroy(htab);
990 
991  /*
992  * Lookup our own database entry; if not found, nothing more to do.
993  */
994  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
995  (void *) &MyDatabaseId,
996  HASH_FIND, NULL);
997  if (dbentry == NULL || dbentry->tables == NULL)
998  return;
999 
1000  /*
1001  * Similarly to above, make a list of all known relations in this DB.
1002  */
1004 
1005  /*
1006  * Initialize our messages table counter to zero
1007  */
1008  msg.m_nentries = 0;
1009 
1010  /*
1011  * Check for all tables listed in stats hashtable if they still exist.
1012  */
1013  hash_seq_init(&hstat, dbentry->tables);
1014  while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
1015  {
1016  Oid tabid = tabentry->tableid;
1017 
1019 
1020  if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
1021  continue;
1022 
1023  /*
1024  * Not there, so add this table's Oid to the message
1025  */
1026  msg.m_tableid[msg.m_nentries++] = tabid;
1027 
1028  /*
1029  * If the message is full, send it out and reinitialize to empty
1030  */
1031  if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
1032  {
1033  len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1034  +msg.m_nentries * sizeof(Oid);
1035 
1037  msg.m_databaseid = MyDatabaseId;
1038  pgstat_send(&msg, len);
1039 
1040  msg.m_nentries = 0;
1041  }
1042  }
1043 
1044  /*
1045  * Send the rest
1046  */
1047  if (msg.m_nentries > 0)
1048  {
1049  len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1050  +msg.m_nentries * sizeof(Oid);
1051 
1053  msg.m_databaseid = MyDatabaseId;
1054  pgstat_send(&msg, len);
1055  }
1056 
1057  /* Clean up */
1058  hash_destroy(htab);
1059 
1060  /*
1061  * Now repeat the above steps for functions. However, we needn't bother
1062  * in the common case where no function stats are being collected.
1063  */
1064  if (dbentry->functions != NULL &&
1065  hash_get_num_entries(dbentry->functions) > 0)
1066  {
1068 
1070  f_msg.m_databaseid = MyDatabaseId;
1071  f_msg.m_nentries = 0;
1072 
1073  hash_seq_init(&hstat, dbentry->functions);
1074  while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&hstat)) != NULL)
1075  {
1076  Oid funcid = funcentry->functionid;
1077 
1079 
1080  if (hash_search(htab, (void *) &funcid, HASH_FIND, NULL) != NULL)
1081  continue;
1082 
1083  /*
1084  * Not there, so add this function's Oid to the message
1085  */
1086  f_msg.m_functionid[f_msg.m_nentries++] = funcid;
1087 
1088  /*
1089  * If the message is full, send it out and reinitialize to empty
1090  */
1091  if (f_msg.m_nentries >= PGSTAT_NUM_FUNCPURGE)
1092  {
1093  len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1094  +f_msg.m_nentries * sizeof(Oid);
1095 
1096  pgstat_send(&f_msg, len);
1097 
1098  f_msg.m_nentries = 0;
1099  }
1100  }
1101 
1102  /*
1103  * Send the rest
1104  */
1105  if (f_msg.m_nentries > 0)
1106  {
1107  len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1108  +f_msg.m_nentries * sizeof(Oid);
1109 
1110  pgstat_send(&f_msg, len);
1111  }
1112 
1113  hash_destroy(htab);
1114  }
1115 }
1116 
1117 
1118 /* ----------
1119  * pgstat_collect_oids() -
1120  *
1121  * Collect the OIDs of all objects listed in the specified system catalog
1122  * into a temporary hash table. Caller should hash_destroy the result
1123  * when done with it. (However, we make the table in CurrentMemoryContext
1124  * so that it will be freed properly in event of an error.)
1125  * ----------
1126  */
1127 static HTAB *
1129 {
1130  HTAB *htab;
1131  HASHCTL hash_ctl;
1132  Relation rel;
1133  HeapScanDesc scan;
1134  HeapTuple tup;
1135  Snapshot snapshot;
1136 
1137  memset(&hash_ctl, 0, sizeof(hash_ctl));
1138  hash_ctl.keysize = sizeof(Oid);
1139  hash_ctl.entrysize = sizeof(Oid);
1140  hash_ctl.hcxt = CurrentMemoryContext;
1141  htab = hash_create("Temporary table of OIDs",
1143  &hash_ctl,
1145 
1146  rel = heap_open(catalogid, AccessShareLock);
1147  snapshot = RegisterSnapshot(GetLatestSnapshot());
1148  scan = heap_beginscan(rel, snapshot, 0, NULL);
1149  while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
1150  {
1151  Oid thisoid = HeapTupleGetOid(tup);
1152 
1154 
1155  (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
1156  }
1157  heap_endscan(scan);
1158  UnregisterSnapshot(snapshot);
1160 
1161  return htab;
1162 }
1163 
1164 
1165 /* ----------
1166  * pgstat_drop_database() -
1167  *
1168  * Tell the collector that we just dropped a database.
1169  * (If the message gets lost, we will still clean the dead DB eventually
1170  * via future invocations of pgstat_vacuum_stat().)
1171  * ----------
1172  */
1173 void
1175 {
1176  PgStat_MsgDropdb msg;
1177 
1179  return;
1180 
1182  msg.m_databaseid = databaseid;
1183  pgstat_send(&msg, sizeof(msg));
1184 }
1185 
1186 
1187 /* ----------
1188  * pgstat_drop_relation() -
1189  *
1190  * Tell the collector that we just dropped a relation.
1191  * (If the message gets lost, we will still clean the dead entry eventually
1192  * via future invocations of pgstat_vacuum_stat().)
1193  *
1194  * Currently not used for lack of any good place to call it; we rely
1195  * entirely on pgstat_vacuum_stat() to clean out stats for dead rels.
1196  * ----------
1197  */
1198 #ifdef NOT_USED
1199 void
1200 pgstat_drop_relation(Oid relid)
1201 {
1202  PgStat_MsgTabpurge msg;
1203  int len;
1204 
1206  return;
1207 
1208  msg.m_tableid[0] = relid;
1209  msg.m_nentries = 1;
1210 
1211  len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) +sizeof(Oid);
1212 
1214  msg.m_databaseid = MyDatabaseId;
1215  pgstat_send(&msg, len);
1216 }
1217 #endif /* NOT_USED */
1218 
1219 
1220 /* ----------
1221  * pgstat_reset_counters() -
1222  *
1223  * Tell the statistics collector to reset counters for our database.
1224  *
1225  * Permission checking for this function is managed through the normal
1226  * GRANT system.
1227  * ----------
1228  */
1229 void
1231 {
1233 
1235  return;
1236 
1238  msg.m_databaseid = MyDatabaseId;
1239  pgstat_send(&msg, sizeof(msg));
1240 }
1241 
1242 /* ----------
1243  * pgstat_reset_shared_counters() -
1244  *
1245  * Tell the statistics collector to reset cluster-wide shared counters.
1246  *
1247  * Permission checking for this function is managed through the normal
1248  * GRANT system.
1249  * ----------
1250  */
1251 void
1252 pgstat_reset_shared_counters(const char *target)
1253 {
1255 
1257  return;
1258 
1259  if (strcmp(target, "archiver") == 0)
1261  else if (strcmp(target, "bgwriter") == 0)
1263  else
1264  ereport(ERROR,
1265  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1266  errmsg("unrecognized reset target: \"%s\"", target),
1267  errhint("Target must be \"archiver\" or \"bgwriter\".")));
1268 
1270  pgstat_send(&msg, sizeof(msg));
1271 }
1272 
1273 /* ----------
1274  * pgstat_reset_single_counter() -
1275  *
1276  * Tell the statistics collector to reset a single counter.
1277  *
1278  * Permission checking for this function is managed through the normal
1279  * GRANT system.
1280  * ----------
1281  */
1282 void
1284 {
1286 
1288  return;
1289 
1291  msg.m_databaseid = MyDatabaseId;
1292  msg.m_resettype = type;
1293  msg.m_objectid = objoid;
1294 
1295  pgstat_send(&msg, sizeof(msg));
1296 }
1297 
1298 /* ----------
1299  * pgstat_report_autovac() -
1300  *
1301  * Called from autovacuum.c to report startup of an autovacuum process.
1302  * We are called before InitPostgres is done, so can't rely on MyDatabaseId;
1303  * the db OID must be passed in, instead.
1304  * ----------
1305  */
1306 void
1308 {
1310 
1312  return;
1313 
1315  msg.m_databaseid = dboid;
1317 
1318  pgstat_send(&msg, sizeof(msg));
1319 }
1320 
1321 
1322 /* ---------
1323  * pgstat_report_vacuum() -
1324  *
1325  * Tell the collector about the table we just vacuumed.
1326  * ---------
1327  */
1328 void
1329 pgstat_report_vacuum(Oid tableoid, bool shared,
1330  PgStat_Counter livetuples, PgStat_Counter deadtuples)
1331 {
1332  PgStat_MsgVacuum msg;
1333 
1335  return;
1336 
1338  msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
1339  msg.m_tableoid = tableoid;
1342  msg.m_live_tuples = livetuples;
1343  msg.m_dead_tuples = deadtuples;
1344  pgstat_send(&msg, sizeof(msg));
1345 }
1346 
1347 /* --------
1348  * pgstat_report_analyze() -
1349  *
1350  * Tell the collector about the table we just analyzed.
1351  *
1352  * Caller must provide new live- and dead-tuples estimates, as well as a
1353  * flag indicating whether to reset the changes_since_analyze counter.
1354  * --------
1355  */
1356 void
1358  PgStat_Counter livetuples, PgStat_Counter deadtuples,
1359  bool resetcounter)
1360 {
1361  PgStat_MsgAnalyze msg;
1362 
1364  return;
1365 
1366  /*
1367  * Unlike VACUUM, ANALYZE might be running inside a transaction that has
1368  * already inserted and/or deleted rows in the target table. ANALYZE will
1369  * have counted such rows as live or dead respectively. Because we will
1370  * report our counts of such rows at transaction end, we should subtract
1371  * off these counts from what we send to the collector now, else they'll
1372  * be double-counted after commit. (This approach also ensures that the
1373  * collector ends up with the right numbers if we abort instead of
1374  * committing.)
1375  */
1376  if (rel->pgstat_info != NULL)
1377  {
1379 
1380  for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
1381  {
1382  livetuples -= trans->tuples_inserted - trans->tuples_deleted;
1383  deadtuples -= trans->tuples_updated + trans->tuples_deleted;
1384  }
1385  /* count stuff inserted by already-aborted subxacts, too */
1386  deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
1387  /* Since ANALYZE's counts are estimates, we could have underflowed */
1388  livetuples = Max(livetuples, 0);
1389  deadtuples = Max(deadtuples, 0);
1390  }
1391 
1393  msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
1394  msg.m_tableoid = RelationGetRelid(rel);
1396  msg.m_resetcounter = resetcounter;
1398  msg.m_live_tuples = livetuples;
1399  msg.m_dead_tuples = deadtuples;
1400  pgstat_send(&msg, sizeof(msg));
1401 }
1402 
1403 /* --------
1404  * pgstat_report_recovery_conflict() -
1405  *
1406  * Tell the collector about a Hot Standby recovery conflict.
1407  * --------
1408  */
1409 void
1411 {
1413 
1415  return;
1416 
1418  msg.m_databaseid = MyDatabaseId;
1419  msg.m_reason = reason;
1420  pgstat_send(&msg, sizeof(msg));
1421 }
1422 
1423 /* --------
1424  * pgstat_report_deadlock() -
1425  *
1426  * Tell the collector about a deadlock detected.
1427  * --------
1428  */
1429 void
1431 {
1432  PgStat_MsgDeadlock msg;
1433 
1435  return;
1436 
1438  msg.m_databaseid = MyDatabaseId;
1439  pgstat_send(&msg, sizeof(msg));
1440 }
1441 
1442 /* --------
1443  * pgstat_report_tempfile() -
1444  *
1445  * Tell the collector about a temporary file.
1446  * --------
1447  */
1448 void
1449 pgstat_report_tempfile(size_t filesize)
1450 {
1451  PgStat_MsgTempFile msg;
1452 
1454  return;
1455 
1457  msg.m_databaseid = MyDatabaseId;
1458  msg.m_filesize = filesize;
1459  pgstat_send(&msg, sizeof(msg));
1460 }
1461 
1462 
1463 /* ----------
1464  * pgstat_ping() -
1465  *
1466  * Send some junk data to the collector to increase traffic.
1467  * ----------
1468  */
1469 void
1471 {
1472  PgStat_MsgDummy msg;
1473 
1475  return;
1476 
1478  pgstat_send(&msg, sizeof(msg));
1479 }
1480 
1481 /* ----------
1482  * pgstat_send_inquiry() -
1483  *
1484  * Notify collector that we need fresh data.
1485  * ----------
1486  */
1487 static void
1488 pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
1489 {
1490  PgStat_MsgInquiry msg;
1491 
1493  msg.clock_time = clock_time;
1494  msg.cutoff_time = cutoff_time;
1495  msg.databaseid = databaseid;
1496  pgstat_send(&msg, sizeof(msg));
1497 }
1498 
1499 
1500 /*
1501  * Initialize function call usage data.
1502  * Called by the executor before invoking a function.
1503  */
1504 void
1507 {
1508  PgStat_BackendFunctionEntry *htabent;
1509  bool found;
1510 
1511  if (pgstat_track_functions <= fcinfo->flinfo->fn_stats)
1512  {
1513  /* stats not wanted */
1514  fcu->fs = NULL;
1515  return;
1516  }
1517 
1518  if (!pgStatFunctions)
1519  {
1520  /* First time through - initialize function stat table */
1521  HASHCTL hash_ctl;
1522 
1523  memset(&hash_ctl, 0, sizeof(hash_ctl));
1524  hash_ctl.keysize = sizeof(Oid);
1525  hash_ctl.entrysize = sizeof(PgStat_BackendFunctionEntry);
1526  pgStatFunctions = hash_create("Function stat entries",
1528  &hash_ctl,
1529  HASH_ELEM | HASH_BLOBS);
1530  }
1531 
1532  /* Get the stats entry for this function, create if necessary */
1533  htabent = hash_search(pgStatFunctions, &fcinfo->flinfo->fn_oid,
1534  HASH_ENTER, &found);
1535  if (!found)
1536  MemSet(&htabent->f_counts, 0, sizeof(PgStat_FunctionCounts));
1537 
1538  fcu->fs = &htabent->f_counts;
1539 
1540  /* save stats for this function, later used to compensate for recursion */
1541  fcu->save_f_total_time = htabent->f_counts.f_total_time;
1542 
1543  /* save current backend-wide total time */
1544  fcu->save_total = total_func_time;
1545 
1546  /* get clock time as of function start */
1548 }
1549 
1550 /*
1551  * find_funcstat_entry - find any existing PgStat_BackendFunctionEntry entry
1552  * for specified function
1553  *
1554  * If no entry, return NULL, don't create a new one
1555  */
1558 {
1559  if (pgStatFunctions == NULL)
1560  return NULL;
1561 
1562  return (PgStat_BackendFunctionEntry *) hash_search(pgStatFunctions,
1563  (void *) &func_id,
1564  HASH_FIND, NULL);
1565 }
1566 
1567 /*
1568  * Calculate function call usage and update stat counters.
1569  * Called by the executor after invoking a function.
1570  *
1571  * In the case of a set-returning function that runs in value-per-call mode,
1572  * we will see multiple pgstat_init_function_usage/pgstat_end_function_usage
1573  * calls for what the user considers a single call of the function. The
1574  * finalize flag should be TRUE on the last call.
1575  */
1576 void
1578 {
1579  PgStat_FunctionCounts *fs = fcu->fs;
1580  instr_time f_total;
1581  instr_time f_others;
1582  instr_time f_self;
1583 
1584  /* stats not wanted? */
1585  if (fs == NULL)
1586  return;
1587 
1588  /* total elapsed time in this function call */
1589  INSTR_TIME_SET_CURRENT(f_total);
1590  INSTR_TIME_SUBTRACT(f_total, fcu->f_start);
1591 
1592  /* self usage: elapsed minus anything already charged to other calls */
1593  f_others = total_func_time;
1594  INSTR_TIME_SUBTRACT(f_others, fcu->save_total);
1595  f_self = f_total;
1596  INSTR_TIME_SUBTRACT(f_self, f_others);
1597 
1598  /* update backend-wide total time */
1600 
1601  /*
1602  * Compute the new f_total_time as the total elapsed time added to the
1603  * pre-call value of f_total_time. This is necessary to avoid
1604  * double-counting any time taken by recursive calls of myself. (We do
1605  * not need any similar kluge for self time, since that already excludes
1606  * any recursive calls.)
1607  */
1608  INSTR_TIME_ADD(f_total, fcu->save_f_total_time);
1609 
1610  /* update counters in function stats table */
1611  if (finalize)
1612  fs->f_numcalls++;
1613  fs->f_total_time = f_total;
1614  INSTR_TIME_ADD(fs->f_self_time, f_self);
1615 
1616  /* indicate that we have something to send */
1617  have_function_stats = true;
1618 }
1619 
1620 
1621 /* ----------
1622  * pgstat_initstats() -
1623  *
1624  * Initialize a relcache entry to count access statistics.
1625  * Called whenever a relation is opened.
1626  *
1627  * We assume that a relcache entry's pgstat_info field is zeroed by
1628  * relcache.c when the relcache entry is made; thereafter it is long-lived
1629  * data. We can avoid repeated searches of the TabStatus arrays when the
1630  * same relation is touched repeatedly within a transaction.
1631  * ----------
1632  */
1633 void
1635 {
1636  Oid rel_id = rel->rd_id;
1637  char relkind = rel->rd_rel->relkind;
1638 
1639  /* We only count stats for things that have storage */
1640  if (!(relkind == RELKIND_RELATION ||
1641  relkind == RELKIND_MATVIEW ||
1642  relkind == RELKIND_INDEX ||
1643  relkind == RELKIND_TOASTVALUE ||
1644  relkind == RELKIND_SEQUENCE))
1645  {
1646  rel->pgstat_info = NULL;
1647  return;
1648  }
1649 
1651  {
1652  /* We're not counting at all */
1653  rel->pgstat_info = NULL;
1654  return;
1655  }
1656 
1657  /*
1658  * If we already set up this relation in the current transaction, nothing
1659  * to do.
1660  */
1661  if (rel->pgstat_info != NULL &&
1662  rel->pgstat_info->t_id == rel_id)
1663  return;
1664 
1665  /* Else find or make the PgStat_TableStatus entry, and update link */
1666  rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
1667 }
1668 
1669 /*
1670  * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
1671  */
1672 static PgStat_TableStatus *
1673 get_tabstat_entry(Oid rel_id, bool isshared)
1674 {
1675  PgStat_TableStatus *entry;
1676  TabStatusArray *tsa;
1677  TabStatusArray *prev_tsa;
1678  int i;
1679 
1680  /*
1681  * Search the already-used tabstat slots for this relation.
1682  */
1683  prev_tsa = NULL;
1684  for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next)
1685  {
1686  for (i = 0; i < tsa->tsa_used; i++)
1687  {
1688  entry = &tsa->tsa_entries[i];
1689  if (entry->t_id == rel_id)
1690  return entry;
1691  }
1692 
1693  if (tsa->tsa_used < TABSTAT_QUANTUM)
1694  {
1695  /*
1696  * It must not be present, but we found a free slot instead. Fine,
1697  * let's use this one. We assume the entry was already zeroed,
1698  * either at creation or after last use.
1699  */
1700  entry = &tsa->tsa_entries[tsa->tsa_used++];
1701  entry->t_id = rel_id;
1702  entry->t_shared = isshared;
1703  return entry;
1704  }
1705  }
1706 
1707  /*
1708  * We ran out of tabstat slots, so allocate more. Be sure they're zeroed.
1709  */
1711  sizeof(TabStatusArray));
1712  if (prev_tsa)
1713  prev_tsa->tsa_next = tsa;
1714  else
1715  pgStatTabList = tsa;
1716 
1717  /*
1718  * Use the first entry of the new TabStatusArray.
1719  */
1720  entry = &tsa->tsa_entries[tsa->tsa_used++];
1721  entry->t_id = rel_id;
1722  entry->t_shared = isshared;
1723  return entry;
1724 }
1725 
1726 /*
1727  * find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
1728  *
1729  * If no entry, return NULL, don't create a new one
1730  */
1733 {
1734  PgStat_TableStatus *entry;
1735  TabStatusArray *tsa;
1736  int i;
1737 
1738  for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
1739  {
1740  for (i = 0; i < tsa->tsa_used; i++)
1741  {
1742  entry = &tsa->tsa_entries[i];
1743  if (entry->t_id == rel_id)
1744  return entry;
1745  }
1746  }
1747 
1748  /* Not present */
1749  return NULL;
1750 }
1751 
1752 /*
1753  * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
1754  */
1755 static PgStat_SubXactStatus *
1757 {
1758  PgStat_SubXactStatus *xact_state;
1759 
1760  xact_state = pgStatXactStack;
1761  if (xact_state == NULL || xact_state->nest_level != nest_level)
1762  {
1763  xact_state = (PgStat_SubXactStatus *)
1765  sizeof(PgStat_SubXactStatus));
1766  xact_state->nest_level = nest_level;
1767  xact_state->prev = pgStatXactStack;
1768  xact_state->first = NULL;
1769  pgStatXactStack = xact_state;
1770  }
1771  return xact_state;
1772 }
1773 
1774 /*
1775  * add_tabstat_xact_level - add a new (sub)transaction state record
1776  */
1777 static void
1778 add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
1779 {
1780  PgStat_SubXactStatus *xact_state;
1782 
1783  /*
1784  * If this is the first rel to be modified at the current nest level, we
1785  * first have to push a transaction stack entry.
1786  */
1787  xact_state = get_tabstat_stack_level(nest_level);
1788 
1789  /* Now make a per-table stack entry */
1790  trans = (PgStat_TableXactStatus *)
1792  sizeof(PgStat_TableXactStatus));
1793  trans->nest_level = nest_level;
1794  trans->upper = pgstat_info->trans;
1795  trans->parent = pgstat_info;
1796  trans->next = xact_state->first;
1797  xact_state->first = trans;
1798  pgstat_info->trans = trans;
1799 }
1800 
1801 /*
1802  * pgstat_count_heap_insert - count a tuple insertion of n tuples
1803  */
1804 void
1806 {
1807  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1808 
1809  if (pgstat_info != NULL)
1810  {
1811  /* We have to log the effect at the proper transactional level */
1812  int nest_level = GetCurrentTransactionNestLevel();
1813 
1814  if (pgstat_info->trans == NULL ||
1815  pgstat_info->trans->nest_level != nest_level)
1816  add_tabstat_xact_level(pgstat_info, nest_level);
1817 
1818  pgstat_info->trans->tuples_inserted += n;
1819  }
1820 }
1821 
1822 /*
1823  * pgstat_count_heap_update - count a tuple update
1824  */
1825 void
1827 {
1828  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1829 
1830  if (pgstat_info != NULL)
1831  {
1832  /* We have to log the effect at the proper transactional level */
1833  int nest_level = GetCurrentTransactionNestLevel();
1834 
1835  if (pgstat_info->trans == NULL ||
1836  pgstat_info->trans->nest_level != nest_level)
1837  add_tabstat_xact_level(pgstat_info, nest_level);
1838 
1839  pgstat_info->trans->tuples_updated++;
1840 
1841  /* t_tuples_hot_updated is nontransactional, so just advance it */
1842  if (hot)
1843  pgstat_info->t_counts.t_tuples_hot_updated++;
1844  }
1845 }
1846 
1847 /*
1848  * pgstat_count_heap_delete - count a tuple deletion
1849  */
1850 void
1852 {
1853  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1854 
1855  if (pgstat_info != NULL)
1856  {
1857  /* We have to log the effect at the proper transactional level */
1858  int nest_level = GetCurrentTransactionNestLevel();
1859 
1860  if (pgstat_info->trans == NULL ||
1861  pgstat_info->trans->nest_level != nest_level)
1862  add_tabstat_xact_level(pgstat_info, nest_level);
1863 
1864  pgstat_info->trans->tuples_deleted++;
1865  }
1866 }
1867 
1868 /*
1869  * pgstat_truncate_save_counters
1870  *
1871  * Whenever a table is truncated, we save its i/u/d counters so that they can
1872  * be cleared, and if the (sub)xact that executed the truncate later aborts,
1873  * the counters can be restored to the saved (pre-truncate) values. Note we do
1874  * this on the first truncate in any particular subxact level only.
1875  */
1876 static void
1878 {
1879  if (!trans->truncated)
1880  {
1881  trans->inserted_pre_trunc = trans->tuples_inserted;
1882  trans->updated_pre_trunc = trans->tuples_updated;
1883  trans->deleted_pre_trunc = trans->tuples_deleted;
1884  trans->truncated = true;
1885  }
1886 }
1887 
1888 /*
1889  * pgstat_truncate_restore_counters - restore counters when a truncate aborts
1890  */
1891 static void
1893 {
1894  if (trans->truncated)
1895  {
1896  trans->tuples_inserted = trans->inserted_pre_trunc;
1897  trans->tuples_updated = trans->updated_pre_trunc;
1898  trans->tuples_deleted = trans->deleted_pre_trunc;
1899  }
1900 }
1901 
1902 /*
1903  * pgstat_count_truncate - update tuple counters due to truncate
1904  */
1905 void
1907 {
1908  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1909 
1910  if (pgstat_info != NULL)
1911  {
1912  /* We have to log the effect at the proper transactional level */
1913  int nest_level = GetCurrentTransactionNestLevel();
1914 
1915  if (pgstat_info->trans == NULL ||
1916  pgstat_info->trans->nest_level != nest_level)
1917  add_tabstat_xact_level(pgstat_info, nest_level);
1918 
1919  pgstat_truncate_save_counters(pgstat_info->trans);
1920  pgstat_info->trans->tuples_inserted = 0;
1921  pgstat_info->trans->tuples_updated = 0;
1922  pgstat_info->trans->tuples_deleted = 0;
1923  }
1924 }
1925 
1926 /*
1927  * pgstat_update_heap_dead_tuples - update dead-tuples count
1928  *
1929  * The semantics of this are that we are reporting the nontransactional
1930  * recovery of "delta" dead tuples; so t_delta_dead_tuples decreases
1931  * rather than increasing, and the change goes straight into the per-table
1932  * counter, not into transactional state.
1933  */
1934 void
1936 {
1937  PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1938 
1939  if (pgstat_info != NULL)
1940  pgstat_info->t_counts.t_delta_dead_tuples -= delta;
1941 }
1942 
1943 
1944 /* ----------
1945  * AtEOXact_PgStat
1946  *
1947  * Called from access/transam/xact.c at top-level transaction commit/abort.
1948  * ----------
1949  */
1950 void
1951 AtEOXact_PgStat(bool isCommit)
1952 {
1953  PgStat_SubXactStatus *xact_state;
1954 
1955  /*
1956  * Count transaction commit or abort. (We use counters, not just bools,
1957  * in case the reporting message isn't sent right away.)
1958  */
1959  if (isCommit)
1960  pgStatXactCommit++;
1961  else
1963 
1964  /*
1965  * Transfer transactional insert/update counts into the base tabstat
1966  * entries. We don't bother to free any of the transactional state, since
1967  * it's all in TopTransactionContext and will go away anyway.
1968  */
1969  xact_state = pgStatXactStack;
1970  if (xact_state != NULL)
1971  {
1973 
1974  Assert(xact_state->nest_level == 1);
1975  Assert(xact_state->prev == NULL);
1976  for (trans = xact_state->first; trans != NULL; trans = trans->next)
1977  {
1978  PgStat_TableStatus *tabstat;
1979 
1980  Assert(trans->nest_level == 1);
1981  Assert(trans->upper == NULL);
1982  tabstat = trans->parent;
1983  Assert(tabstat->trans == trans);
1984  /* restore pre-truncate stats (if any) in case of aborted xact */
1985  if (!isCommit)
1987  /* count attempted actions regardless of commit/abort */
1988  tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
1989  tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
1990  tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
1991  if (isCommit)
1992  {
1993  tabstat->t_counts.t_truncated = trans->truncated;
1994  if (trans->truncated)
1995  {
1996  /* forget live/dead stats seen by backend thus far */
1997  tabstat->t_counts.t_delta_live_tuples = 0;
1998  tabstat->t_counts.t_delta_dead_tuples = 0;
1999  }
2000  /* insert adds a live tuple, delete removes one */
2001  tabstat->t_counts.t_delta_live_tuples +=
2002  trans->tuples_inserted - trans->tuples_deleted;
2003  /* update and delete each create a dead tuple */
2004  tabstat->t_counts.t_delta_dead_tuples +=
2005  trans->tuples_updated + trans->tuples_deleted;
2006  /* insert, update, delete each count as one change event */
2007  tabstat->t_counts.t_changed_tuples +=
2008  trans->tuples_inserted + trans->tuples_updated +
2009  trans->tuples_deleted;
2010  }
2011  else
2012  {
2013  /* inserted tuples are dead, deleted tuples are unaffected */
2014  tabstat->t_counts.t_delta_dead_tuples +=
2015  trans->tuples_inserted + trans->tuples_updated;
2016  /* an aborted xact generates no changed_tuple events */
2017  }
2018  tabstat->trans = NULL;
2019  }
2020  }
2021  pgStatXactStack = NULL;
2022 
2023  /* Make sure any stats snapshot is thrown away */
2025 }
2026 
2027 /* ----------
2028  * AtEOSubXact_PgStat
2029  *
2030  * Called from access/transam/xact.c at subtransaction commit/abort.
2031  * ----------
2032  */
2033 void
2034 AtEOSubXact_PgStat(bool isCommit, int nestDepth)
2035 {
2036  PgStat_SubXactStatus *xact_state;
2037 
2038  /*
2039  * Transfer transactional insert/update counts into the next higher
2040  * subtransaction state.
2041  */
2042  xact_state = pgStatXactStack;
2043  if (xact_state != NULL &&
2044  xact_state->nest_level >= nestDepth)
2045  {
2047  PgStat_TableXactStatus *next_trans;
2048 
2049  /* delink xact_state from stack immediately to simplify reuse case */
2050  pgStatXactStack = xact_state->prev;
2051 
2052  for (trans = xact_state->first; trans != NULL; trans = next_trans)
2053  {
2054  PgStat_TableStatus *tabstat;
2055 
2056  next_trans = trans->next;
2057  Assert(trans->nest_level == nestDepth);
2058  tabstat = trans->parent;
2059  Assert(tabstat->trans == trans);
2060  if (isCommit)
2061  {
2062  if (trans->upper && trans->upper->nest_level == nestDepth - 1)
2063  {
2064  if (trans->truncated)
2065  {
2066  /* propagate the truncate status one level up */
2068  /* replace upper xact stats with ours */
2069  trans->upper->tuples_inserted = trans->tuples_inserted;
2070  trans->upper->tuples_updated = trans->tuples_updated;
2071  trans->upper->tuples_deleted = trans->tuples_deleted;
2072  }
2073  else
2074  {
2075  trans->upper->tuples_inserted += trans->tuples_inserted;
2076  trans->upper->tuples_updated += trans->tuples_updated;
2077  trans->upper->tuples_deleted += trans->tuples_deleted;
2078  }
2079  tabstat->trans = trans->upper;
2080  pfree(trans);
2081  }
2082  else
2083  {
2084  /*
2085  * When there isn't an immediate parent state, we can just
2086  * reuse the record instead of going through a
2087  * palloc/pfree pushup (this works since it's all in
2088  * TopTransactionContext anyway). We have to re-link it
2089  * into the parent level, though, and that might mean
2090  * pushing a new entry into the pgStatXactStack.
2091  */
2092  PgStat_SubXactStatus *upper_xact_state;
2093 
2094  upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
2095  trans->next = upper_xact_state->first;
2096  upper_xact_state->first = trans;
2097  trans->nest_level = nestDepth - 1;
2098  }
2099  }
2100  else
2101  {
2102  /*
2103  * On abort, update top-level tabstat counts, then forget the
2104  * subtransaction
2105  */
2106 
2107  /* first restore values obliterated by truncate */
2109  /* count attempted actions regardless of commit/abort */
2110  tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
2111  tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
2112  tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
2113  /* inserted tuples are dead, deleted tuples are unaffected */
2114  tabstat->t_counts.t_delta_dead_tuples +=
2115  trans->tuples_inserted + trans->tuples_updated;
2116  tabstat->trans = trans->upper;
2117  pfree(trans);
2118  }
2119  }
2120  pfree(xact_state);
2121  }
2122 }
2123 
2124 
2125 /*
2126  * AtPrepare_PgStat
2127  * Save the transactional stats state at 2PC transaction prepare.
2128  *
2129  * In this phase we just generate 2PC records for all the pending
2130  * transaction-dependent stats work.
2131  */
2132 void
2134 {
2135  PgStat_SubXactStatus *xact_state;
2136 
2137  xact_state = pgStatXactStack;
2138  if (xact_state != NULL)
2139  {
2141 
2142  Assert(xact_state->nest_level == 1);
2143  Assert(xact_state->prev == NULL);
2144  for (trans = xact_state->first; trans != NULL; trans = trans->next)
2145  {
2146  PgStat_TableStatus *tabstat;
2147  TwoPhasePgStatRecord record;
2148 
2149  Assert(trans->nest_level == 1);
2150  Assert(trans->upper == NULL);
2151  tabstat = trans->parent;
2152  Assert(tabstat->trans == trans);
2153 
2154  record.tuples_inserted = trans->tuples_inserted;
2155  record.tuples_updated = trans->tuples_updated;
2156  record.tuples_deleted = trans->tuples_deleted;
2157  record.inserted_pre_trunc = trans->inserted_pre_trunc;
2158  record.updated_pre_trunc = trans->updated_pre_trunc;
2159  record.deleted_pre_trunc = trans->deleted_pre_trunc;
2160  record.t_id = tabstat->t_id;
2161  record.t_shared = tabstat->t_shared;
2162  record.t_truncated = trans->truncated;
2163 
2165  &record, sizeof(TwoPhasePgStatRecord));
2166  }
2167  }
2168 }
2169 
2170 /*
2171  * PostPrepare_PgStat
2172  * Clean up after successful PREPARE.
2173  *
2174  * All we need do here is unlink the transaction stats state from the
2175  * nontransactional state. The nontransactional action counts will be
2176  * reported to the stats collector immediately, while the effects on live
2177  * and dead tuple counts are preserved in the 2PC state file.
2178  *
2179  * Note: AtEOXact_PgStat is not called during PREPARE.
2180  */
2181 void
2183 {
2184  PgStat_SubXactStatus *xact_state;
2185 
2186  /*
2187  * We don't bother to free any of the transactional state, since it's all
2188  * in TopTransactionContext and will go away anyway.
2189  */
2190  xact_state = pgStatXactStack;
2191  if (xact_state != NULL)
2192  {
2194 
2195  for (trans = xact_state->first; trans != NULL; trans = trans->next)
2196  {
2197  PgStat_TableStatus *tabstat;
2198 
2199  tabstat = trans->parent;
2200  tabstat->trans = NULL;
2201  }
2202  }
2203  pgStatXactStack = NULL;
2204 
2205  /* Make sure any stats snapshot is thrown away */
2207 }
2208 
2209 /*
2210  * 2PC processing routine for COMMIT PREPARED case.
2211  *
2212  * Load the saved counts into our local pgstats state.
2213  */
2214 void
2216  void *recdata, uint32 len)
2217 {
2218  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2219  PgStat_TableStatus *pgstat_info;
2220 
2221  /* Find or create a tabstat entry for the rel */
2222  pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2223 
2224  /* Same math as in AtEOXact_PgStat, commit case */
2225  pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2226  pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2227  pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2228  pgstat_info->t_counts.t_truncated = rec->t_truncated;
2229  if (rec->t_truncated)
2230  {
2231  /* forget live/dead stats seen by backend thus far */
2232  pgstat_info->t_counts.t_delta_live_tuples = 0;
2233  pgstat_info->t_counts.t_delta_dead_tuples = 0;
2234  }
2235  pgstat_info->t_counts.t_delta_live_tuples +=
2236  rec->tuples_inserted - rec->tuples_deleted;
2237  pgstat_info->t_counts.t_delta_dead_tuples +=
2238  rec->tuples_updated + rec->tuples_deleted;
2239  pgstat_info->t_counts.t_changed_tuples +=
2240  rec->tuples_inserted + rec->tuples_updated +
2241  rec->tuples_deleted;
2242 }
2243 
2244 /*
2245  * 2PC processing routine for ROLLBACK PREPARED case.
2246  *
2247  * Load the saved counts into our local pgstats state, but treat them
2248  * as aborted.
2249  */
2250 void
2252  void *recdata, uint32 len)
2253 {
2254  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2255  PgStat_TableStatus *pgstat_info;
2256 
2257  /* Find or create a tabstat entry for the rel */
2258  pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2259 
2260  /* Same math as in AtEOXact_PgStat, abort case */
2261  if (rec->t_truncated)
2262  {
2263  rec->tuples_inserted = rec->inserted_pre_trunc;
2264  rec->tuples_updated = rec->updated_pre_trunc;
2265  rec->tuples_deleted = rec->deleted_pre_trunc;
2266  }
2267  pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2268  pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2269  pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2270  pgstat_info->t_counts.t_delta_dead_tuples +=
2271  rec->tuples_inserted + rec->tuples_updated;
2272 }
2273 
2274 
2275 /* ----------
2276  * pgstat_fetch_stat_dbentry() -
2277  *
2278  * Support function for the SQL-callable pgstat* functions. Returns
2279  * the collected statistics for one database or NULL. NULL doesn't mean
2280  * that the database doesn't exist, it is just not yet known by the
2281  * collector, so the caller is better off to report ZERO instead.
2282  * ----------
2283  */
2286 {
2287  /*
2288  * If not done for this transaction, read the statistics collector stats
2289  * file into some hash tables.
2290  */
2292 
2293  /*
2294  * Lookup the requested database; return NULL if not found
2295  */
2296  return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2297  (void *) &dbid,
2298  HASH_FIND, NULL);
2299 }
2300 
2301 
2302 /* ----------
2303  * pgstat_fetch_stat_tabentry() -
2304  *
2305  * Support function for the SQL-callable pgstat* functions. Returns
2306  * the collected statistics for one table or NULL. NULL doesn't mean
2307  * that the table doesn't exist, it is just not yet known by the
2308  * collector, so the caller is better off to report ZERO instead.
2309  * ----------
2310  */
2313 {
2314  Oid dbid;
2315  PgStat_StatDBEntry *dbentry;
2316  PgStat_StatTabEntry *tabentry;
2317 
2318  /*
2319  * If not done for this transaction, read the statistics collector stats
2320  * file into some hash tables.
2321  */
2323 
2324  /*
2325  * Lookup our database, then look in its table hash table.
2326  */
2327  dbid = MyDatabaseId;
2328  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2329  (void *) &dbid,
2330  HASH_FIND, NULL);
2331  if (dbentry != NULL && dbentry->tables != NULL)
2332  {
2333  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2334  (void *) &relid,
2335  HASH_FIND, NULL);
2336  if (tabentry)
2337  return tabentry;
2338  }
2339 
2340  /*
2341  * If we didn't find it, maybe it's a shared table.
2342  */
2343  dbid = InvalidOid;
2344  dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2345  (void *) &dbid,
2346  HASH_FIND, NULL);
2347  if (dbentry != NULL && dbentry->tables != NULL)
2348  {
2349  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2350  (void *) &relid,
2351  HASH_FIND, NULL);
2352  if (tabentry)
2353  return tabentry;
2354  }
2355 
2356  return NULL;
2357 }
2358 
2359 
2360 /* ----------
2361  * pgstat_fetch_stat_funcentry() -
2362  *
2363  * Support function for the SQL-callable pgstat* functions. Returns
2364  * the collected statistics for one function or NULL.
2365  * ----------
2366  */
2369 {
2370  PgStat_StatDBEntry *dbentry;
2371  PgStat_StatFuncEntry *funcentry = NULL;
2372 
2373  /* load the stats file if needed */
2375 
2376  /* Lookup our database, then find the requested function. */
2378  if (dbentry != NULL && dbentry->functions != NULL)
2379  {
2380  funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
2381  (void *) &func_id,
2382  HASH_FIND, NULL);
2383  }
2384 
2385  return funcentry;
2386 }
2387 
2388 
2389 /* ----------
2390  * pgstat_fetch_stat_beentry() -
2391  *
2392  * Support function for the SQL-callable pgstat* functions. Returns
2393  * our local copy of the current-activity entry for one backend.
2394  *
2395  * NB: caller is responsible for a check if the user is permitted to see
2396  * this info (especially the querystring).
2397  * ----------
2398  */
2401 {
2403 
2404  if (beid < 1 || beid > localNumBackends)
2405  return NULL;
2406 
2407  return &localBackendStatusTable[beid - 1].backendStatus;
2408 }
2409 
2410 
2411 /* ----------
2412  * pgstat_fetch_stat_local_beentry() -
2413  *
2414  * Like pgstat_fetch_stat_beentry() but with locally computed additions (like
2415  * xid and xmin values of the backend)
2416  *
2417  * NB: caller is responsible for a check if the user is permitted to see
2418  * this info (especially the querystring).
2419  * ----------
2420  */
2423 {
2425 
2426  if (beid < 1 || beid > localNumBackends)
2427  return NULL;
2428 
2429  return &localBackendStatusTable[beid - 1];
2430 }
2431 
2432 
2433 /* ----------
2434  * pgstat_fetch_stat_numbackends() -
2435  *
2436  * Support function for the SQL-callable pgstat* functions. Returns
2437  * the maximum current backend id.
2438  * ----------
2439  */
2440 int
2442 {
2444 
2445  return localNumBackends;
2446 }
2447 
2448 /*
2449  * ---------
2450  * pgstat_fetch_stat_archiver() -
2451  *
2452  * Support function for the SQL-callable pgstat* functions. Returns
2453  * a pointer to the archiver statistics struct.
2454  * ---------
2455  */
2458 {
2460 
2461  return &archiverStats;
2462 }
2463 
2464 
2465 /*
2466  * ---------
2467  * pgstat_fetch_global() -
2468  *
2469  * Support function for the SQL-callable pgstat* functions. Returns
2470  * a pointer to the global statistics struct.
2471  * ---------
2472  */
2475 {
2477 
2478  return &globalStats;
2479 }
2480 
2481 
2482 /* ------------------------------------------------------------
2483  * Functions for management of the shared-memory PgBackendStatus array
2484  * ------------------------------------------------------------
2485  */
2486 
2493 #ifdef USE_SSL
2494 static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
2495 #endif
2496 
2497 
2498 /*
2499  * Report shared-memory space needed by CreateSharedBackendStatus.
2500  */
2501 Size
2503 {
2504  Size size;
2505 
2506  /* BackendStatusArray: */
2507  size = mul_size(sizeof(PgBackendStatus), MaxBackends);
2508  /* BackendAppnameBuffer: */
2509  size = add_size(size,
2511  /* BackendClientHostnameBuffer: */
2512  size = add_size(size,
2514  /* BackendActivityBuffer: */
2515  size = add_size(size,
2517 #ifdef USE_SSL
2518  /* BackendSslStatusBuffer: */
2519  size = add_size(size,
2521 #endif
2522  return size;
2523 }
2524 
2525 /*
2526  * Initialize the shared status array and several string buffers
2527  * during postmaster startup.
2528  */
2529 void
2531 {
2532  Size size;
2533  bool found;
2534  int i;
2535  char *buffer;
2536 
2537  /* Create or attach to the shared array */
2538  size = mul_size(sizeof(PgBackendStatus), MaxBackends);
2539  BackendStatusArray = (PgBackendStatus *)
2540  ShmemInitStruct("Backend Status Array", size, &found);
2541 
2542  if (!found)
2543  {
2544  /*
2545  * We're the first - initialize.
2546  */
2547  MemSet(BackendStatusArray, 0, size);
2548  }
2549 
2550  /* Create or attach to the shared appname buffer */
2551  size = mul_size(NAMEDATALEN, MaxBackends);
2552  BackendAppnameBuffer = (char *)
2553  ShmemInitStruct("Backend Application Name Buffer", size, &found);
2554 
2555  if (!found)
2556  {
2557  MemSet(BackendAppnameBuffer, 0, size);
2558 
2559  /* Initialize st_appname pointers. */
2560  buffer = BackendAppnameBuffer;
2561  for (i = 0; i < MaxBackends; i++)
2562  {
2563  BackendStatusArray[i].st_appname = buffer;
2564  buffer += NAMEDATALEN;
2565  }
2566  }
2567 
2568  /* Create or attach to the shared client hostname buffer */
2569  size = mul_size(NAMEDATALEN, MaxBackends);
2570  BackendClientHostnameBuffer = (char *)
2571  ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
2572 
2573  if (!found)
2574  {
2575  MemSet(BackendClientHostnameBuffer, 0, size);
2576 
2577  /* Initialize st_clienthostname pointers. */
2578  buffer = BackendClientHostnameBuffer;
2579  for (i = 0; i < MaxBackends; i++)
2580  {
2581  BackendStatusArray[i].st_clienthostname = buffer;
2582  buffer += NAMEDATALEN;
2583  }
2584  }
2585 
2586  /* Create or attach to the shared activity buffer */
2587  BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
2588  MaxBackends);
2589  BackendActivityBuffer = (char *)
2590  ShmemInitStruct("Backend Activity Buffer",
2591  BackendActivityBufferSize,
2592  &found);
2593 
2594  if (!found)
2595  {
2596  MemSet(BackendActivityBuffer, 0, size);
2597 
2598  /* Initialize st_activity pointers. */
2599  buffer = BackendActivityBuffer;
2600  for (i = 0; i < MaxBackends; i++)
2601  {
2602  BackendStatusArray[i].st_activity = buffer;
2604  }
2605  }
2606 
2607 #ifdef USE_SSL
2608  /* Create or attach to the shared SSL status buffer */
2609  size = mul_size(sizeof(PgBackendSSLStatus), MaxBackends);
2610  BackendSslStatusBuffer = (PgBackendSSLStatus *)
2611  ShmemInitStruct("Backend SSL Status Buffer", size, &found);
2612 
2613  if (!found)
2614  {
2615  PgBackendSSLStatus *ptr;
2616 
2617  MemSet(BackendSslStatusBuffer, 0, size);
2618 
2619  /* Initialize st_sslstatus pointers. */
2620  ptr = BackendSslStatusBuffer;
2621  for (i = 0; i < MaxBackends; i++)
2622  {
2623  BackendStatusArray[i].st_sslstatus = ptr;
2624  ptr++;
2625  }
2626  }
2627 #endif
2628 }
2629 
2630 
2631 /* ----------
2632  * pgstat_initialize() -
2633  *
2634  * Initialize pgstats state, and set up our on-proc-exit hook.
2635  * Called from InitPostgres. MyBackendId must be set,
2636  * but we must not have started any transaction yet (since the
2637  * exit hook must run after the last transaction exit).
2638  * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
2639  * ----------
2640  */
2641 void
2643 {
2644  /* Initialize MyBEEntry */
2646  MyBEEntry = &BackendStatusArray[MyBackendId - 1];
2647 
2648  /* Set up a process-exit hook to clean up */
2650 }
2651 
2652 /* ----------
2653  * pgstat_bestart() -
2654  *
2655  * Initialize this backend's entry in the PgBackendStatus array.
2656  * Called from InitPostgres.
2657  * MyDatabaseId, session userid, and application_name must be set
2658  * (hence, this cannot be combined with pgstat_initialize).
2659  * ----------
2660  */
2661 void
2663 {
2664  TimestampTz proc_start_timestamp;
2665  Oid userid;
2666  SockAddr clientaddr;
2667  volatile PgBackendStatus *beentry;
2668 
2669  /*
2670  * To minimize the time spent modifying the PgBackendStatus entry, fetch
2671  * all the needed data first.
2672  *
2673  * If we have a MyProcPort, use its session start time (for consistency,
2674  * and to save a kernel call).
2675  */
2676  if (MyProcPort)
2677  proc_start_timestamp = MyProcPort->SessionStartTime;
2678  else
2679  proc_start_timestamp = GetCurrentTimestamp();
2680  userid = GetSessionUserId();
2681 
2682  /*
2683  * We may not have a MyProcPort (eg, if this is the autovacuum process).
2684  * If so, use all-zeroes client address, which is dealt with specially in
2685  * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
2686  */
2687  if (MyProcPort)
2688  memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr));
2689  else
2690  MemSet(&clientaddr, 0, sizeof(clientaddr));
2691 
2692  /*
2693  * Initialize my status entry, following the protocol of bumping
2694  * st_changecount before and after; and make sure it's even afterwards. We
2695  * use a volatile pointer here to ensure the compiler doesn't try to get
2696  * cute.
2697  */
2698  beentry = MyBEEntry;
2699  do
2700  {
2702  } while ((beentry->st_changecount & 1) == 0);
2703 
2704  beentry->st_procpid = MyProcPid;
2705  beentry->st_proc_start_timestamp = proc_start_timestamp;
2706  beentry->st_activity_start_timestamp = 0;
2707  beentry->st_state_start_timestamp = 0;
2708  beentry->st_xact_start_timestamp = 0;
2709  beentry->st_databaseid = MyDatabaseId;
2710  beentry->st_userid = userid;
2711  beentry->st_clientaddr = clientaddr;
2714  NAMEDATALEN);
2715  else
2716  beentry->st_clienthostname[0] = '\0';
2717 #ifdef USE_SSL
2718  if (MyProcPort && MyProcPort->ssl != NULL)
2719  {
2720  beentry->st_ssl = true;
2726  }
2727  else
2728  {
2729  beentry->st_ssl = false;
2730  }
2731 #else
2732  beentry->st_ssl = false;
2733 #endif
2734  beentry->st_state = STATE_UNDEFINED;
2735  beentry->st_appname[0] = '\0';
2736  beentry->st_activity[0] = '\0';
2737  /* Also make sure the last byte in each string area is always 0 */
2738  beentry->st_clienthostname[NAMEDATALEN - 1] = '\0';
2739  beentry->st_appname[NAMEDATALEN - 1] = '\0';
2740  beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
2743 
2744  /*
2745  * we don't zero st_progress_param here to save cycles; nobody should
2746  * examine it until st_progress_command has been set to something other
2747  * than PROGRESS_COMMAND_INVALID
2748  */
2749 
2751 
2752  /* Update app name to current GUC setting */
2753  if (application_name)
2755 }
2756 
2757 /*
2758  * Shut down a single backend's statistics reporting at process exit.
2759  *
2760  * Flush any remaining statistics counts out to the collector.
2761  * Without this, operations triggered during backend exit (such as
2762  * temp table deletions) won't be counted.
2763  *
2764  * Lastly, clear out our entry in the PgBackendStatus array.
2765  */
2766 static void
2768 {
2769  volatile PgBackendStatus *beentry = MyBEEntry;
2770 
2771  /*
2772  * If we got as far as discovering our own database ID, we can report what
2773  * we did to the collector. Otherwise, we'd be sending an invalid
2774  * database ID, so forget it. (This means that accesses to pg_database
2775  * during failed backend starts might never get counted.)
2776  */
2777  if (OidIsValid(MyDatabaseId))
2778  pgstat_report_stat(true);
2779 
2780  /*
2781  * Clear my status entry, following the protocol of bumping st_changecount
2782  * before and after. We use a volatile pointer here to ensure the
2783  * compiler doesn't try to get cute.
2784  */
2786 
2787  beentry->st_procpid = 0; /* mark invalid */
2788 
2790 }
2791 
2792 
2793 /* ----------
2794  * pgstat_report_activity() -
2795  *
2796  * Called from tcop/postgres.c to report what the backend is actually doing
2797  * (but note cmd_str can be NULL for certain cases).
2798  *
2799  * All updates of the status entry follow the protocol of bumping
2800  * st_changecount before and after. We use a volatile pointer here to
2801  * ensure the compiler doesn't try to get cute.
2802  * ----------
2803  */
2804 void
2806 {
2807  volatile PgBackendStatus *beentry = MyBEEntry;
2808  TimestampTz start_timestamp;
2809  TimestampTz current_timestamp;
2810  int len = 0;
2811 
2812  TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
2813 
2814  if (!beentry)
2815  return;
2816 
2818  {
2819  if (beentry->st_state != STATE_DISABLED)
2820  {
2821  volatile PGPROC *proc = MyProc;
2822 
2823  /*
2824  * track_activities is disabled, but we last reported a
2825  * non-disabled state. As our final update, change the state and
2826  * clear fields we will not be updating anymore.
2827  */
2829  beentry->st_state = STATE_DISABLED;
2830  beentry->st_state_start_timestamp = 0;
2831  beentry->st_activity[0] = '\0';
2832  beentry->st_activity_start_timestamp = 0;
2833  /* st_xact_start_timestamp and wait_event_info are also disabled */
2834  beentry->st_xact_start_timestamp = 0;
2835  proc->wait_event_info = 0;
2837  }
2838  return;
2839  }
2840 
2841  /*
2842  * To minimize the time spent modifying the entry, fetch all the needed
2843  * data first.
2844  */
2845  start_timestamp = GetCurrentStatementStartTimestamp();
2846  if (cmd_str != NULL)
2847  {
2848  len = pg_mbcliplen(cmd_str, strlen(cmd_str),
2850  }
2851  current_timestamp = GetCurrentTimestamp();
2852 
2853  /*
2854  * Now update the status entry
2855  */
2857 
2858  beentry->st_state = state;
2859  beentry->st_state_start_timestamp = current_timestamp;
2860 
2861  if (cmd_str != NULL)
2862  {
2863  memcpy((char *) beentry->st_activity, cmd_str, len);
2864  beentry->st_activity[len] = '\0';
2865  beentry->st_activity_start_timestamp = start_timestamp;
2866  }
2867 
2869 }
2870 
2871 /*-----------
2872  * pgstat_progress_start_command() -
2873  *
2874  * Set st_progress_command (and st_progress_command_target) in own backend
2875  * entry. Also, zero-initialize st_progress_param array.
2876  *-----------
2877  */
2878 void
2880 {
2881  volatile PgBackendStatus *beentry = MyBEEntry;
2882 
2883  if (!beentry || !pgstat_track_activities)
2884  return;
2885 
2887  beentry->st_progress_command = cmdtype;
2888  beentry->st_progress_command_target = relid;
2889  MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
2891 }
2892 
2893 /*-----------
2894  * pgstat_progress_update_param() -
2895  *
2896  * Update index'th member in st_progress_param[] of own backend entry.
2897  *-----------
2898  */
2899 void
2901 {
2902  volatile PgBackendStatus *beentry = MyBEEntry;
2903 
2904  Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
2905 
2906  if (!beentry || !pgstat_track_activities)
2907  return;
2908 
2910  beentry->st_progress_param[index] = val;
2912 }
2913 
2914 /*-----------
2915  * pgstat_progress_update_multi_param() -
2916  *
2917  * Update multiple members in st_progress_param[] of own backend entry.
2918  * This is atomic; readers won't see intermediate states.
2919  *-----------
2920  */
2921 void
2923  const int64 *val)
2924 {
2925  volatile PgBackendStatus *beentry = MyBEEntry;
2926  int i;
2927 
2928  if (!beentry || !pgstat_track_activities || nparam == 0)
2929  return;
2930 
2932 
2933  for (i = 0; i < nparam; ++i)
2934  {
2935  Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
2936 
2937  beentry->st_progress_param[index[i]] = val[i];
2938  }
2939 
2941 }
2942 
2943 /*-----------
2944  * pgstat_progress_end_command() -
2945  *
2946  * Reset st_progress_command (and st_progress_command_target) in own backend
2947  * entry. This signals the end of the command.
2948  *-----------
2949  */
2950 void
2952 {
2953  volatile PgBackendStatus *beentry = MyBEEntry;
2954 
2955  if (!beentry)
2956  return;
2959  return;
2960 
2965 }
2966 
2967 /* ----------
2968  * pgstat_report_appname() -
2969  *
2970  * Called to update our application name.
2971  * ----------
2972  */
2973 void
2974 pgstat_report_appname(const char *appname)
2975 {
2976  volatile PgBackendStatus *beentry = MyBEEntry;
2977  int len;
2978 
2979  if (!beentry)
2980  return;
2981 
2982  /* This should be unnecessary if GUC did its job, but be safe */
2983  len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
2984 
2985  /*
2986  * Update my status entry, following the protocol of bumping
2987  * st_changecount before and after. We use a volatile pointer here to
2988  * ensure the compiler doesn't try to get cute.
2989  */
2991 
2992  memcpy((char *) beentry->st_appname, appname, len);
2993  beentry->st_appname[len] = '\0';
2994 
2996 }
2997 
2998 /*
2999  * Report current transaction start timestamp as the specified value.
3000  * Zero means there is no active transaction.
3001  */
3002 void
3004 {
3005  volatile PgBackendStatus *beentry = MyBEEntry;
3006 
3007  if (!pgstat_track_activities || !beentry)
3008  return;
3009 
3010  /*
3011  * Update my status entry, following the protocol of bumping
3012  * st_changecount before and after. We use a volatile pointer here to
3013  * ensure the compiler doesn't try to get cute.
3014  */
3016  beentry->st_xact_start_timestamp = tstamp;
3018 }
3019 
3020 /* ----------
3021  * pgstat_read_current_status() -
3022  *
3023  * Copy the current contents of the PgBackendStatus array to local memory,
3024  * if not already done in this transaction.
3025  * ----------
3026  */
3027 static void
3029 {
3030  volatile PgBackendStatus *beentry;
3031  LocalPgBackendStatus *localtable;
3032  LocalPgBackendStatus *localentry;
3033  char *localappname,
3034  *localactivity;
3035 #ifdef USE_SSL
3036  PgBackendSSLStatus *localsslstatus;
3037 #endif
3038  int i;
3039 
3041  if (localBackendStatusTable)
3042  return; /* already done */
3043 
3045 
3046  localtable = (LocalPgBackendStatus *)
3047  MemoryContextAlloc(pgStatLocalContext,
3048  sizeof(LocalPgBackendStatus) * MaxBackends);
3049  localappname = (char *)
3050  MemoryContextAlloc(pgStatLocalContext,
3052  localactivity = (char *)
3053  MemoryContextAlloc(pgStatLocalContext,
3054  pgstat_track_activity_query_size * MaxBackends);
3055 #ifdef USE_SSL
3056  localsslstatus = (PgBackendSSLStatus *)
3057  MemoryContextAlloc(pgStatLocalContext,
3058  sizeof(PgBackendSSLStatus) * MaxBackends);
3059 #endif
3060 
3061  localNumBackends = 0;
3062 
3063  beentry = BackendStatusArray;
3064  localentry = localtable;
3065  for (i = 1; i <= MaxBackends; i++)
3066  {
3067  /*
3068  * Follow the protocol of retrying if st_changecount changes while we
3069  * copy the entry, or if it's odd. (The check for odd is needed to
3070  * cover the case where we are able to completely copy the entry while
3071  * the source backend is between increment steps.) We use a volatile
3072  * pointer here to ensure the compiler doesn't try to get cute.
3073  */
3074  for (;;)
3075  {
3076  int before_changecount;
3077  int after_changecount;
3078 
3079  pgstat_save_changecount_before(beentry, before_changecount);
3080 
3081  localentry->backendStatus.st_procpid = beentry->st_procpid;
3082  if (localentry->backendStatus.st_procpid > 0)
3083  {
3084  memcpy(&localentry->backendStatus, (char *) beentry, sizeof(PgBackendStatus));
3085 
3086  /*
3087  * strcpy is safe even if the string is modified concurrently,
3088  * because there's always a \0 at the end of the buffer.
3089  */
3090  strcpy(localappname, (char *) beentry->st_appname);
3091  localentry->backendStatus.st_appname = localappname;
3092  strcpy(localactivity, (char *) beentry->st_activity);
3093  localentry->backendStatus.st_activity = localactivity;
3094  localentry->backendStatus.st_ssl = beentry->st_ssl;
3095 #ifdef USE_SSL
3096  if (beentry->st_ssl)
3097  {
3098  memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
3099  localentry->backendStatus.st_sslstatus = localsslstatus;
3100  }
3101 #endif
3102  }
3103 
3104  pgstat_save_changecount_after(beentry, after_changecount);
3105  if (before_changecount == after_changecount &&
3106  (before_changecount & 1) == 0)
3107  break;
3108 
3109  /* Make sure we can break out of loop if stuck... */
3111  }
3112 
3113  beentry++;
3114  /* Only valid entries get included into the local array */
3115  if (localentry->backendStatus.st_procpid > 0)
3116  {
3118  &localentry->backend_xid,
3119  &localentry->backend_xmin);
3120 
3121  localentry++;
3122  localappname += NAMEDATALEN;
3123  localactivity += pgstat_track_activity_query_size;
3124 #ifdef USE_SSL
3125  localsslstatus++;
3126 #endif
3127  localNumBackends++;
3128  }
3129  }
3130 
3131  /* Set the pointer only after completion of a valid table */
3132  localBackendStatusTable = localtable;
3133 }
3134 
3135 /* ----------
3136  * pgstat_get_wait_event_type() -
3137  *
3138  * Return a string representing the current wait event type, backend is
3139  * waiting on.
3140  */
3141 const char *
3143 {
3144  uint32 classId;
3145  const char *event_type;
3146 
3147  /* report process as not waiting. */
3148  if (wait_event_info == 0)
3149  return NULL;
3150 
3151  classId = wait_event_info & 0xFF000000;
3152 
3153  switch (classId)
3154  {
3155  case PG_WAIT_LWLOCK:
3156  event_type = "LWLock";
3157  break;
3158  case PG_WAIT_LOCK:
3159  event_type = "Lock";
3160  break;
3161  case PG_WAIT_BUFFER_PIN:
3162  event_type = "BufferPin";
3163  break;
3164  case PG_WAIT_ACTIVITY:
3165  event_type = "Activity";
3166  break;
3167  case PG_WAIT_CLIENT:
3168  event_type = "Client";
3169  break;
3170  case PG_WAIT_EXTENSION:
3171  event_type = "Extension";
3172  break;
3173  case PG_WAIT_IPC:
3174  event_type = "IPC";
3175  break;
3176  case PG_WAIT_TIMEOUT:
3177  event_type = "Timeout";
3178  break;
3179  default:
3180  event_type = "???";
3181  break;
3182  }
3183 
3184  return event_type;
3185 }
3186 
3187 /* ----------
3188  * pgstat_get_wait_event() -
3189  *
3190  * Return a string representing the current wait event, backend is
3191  * waiting on.
3192  */
3193 const char *
3195 {
3196  uint32 classId;
3197  uint16 eventId;
3198  const char *event_name;
3199 
3200  /* report process as not waiting. */
3201  if (wait_event_info == 0)
3202  return NULL;
3203 
3204  classId = wait_event_info & 0xFF000000;
3205  eventId = wait_event_info & 0x0000FFFF;
3206 
3207  switch (classId)
3208  {
3209  case PG_WAIT_LWLOCK:
3210  event_name = GetLWLockIdentifier(classId, eventId);
3211  break;
3212  case PG_WAIT_LOCK:
3213  event_name = GetLockNameFromTagType(eventId);
3214  break;
3215  case PG_WAIT_BUFFER_PIN:
3216  event_name = "BufferPin";
3217  break;
3218  case PG_WAIT_ACTIVITY:
3219  {
3220  WaitEventActivity w = (WaitEventActivity) wait_event_info;
3221 
3222  event_name = pgstat_get_wait_activity(w);
3223  break;
3224  }
3225  case PG_WAIT_CLIENT:
3226  {
3227  WaitEventClient w = (WaitEventClient) wait_event_info;
3228 
3229  event_name = pgstat_get_wait_client(w);
3230  break;
3231  }
3232  case PG_WAIT_EXTENSION:
3233  event_name = "Extension";
3234  break;
3235  case PG_WAIT_IPC:
3236  {
3237  WaitEventIPC w = (WaitEventIPC) wait_event_info;
3238 
3239  event_name = pgstat_get_wait_ipc(w);
3240  break;
3241  }
3242  case PG_WAIT_TIMEOUT:
3243  {
3244  WaitEventTimeout w = (WaitEventTimeout) wait_event_info;
3245 
3246  event_name = pgstat_get_wait_timeout(w);
3247  break;
3248  }
3249  default:
3250  event_name = "unknown wait event";
3251  break;
3252  }
3253 
3254  return event_name;
3255 }
3256 
3257 /* ----------
3258  * pgstat_get_wait_activity() -
3259  *
3260  * Convert WaitEventActivity to string.
3261  * ----------
3262  */
3263 static const char *
3265 {
3266  const char *event_name = "unknown wait event";
3267 
3268  switch (w)
3269  {
3271  event_name = "ArchiverMain";
3272  break;
3274  event_name = "AutoVacuumMain";
3275  break;
3277  event_name = "BgWriterHibernate";
3278  break;
3280  event_name = "BgWriterMain";
3281  break;
3283  event_name = "CheckpointerMain";
3284  break;
3286  event_name = "PgStatMain";
3287  break;
3289  event_name = "RecoveryWalAll";
3290  break;
3292  event_name = "RecoveryWalStream";
3293  break;
3295  event_name = "SysLoggerMain";
3296  break;
3298  event_name = "WalReceiverMain";
3299  break;
3301  event_name = "WalSenderMain";
3302  break;
3304  event_name = "WalWriterMain";
3305  break;
3307  event_name = "LogicalLauncherMain";
3308  break;
3310  event_name = "LogicalApplyMain";
3311  break;
3312  /* no default case, so that compiler will warn */
3313  }
3314 
3315  return event_name;
3316 }
3317 
3318 /* ----------
3319  * pgstat_get_wait_client() -
3320  *
3321  * Convert WaitEventClient to string.
3322  * ----------
3323  */
3324 static const char *
3326 {
3327  const char *event_name = "unknown wait event";
3328 
3329  switch (w)
3330  {
3332  event_name = "ClientRead";
3333  break;
3335  event_name = "ClientWrite";
3336  break;
3338  event_name = "SSLOpenServer";
3339  break;
3341  event_name = "WalReceiverWaitStart";
3342  break;
3344  event_name = "LibPQWalReceiverRead";
3345  break;
3347  event_name = "WalSenderWaitForWAL";
3348  break;
3350  event_name = "WalSenderWriteData";
3351  break;
3352  /* no default case, so that compiler will warn */
3353  }
3354 
3355  return event_name;
3356 }
3357 
3358 /* ----------
3359  * pgstat_get_wait_ipc() -
3360  *
3361  * Convert WaitEventIPC to string.
3362  * ----------
3363  */
3364 static const char *
3366 {
3367  const char *event_name = "unknown wait event";
3368 
3369  switch (w)
3370  {
3372  event_name = "BgWorkerShutdown";
3373  break;
3375  event_name = "BgWorkerStartup";
3376  break;
3377  case WAIT_EVENT_BTREE_PAGE:
3378  event_name = "BtreePage";
3379  break;
3381  event_name = "ExecuteGather";
3382  break;
3384  event_name = "MessageQueueInternal";
3385  break;
3387  event_name = "MessageQueuePutMessage";
3388  break;
3389  case WAIT_EVENT_MQ_RECEIVE:
3390  event_name = "MessageQueueReceive";
3391  break;
3392  case WAIT_EVENT_MQ_SEND:
3393  event_name = "MessageQueueSend";
3394  break;
3396  event_name = "ParallelFinish";
3397  break;
3399  event_name = "SafeSnapshot";
3400  break;
3401  case WAIT_EVENT_SYNC_REP:
3402  event_name = "SyncRep";
3403  break;
3404  /* no default case, so that compiler will warn */
3405  }
3406 
3407  return event_name;
3408 }
3409 
3410 /* ----------
3411  * pgstat_get_wait_timeout() -
3412  *
3413  * Convert WaitEventTimeout to string.
3414  * ----------
3415  */
3416 static const char *
3418 {
3419  const char *event_name = "unknown wait event";
3420 
3421  switch (w)
3422  {
3424  event_name = "BaseBackupThrottle";
3425  break;
3426  case WAIT_EVENT_PG_SLEEP:
3427  event_name = "PgSleep";
3428  break;
3430  event_name = "RecoveryApplyDelay";
3431  break;
3432  /* no default case, so that compiler will warn */
3433  }
3434 
3435  return event_name;
3436 }
3437 
3438 /* ----------
3439  * pgstat_get_backend_current_activity() -
3440  *
3441  * Return a string representing the current activity of the backend with
3442  * the specified PID. This looks directly at the BackendStatusArray,
3443  * and so will provide current information regardless of the age of our
3444  * transaction's snapshot of the status array.
3445  *
3446  * It is the caller's responsibility to invoke this only for backends whose
3447  * state is expected to remain stable while the result is in use. The
3448  * only current use is in deadlock reporting, where we can expect that
3449  * the target backend is blocked on a lock. (There are corner cases
3450  * where the target's wait could get aborted while we are looking at it,
3451  * but the very worst consequence is to return a pointer to a string
3452  * that's been changed, so we won't worry too much.)
3453  *
3454  * Note: return strings for special cases match pg_stat_get_backend_activity.
3455  * ----------
3456  */
3457 const char *
3458 pgstat_get_backend_current_activity(int pid, bool checkUser)
3459 {
3460  PgBackendStatus *beentry;
3461  int i;
3462 
3463  beentry = BackendStatusArray;
3464  for (i = 1; i <= MaxBackends; i++)
3465  {
3466  /*
3467  * Although we expect the target backend's entry to be stable, that
3468  * doesn't imply that anyone else's is. To avoid identifying the
3469  * wrong backend, while we check for a match to the desired PID we
3470  * must follow the protocol of retrying if st_changecount changes
3471  * while we examine the entry, or if it's odd. (This might be
3472  * unnecessary, since fetching or storing an int is almost certainly
3473  * atomic, but let's play it safe.) We use a volatile pointer here to
3474  * ensure the compiler doesn't try to get cute.
3475  */
3476  volatile PgBackendStatus *vbeentry = beentry;
3477  bool found;
3478 
3479  for (;;)
3480  {
3481  int before_changecount;
3482  int after_changecount;
3483 
3484  pgstat_save_changecount_before(vbeentry, before_changecount);
3485 
3486  found = (vbeentry->st_procpid == pid);
3487 
3488  pgstat_save_changecount_after(vbeentry, after_changecount);
3489 
3490  if (before_changecount == after_changecount &&
3491  (before_changecount & 1) == 0)
3492  break;
3493 
3494  /* Make sure we can break out of loop if stuck... */
3496  }
3497 
3498  if (found)
3499  {
3500  /* Now it is safe to use the non-volatile pointer */
3501  if (checkUser && !superuser() && beentry->st_userid != GetUserId())
3502  return "<insufficient privilege>";
3503  else if (*(beentry->st_activity) == '\0')
3504  return "<command string not enabled>";
3505  else
3506  return beentry->st_activity;
3507  }
3508 
3509  beentry++;
3510  }
3511 
3512  /* If we get here, caller is in error ... */
3513  return "<backend information not available>";
3514 }
3515 
3516 /* ----------
3517  * pgstat_get_crashed_backend_activity() -
3518  *
3519  * Return a string representing the current activity of the backend with
3520  * the specified PID. Like the function above, but reads shared memory with
3521  * the expectation that it may be corrupt. On success, copy the string
3522  * into the "buffer" argument and return that pointer. On failure,
3523  * return NULL.
3524  *
3525  * This function is only intended to be used by the postmaster to report the
3526  * query that crashed a backend. In particular, no attempt is made to
3527  * follow the correct concurrency protocol when accessing the
3528  * BackendStatusArray. But that's OK, in the worst case we'll return a
3529  * corrupted message. We also must take care not to trip on ereport(ERROR).
3530  * ----------
3531  */
3532 const char *
3533 pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
3534 {
3535  volatile PgBackendStatus *beentry;
3536  int i;
3537 
3538  beentry = BackendStatusArray;
3539 
3540  /*
3541  * We probably shouldn't get here before shared memory has been set up,
3542  * but be safe.
3543  */
3544  if (beentry == NULL || BackendActivityBuffer == NULL)
3545  return NULL;
3546 
3547  for (i = 1; i <= MaxBackends; i++)
3548  {
3549  if (beentry->st_procpid == pid)
3550  {
3551  /* Read pointer just once, so it can't change after validation */
3552  const char *activity = beentry->st_activity;
3553  const char *activity_last;
3554 
3555  /*
3556  * We mustn't access activity string before we verify that it
3557  * falls within the BackendActivityBuffer. To make sure that the
3558  * entire string including its ending is contained within the
3559  * buffer, subtract one activity length from the buffer size.
3560  */
3561  activity_last = BackendActivityBuffer + BackendActivityBufferSize
3563 
3564  if (activity < BackendActivityBuffer ||
3565  activity > activity_last)
3566  return NULL;
3567 
3568  /* If no string available, no point in a report */
3569  if (activity[0] == '\0')
3570  return NULL;
3571 
3572  /*
3573  * Copy only ASCII-safe characters so we don't run into encoding
3574  * problems when reporting the message; and be sure not to run off
3575  * the end of memory.
3576  */
3577  ascii_safe_strlcpy(buffer, activity,
3578  Min(buflen, pgstat_track_activity_query_size));
3579 
3580  return buffer;
3581  }
3582 
3583  beentry++;
3584  }
3585 
3586  /* PID not found */
3587  return NULL;
3588 }
3589 
3590 
3591 /* ------------------------------------------------------------
3592  * Local support functions follow
3593  * ------------------------------------------------------------
3594  */
3595 
3596 
3597 /* ----------
3598  * pgstat_setheader() -
3599  *
3600  * Set common header fields in a statistics message
3601  * ----------
3602  */
3603 static void
3605 {
3606  hdr->m_type = mtype;
3607 }
3608 
3609 
3610 /* ----------
3611  * pgstat_send() -
3612  *
3613  * Send out one statistics message to the collector
3614  * ----------
3615  */
3616 static void
3617 pgstat_send(void *msg, int len)
3618 {
3619  int rc;
3620 
3622  return;
3623 
3624  ((PgStat_MsgHdr *) msg)->m_size = len;
3625 
3626  /* We'll retry after EINTR, but ignore all other failures */
3627  do
3628  {
3629  rc = send(pgStatSock, msg, len, 0);
3630  } while (rc < 0 && errno == EINTR);
3631 
3632 #ifdef USE_ASSERT_CHECKING
3633  /* In debug builds, log send failures ... */
3634  if (rc < 0)
3635  elog(LOG, "could not send to statistics collector: %m");
3636 #endif
3637 }
3638 
3639 /* ----------
3640  * pgstat_send_archiver() -
3641  *
3642  * Tell the collector about the WAL file that we successfully
3643  * archived or failed to archive.
3644  * ----------
3645  */
3646 void
3647 pgstat_send_archiver(const char *xlog, bool failed)
3648 {
3649  PgStat_MsgArchiver msg;
3650 
3651  /*
3652  * Prepare and send the message
3653  */
3655  msg.m_failed = failed;
3656  StrNCpy(msg.m_xlog, xlog, sizeof(msg.m_xlog));
3658  pgstat_send(&msg, sizeof(msg));
3659 }
3660 
3661 /* ----------
3662  * pgstat_send_bgwriter() -
3663  *
3664  * Send bgwriter statistics to the collector
3665  * ----------
3666  */
3667 void
3669 {
3670  /* We assume this initializes to zeroes */
3671  static const PgStat_MsgBgWriter all_zeroes;
3672 
3673  /*
3674  * This function can be called even if nothing at all has happened. In
3675  * this case, avoid sending a completely empty message to the stats
3676  * collector.
3677  */
3678  if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
3679  return;
3680 
3681  /*
3682  * Prepare and send the message
3683  */
3684  pgstat_setheader(&BgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER);
3685  pgstat_send(&BgWriterStats, sizeof(BgWriterStats));
3686 
3687  /*
3688  * Clear out the statistics buffer, so it can be re-used.
3689  */
3690  MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
3691 }
3692 
3693 
3694 /* ----------
3695  * PgstatCollectorMain() -
3696  *
3697  * Start up the statistics collector process. This is the body of the
3698  * postmaster child process.
3699  *
3700  * The argc/argv parameters are valid only in EXEC_BACKEND case.
3701  * ----------
3702  */
3703 NON_EXEC_STATIC void
3704 PgstatCollectorMain(int argc, char *argv[])
3705 {
3706  int len;
3707  PgStat_Msg msg;
3708  int wr;
3709 
3710  /*
3711  * Ignore all signals usually bound to some action in the postmaster,
3712  * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3713  * support latch operations, because we only use a local latch.
3714  */
3716  pqsignal(SIGINT, SIG_IGN);
3717  pqsignal(SIGTERM, SIG_IGN);
3729 
3730  /*
3731  * Identify myself via ps
3732  */
3733  init_ps_display("stats collector process", "", "", "");
3734 
3735  /*
3736  * Read in existing stats files or initialize the stats to zero.
3737  */
3738  pgStatRunningInCollector = true;
3739  pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
3740 
3741  /*
3742  * Loop to process messages until we get SIGQUIT or detect ungraceful
3743  * death of our parent postmaster.
3744  *
3745  * For performance reasons, we don't want to do ResetLatch/WaitLatch after
3746  * every message; instead, do that only after a recv() fails to obtain a
3747  * message. (This effectively means that if backends are sending us stuff
3748  * like mad, we won't notice postmaster death until things slack off a
3749  * bit; which seems fine.) To do that, we have an inner loop that
3750  * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3751  * the inner loop, which means that such interrupts will get serviced but
3752  * the latch won't get cleared until next time there is a break in the
3753  * action.
3754  */
3755  for (;;)
3756  {
3757  /* Clear any already-pending wakeups */
3759 
3760  /*
3761  * Quit if we get SIGQUIT from the postmaster.
3762  */
3763  if (need_exit)
3764  break;
3765 
3766  /*
3767  * Inner loop iterates as long as we keep getting messages, or until
3768  * need_exit becomes set.
3769  */
3770  while (!need_exit)
3771  {
3772  /*
3773  * Reload configuration if we got SIGHUP from the postmaster.
3774  */
3775  if (got_SIGHUP)
3776  {
3777  got_SIGHUP = false;
3779  }
3780 
3781  /*
3782  * Write the stats file(s) if a new request has arrived that is
3783  * not satisfied by existing file(s).
3784  */
3786  pgstat_write_statsfiles(false, false);
3787 
3788  /*
3789  * Try to receive and process a message. This will not block,
3790  * since the socket is set to non-blocking mode.
3791  *
3792  * XXX On Windows, we have to force pgwin32_recv to cooperate,
3793  * despite the previous use of pg_set_noblock() on the socket.
3794  * This is extremely broken and should be fixed someday.
3795  */
3796 #ifdef WIN32
3797  pgwin32_noblock = 1;
3798 #endif
3799 
3800  len = recv(pgStatSock, (char *) &msg,
3801  sizeof(PgStat_Msg), 0);
3802 
3803 #ifdef WIN32
3804  pgwin32_noblock = 0;
3805 #endif
3806 
3807  if (len < 0)
3808  {
3809  if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
3810  break; /* out of inner loop */
3811  ereport(ERROR,
3813  errmsg("could not read statistics message: %m")));
3814  }
3815 
3816  /*
3817  * We ignore messages that are smaller than our common header
3818  */
3819  if (len < sizeof(PgStat_MsgHdr))
3820  continue;
3821 
3822  /*
3823  * The received length must match the length in the header
3824  */
3825  if (msg.msg_hdr.m_size != len)
3826  continue;
3827 
3828  /*
3829  * O.K. - we accept this message. Process it.
3830  */
3831  switch (msg.msg_hdr.m_type)
3832  {
3833  case PGSTAT_MTYPE_DUMMY:
3834  break;
3835 
3836  case PGSTAT_MTYPE_INQUIRY:
3837  pgstat_recv_inquiry((PgStat_MsgInquiry *) &msg, len);
3838  break;
3839 
3840  case PGSTAT_MTYPE_TABSTAT:
3841  pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, len);
3842  break;
3843 
3844  case PGSTAT_MTYPE_TABPURGE:
3846  break;
3847 
3848  case PGSTAT_MTYPE_DROPDB:
3849  pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, len);
3850  break;
3851 
3854  len);
3855  break;
3856 
3860  len);
3861  break;
3862 
3866  len);
3867  break;
3868 
3871  break;
3872 
3873  case PGSTAT_MTYPE_VACUUM:
3874  pgstat_recv_vacuum((PgStat_MsgVacuum *) &msg, len);
3875  break;
3876 
3877  case PGSTAT_MTYPE_ANALYZE:
3878  pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, len);
3879  break;
3880 
3881  case PGSTAT_MTYPE_ARCHIVER:
3883  break;
3884 
3885  case PGSTAT_MTYPE_BGWRITER:
3887  break;
3888 
3889  case PGSTAT_MTYPE_FUNCSTAT:
3891  break;
3892 
3895  break;
3896 
3899  break;
3900 
3901  case PGSTAT_MTYPE_DEADLOCK:
3903  break;
3904 
3905  case PGSTAT_MTYPE_TEMPFILE:
3907  break;
3908 
3909  default:
3910  break;
3911  }
3912  } /* end of inner message-processing loop */
3913 
3914  /* Sleep until there's something to do */
3915 #ifndef WIN32
3918  pgStatSock, -1L,
3920 #else
3921 
3922  /*
3923  * Windows, at least in its Windows Server 2003 R2 incarnation,
3924  * sometimes loses FD_READ events. Waking up and retrying the recv()
3925  * fixes that, so don't sleep indefinitely. This is a crock of the
3926  * first water, but until somebody wants to debug exactly what's
3927  * happening there, this is the best we can do. The two-second
3928  * timeout matches our pre-9.2 behavior, and needs to be short enough
3929  * to not provoke "using stale statistics" complaints from
3930  * backend_read_statsfile.
3931  */
3934  pgStatSock,
3935  2 * 1000L /* msec */,
3937 #endif
3938 
3939  /*
3940  * Emergency bailout if postmaster has died. This is to avoid the
3941  * necessity for manual cleanup of all postmaster children.
3942  */
3943  if (wr & WL_POSTMASTER_DEATH)
3944  break;
3945  } /* end of outer loop */
3946 
3947  /*
3948  * Save the final stats to reuse at next startup.
3949  */
3950  pgstat_write_statsfiles(true, true);
3951 
3952  exit(0);
3953 }
3954 
3955 
3956 /* SIGQUIT signal handler for collector process */
3957 static void
3959 {
3960  int save_errno = errno;
3961 
3962  need_exit = true;
3963  SetLatch(MyLatch);
3964 
3965  errno = save_errno;
3966 }
3967 
3968 /* SIGHUP handler for collector process */
3969 static void
3971 {
3972  int save_errno = errno;
3973 
3974  got_SIGHUP = true;
3975  SetLatch(MyLatch);
3976 
3977  errno = save_errno;
3978 }
3979 
3980 /*
3981  * Subroutine to clear stats in a database entry
3982  *
3983  * Tables and functions hashes are initialized to empty.
3984  */
3985 static void
3987 {
3988  HASHCTL hash_ctl;
3989 
3990  dbentry->n_xact_commit = 0;
3991  dbentry->n_xact_rollback = 0;
3992  dbentry->n_blocks_fetched = 0;
3993  dbentry->n_blocks_hit = 0;
3994  dbentry->n_tuples_returned = 0;
3995  dbentry->n_tuples_fetched = 0;
3996  dbentry->n_tuples_inserted = 0;
3997  dbentry->n_tuples_updated = 0;
3998  dbentry->n_tuples_deleted = 0;
3999  dbentry->last_autovac_time = 0;
4000  dbentry->n_conflict_tablespace = 0;
4001  dbentry->n_conflict_lock = 0;
4002  dbentry->n_conflict_snapshot = 0;
4003  dbentry->n_conflict_bufferpin = 0;
4004  dbentry->n_conflict_startup_deadlock = 0;
4005  dbentry->n_temp_files = 0;
4006  dbentry->n_temp_bytes = 0;
4007  dbentry->n_deadlocks = 0;
4008  dbentry->n_block_read_time = 0;
4009  dbentry->n_block_write_time = 0;
4010 
4012  dbentry->stats_timestamp = 0;
4013 
4014  memset(&hash_ctl, 0, sizeof(hash_ctl));
4015  hash_ctl.keysize = sizeof(Oid);
4016  hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
4017  dbentry->tables = hash_create("Per-database table",
4019  &hash_ctl,
4020  HASH_ELEM | HASH_BLOBS);
4021 
4022  hash_ctl.keysize = sizeof(Oid);
4023  hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
4024  dbentry->functions = hash_create("Per-database function",
4026  &hash_ctl,
4027  HASH_ELEM | HASH_BLOBS);
4028 }
4029 
4030 /*
4031  * Lookup the hash table entry for the specified database. If no hash
4032  * table entry exists, initialize it, if the create parameter is true.
4033  * Else, return NULL.
4034  */
4035 static PgStat_StatDBEntry *
4036 pgstat_get_db_entry(Oid databaseid, bool create)
4037 {
4038  PgStat_StatDBEntry *result;
4039  bool found;
4040  HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4041 
4042  /* Lookup or create the hash table entry for this database */
4043  result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
4044  &databaseid,
4045  action, &found);
4046 
4047  if (!create && !found)
4048  return NULL;
4049 
4050  /*
4051  * If not found, initialize the new one. This creates empty hash tables
4052  * for tables and functions, too.
4053  */
4054  if (!found)
4055  reset_dbentry_counters(result);
4056 
4057  return result;
4058 }
4059 
4060 
4061 /*
4062  * Lookup the hash table entry for the specified table. If no hash
4063  * table entry exists, initialize it, if the create parameter is true.
4064  * Else, return NULL.
4065  */
4066 static PgStat_StatTabEntry *
4067 pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
4068 {
4069  PgStat_StatTabEntry *result;
4070  bool found;
4071  HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4072 
4073  /* Lookup or create the hash table entry for this table */
4074  result = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
4075  &tableoid,
4076  action, &found);
4077 
4078  if (!create && !found)
4079  return NULL;
4080 
4081  /* If not found, initialize the new one. */
4082  if (!found)
4083  {
4084  result->numscans = 0;
4085  result->tuples_returned = 0;
4086  result->tuples_fetched = 0;
4087  result->tuples_inserted = 0;
4088  result->tuples_updated = 0;
4089  result->tuples_deleted = 0;
4090  result->tuples_hot_updated = 0;
4091  result->n_live_tuples = 0;
4092  result->n_dead_tuples = 0;
4093  result->changes_since_analyze = 0;
4094  result->blocks_fetched = 0;
4095  result->blocks_hit = 0;
4096  result->vacuum_timestamp = 0;
4097  result->vacuum_count = 0;
4098  result->autovac_vacuum_timestamp = 0;
4099  result->autovac_vacuum_count = 0;
4100  result->analyze_timestamp = 0;
4101  result->analyze_count = 0;
4102  result->autovac_analyze_timestamp = 0;
4103  result->autovac_analyze_count = 0;
4104  }
4105 
4106  return result;
4107 }
4108 
4109 
4110 /* ----------
4111  * pgstat_write_statsfiles() -
4112  * Write the global statistics file, as well as requested DB files.
4113  *
4114  * 'permanent' specifies writing to the permanent files not temporary ones.
4115  * When true (happens only when the collector is shutting down), also remove
4116  * the temporary files so that backends starting up under a new postmaster
4117  * can't read old data before the new collector is ready.
4118  *
4119  * When 'allDbs' is false, only the requested databases (listed in
4120  * pending_write_requests) will be written; otherwise, all databases
4121  * will be written.
4122  * ----------
4123  */
4124 static void
4125 pgstat_write_statsfiles(bool permanent, bool allDbs)
4126 {
4127  HASH_SEQ_STATUS hstat;
4128  PgStat_StatDBEntry *dbentry;
4129  FILE *fpout;
4130  int32 format_id;
4131  const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
4132  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4133  int rc;
4134 
4135  elog(DEBUG2, "writing stats file \"%s\"", statfile);
4136 
4137  /*
4138  * Open the statistics temp file to write out the current values.
4139  */
4140  fpout = AllocateFile(tmpfile, PG_BINARY_W);
4141  if (fpout == NULL)
4142  {
4143  ereport(LOG,
4145  errmsg("could not open temporary statistics file \"%s\": %m",
4146  tmpfile)));
4147  return;
4148  }
4149 
4150  /*
4151  * Set the timestamp of the stats file.
4152  */
4153  globalStats.stats_timestamp = GetCurrentTimestamp();
4154 
4155  /*
4156  * Write the file header --- currently just a format ID.
4157  */
4158  format_id = PGSTAT_FILE_FORMAT_ID;
4159  rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4160  (void) rc; /* we'll check for error with ferror */
4161 
4162  /*
4163  * Write global stats struct
4164  */
4165  rc = fwrite(&globalStats, sizeof(globalStats), 1, fpout);
4166  (void) rc; /* we'll check for error with ferror */
4167 
4168  /*
4169  * Write archiver stats struct
4170  */
4171  rc = fwrite(&archiverStats, sizeof(archiverStats), 1, fpout);
4172  (void) rc; /* we'll check for error with ferror */
4173 
4174  /*
4175  * Walk through the database table.
4176  */
4177  hash_seq_init(&hstat, pgStatDBHash);
4178  while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
4179  {
4180  /*
4181  * Write out the table and function stats for this DB into the
4182  * appropriate per-DB stat file, if required.
4183  */
4184  if (allDbs || pgstat_db_requested(dbentry->databaseid))
4185  {
4186  /* Make DB's timestamp consistent with the global stats */
4187  dbentry->stats_timestamp = globalStats.stats_timestamp;
4188 
4189  pgstat_write_db_statsfile(dbentry, permanent);
4190  }
4191 
4192  /*
4193  * Write out the DB entry. We don't write the tables or functions
4194  * pointers, since they're of no use to any other process.
4195  */
4196  fputc('D', fpout);
4197  rc = fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
4198  (void) rc; /* we'll check for error with ferror */
4199  }
4200 
4201  /*
4202  * No more output to be done. Close the temp file and replace the old
4203  * pgstat.stat with it. The ferror() check replaces testing for error
4204  * after each individual fputc or fwrite above.
4205  */
4206  fputc('E', fpout);
4207 
4208  if (ferror(fpout))
4209  {
4210  ereport(LOG,
4212  errmsg("could not write temporary statistics file \"%s\": %m",
4213  tmpfile)));
4214  FreeFile(fpout);
4215  unlink(tmpfile);
4216  }
4217  else if (FreeFile(fpout) < 0)
4218  {
4219  ereport(LOG,
4221  errmsg("could not close temporary statistics file \"%s\": %m",
4222  tmpfile)));
4223  unlink(tmpfile);
4224  }
4225  else if (rename(tmpfile, statfile) < 0)
4226  {
4227  ereport(LOG,
4229  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4230  tmpfile, statfile)));
4231  unlink(tmpfile);
4232  }
4233 
4234  if (permanent)
4236 
4237  /*
4238  * Now throw away the list of requests. Note that requests sent after we
4239  * started the write are still waiting on the network socket.
4240  */
4241  list_free(pending_write_requests);
4242  pending_write_requests = NIL;
4243 }
4244 
4245 /*
4246  * return the filename for a DB stat file; filename is the output buffer,
4247  * of length len.
4248  */
4249 static void
4250 get_dbstat_filename(bool permanent, bool tempname, Oid databaseid,
4251  char *filename, int len)
4252 {
4253  int printed;
4254 
4255  /* NB -- pgstat_reset_remove_files knows about the pattern this uses */
4256  printed = snprintf(filename, len, "%s/db_%u.%s",
4257  permanent ? PGSTAT_STAT_PERMANENT_DIRECTORY :
4259  databaseid,
4260  tempname ? "tmp" : "stat");
4261  if (printed > len)
4262  elog(ERROR, "overlength pgstat path");
4263 }
4264 
4265 /* ----------
4266  * pgstat_write_db_statsfile() -
4267  * Write the stat file for a single database.
4268  *
4269  * If writing to the permanent file (happens when the collector is
4270  * shutting down only), remove the temporary file so that backends
4271  * starting up under a new postmaster can't read the old data before
4272  * the new collector is ready.
4273  * ----------
4274  */
4275 static void
4277 {
4278  HASH_SEQ_STATUS tstat;
4279  HASH_SEQ_STATUS fstat;
4280  PgStat_StatTabEntry *tabentry;
4281  PgStat_StatFuncEntry *funcentry;
4282  FILE *fpout;
4283  int32 format_id;
4284  Oid dbid = dbentry->databaseid;
4285  int rc;
4286  char tmpfile[MAXPGPATH];
4287  char statfile[MAXPGPATH];
4288 
4289  get_dbstat_filename(permanent, true, dbid, tmpfile, MAXPGPATH);
4290  get_dbstat_filename(permanent, false, dbid, statfile, MAXPGPATH);
4291 
4292  elog(DEBUG2, "writing stats file \"%s\"", statfile);
4293 
4294  /*
4295  * Open the statistics temp file to write out the current values.
4296  */
4297  fpout = AllocateFile(tmpfile, PG_BINARY_W);
4298  if (fpout == NULL)
4299  {
4300  ereport(LOG,
4302  errmsg("could not open temporary statistics file \"%s\": %m",
4303  tmpfile)));
4304  return;
4305  }
4306 
4307  /*
4308  * Write the file header --- currently just a format ID.
4309  */
4310  format_id = PGSTAT_FILE_FORMAT_ID;
4311  rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4312  (void) rc; /* we'll check for error with ferror */
4313 
4314  /*
4315  * Walk through the database's access stats per table.
4316  */
4317  hash_seq_init(&tstat, dbentry->tables);
4318  while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
4319  {
4320  fputc('T', fpout);
4321  rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
4322  (void) rc; /* we'll check for error with ferror */
4323  }
4324 
4325  /*
4326  * Walk through the database's function stats table.
4327  */
4328  hash_seq_init(&fstat, dbentry->functions);
4329  while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
4330  {
4331  fputc('F', fpout);
4332  rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
4333  (void) rc; /* we'll check for error with ferror */
4334  }
4335 
4336  /*
4337  * No more output to be done. Close the temp file and replace the old
4338  * pgstat.stat with it. The ferror() check replaces testing for error
4339  * after each individual fputc or fwrite above.
4340  */
4341  fputc('E', fpout);
4342 
4343  if (ferror(fpout))
4344  {
4345  ereport(LOG,
4347  errmsg("could not write temporary statistics file \"%s\": %m",
4348  tmpfile)));
4349  FreeFile(fpout);
4350  unlink(tmpfile);
4351  }
4352  else if (FreeFile(fpout) < 0)
4353  {
4354  ereport(LOG,
4356  errmsg("could not close temporary statistics file \"%s\": %m",
4357  tmpfile)));
4358  unlink(tmpfile);
4359  }
4360  else if (rename(tmpfile, statfile) < 0)
4361  {
4362  ereport(LOG,
4364  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4365  tmpfile, statfile)));
4366  unlink(tmpfile);
4367  }
4368 
4369  if (permanent)
4370  {
4371  get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
4372 
4373  elog(DEBUG2, "removing temporary stats file \"%s\"", statfile);
4374  unlink(statfile);
4375  }
4376 }
4377 
4378 /* ----------
4379  * pgstat_read_statsfiles() -
4380  *
4381  * Reads in some existing statistics collector files and returns the
4382  * databases hash table that is the top level of the data.
4383  *
4384  * If 'onlydb' is not InvalidOid, it means we only want data for that DB
4385  * plus the shared catalogs ("DB 0"). We'll still populate the DB hash
4386  * table for all databases, but we don't bother even creating table/function
4387  * hash tables for other databases.
4388  *
4389  * 'permanent' specifies reading from the permanent files not temporary ones.
4390  * When true (happens only when the collector is starting up), remove the
4391  * files after reading; the in-memory status is now authoritative, and the
4392  * files would be out of date in case somebody else reads them.
4393  *
4394  * If a 'deep' read is requested, table/function stats are read, otherwise
4395  * the table/function hash tables remain empty.
4396  * ----------
4397  */
4398 static HTAB *
4399 pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
4400 {
4401  PgStat_StatDBEntry *dbentry;
4402  PgStat_StatDBEntry dbbuf;
4403  HASHCTL hash_ctl;
4404  HTAB *dbhash;
4405  FILE *fpin;
4406  int32 format_id;
4407  bool found;
4408  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4409 
4410  /*
4411  * The tables will live in pgStatLocalContext.
4412  */
4414 
4415  /*
4416  * Create the DB hashtable
4417  */
4418  memset(&hash_ctl, 0, sizeof(hash_ctl));
4419  hash_ctl.keysize = sizeof(Oid);
4420  hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
4421  hash_ctl.hcxt = pgStatLocalContext;
4422  dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
4424 
4425  /*
4426  * Clear out global and archiver statistics so they start from zero in
4427  * case we can't load an existing statsfile.
4428  */
4429  memset(&globalStats, 0, sizeof(globalStats));
4430  memset(&archiverStats, 0, sizeof(archiverStats));
4431 
4432  /*
4433  * Set the current timestamp (will be kept only in case we can't load an
4434  * existing statsfile).
4435  */
4436  globalStats.stat_reset_timestamp = GetCurrentTimestamp();
4437  archiverStats.stat_reset_timestamp = globalStats.stat_reset_timestamp;
4438 
4439  /*
4440  * Try to open the stats file. If it doesn't exist, the backends simply
4441  * return zero for anything and the collector simply starts from scratch
4442  * with empty counters.
4443  *
4444  * ENOENT is a possibility if the stats collector is not running or has
4445  * not yet written the stats file the first time. Any other failure
4446  * condition is suspicious.
4447  */
4448  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
4449  {
4450  if (errno != ENOENT)
4453  errmsg("could not open statistics file \"%s\": %m",
4454  statfile)));
4455  return dbhash;
4456  }
4457 
4458  /*
4459  * Verify it's of the expected format.
4460  */
4461  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
4462  format_id != PGSTAT_FILE_FORMAT_ID)
4463  {
4465  (errmsg("corrupted statistics file \"%s\"", statfile)));
4466  goto done;
4467  }
4468 
4469  /*
4470  * Read global stats struct
4471  */
4472  if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats))
4473  {
4475  (errmsg("corrupted statistics file \"%s\"", statfile)));
4476  goto done;
4477  }
4478 
4479  /*
4480  * Read archiver stats struct
4481  */
4482  if (fread(&archiverStats, 1, sizeof(archiverStats), fpin) != sizeof(archiverStats))
4483  {
4485  (errmsg("corrupted statistics file \"%s\"", statfile)));
4486  goto done;
4487  }
4488 
4489  /*
4490  * We found an existing collector stats file. Read it and put all the
4491  * hashtable entries into place.
4492  */
4493  for (;;)
4494  {
4495  switch (fgetc(fpin))
4496  {
4497  /*
4498  * 'D' A PgStat_StatDBEntry struct describing a database
4499  * follows.
4500  */
4501  case 'D':
4502  if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
4503  fpin) != offsetof(PgStat_StatDBEntry, tables))
4504  {
4506  (errmsg("corrupted statistics file \"%s\"",
4507  statfile)));
4508  goto done;
4509  }
4510 
4511  /*
4512  * Add to the DB hash
4513  */
4514  dbentry = (PgStat_StatDBEntry *) hash_search(dbhash,
4515  (void *) &dbbuf.databaseid,
4516  HASH_ENTER,
4517  &found);
4518  if (found)
4519  {
4521  (errmsg("corrupted statistics file \"%s\"",
4522  statfile)));
4523  goto done;
4524  }
4525 
4526  memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
4527  dbentry->tables = NULL;
4528  dbentry->functions = NULL;
4529 
4530  /*
4531  * Don't create tables/functions hashtables for uninteresting
4532  * databases.
4533  */
4534  if (onlydb != InvalidOid)
4535  {
4536  if (dbbuf.databaseid != onlydb &&
4537  dbbuf.databaseid != InvalidOid)
4538  break;
4539  }
4540 
4541  memset(&hash_ctl, 0, sizeof(hash_ctl));
4542  hash_ctl.keysize = sizeof(Oid);
4543  hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
4544  hash_ctl.hcxt = pgStatLocalContext;
4545  dbentry->tables = hash_create("Per-database table",
4547  &hash_ctl,
4549 
4550  hash_ctl.keysize = sizeof(Oid);
4551  hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
4552  hash_ctl.hcxt = pgStatLocalContext;
4553  dbentry->functions = hash_create("Per-database function",
4555  &hash_ctl,
4557 
4558  /*
4559  * If requested, read the data from the database-specific
4560  * file. Otherwise we just leave the hashtables empty.
4561  */
4562  if (deep)
4564  dbentry->tables,
4565  dbentry->functions,
4566  permanent);
4567 
4568  break;
4569 
4570  case 'E':
4571  goto done;
4572 
4573  default:
4575  (errmsg("corrupted statistics file \"%s\"",
4576  statfile)));
4577  goto done;
4578  }
4579  }
4580 
4581 done:
4582  FreeFile(fpin);
4583 
4584  /* If requested to read the permanent file, also get rid of it. */
4585  if (permanent)
4586  {
4587  elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
4588  unlink(statfile);
4589  }
4590 
4591  return dbhash;
4592 }
4593 
4594 
4595 /* ----------
4596  * pgstat_read_db_statsfile() -
4597  *
4598  * Reads in the existing statistics collector file for the given database,
4599  * filling the passed-in tables and functions hash tables.
4600  *
4601  * As in pgstat_read_statsfiles, if the permanent file is requested, it is
4602  * removed after reading.
4603  *
4604  * Note: this code has the ability to skip storing per-table or per-function
4605  * data, if NULL is passed for the corresponding hashtable. That's not used
4606  * at the moment though.
4607  * ----------
4608  */
4609 static void
4610 pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash,
4611  bool permanent)
4612 {
4613  PgStat_StatTabEntry *tabentry;
4614  PgStat_StatTabEntry tabbuf;
4615  PgStat_StatFuncEntry funcbuf;
4616  PgStat_StatFuncEntry *funcentry;
4617  FILE *fpin;
4618  int32 format_id;
4619  bool found;
4620  char statfile[MAXPGPATH];
4621 
4622  get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
4623 
4624  /*
4625  * Try to open the stats file. If it doesn't exist, the backends simply
4626  * return zero for anything and the collector simply starts from scratch
4627  * with empty counters.
4628  *
4629  * ENOENT is a possibility if the stats collector is not running or has
4630  * not yet written the stats file the first time. Any other failure
4631  * condition is suspicious.
4632  */
4633  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
4634  {
4635  if (errno != ENOENT)
4638  errmsg("could not open statistics file \"%s\": %m",
4639  statfile)));
4640  return;
4641  }
4642 
4643  /*
4644  * Verify it's of the expected format.
4645  */
4646  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
4647  format_id != PGSTAT_FILE_FORMAT_ID)
4648  {
4650  (errmsg("corrupted statistics file \"%s\"", statfile)));
4651  goto done;
4652  }
4653 
4654  /*
4655  * We found an existing collector stats file. Read it and put all the
4656  * hashtable entries into place.
4657  */
4658  for (;;)
4659  {
4660  switch (fgetc(fpin))
4661  {
4662  /*
4663  * 'T' A PgStat_StatTabEntry follows.
4664  */
4665  case 'T':
4666  if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry),
4667  fpin) != sizeof(PgStat_StatTabEntry))
4668  {
4670  (errmsg("corrupted statistics file \"%s\"",
4671  statfile)));
4672  goto done;
4673  }
4674 
4675  /*
4676  * Skip if table data not wanted.
4677  */
4678  if (tabhash == NULL)
4679  break;
4680 
4681  tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
4682  (void *) &tabbuf.tableid,
4683  HASH_ENTER, &found);
4684 
4685  if (found)
4686  {
4688  (errmsg("corrupted statistics file \"%s\"",
4689  statfile)));
4690  goto done;
4691  }
4692 
4693  memcpy(tabentry, &tabbuf, sizeof(tabbuf));
4694  break;
4695 
4696  /*
4697  * 'F' A PgStat_StatFuncEntry follows.
4698  */
4699  case 'F':
4700  if (fread(&funcbuf, 1, sizeof(PgStat_StatFuncEntry),
4701  fpin) != sizeof(PgStat_StatFuncEntry))
4702  {
4704  (errmsg("corrupted statistics file \"%s\"",
4705  statfile)));
4706  goto done;
4707  }
4708 
4709  /*
4710  * Skip if function data not wanted.
4711  */
4712  if (funchash == NULL)
4713  break;
4714 
4715  funcentry = (PgStat_StatFuncEntry *) hash_search(funchash,
4716  (void *) &funcbuf.functionid,
4717  HASH_ENTER, &found);
4718 
4719  if (found)
4720  {
4722  (errmsg("corrupted statistics file \"%s\"",
4723  statfile)));
4724  goto done;
4725  }
4726 
4727  memcpy(funcentry, &funcbuf, sizeof(funcbuf));
4728  break;
4729 
4730  /*
4731  * 'E' The EOF marker of a complete stats file.
4732  */
4733  case 'E':
4734  goto done;
4735 
4736  default:
4738  (errmsg("corrupted statistics file \"%s\"",
4739  statfile)));
4740  goto done;
4741  }
4742  }
4743 
4744 done:
4745  FreeFile(fpin);
4746 
4747  if (permanent)
4748  {
4749  elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
4750  unlink(statfile);
4751  }
4752 }
4753 
4754 /* ----------
4755  * pgstat_read_db_statsfile_timestamp() -
4756  *
4757  * Attempt to determine the timestamp of the last db statfile write.
4758  * Returns TRUE if successful; the timestamp is stored in *ts.
4759  *
4760  * This needs to be careful about handling databases for which no stats file
4761  * exists, such as databases without a stat entry or those not yet written:
4762  *
4763  * - if there's a database entry in the global file, return the corresponding
4764  * stats_timestamp value.
4765  *
4766  * - if there's no db stat entry (e.g. for a new or inactive database),
4767  * there's no stats_timestamp value, but also nothing to write so we return
4768  * the timestamp of the global statfile.
4769  * ----------
4770  */
4771 static bool
4772 pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent,
4773  TimestampTz *ts)
4774 {
4775  PgStat_StatDBEntry dbentry;
4776  PgStat_GlobalStats myGlobalStats;
4777  PgStat_ArchiverStats myArchiverStats;
4778  FILE *fpin;
4779  int32 format_id;
4780  const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4781 
4782  /*
4783  * Try to open the stats file. As above, anything but ENOENT is worthy of
4784  * complaining about.
4785  */
4786  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
4787  {
4788  if (errno != ENOENT)
4791  errmsg("could not open statistics file \"%s\": %m",
4792  statfile)));
4793  return false;
4794  }
4795 
4796  /*
4797  * Verify it's of the expected format.
4798  */
4799  if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
4800  format_id != PGSTAT_FILE_FORMAT_ID)
4801  {
4803  (errmsg("corrupted statistics file \"%s\"", statfile)));
4804  FreeFile(fpin);
4805  return false;
4806  }
4807 
4808  /*
4809  * Read global stats struct
4810  */
4811  if (fread(&myGlobalStats, 1, sizeof(myGlobalStats),
4812  fpin) != sizeof(myGlobalStats))
4813  {
4815  (errmsg("corrupted statistics file \"%s\"", statfile)));
4816  FreeFile(fpin);
4817  return false;
4818  }
4819 
4820  /*
4821  * Read archiver stats struct
4822  */
4823  if (fread(&myArchiverStats, 1, sizeof(myArchiverStats),
4824  fpin) != sizeof(myArchiverStats))
4825  {
4827  (errmsg("corrupted statistics file \"%s\"", statfile)));
4828  FreeFile(fpin);
4829  return false;
4830  }
4831 
4832  /* By default, we're going to return the timestamp of the global file. */
4833  *ts = myGlobalStats.stats_timestamp;
4834 
4835  /*
4836  * We found an existing collector stats file. Read it and look for a
4837  * record for the requested database. If found, use its timestamp.
4838  */
4839  for (;;)
4840  {
4841  switch (fgetc(fpin))
4842  {
4843  /*
4844  * 'D' A PgStat_StatDBEntry struct describing a database
4845  * follows.
4846  */
4847  case 'D':
4848  if (fread(&dbentry, 1, offsetof(PgStat_StatDBEntry, tables),
4849  fpin) != offsetof(PgStat_StatDBEntry, tables))
4850  {
4852  (errmsg("corrupted statistics file \"%s\"",
4853  statfile)));
4854  goto done;
4855  }
4856 
4857  /*
4858  * If this is the DB we're looking for, save its timestamp and
4859  * we're done.
4860  */
4861  if (dbentry.databaseid == databaseid)
4862  {
4863  *ts = dbentry.stats_timestamp;
4864  goto done;
4865  }
4866 
4867  break;
4868 
4869  case 'E':
4870  goto done;
4871 
4872  default:
4874  (errmsg("corrupted statistics file \"%s\"",
4875  statfile)));
4876  goto done;
4877  }
4878  }
4879 
4880 done:
4881  FreeFile(fpin);
4882  return true;
4883 }
4884 
4885 /*
4886  * If not already done, read the statistics collector stats file into
4887  * some hash tables. The results will be kept until pgstat_clear_snapshot()
4888  * is called (typically, at end of transaction).
4889  */
4890 static void
4892 {
4893  TimestampTz min_ts = 0;
4894  TimestampTz ref_ts = 0;
4895  Oid inquiry_db;
4896  int count;
4897 
4898  /* already read it? */
4899  if (pgStatDBHash)
4900  return;
4902 
4903  /*
4904  * In a normal backend, we check staleness of the data for our own DB, and
4905  * so we send MyDatabaseId in inquiry messages. In the autovac launcher,
4906  * check staleness of the shared-catalog data, and send InvalidOid in
4907  * inquiry messages so as not to force writing unnecessary data.
4908  */
4910  inquiry_db = InvalidOid;
4911  else
4912  inquiry_db = MyDatabaseId;
4913 
4914  /*
4915  * Loop until fresh enough stats file is available or we ran out of time.
4916  * The stats inquiry message is sent repeatedly in case collector drops
4917  * it; but not every single time, as that just swamps the collector.
4918  */
4919  for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++)
4920  {
4921  bool ok;
4922  TimestampTz file_ts = 0;
4923  TimestampTz cur_ts;
4924 
4926 
4927  ok = pgstat_read_db_statsfile_timestamp(inquiry_db, false, &file_ts);
4928 
4929  cur_ts = GetCurrentTimestamp();
4930  /* Calculate min acceptable timestamp, if we didn't already */
4931  if (count == 0 || cur_ts < ref_ts)
4932  {
4933  /*
4934  * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL
4935  * msec before now. This indirectly ensures that the collector
4936  * needn't write the file more often than PGSTAT_STAT_INTERVAL. In
4937  * an autovacuum worker, however, we want a lower delay to avoid
4938  * using stale data, so we use PGSTAT_RETRY_DELAY (since the
4939  * number of workers is low, this shouldn't be a problem).
4940  *
4941  * We don't recompute min_ts after sleeping, except in the
4942  * unlikely case that cur_ts went backwards. So we might end up
4943  * accepting a file a bit older than PGSTAT_STAT_INTERVAL. In
4944  * practice that shouldn't happen, though, as long as the sleep
4945  * time is less than PGSTAT_STAT_INTERVAL; and we don't want to
4946  * tell the collector that our cutoff time is less than what we'd
4947  * actually accept.
4948  */
4949  ref_ts = cur_ts;
4951  min_ts = TimestampTzPlusMilliseconds(ref_ts,
4953  else
4954  min_ts = TimestampTzPlusMilliseconds(ref_ts,
4956  }
4957 
4958  /*
4959  * If the file timestamp is actually newer than cur_ts, we must have
4960  * had a clock glitch (system time went backwards) or there is clock
4961  * skew between our processor and the stats collector's processor.
4962  * Accept the file, but send an inquiry message anyway to make
4963  * pgstat_recv_inquiry do a sanity check on the collector's time.
4964  */
4965  if (ok && file_ts > cur_ts)
4966  {
4967  /*
4968  * A small amount of clock skew between processors isn't terribly
4969  * surprising, but a large difference is worth logging. We
4970  * arbitrarily define "large" as 1000 msec.
4971  */
4972  if (file_ts >= TimestampTzPlusMilliseconds(cur_ts, 1000))
4973  {
4974  char *filetime;
4975  char *mytime;
4976 
4977  /* Copy because timestamptz_to_str returns a static buffer */
4978  filetime = pstrdup(timestamptz_to_str(file_ts));
4979  mytime = pstrdup(timestamptz_to_str(cur_ts));
4980  elog(LOG, "stats collector's time %s is later than backend local time %s",
4981  filetime, mytime);
4982  pfree(filetime);
4983  pfree(mytime);
4984  }
4985 
4986  pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
4987  break;
4988  }
4989 
4990  /* Normal acceptance case: file is not older than cutoff time */
4991  if (ok && file_ts >= min_ts)
4992  break;
4993 
4994  /* Not there or too old, so kick the collector and wait a bit */
4995  if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
4996  pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
4997 
4998  pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
4999  }
5000 
5001  if (count >= PGSTAT_POLL_LOOP_COUNT)
5002  ereport(LOG,
5003  (errmsg("using stale statistics instead of current ones "
5004  "because stats collector is not responding")));
5005 
5006  /*
5007  * Autovacuum launcher wants stats about all databases, but a shallow read
5008  * is sufficient. Regular backends want a deep read for just the tables
5009  * they can see (MyDatabaseId + shared catalogs).
5010  */
5012  pgStatDBHash = pgstat_read_statsfiles(InvalidOid, false, false);
5013  else
5014  pgStatDBHash = pgstat_read_statsfiles(MyDatabaseId, false, true);
5015 }
5016 
5017 
5018 /* ----------
5019  * pgstat_setup_memcxt() -
5020  *
5021  * Create pgStatLocalContext, if not already done.
5022  * ----------
5023  */
5024 static void
5026 {
5027  if (!pgStatLocalContext)
5028  pgStatLocalContext = AllocSetContextCreate(TopMemoryContext,
5029  "Statistics snapshot",
5031 }
5032 
5033 
5034 /* ----------
5035  * pgstat_clear_snapshot() -
5036  *
5037  * Discard any data collected in the current transaction. Any subsequent
5038  * request will cause new snapshots to be read.
5039  *
5040  * This is also invoked during transaction commit or abort to discard
5041  * the no-longer-wanted snapshot.
5042  * ----------
5043  */
5044 void
5046 {
5047  /* Release memory, if any was allocated */
5048  if (pgStatLocalContext)
5049  MemoryContextDelete(pgStatLocalContext);
5050 
5051  /* Reset variables */
5052  pgStatLocalContext = NULL;
5053  pgStatDBHash = NULL;
5054  localBackendStatusTable = NULL;
5055  localNumBackends = 0;
5056 }
5057 
5058 
5059 /* ----------
5060  * pgstat_recv_inquiry() -
5061  *
5062  * Process stat inquiry requests.
5063  * ----------
5064  */
5065 static void
5067 {
5068  PgStat_StatDBEntry *dbentry;
5069 
5070  elog(DEBUG2, "received inquiry for database %u", msg->databaseid);
5071 
5072  /*
5073  * If there's already a write request for this DB, there's nothing to do.
5074  *
5075  * Note that if a request is found, we return early and skip the below
5076  * check for clock skew. This is okay, since the only way for a DB
5077  * request to be present in the list is that we have been here since the
5078  * last write round. It seems sufficient to check for clock skew once per
5079  * write round.
5080  */
5081  if (list_member_oid(pending_write_requests, msg->databaseid))
5082  return;
5083 
5084  /*
5085  * Check to see if we last wrote this database at a time >= the requested
5086  * cutoff time. If so, this is a stale request that was generated before
5087  * we updated the DB file, and we don't need to do so again.
5088  *
5089  * If the requestor's local clock time is older than stats_timestamp, we
5090  * should suspect a clock glitch, ie system time going backwards; though
5091  * the more likely explanation is just delayed message receipt. It is
5092  * worth expending a GetCurrentTimestamp call to be sure, since a large
5093  * retreat in the system clock reading could otherwise cause us to neglect
5094  * to update the stats file for a long time.
5095  */
5096  dbentry = pgstat_get_db_entry(msg->databaseid, false);
5097  if (dbentry == NULL)
5098  {
5099  /*
5100  * We have no data for this DB. Enter a write request anyway so that
5101  * the global stats will get updated. This is needed to prevent
5102  * backend_read_statsfile from waiting for data that we cannot supply,
5103  * in the case of a new DB that nobody has yet reported any stats for.
5104  * See the behavior of pgstat_read_db_statsfile_timestamp.
5105  */
5106  }
5107  else if (msg->clock_time < dbentry->stats_timestamp)
5108  {
5109  TimestampTz cur_ts = GetCurrentTimestamp();
5110 
5111  if (cur_ts < dbentry->stats_timestamp)
5112  {
5113  /*
5114  * Sure enough, time went backwards. Force a new stats file write
5115  * to get back in sync; but first, log a complaint.
5116  */
5117  char *writetime;
5118  char *mytime;
5119 
5120  /* Copy because timestamptz_to_str returns a static buffer */
5121  writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
5122  mytime = pstrdup(timestamptz_to_str(cur_ts));
5123  elog(LOG,
5124  "stats_timestamp %s is later than collector's time %s for database %u",
5125  writetime, mytime, dbentry->databaseid);
5126  pfree(writetime);
5127  pfree(mytime);
5128  }
5129  else
5130  {
5131  /*
5132  * Nope, it's just an old request. Assuming msg's clock_time is
5133  * >= its cutoff_time, it must be stale, so we can ignore it.
5134  */
5135  return;
5136  }
5137  }
5138  else if (msg->cutoff_time <= dbentry->stats_timestamp)
5139  {
5140  /* Stale request, ignore it */
5141  return;
5142  }
5143 
5144  /*
5145  * We need to write this DB, so create a request.
5146  */
5147  pending_write_requests = lappend_oid(pending_write_requests,
5148  msg->databaseid);
5149 }
5150 
5151 
5152 /* ----------
5153  * pgstat_recv_tabstat() -
5154  *
5155  * Count what the backend has done.
5156  * ----------
5157  */
5158 static void
5160 {
5161  PgStat_StatDBEntry *dbentry;
5162  PgStat_StatTabEntry *tabentry;
5163  int i;
5164  bool found;
5165 
5166  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5167 
5168  /*
5169  * Update database-wide stats.
5170  */
5171  dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
5172  dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
5173  dbentry->n_block_read_time += msg->m_block_read_time;
5174  dbentry->n_block_write_time += msg->m_block_write_time;
5175 
5176  /*
5177  * Process all table entries in the message.
5178  */
5179  for (i = 0; i < msg->m_nentries; i++)
5180  {
5181  PgStat_TableEntry *tabmsg = &(msg->m_entry[i]);
5182 
5183  tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
5184  (void *) &(tabmsg->t_id),
5185  HASH_ENTER, &found);
5186 
5187  if (!found)
5188  {
5189  /*
5190  * If it's a new table entry, initialize counters to the values we
5191  * just got.
5192  */
5193  tabentry->numscans = tabmsg->t_counts.t_numscans;
5194  tabentry->tuples_returned = tabmsg->t_counts.t_tuples_returned;
5195  tabentry->tuples_fetched = tabmsg->t_counts.t_tuples_fetched;
5196  tabentry->tuples_inserted = tabmsg->t_counts.t_tuples_inserted;
5197  tabentry->tuples_updated = tabmsg->t_counts.t_tuples_updated;
5198  tabentry->tuples_deleted = tabmsg->t_counts.t_tuples_deleted;
5199  tabentry->tuples_hot_updated = tabmsg->t_counts.t_tuples_hot_updated;
5200  tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
5201  tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
5202  tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
5203  tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
5204  tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
5205 
5206  tabentry->vacuum_timestamp = 0;
5207  tabentry->vacuum_count = 0;
5208  tabentry->autovac_vacuum_timestamp = 0;
5209  tabentry->autovac_vacuum_count = 0;
5210  tabentry->analyze_timestamp = 0;
5211  tabentry->analyze_count = 0;
5212  tabentry->autovac_analyze_timestamp = 0;
5213  tabentry->autovac_analyze_count = 0;
5214  }
5215  else
5216  {
5217  /*
5218  * Otherwise add the values to the existing entry.
5219  */
5220  tabentry->numscans += tabmsg->t_counts.t_numscans;
5221  tabentry->tuples_returned += tabmsg->t_counts.t_tuples_returned;
5222  tabentry->tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5223  tabentry->tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5224  tabentry->tuples_updated += tabmsg->t_counts.t_tuples_updated;
5225  tabentry->tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5226  tabentry->tuples_hot_updated += tabmsg->t_counts.t_tuples_hot_updated;
5227  /* If table was truncated, first reset the live/dead counters */
5228  if (tabmsg->t_counts.t_truncated)
5229  {
5230  tabentry->n_live_tuples = 0;
5231  tabentry->n_dead_tuples = 0;
5232  }
5233  tabentry->n_live_tuples += tabmsg->t_counts.t_delta_live_tuples;
5234  tabentry->n_dead_tuples += tabmsg->t_counts.t_delta_dead_tuples;
5235  tabentry->changes_since_analyze += tabmsg->t_counts.t_changed_tuples;
5236  tabentry->blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5237  tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
5238  }
5239 
5240  /* Clamp n_live_tuples in case of negative delta_live_tuples */
5241  tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
5242  /* Likewise for n_dead_tuples */
5243  tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
5244 
5245  /*
5246  * Add per-table stats to the per-database entry, too.
5247  */
5248  dbentry->n_tuples_returned += tabmsg->t_counts.t_tuples_returned;
5249  dbentry->n_tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5250  dbentry->n_tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5251  dbentry->n_tuples_updated += tabmsg->t_counts.t_tuples_updated;
5252  dbentry->n_tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5253  dbentry->n_blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5254  dbentry->n_blocks_hit += tabmsg->t_counts.t_blocks_hit;
5255  }
5256 }
5257 
5258 
5259 /* ----------
5260  * pgstat_recv_tabpurge() -
5261  *
5262  * Arrange for dead table removal.
5263  * ----------
5264  */
5265 static void
5267 {
5268  PgStat_StatDBEntry *dbentry;
5269  int i;
5270 
5271  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5272 
5273  /*
5274  * No need to purge if we don't even know the database.
5275  */
5276  if (!dbentry || !dbentry->tables)
5277  return;
5278 
5279  /*
5280  * Process all table entries in the message.
5281  */
5282  for (i = 0; i < msg->m_nentries; i++)
5283  {
5284  /* Remove from hashtable if present; we don't care if it's not. */
5285  (void) hash_search(dbentry->tables,
5286  (void *) &(msg->m_tableid[i]),
5287  HASH_REMOVE, NULL);
5288  }
5289 }
5290 
5291 
5292 /* ----------
5293  * pgstat_recv_dropdb() -
5294  *
5295  * Arrange for dead database removal
5296  * ----------
5297  */
5298 static void
5300 {
5301  Oid dbid = msg->m_databaseid;
5302  PgStat_StatDBEntry *dbentry;
5303 
5304  /*
5305  * Lookup the database in the hashtable.
5306  */
5307  dbentry = pgstat_get_db_entry(dbid, false);
5308 
5309  /*
5310  * If found, remove it (along with the db statfile).
5311  */
5312  if (dbentry)
5313  {
5314  char statfile[MAXPGPATH];
5315 
5316  get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
5317 
5318  elog(DEBUG2, "removing stats file \"%s\"", statfile);
5319  unlink(statfile);
5320 
5321  if (dbentry->tables != NULL)
5322  hash_destroy(dbentry->tables);
5323  if (dbentry->functions != NULL)
5324  hash_destroy(dbentry->functions);
5325 
5326  if (hash_search(pgStatDBHash,
5327  (void *) &dbid,
5328  HASH_REMOVE, NULL) == NULL)
5329  ereport(ERROR,
5330  (errmsg("database hash table corrupted during cleanup --- abort")));
5331  }
5332 }
5333 
5334 
5335 /* ----------
5336  * pgstat_recv_resetcounter() -
5337  *
5338  * Reset the statistics for the specified database.
5339  * ----------
5340  */
5341 static void
5343 {
5344  PgStat_StatDBEntry *dbentry;
5345 
5346  /*
5347  * Lookup the database in the hashtable. Nothing to do if not there.
5348  */
5349  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5350 
5351  if (!dbentry)
5352  return;
5353 
5354  /*
5355  * We simply throw away all the database's table entries by recreating a
5356  * new hash table for them.
5357  */
5358  if (dbentry->tables != NULL)
5359  hash_destroy(dbentry->tables);
5360  if (dbentry->functions != NULL)
5361  hash_destroy(dbentry->functions);
5362 
5363  dbentry->tables = NULL;
5364  dbentry->functions = NULL;
5365 
5366  /*
5367  * Reset database-level stats, too. This creates empty hash tables for
5368  * tables and functions.
5369  */
5370  reset_dbentry_counters(dbentry);
5371 }
5372 
5373 /* ----------
5374  * pgstat_recv_resetshared() -
5375  *
5376  * Reset some shared statistics of the cluster.
5377  * ----------
5378  */
5379 static void
5381 {
5382  if (msg->m_resettarget == RESET_BGWRITER)
5383  {
5384  /* Reset the global background writer statistics for the cluster. */
5385  memset(&globalStats, 0, sizeof(globalStats));
5386  globalStats.stat_reset_timestamp = GetCurrentTimestamp();
5387  }
5388  else if (msg->m_resettarget == RESET_ARCHIVER)
5389  {
5390  /* Reset the archiver statistics for the cluster. */
5391  memset(&archiverStats, 0, sizeof(archiverStats));
5392  archiverStats.stat_reset_timestamp = GetCurrentTimestamp();
5393  }
5394 
5395  /*
5396  * Presumably the sender of this message validated the target, don't
5397  * complain here if it's not valid
5398  */
5399 }
5400 
5401 /* ----------
5402  * pgstat_recv_resetsinglecounter() -
5403  *
5404  * Reset a statistics for a single object
5405  * ----------
5406  */
5407 static void
5409 {
5410  PgStat_StatDBEntry *dbentry;
5411 
5412  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5413 
5414  if (!dbentry)
5415  return;
5416 
5417  /* Set the reset timestamp for the whole database */
5419 
5420  /* Remove object if it exists, ignore it if not */
5421  if (msg->m_resettype == RESET_TABLE)
5422  (void) hash_search(dbentry->tables, (void *) &(msg->m_objectid),
5423  HASH_REMOVE, NULL);
5424  else if (msg->m_resettype == RESET_FUNCTION)
5425  (void) hash_search(dbentry->functions, (void *) &(msg->m_objectid),
5426  HASH_REMOVE, NULL);
5427 }
5428 
5429 /* ----------
5430  * pgstat_recv_autovac() -
5431  *
5432  * Process an autovacuum signalling message.
5433  * ----------
5434  */
5435 static void
5437 {
5438  PgStat_StatDBEntry *dbentry;
5439 
5440  /*
5441  * Store the last autovacuum time in the database's hashtable entry.
5442  */
5443  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5444 
5445  dbentry->last_autovac_time = msg->m_start_time;
5446 }
5447 
5448 /* ----------
5449  * pgstat_recv_vacuum() -
5450  *
5451  * Process a VACUUM message.
5452  * ----------
5453  */
5454 static void
5456 {
5457  PgStat_StatDBEntry *dbentry;
5458  PgStat_StatTabEntry *tabentry;
5459 
5460  /*
5461  * Store the data in the table's hashtable entry.
5462  */
5463  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5464 
5465  tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
5466 
5467  tabentry->n_live_tuples = msg->m_live_tuples;
5468  tabentry->n_dead_tuples = msg->m_dead_tuples;
5469 
5470  if (msg->m_autovacuum)
5471  {
5472  tabentry->autovac_vacuum_timestamp = msg->m_vacuumtime;
5473  tabentry->autovac_vacuum_count++;
5474  }
5475  else
5476  {
5477  tabentry->vacuum_timestamp = msg->m_vacuumtime;
5478  tabentry->vacuum_count++;
5479  }
5480 }
5481 
5482 /* ----------
5483  * pgstat_recv_analyze() -
5484  *
5485  * Process an ANALYZE message.
5486  * ----------
5487  */
5488 static void
5490 {
5491  PgStat_StatDBEntry *dbentry;
5492  PgStat_StatTabEntry *tabentry;
5493 
5494  /*
5495  * Store the data in the table's hashtable entry.
5496  */
5497  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5498 
5499  tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
5500 
5501  tabentry->n_live_tuples = msg->m_live_tuples;
5502  tabentry->n_dead_tuples = msg->m_dead_tuples;
5503 
5504  /*
5505  * If commanded, reset changes_since_analyze to zero. This forgets any
5506  * changes that were committed while the ANALYZE was in progress, but we
5507  * have no good way to estimate how many of those there were.
5508  */
5509  if (msg->m_resetcounter)
5510  tabentry->changes_since_analyze = 0;
5511 
5512  if (msg->m_autovacuum)
5513  {
5514  tabentry->autovac_analyze_timestamp = msg->m_analyzetime;
5515  tabentry->autovac_analyze_count++;
5516  }
5517  else
5518  {
5519  tabentry->analyze_timestamp = msg->m_analyzetime;
5520  tabentry->analyze_count++;
5521  }
5522 }
5523 
5524 
5525 /* ----------
5526  * pgstat_recv_archiver() -
5527  *
5528  * Process a ARCHIVER message.
5529  * ----------
5530  */
5531 static void
5533 {
5534  if (msg->m_failed)
5535  {
5536  /* Failed archival attempt */
5537  ++archiverStats.failed_count;
5538  memcpy(archiverStats.last_failed_wal, msg->m_xlog,
5539  sizeof(archiverStats.last_failed_wal));
5540  archiverStats.last_failed_timestamp = msg->m_timestamp;
5541  }
5542  else
5543  {
5544  /* Successful archival operation */
5545  ++archiverStats.archived_count;
5546  memcpy(archiverStats.last_archived_wal, msg->m_xlog,
5547  sizeof(archiverStats.last_archived_wal));
5548  archiverStats.last_archived_timestamp = msg->m_timestamp;
5549  }
5550 }
5551 
5552 /* ----------
5553  * pgstat_recv_bgwriter() -
5554  *
5555  * Process a BGWRITER message.
5556  * ----------
5557  */
5558 static void
5560 {
5561  globalStats.timed_checkpoints += msg->m_timed_checkpoints;
5562  globalStats.requested_checkpoints += msg->m_requested_checkpoints;
5563  globalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
5564  globalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
5566  globalStats.buf_written_clean += msg->m_buf_written_clean;
5567  globalStats.maxwritten_clean += msg->m_maxwritten_clean;
5568  globalStats.buf_written_backend += msg->m_buf_written_backend;
5569  globalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
5570  globalStats.buf_alloc += msg->m_buf_alloc;
5571 }
5572 
5573 /* ----------
5574  * pgstat_recv_recoveryconflict() -
5575  *
5576  * Process a RECOVERYCONFLICT message.
5577  * ----------
5578  */
5579 static void
5581 {
5582  PgStat_StatDBEntry *dbentry;
5583 
5584  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5585 
5586  switch (msg->m_reason)
5587  {
5589 
5590  /*
5591  * Since we drop the information about the database as soon as it
5592  * replicates, there is no point in counting these conflicts.
5593  */
5594  break;
5596  dbentry->n_conflict_tablespace++;
5597  break;
5599  dbentry->n_conflict_lock++;
5600  break;
5602  dbentry->n_conflict_snapshot++;
5603  break;
5605  dbentry->n_conflict_bufferpin++;
5606  break;
5608  dbentry->n_conflict_startup_deadlock++;
5609  break;
5610  }
5611 }
5612 
5613 /* ----------
5614  * pgstat_recv_deadlock() -
5615  *
5616  * Process a DEADLOCK message.
5617  * ----------
5618  */
5619 static void
5621 {
5622  PgStat_StatDBEntry *dbentry;
5623 
5624  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5625 
5626  dbentry->n_deadlocks++;
5627 }
5628 
5629 /* ----------
5630  * pgstat_recv_tempfile() -
5631  *
5632  * Process a TEMPFILE message.
5633  * ----------
5634  */
5635 static void
5637 {
5638  PgStat_StatDBEntry *dbentry;
5639 
5640  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5641 
5642  dbentry->n_temp_bytes += msg->m_filesize;
5643  dbentry->n_temp_files += 1;
5644 }
5645 
5646 /* ----------
5647  * pgstat_recv_funcstat() -
5648  *
5649  * Count what the backend has done.
5650  * ----------
5651  */
5652 static void
5654 {
5655  PgStat_FunctionEntry *funcmsg = &(msg->m_entry[0]);
5656  PgStat_StatDBEntry *dbentry;
5657  PgStat_StatFuncEntry *funcentry;
5658  int i;
5659  bool found;
5660 
5661  dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5662 
5663  /*
5664  * Process all function entries in the message.
5665  */
5666  for (i = 0; i < msg->m_nentries; i++, funcmsg++)
5667  {
5668  funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
5669  (void *) &(funcmsg->f_id),
5670  HASH_ENTER, &found);
5671 
5672  if (!found)
5673  {
5674  /*
5675  * If it's a new function entry, initialize counters to the values
5676  * we just got.
5677  */
5678  funcentry->f_numcalls = funcmsg->f_numcalls;
5679  funcentry->f_total_time = funcmsg->f_total_time;
5680  funcentry->f_self_time = funcmsg->f_self_time;
5681  }
5682  else
5683  {
5684  /*
5685  * Otherwise add the values to the existing entry.
5686  */
5687  funcentry->f_numcalls += funcmsg->f_numcalls;
5688  funcentry->f_total_time += funcmsg->f_total_time;
5689  funcentry->f_self_time += funcmsg->f_self_time;
5690  }
5691  }
5692 }
5693 
5694 /* ----------
5695  * pgstat_recv_funcpurge() -
5696  *
5697  * Arrange for dead function removal.
5698  * ----------
5699  */
5700 static void
5702 {
5703  PgStat_StatDBEntry *dbentry;
5704  int i;
5705 
5706  dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5707 
5708  /*
5709  * No need to purge if we don't even know the database.
5710  */
5711  if (!dbentry || !dbentry->functions)
5712  return;
5713 
5714  /*
5715  * Process all function entries in the message.
5716  */
5717  for (i = 0; i < msg->m_nentries; i++)
5718  {
5719  /* Remove from hashtable if present; we don't care if it's not. */
5720  (void) hash_search(dbentry->functions,
5721  (void *) &(msg->m_functionid[i]),
5722  HASH_REMOVE, NULL);
5723  }
5724 }
5725 
5726 /* ----------
5727  * pgstat_write_statsfile_needed() -
5728  *
5729  * Do we need to write out any stats files?
5730  * ----------
5731  */
5732 static bool
5734 {
5735  if (pending_write_requests != NIL)
5736  return true;
5737 
5738  /* Everything was written recently */
5739  return false;
5740 }
5741 
5742 /* ----------
5743  * pgstat_db_requested() -
5744  *
5745  * Checks whether stats for a particular DB need to be written to a file.
5746  * ----------
5747  */
5748 static bool
5750 {
5751  /*
5752  * If any requests are outstanding at all, we should write the stats for
5753  * shared catalogs (the "database" with OID 0). This ensures that
5754  * backends will see up-to-date stats for shared catalogs, even though
5755  * they send inquiry messages mentioning only their own DB.
5756  */
5757  if (databaseid == InvalidOid && pending_write_requests != NIL)
5758  return true;
5759 
5760  /* Search to see if there's an open request to write this database. */
5761  if (list_member_oid(pending_write_requests, databaseid))
5762  return true;
5763 
5764  return false;
5765 }
#define send(s, buf, len, flags)
Definition: win32.h:386
bool ssl_compression
Definition: pgstat.h:835
static void pgstat_send(void *msg, int len)
Definition: pgstat.c:3617
#define connect(s, name, namelen)
Definition: win32.h:383
void pgstat_report_deadlock(void)
Definition: pgstat.c:1430
#define PGSTAT_RESTART_INTERVAL
Definition: pgstat.c:88
#define EWOULDBLOCK
Definition: win32.h:301
#define PGSTAT_DB_HASH_SIZE
Definition: pgstat.c:100
#define NIL
Definition: pg_list.h:69
PgStat_Counter m_buf_fsync_backend
Definition: pgstat.h:419
PgStat_Counter analyze_count
Definition: pgstat.h:641
char ssl_cipher[NAMEDATALEN]
Definition: pgstat.h:837
char last_failed_wal[MAX_XFN_CHARS+1]
Definition: pgstat.h:672
instr_time f_self_time
Definition: pgstat.h:463
char m_xlog[MAX_XFN_CHARS+1]
Definition: pgstat.h:401
static PgStat_StatTabEntry * pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
Definition: pgstat.c:4067
#define PGSTAT_STAT_PERMANENT_DIRECTORY
Definition: pgstat.h:29
PgStat_Counter buf_alloc
Definition: pgstat.h:693