PostgreSQL Source Code  git master
pg_stat_statements.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * Track statement planning and execution times as well as resource
5  * usage across a whole database cluster.
6  *
7  * Execution costs are totaled for each distinct source query, and kept in
8  * a shared hashtable. (We track only as many distinct queries as will fit
9  * in the designated amount of shared memory.)
10  *
11  * Starting in Postgres 9.2, this module normalized query entries. As of
12  * Postgres 14, the normalization is done by the core if compute_query_id is
13  * enabled, or optionally by third-party modules.
14  *
15  * To facilitate presenting entries to users, we create "representative" query
16  * strings in which constants are replaced with parameter symbols ($n), to
17  * make it clearer what a normalized entry can represent. To save on shared
18  * memory, and to avoid having to truncate oversized query strings, we store
19  * these strings in a temporary external query-texts file. Offsets into this
20  * file are kept in shared memory.
21  *
22  * Note about locking issues: to create or delete an entry in the shared
23  * hashtable, one must hold pgss->lock exclusively. Modifying any field
24  * in an entry except the counters requires the same. To look up an entry,
25  * one must hold the lock shared. To read or update the counters within
26  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27  * disappear!) and also take the entry's mutex spinlock.
28  * The shared state variable pgss->extent (the next free spot in the external
29  * query-text file) should be accessed only while holding either the
30  * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31  * allow reserving file space while holding only shared lock on pgss->lock.
32  * Rewriting the entire external query-text file, eg for garbage collection,
33  * requires holding pgss->lock exclusively; this allows individual entries
34  * in the file to be read or written while holding only shared lock.
35  *
36  *
37  * Copyright (c) 2008-2024, PostgreSQL Global Development Group
38  *
39  * IDENTIFICATION
40  * contrib/pg_stat_statements/pg_stat_statements.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45 
46 #include <math.h>
47 #include <sys/stat.h>
48 #include <unistd.h>
49 
50 #include "access/parallel.h"
51 #include "catalog/pg_authid.h"
52 #include "common/hashfn.h"
53 #include "common/int.h"
54 #include "executor/instrument.h"
55 #include "funcapi.h"
56 #include "jit/jit.h"
57 #include "mb/pg_wchar.h"
58 #include "miscadmin.h"
59 #include "nodes/queryjumble.h"
60 #include "optimizer/planner.h"
61 #include "parser/analyze.h"
62 #include "parser/parsetree.h"
63 #include "parser/scanner.h"
64 #include "parser/scansup.h"
65 #include "pgstat.h"
66 #include "storage/fd.h"
67 #include "storage/ipc.h"
68 #include "storage/lwlock.h"
69 #include "storage/shmem.h"
70 #include "storage/spin.h"
71 #include "tcop/utility.h"
72 #include "utils/acl.h"
73 #include "utils/builtins.h"
74 #include "utils/memutils.h"
75 #include "utils/timestamp.h"
76 
78 
79 /* Location of permanent stats file (valid when database is shut down) */
80 #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
81 
82 /*
83  * Location of external query text file.
84  */
85 #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
86 
87 /* Magic number identifying the stats file format */
88 static const uint32 PGSS_FILE_HEADER = 0x20220408;
89 
90 /* PostgreSQL major version number, changes in which invalidate all entries */
91 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
92 
93 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
94 #define USAGE_EXEC(duration) (1.0)
95 #define USAGE_INIT (1.0) /* including initial planning */
96 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
97 #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
98 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
99 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
100 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
101 #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
102 
103 /*
104  * Extension version number, for supporting older extension versions' objects
105  */
106 typedef enum pgssVersion
107 {
118 
119 typedef enum pgssStoreKind
120 {
122 
123  /*
124  * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
125  * reference the underlying values in the arrays in the Counters struct,
126  * and this order is required in pg_stat_statements_internal().
127  */
131 
132 #define PGSS_NUMKIND (PGSS_EXEC + 1)
133 
134 /*
135  * Hashtable key that defines the identity of a hashtable entry. We separate
136  * queries by user and by database even if they are otherwise identical.
137  *
138  * If you add a new key to this struct, make sure to teach pgss_store() to
139  * zero the padding bytes. Otherwise, things will break, because pgss_hash is
140  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
141 
142  */
143 typedef struct pgssHashKey
144 {
145  Oid userid; /* user OID */
146  Oid dbid; /* database OID */
147  uint64 queryid; /* query identifier */
148  bool toplevel; /* query executed at top level */
150 
151 /*
152  * The actual stats counters kept within pgssEntry.
153  */
154 typedef struct Counters
155 {
156  int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
157  double total_time[PGSS_NUMKIND]; /* total planning/execution time,
158  * in msec */
159  double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
160  * msec since min/max reset */
161  double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
162  * msec since min/max reset */
163  double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
164  * msec */
165  double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
166  * planning/execution time in msec */
167  int64 rows; /* total # of retrieved or affected rows */
168  int64 shared_blks_hit; /* # of shared buffer hits */
169  int64 shared_blks_read; /* # of shared disk blocks read */
170  int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
171  int64 shared_blks_written; /* # of shared disk blocks written */
172  int64 local_blks_hit; /* # of local buffer hits */
173  int64 local_blks_read; /* # of local disk blocks read */
174  int64 local_blks_dirtied; /* # of local disk blocks dirtied */
175  int64 local_blks_written; /* # of local disk blocks written */
176  int64 temp_blks_read; /* # of temp blocks read */
177  int64 temp_blks_written; /* # of temp blocks written */
178  double shared_blk_read_time; /* time spent reading shared blocks,
179  * in msec */
180  double shared_blk_write_time; /* time spent writing shared blocks,
181  * in msec */
182  double local_blk_read_time; /* time spent reading local blocks, in
183  * msec */
184  double local_blk_write_time; /* time spent writing local blocks, in
185  * msec */
186  double temp_blk_read_time; /* time spent reading temp blocks, in msec */
187  double temp_blk_write_time; /* time spent writing temp blocks, in
188  * msec */
189  double usage; /* usage factor */
190  int64 wal_records; /* # of WAL records generated */
191  int64 wal_fpi; /* # of WAL full page images generated */
192  uint64 wal_bytes; /* total amount of WAL generated in bytes */
193  int64 jit_functions; /* total number of JIT functions emitted */
194  double jit_generation_time; /* total time to generate jit code */
195  int64 jit_inlining_count; /* number of times inlining time has been
196  * > 0 */
197  double jit_deform_time; /* total time to deform tuples in jit code */
198  int64 jit_deform_count; /* number of times deform time has been >
199  * 0 */
200 
201  double jit_inlining_time; /* total time to inline jit code */
202  int64 jit_optimization_count; /* number of times optimization time
203  * has been > 0 */
204  double jit_optimization_time; /* total time to optimize jit code */
205  int64 jit_emission_count; /* number of times emission time has been
206  * > 0 */
207  double jit_emission_time; /* total time to emit jit code */
208  int64 parallel_workers_to_launch; /* # of parallel workers planned
209  * to be launched */
210  int64 parallel_workers_launched; /* # of parallel workers actually
211  * launched */
213 
214 /*
215  * Global statistics for pg_stat_statements
216  */
217 typedef struct pgssGlobalStats
218 {
219  int64 dealloc; /* # of times entries were deallocated */
220  TimestampTz stats_reset; /* timestamp with all stats reset */
222 
223 /*
224  * Statistics per statement
225  *
226  * Note: in event of a failure in garbage collection of the query text file,
227  * we reset query_offset to zero and query_len to -1. This will be seen as
228  * an invalid state by qtext_fetch().
229  */
230 typedef struct pgssEntry
231 {
232  pgssHashKey key; /* hash key of entry - MUST BE FIRST */
233  Counters counters; /* the statistics for this query */
234  Size query_offset; /* query text offset in external file */
235  int query_len; /* # of valid bytes in query string, or -1 */
236  int encoding; /* query text encoding */
237  TimestampTz stats_since; /* timestamp of entry allocation */
238  TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
239  slock_t mutex; /* protects the counters only */
241 
242 /*
243  * Global shared state
244  */
245 typedef struct pgssSharedState
246 {
247  LWLock *lock; /* protects hashtable search/modification */
248  double cur_median_usage; /* current median usage in hashtable */
249  Size mean_query_len; /* current mean entry text length */
250  slock_t mutex; /* protects following fields only: */
251  Size extent; /* current extent of query file */
252  int n_writers; /* number of active writers to query file */
253  int gc_count; /* query file garbage collection cycle count */
254  pgssGlobalStats stats; /* global statistics for pgss */
256 
257 /*---- Local variables ----*/
258 
259 /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
260 static int nesting_level = 0;
261 
262 /* Saved hook values in case of unload */
272 
273 /* Links to shared memory state */
274 static pgssSharedState *pgss = NULL;
275 static HTAB *pgss_hash = NULL;
276 
277 /*---- GUC variables ----*/
278 
279 typedef enum
280 {
281  PGSS_TRACK_NONE, /* track no statements */
282  PGSS_TRACK_TOP, /* only top level statements */
283  PGSS_TRACK_ALL, /* all statements, including nested ones */
285 
286 static const struct config_enum_entry track_options[] =
287 {
288  {"none", PGSS_TRACK_NONE, false},
289  {"top", PGSS_TRACK_TOP, false},
290  {"all", PGSS_TRACK_ALL, false},
291  {NULL, 0, false}
292 };
293 
294 static int pgss_max = 5000; /* max # statements to track */
295 static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
296 static bool pgss_track_utility = true; /* whether to track utility commands */
297 static bool pgss_track_planning = false; /* whether to track planning
298  * duration */
299 static bool pgss_save = true; /* whether to save stats across shutdown */
300 
301 
302 #define pgss_enabled(level) \
303  (!IsParallelWorker() && \
304  (pgss_track == PGSS_TRACK_ALL || \
305  (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
306 
307 #define record_gc_qtexts() \
308  do { \
309  SpinLockAcquire(&pgss->mutex); \
310  pgss->gc_count++; \
311  SpinLockRelease(&pgss->mutex); \
312  } while(0)
313 
314 /*---- Function declarations ----*/
315 
328 
329 static void pgss_shmem_request(void);
330 static void pgss_shmem_startup(void);
331 static void pgss_shmem_shutdown(int code, Datum arg);
332 static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
333  JumbleState *jstate);
335  const char *query_string,
336  int cursorOptions,
337  ParamListInfo boundParams);
338 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
339 static void pgss_ExecutorRun(QueryDesc *queryDesc,
340  ScanDirection direction,
341  uint64 count, bool execute_once);
342 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
343 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
344 static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
345  bool readOnlyTree,
347  QueryEnvironment *queryEnv,
349 static void pgss_store(const char *query, uint64 queryId,
350  int query_location, int query_len,
351  pgssStoreKind kind,
352  double total_time, uint64 rows,
353  const BufferUsage *bufusage,
354  const WalUsage *walusage,
355  const struct JitInstrumentation *jitusage,
356  JumbleState *jstate,
357  int parallel_workers_to_launch,
358  int parallel_workers_launched);
360  pgssVersion api_version,
361  bool showtext);
362 static Size pgss_memsize(void);
363 static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
364  int encoding, bool sticky);
365 static void entry_dealloc(void);
366 static bool qtext_store(const char *query, int query_len,
367  Size *query_offset, int *gc_count);
368 static char *qtext_load_file(Size *buffer_size);
369 static char *qtext_fetch(Size query_offset, int query_len,
370  char *buffer, Size buffer_size);
371 static bool need_gc_qtexts(void);
372 static void gc_qtexts(void);
373 static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only);
374 static char *generate_normalized_query(JumbleState *jstate, const char *query,
375  int query_loc, int *query_len_p);
376 static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
377  int query_loc);
378 static int comp_location(const void *a, const void *b);
379 
380 
381 /*
382  * Module load callback
383  */
384 void
385 _PG_init(void)
386 {
387  /*
388  * In order to create our shared memory area, we have to be loaded via
389  * shared_preload_libraries. If not, fall out without hooking into any of
390  * the main system. (We don't throw error here because it seems useful to
391  * allow the pg_stat_statements functions to be created even when the
392  * module isn't active. The functions must protect themselves against
393  * being called then, however.)
394  */
396  return;
397 
398  /*
399  * Inform the postmaster that we want to enable query_id calculation if
400  * compute_query_id is set to auto.
401  */
402  EnableQueryId();
403 
404  /*
405  * Define (or redefine) custom GUC variables.
406  */
407  DefineCustomIntVariable("pg_stat_statements.max",
408  "Sets the maximum number of statements tracked by pg_stat_statements.",
409  NULL,
410  &pgss_max,
411  5000,
412  100,
413  INT_MAX / 2,
415  0,
416  NULL,
417  NULL,
418  NULL);
419 
420  DefineCustomEnumVariable("pg_stat_statements.track",
421  "Selects which statements are tracked by pg_stat_statements.",
422  NULL,
423  &pgss_track,
426  PGC_SUSET,
427  0,
428  NULL,
429  NULL,
430  NULL);
431 
432  DefineCustomBoolVariable("pg_stat_statements.track_utility",
433  "Selects whether utility commands are tracked by pg_stat_statements.",
434  NULL,
436  true,
437  PGC_SUSET,
438  0,
439  NULL,
440  NULL,
441  NULL);
442 
443  DefineCustomBoolVariable("pg_stat_statements.track_planning",
444  "Selects whether planning duration is tracked by pg_stat_statements.",
445  NULL,
447  false,
448  PGC_SUSET,
449  0,
450  NULL,
451  NULL,
452  NULL);
453 
454  DefineCustomBoolVariable("pg_stat_statements.save",
455  "Save pg_stat_statements statistics across server shutdowns.",
456  NULL,
457  &pgss_save,
458  true,
459  PGC_SIGHUP,
460  0,
461  NULL,
462  NULL,
463  NULL);
464 
465  MarkGUCPrefixReserved("pg_stat_statements");
466 
467  /*
468  * Install hooks.
469  */
488 }
489 
490 /*
491  * shmem_request hook: request additional shared resources. We'll allocate or
492  * attach to the shared resources in pgss_shmem_startup().
493  */
494 static void
496 {
499 
501  RequestNamedLWLockTranche("pg_stat_statements", 1);
502 }
503 
504 /*
505  * shmem_startup hook: allocate or attach to shared memory,
506  * then load any pre-existing statistics from file.
507  * Also create and load the query-texts file, which is expected to exist
508  * (even if empty) while the module is enabled.
509  */
510 static void
512 {
513  bool found;
514  HASHCTL info;
515  FILE *file = NULL;
516  FILE *qfile = NULL;
517  uint32 header;
518  int32 num;
519  int32 pgver;
520  int32 i;
521  int buffer_size;
522  char *buffer = NULL;
523 
526 
527  /* reset in case this is a restart within the postmaster */
528  pgss = NULL;
529  pgss_hash = NULL;
530 
531  /*
532  * Create or attach to the shared memory state, including hash table
533  */
534  LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
535 
536  pgss = ShmemInitStruct("pg_stat_statements",
537  sizeof(pgssSharedState),
538  &found);
539 
540  if (!found)
541  {
542  /* First time through ... */
543  pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
547  pgss->extent = 0;
548  pgss->n_writers = 0;
549  pgss->gc_count = 0;
550  pgss->stats.dealloc = 0;
552  }
553 
554  info.keysize = sizeof(pgssHashKey);
555  info.entrysize = sizeof(pgssEntry);
556  pgss_hash = ShmemInitHash("pg_stat_statements hash",
558  &info,
560 
561  LWLockRelease(AddinShmemInitLock);
562 
563  /*
564  * If we're in the postmaster (or a standalone backend...), set up a shmem
565  * exit hook to dump the statistics to disk.
566  */
567  if (!IsUnderPostmaster)
569 
570  /*
571  * Done if some other process already completed our initialization.
572  */
573  if (found)
574  return;
575 
576  /*
577  * Note: we don't bother with locks here, because there should be no other
578  * processes running when this code is reached.
579  */
580 
581  /* Unlink query text file possibly left over from crash */
582  unlink(PGSS_TEXT_FILE);
583 
584  /* Allocate new query text temp file */
586  if (qfile == NULL)
587  goto write_error;
588 
589  /*
590  * If we were told not to load old statistics, we're done. (Note we do
591  * not try to unlink any old dump file in this case. This seems a bit
592  * questionable but it's the historical behavior.)
593  */
594  if (!pgss_save)
595  {
596  FreeFile(qfile);
597  return;
598  }
599 
600  /*
601  * Attempt to load old statistics from the dump file.
602  */
604  if (file == NULL)
605  {
606  if (errno != ENOENT)
607  goto read_error;
608  /* No existing persisted stats file, so we're done */
609  FreeFile(qfile);
610  return;
611  }
612 
613  buffer_size = 2048;
614  buffer = (char *) palloc(buffer_size);
615 
616  if (fread(&header, sizeof(uint32), 1, file) != 1 ||
617  fread(&pgver, sizeof(uint32), 1, file) != 1 ||
618  fread(&num, sizeof(int32), 1, file) != 1)
619  goto read_error;
620 
621  if (header != PGSS_FILE_HEADER ||
622  pgver != PGSS_PG_MAJOR_VERSION)
623  goto data_error;
624 
625  for (i = 0; i < num; i++)
626  {
627  pgssEntry temp;
628  pgssEntry *entry;
629  Size query_offset;
630 
631  if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
632  goto read_error;
633 
634  /* Encoding is the only field we can easily sanity-check */
635  if (!PG_VALID_BE_ENCODING(temp.encoding))
636  goto data_error;
637 
638  /* Resize buffer as needed */
639  if (temp.query_len >= buffer_size)
640  {
641  buffer_size = Max(buffer_size * 2, temp.query_len + 1);
642  buffer = repalloc(buffer, buffer_size);
643  }
644 
645  if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
646  goto read_error;
647 
648  /* Should have a trailing null, but let's make sure */
649  buffer[temp.query_len] = '\0';
650 
651  /* Skip loading "sticky" entries */
652  if (IS_STICKY(temp.counters))
653  continue;
654 
655  /* Store the query text */
656  query_offset = pgss->extent;
657  if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
658  goto write_error;
659  pgss->extent += temp.query_len + 1;
660 
661  /* make the hashtable entry (discards old entries if too many) */
662  entry = entry_alloc(&temp.key, query_offset, temp.query_len,
663  temp.encoding,
664  false);
665 
666  /* copy in the actual stats */
667  entry->counters = temp.counters;
668  entry->stats_since = temp.stats_since;
670  }
671 
672  /* Read global statistics for pg_stat_statements */
673  if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
674  goto read_error;
675 
676  pfree(buffer);
677  FreeFile(file);
678  FreeFile(qfile);
679 
680  /*
681  * Remove the persisted stats file so it's not included in
682  * backups/replication standbys, etc. A new file will be written on next
683  * shutdown.
684  *
685  * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
686  * because we remove that file on startup; it acts inversely to
687  * PGSS_DUMP_FILE, in that it is only supposed to be around when the
688  * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
689  * when the server is not running. Leaving the file creates no danger of
690  * a newly restored database having a spurious record of execution costs,
691  * which is what we're really concerned about here.
692  */
693  unlink(PGSS_DUMP_FILE);
694 
695  return;
696 
697 read_error:
698  ereport(LOG,
700  errmsg("could not read file \"%s\": %m",
701  PGSS_DUMP_FILE)));
702  goto fail;
703 data_error:
704  ereport(LOG,
705  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
706  errmsg("ignoring invalid data in file \"%s\"",
707  PGSS_DUMP_FILE)));
708  goto fail;
709 write_error:
710  ereport(LOG,
712  errmsg("could not write file \"%s\": %m",
713  PGSS_TEXT_FILE)));
714 fail:
715  if (buffer)
716  pfree(buffer);
717  if (file)
718  FreeFile(file);
719  if (qfile)
720  FreeFile(qfile);
721  /* If possible, throw away the bogus file; ignore any error */
722  unlink(PGSS_DUMP_FILE);
723 
724  /*
725  * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
726  * server is running with pg_stat_statements enabled
727  */
728 }
729 
730 /*
731  * shmem_shutdown hook: Dump statistics into file.
732  *
733  * Note: we don't bother with acquiring lock, because there should be no
734  * other processes running when this is called.
735  */
736 static void
738 {
739  FILE *file;
740  char *qbuffer = NULL;
741  Size qbuffer_size = 0;
742  HASH_SEQ_STATUS hash_seq;
743  int32 num_entries;
744  pgssEntry *entry;
745 
746  /* Don't try to dump during a crash. */
747  if (code)
748  return;
749 
750  /* Safety check ... shouldn't get here unless shmem is set up. */
751  if (!pgss || !pgss_hash)
752  return;
753 
754  /* Don't dump if told not to. */
755  if (!pgss_save)
756  return;
757 
758  file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
759  if (file == NULL)
760  goto error;
761 
762  if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
763  goto error;
764  if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
765  goto error;
766  num_entries = hash_get_num_entries(pgss_hash);
767  if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
768  goto error;
769 
770  qbuffer = qtext_load_file(&qbuffer_size);
771  if (qbuffer == NULL)
772  goto error;
773 
774  /*
775  * When serializing to disk, we store query texts immediately after their
776  * entry data. Any orphaned query texts are thereby excluded.
777  */
778  hash_seq_init(&hash_seq, pgss_hash);
779  while ((entry = hash_seq_search(&hash_seq)) != NULL)
780  {
781  int len = entry->query_len;
782  char *qstr = qtext_fetch(entry->query_offset, len,
783  qbuffer, qbuffer_size);
784 
785  if (qstr == NULL)
786  continue; /* Ignore any entries with bogus texts */
787 
788  if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
789  fwrite(qstr, 1, len + 1, file) != len + 1)
790  {
791  /* note: we assume hash_seq_term won't change errno */
792  hash_seq_term(&hash_seq);
793  goto error;
794  }
795  }
796 
797  /* Dump global statistics for pg_stat_statements */
798  if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
799  goto error;
800 
801  free(qbuffer);
802  qbuffer = NULL;
803 
804  if (FreeFile(file))
805  {
806  file = NULL;
807  goto error;
808  }
809 
810  /*
811  * Rename file into place, so we atomically replace any old one.
812  */
814 
815  /* Unlink query-texts file; it's not needed while shutdown */
816  unlink(PGSS_TEXT_FILE);
817 
818  return;
819 
820 error:
821  ereport(LOG,
823  errmsg("could not write file \"%s\": %m",
824  PGSS_DUMP_FILE ".tmp")));
825  free(qbuffer);
826  if (file)
827  FreeFile(file);
828  unlink(PGSS_DUMP_FILE ".tmp");
829  unlink(PGSS_TEXT_FILE);
830 }
831 
832 /*
833  * Post-parse-analysis hook: mark query with a queryId
834  */
835 static void
837 {
839  prev_post_parse_analyze_hook(pstate, query, jstate);
840 
841  /* Safety check... */
843  return;
844 
845  /*
846  * If it's EXECUTE, clear the queryId so that stats will accumulate for
847  * the underlying PREPARE. But don't do this if we're not tracking
848  * utility statements, to avoid messing up another extension that might be
849  * tracking them.
850  */
851  if (query->utilityStmt)
852  {
854  {
855  query->queryId = UINT64CONST(0);
856  return;
857  }
858  }
859 
860  /*
861  * If query jumbling were able to identify any ignorable constants, we
862  * immediately create a hash table entry for the query, so that we can
863  * record the normalized form of the query string. If there were no such
864  * constants, the normalized string would be the same as the query text
865  * anyway, so there's no need for an early entry.
866  */
867  if (jstate && jstate->clocations_count > 0)
868  pgss_store(pstate->p_sourcetext,
869  query->queryId,
870  query->stmt_location,
871  query->stmt_len,
872  PGSS_INVALID,
873  0,
874  0,
875  NULL,
876  NULL,
877  NULL,
878  jstate,
879  0,
880  0);
881 }
882 
883 /*
884  * Planner hook: forward to regular planner, but measure planning time
885  * if needed.
886  */
887 static PlannedStmt *
889  const char *query_string,
890  int cursorOptions,
891  ParamListInfo boundParams)
892 {
893  PlannedStmt *result;
894 
895  /*
896  * We can't process the query if no query_string is provided, as
897  * pgss_store needs it. We also ignore query without queryid, as it would
898  * be treated as a utility statement, which may not be the case.
899  */
901  && pgss_track_planning && query_string
902  && parse->queryId != UINT64CONST(0))
903  {
906  BufferUsage bufusage_start,
907  bufusage;
908  WalUsage walusage_start,
909  walusage;
910 
911  /* We need to track buffer usage as the planner can access them. */
912  bufusage_start = pgBufferUsage;
913 
914  /*
915  * Similarly the planner could write some WAL records in some cases
916  * (e.g. setting a hint bit with those being WAL-logged)
917  */
918  walusage_start = pgWalUsage;
920 
921  nesting_level++;
922  PG_TRY();
923  {
924  if (prev_planner_hook)
925  result = prev_planner_hook(parse, query_string, cursorOptions,
926  boundParams);
927  else
928  result = standard_planner(parse, query_string, cursorOptions,
929  boundParams);
930  }
931  PG_FINALLY();
932  {
933  nesting_level--;
934  }
935  PG_END_TRY();
936 
939 
940  /* calc differences of buffer counters. */
941  memset(&bufusage, 0, sizeof(BufferUsage));
942  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
943 
944  /* calc differences of WAL counters. */
945  memset(&walusage, 0, sizeof(WalUsage));
946  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
947 
948  pgss_store(query_string,
949  parse->queryId,
950  parse->stmt_location,
951  parse->stmt_len,
952  PGSS_PLAN,
954  0,
955  &bufusage,
956  &walusage,
957  NULL,
958  NULL,
959  0,
960  0);
961  }
962  else
963  {
964  /*
965  * Even though we're not tracking plan time for this statement, we
966  * must still increment the nesting level, to ensure that functions
967  * evaluated during planning are not seen as top-level calls.
968  */
969  nesting_level++;
970  PG_TRY();
971  {
972  if (prev_planner_hook)
973  result = prev_planner_hook(parse, query_string, cursorOptions,
974  boundParams);
975  else
976  result = standard_planner(parse, query_string, cursorOptions,
977  boundParams);
978  }
979  PG_FINALLY();
980  {
981  nesting_level--;
982  }
983  PG_END_TRY();
984  }
985 
986  return result;
987 }
988 
989 /*
990  * ExecutorStart hook: start up tracking if needed
991  */
992 static void
993 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
994 {
995  if (prev_ExecutorStart)
996  prev_ExecutorStart(queryDesc, eflags);
997  else
998  standard_ExecutorStart(queryDesc, eflags);
999 
1000  /*
1001  * If query has queryId zero, don't track it. This prevents double
1002  * counting of optimizable statements that are directly contained in
1003  * utility statements.
1004  */
1005  if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
1006  {
1007  /*
1008  * Set up to track total elapsed time in ExecutorRun. Make sure the
1009  * space is allocated in the per-query context so it will go away at
1010  * ExecutorEnd.
1011  */
1012  if (queryDesc->totaltime == NULL)
1013  {
1014  MemoryContext oldcxt;
1015 
1016  oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1017  queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1018  MemoryContextSwitchTo(oldcxt);
1019  }
1020  }
1021 }
1022 
1023 /*
1024  * ExecutorRun hook: all we need do is track nesting depth
1025  */
1026 static void
1027 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
1028  bool execute_once)
1029 {
1030  nesting_level++;
1031  PG_TRY();
1032  {
1033  if (prev_ExecutorRun)
1034  prev_ExecutorRun(queryDesc, direction, count, execute_once);
1035  else
1036  standard_ExecutorRun(queryDesc, direction, count, execute_once);
1037  }
1038  PG_FINALLY();
1039  {
1040  nesting_level--;
1041  }
1042  PG_END_TRY();
1043 }
1044 
1045 /*
1046  * ExecutorFinish hook: all we need do is track nesting depth
1047  */
1048 static void
1050 {
1051  nesting_level++;
1052  PG_TRY();
1053  {
1054  if (prev_ExecutorFinish)
1055  prev_ExecutorFinish(queryDesc);
1056  else
1057  standard_ExecutorFinish(queryDesc);
1058  }
1059  PG_FINALLY();
1060  {
1061  nesting_level--;
1062  }
1063  PG_END_TRY();
1064 }
1065 
1066 /*
1067  * ExecutorEnd hook: store results if needed
1068  */
1069 static void
1071 {
1072  uint64 queryId = queryDesc->plannedstmt->queryId;
1073 
1074  if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1076  {
1077  /*
1078  * Make sure stats accumulation is done. (Note: it's okay if several
1079  * levels of hook all do this.)
1080  */
1081  InstrEndLoop(queryDesc->totaltime);
1082 
1083  pgss_store(queryDesc->sourceText,
1084  queryId,
1085  queryDesc->plannedstmt->stmt_location,
1086  queryDesc->plannedstmt->stmt_len,
1087  PGSS_EXEC,
1088  queryDesc->totaltime->total * 1000.0, /* convert to msec */
1089  queryDesc->estate->es_total_processed,
1090  &queryDesc->totaltime->bufusage,
1091  &queryDesc->totaltime->walusage,
1092  queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1093  NULL,
1095  queryDesc->estate->es_parallel_workers_launched);
1096  }
1097 
1098  if (prev_ExecutorEnd)
1099  prev_ExecutorEnd(queryDesc);
1100  else
1101  standard_ExecutorEnd(queryDesc);
1102 }
1103 
1104 /*
1105  * ProcessUtility hook
1106  */
1107 static void
1108 pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1109  bool readOnlyTree,
1111  ParamListInfo params, QueryEnvironment *queryEnv,
1113 {
1114  Node *parsetree = pstmt->utilityStmt;
1115  uint64 saved_queryId = pstmt->queryId;
1116  int saved_stmt_location = pstmt->stmt_location;
1117  int saved_stmt_len = pstmt->stmt_len;
1118  bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
1119 
1120  /*
1121  * Force utility statements to get queryId zero. We do this even in cases
1122  * where the statement contains an optimizable statement for which a
1123  * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1124  * cases, runtime control will first go through ProcessUtility and then
1125  * the executor, and we don't want the executor hooks to do anything,
1126  * since we are already measuring the statement's costs at the utility
1127  * level.
1128  *
1129  * Note that this is only done if pg_stat_statements is enabled and
1130  * configured to track utility statements, in the unlikely possibility
1131  * that user configured another extension to handle utility statements
1132  * only.
1133  */
1134  if (enabled)
1135  pstmt->queryId = UINT64CONST(0);
1136 
1137  /*
1138  * If it's an EXECUTE statement, we don't track it and don't increment the
1139  * nesting level. This allows the cycles to be charged to the underlying
1140  * PREPARE instead (by the Executor hooks), which is much more useful.
1141  *
1142  * We also don't track execution of PREPARE. If we did, we would get one
1143  * hash table entry for the PREPARE (with hash calculated from the query
1144  * string), and then a different one with the same query string (but hash
1145  * calculated from the query tree) would be used to accumulate costs of
1146  * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1147  * actually run the planner (only parse+rewrite), its costs are generally
1148  * pretty negligible and it seems okay to just ignore it.
1149  */
1150  if (enabled &&
1151  !IsA(parsetree, ExecuteStmt) &&
1152  !IsA(parsetree, PrepareStmt))
1153  {
1154  instr_time start;
1156  uint64 rows;
1157  BufferUsage bufusage_start,
1158  bufusage;
1159  WalUsage walusage_start,
1160  walusage;
1161 
1162  bufusage_start = pgBufferUsage;
1163  walusage_start = pgWalUsage;
1165 
1166  nesting_level++;
1167  PG_TRY();
1168  {
1169  if (prev_ProcessUtility)
1170  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1171  context, params, queryEnv,
1172  dest, qc);
1173  else
1174  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1175  context, params, queryEnv,
1176  dest, qc);
1177  }
1178  PG_FINALLY();
1179  {
1180  nesting_level--;
1181  }
1182  PG_END_TRY();
1183 
1184  /*
1185  * CAUTION: do not access the *pstmt data structure again below here.
1186  * If it was a ROLLBACK or similar, that data structure may have been
1187  * freed. We must copy everything we still need into local variables,
1188  * which we did above.
1189  *
1190  * For the same reason, we can't risk restoring pstmt->queryId to its
1191  * former value, which'd otherwise be a good idea.
1192  */
1193 
1196 
1197  /*
1198  * Track the total number of rows retrieved or affected by the utility
1199  * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1200  * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1201  */
1202  rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1203  qc->commandTag == CMDTAG_FETCH ||
1204  qc->commandTag == CMDTAG_SELECT ||
1205  qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1206  qc->nprocessed : 0;
1207 
1208  /* calc differences of buffer counters. */
1209  memset(&bufusage, 0, sizeof(BufferUsage));
1210  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1211 
1212  /* calc differences of WAL counters. */
1213  memset(&walusage, 0, sizeof(WalUsage));
1214  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1215 
1216  pgss_store(queryString,
1217  saved_queryId,
1218  saved_stmt_location,
1219  saved_stmt_len,
1220  PGSS_EXEC,
1222  rows,
1223  &bufusage,
1224  &walusage,
1225  NULL,
1226  NULL,
1227  0,
1228  0);
1229  }
1230  else
1231  {
1232  /*
1233  * Even though we're not tracking execution time for this statement,
1234  * we must still increment the nesting level, to ensure that functions
1235  * evaluated within it are not seen as top-level calls. But don't do
1236  * so for EXECUTE; that way, when control reaches pgss_planner or
1237  * pgss_ExecutorStart, we will treat the costs as top-level if
1238  * appropriate. Likewise, don't bump for PREPARE, so that parse
1239  * analysis will treat the statement as top-level if appropriate.
1240  *
1241  * To be absolutely certain we don't mess up the nesting level,
1242  * evaluate the bump_level condition just once.
1243  */
1244  bool bump_level =
1245  !IsA(parsetree, ExecuteStmt) &&
1246  !IsA(parsetree, PrepareStmt);
1247 
1248  if (bump_level)
1249  nesting_level++;
1250  PG_TRY();
1251  {
1252  if (prev_ProcessUtility)
1253  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1254  context, params, queryEnv,
1255  dest, qc);
1256  else
1257  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1258  context, params, queryEnv,
1259  dest, qc);
1260  }
1261  PG_FINALLY();
1262  {
1263  if (bump_level)
1264  nesting_level--;
1265  }
1266  PG_END_TRY();
1267  }
1268 }
1269 
1270 /*
1271  * Store some statistics for a statement.
1272  *
1273  * If jstate is not NULL then we're trying to create an entry for which
1274  * we have no statistics as yet; we just want to record the normalized
1275  * query string. total_time, rows, bufusage and walusage are ignored in this
1276  * case.
1277  *
1278  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1279  * for the arrays in the Counters field.
1280  */
1281 static void
1282 pgss_store(const char *query, uint64 queryId,
1283  int query_location, int query_len,
1284  pgssStoreKind kind,
1285  double total_time, uint64 rows,
1286  const BufferUsage *bufusage,
1287  const WalUsage *walusage,
1288  const struct JitInstrumentation *jitusage,
1289  JumbleState *jstate,
1290  int parallel_workers_to_launch,
1291  int parallel_workers_launched)
1292 {
1293  pgssHashKey key;
1294  pgssEntry *entry;
1295  char *norm_query = NULL;
1296  int encoding = GetDatabaseEncoding();
1297 
1298  Assert(query != NULL);
1299 
1300  /* Safety check... */
1301  if (!pgss || !pgss_hash)
1302  return;
1303 
1304  /*
1305  * Nothing to do if compute_query_id isn't enabled and no other module
1306  * computed a query identifier.
1307  */
1308  if (queryId == UINT64CONST(0))
1309  return;
1310 
1311  /*
1312  * Confine our attention to the relevant part of the string, if the query
1313  * is a portion of a multi-statement source string, and update query
1314  * location and length if needed.
1315  */
1316  query = CleanQuerytext(query, &query_location, &query_len);
1317 
1318  /* Set up key for hashtable search */
1319 
1320  /* clear padding */
1321  memset(&key, 0, sizeof(pgssHashKey));
1322 
1323  key.userid = GetUserId();
1324  key.dbid = MyDatabaseId;
1325  key.queryid = queryId;
1326  key.toplevel = (nesting_level == 0);
1327 
1328  /* Lookup the hash table entry with shared lock. */
1330 
1331  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1332 
1333  /* Create new entry, if not present */
1334  if (!entry)
1335  {
1336  Size query_offset;
1337  int gc_count;
1338  bool stored;
1339  bool do_gc;
1340 
1341  /*
1342  * Create a new, normalized query string if caller asked. We don't
1343  * need to hold the lock while doing this work. (Note: in any case,
1344  * it's possible that someone else creates a duplicate hashtable entry
1345  * in the interval where we don't hold the lock below. That case is
1346  * handled by entry_alloc.)
1347  */
1348  if (jstate)
1349  {
1351  norm_query = generate_normalized_query(jstate, query,
1352  query_location,
1353  &query_len);
1355  }
1356 
1357  /* Append new query text to file with only shared lock held */
1358  stored = qtext_store(norm_query ? norm_query : query, query_len,
1359  &query_offset, &gc_count);
1360 
1361  /*
1362  * Determine whether we need to garbage collect external query texts
1363  * while the shared lock is still held. This micro-optimization
1364  * avoids taking the time to decide this while holding exclusive lock.
1365  */
1366  do_gc = need_gc_qtexts();
1367 
1368  /* Need exclusive lock to make a new hashtable entry - promote */
1371 
1372  /*
1373  * A garbage collection may have occurred while we weren't holding the
1374  * lock. In the unlikely event that this happens, the query text we
1375  * stored above will have been garbage collected, so write it again.
1376  * This should be infrequent enough that doing it while holding
1377  * exclusive lock isn't a performance problem.
1378  */
1379  if (!stored || pgss->gc_count != gc_count)
1380  stored = qtext_store(norm_query ? norm_query : query, query_len,
1381  &query_offset, NULL);
1382 
1383  /* If we failed to write to the text file, give up */
1384  if (!stored)
1385  goto done;
1386 
1387  /* OK to create a new hashtable entry */
1388  entry = entry_alloc(&key, query_offset, query_len, encoding,
1389  jstate != NULL);
1390 
1391  /* If needed, perform garbage collection while exclusive lock held */
1392  if (do_gc)
1393  gc_qtexts();
1394  }
1395 
1396  /* Increment the counts, except when jstate is not NULL */
1397  if (!jstate)
1398  {
1399  Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1400 
1401  /*
1402  * Grab the spinlock while updating the counters (see comment about
1403  * locking rules at the head of the file)
1404  */
1405  SpinLockAcquire(&entry->mutex);
1406 
1407  /* "Unstick" entry if it was previously sticky */
1408  if (IS_STICKY(entry->counters))
1409  entry->counters.usage = USAGE_INIT;
1410 
1411  entry->counters.calls[kind] += 1;
1412  entry->counters.total_time[kind] += total_time;
1413 
1414  if (entry->counters.calls[kind] == 1)
1415  {
1416  entry->counters.min_time[kind] = total_time;
1417  entry->counters.max_time[kind] = total_time;
1418  entry->counters.mean_time[kind] = total_time;
1419  }
1420  else
1421  {
1422  /*
1423  * Welford's method for accurately computing variance. See
1424  * <http://www.johndcook.com/blog/standard_deviation/>
1425  */
1426  double old_mean = entry->counters.mean_time[kind];
1427 
1428  entry->counters.mean_time[kind] +=
1429  (total_time - old_mean) / entry->counters.calls[kind];
1430  entry->counters.sum_var_time[kind] +=
1431  (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1432 
1433  /*
1434  * Calculate min and max time. min = 0 and max = 0 means that the
1435  * min/max statistics were reset
1436  */
1437  if (entry->counters.min_time[kind] == 0
1438  && entry->counters.max_time[kind] == 0)
1439  {
1440  entry->counters.min_time[kind] = total_time;
1441  entry->counters.max_time[kind] = total_time;
1442  }
1443  else
1444  {
1445  if (entry->counters.min_time[kind] > total_time)
1446  entry->counters.min_time[kind] = total_time;
1447  if (entry->counters.max_time[kind] < total_time)
1448  entry->counters.max_time[kind] = total_time;
1449  }
1450  }
1451  entry->counters.rows += rows;
1452  entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1453  entry->counters.shared_blks_read += bufusage->shared_blks_read;
1454  entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1455  entry->counters.shared_blks_written += bufusage->shared_blks_written;
1456  entry->counters.local_blks_hit += bufusage->local_blks_hit;
1457  entry->counters.local_blks_read += bufusage->local_blks_read;
1458  entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1459  entry->counters.local_blks_written += bufusage->local_blks_written;
1460  entry->counters.temp_blks_read += bufusage->temp_blks_read;
1461  entry->counters.temp_blks_written += bufusage->temp_blks_written;
1468  entry->counters.usage += USAGE_EXEC(total_time);
1469  entry->counters.wal_records += walusage->wal_records;
1470  entry->counters.wal_fpi += walusage->wal_fpi;
1471  entry->counters.wal_bytes += walusage->wal_bytes;
1472  if (jitusage)
1473  {
1474  entry->counters.jit_functions += jitusage->created_functions;
1476 
1477  if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1478  entry->counters.jit_deform_count++;
1480 
1482  entry->counters.jit_inlining_count++;
1484 
1488 
1490  entry->counters.jit_emission_count++;
1492  }
1493 
1494  /* parallel worker counters */
1495  entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1496  entry->counters.parallel_workers_launched += parallel_workers_launched;
1497 
1498  SpinLockRelease(&entry->mutex);
1499  }
1500 
1501 done:
1503 
1504  /* We postpone this clean-up until we're out of the lock */
1505  if (norm_query)
1506  pfree(norm_query);
1507 }
1508 
1509 /*
1510  * Reset statement statistics corresponding to userid, dbid, and queryid.
1511  */
1512 Datum
1514 {
1515  Oid userid;
1516  Oid dbid;
1517  uint64 queryid;
1518 
1519  userid = PG_GETARG_OID(0);
1520  dbid = PG_GETARG_OID(1);
1521  queryid = (uint64) PG_GETARG_INT64(2);
1522 
1523  entry_reset(userid, dbid, queryid, false);
1524 
1525  PG_RETURN_VOID();
1526 }
1527 
1528 Datum
1530 {
1531  Oid userid;
1532  Oid dbid;
1533  uint64 queryid;
1534  bool minmax_only;
1535 
1536  userid = PG_GETARG_OID(0);
1537  dbid = PG_GETARG_OID(1);
1538  queryid = (uint64) PG_GETARG_INT64(2);
1539  minmax_only = PG_GETARG_BOOL(3);
1540 
1541  PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1542 }
1543 
1544 /*
1545  * Reset statement statistics.
1546  */
1547 Datum
1549 {
1550  entry_reset(0, 0, 0, false);
1551 
1552  PG_RETURN_VOID();
1553 }
1554 
1555 /* Number of output arguments (columns) for various API versions */
1556 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1557 #define PG_STAT_STATEMENTS_COLS_V1_1 18
1558 #define PG_STAT_STATEMENTS_COLS_V1_2 19
1559 #define PG_STAT_STATEMENTS_COLS_V1_3 23
1560 #define PG_STAT_STATEMENTS_COLS_V1_8 32
1561 #define PG_STAT_STATEMENTS_COLS_V1_9 33
1562 #define PG_STAT_STATEMENTS_COLS_V1_10 43
1563 #define PG_STAT_STATEMENTS_COLS_V1_11 49
1564 #define PG_STAT_STATEMENTS_COLS_V1_12 51
1565 #define PG_STAT_STATEMENTS_COLS 51 /* maximum of above */
1566 
1567 /*
1568  * Retrieve statement statistics.
1569  *
1570  * The SQL API of this function has changed multiple times, and will likely
1571  * do so again in future. To support the case where a newer version of this
1572  * loadable module is being used with an old SQL declaration of the function,
1573  * we continue to support the older API versions. For 1.2 and later, the
1574  * expected API version is identified by embedding it in the C name of the
1575  * function. Unfortunately we weren't bright enough to do that for 1.1.
1576  */
1577 Datum
1579 {
1580  bool showtext = PG_GETARG_BOOL(0);
1581 
1582  pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
1583 
1584  return (Datum) 0;
1585 }
1586 
1587 Datum
1589 {
1590  bool showtext = PG_GETARG_BOOL(0);
1591 
1592  pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1593 
1594  return (Datum) 0;
1595 }
1596 
1597 Datum
1599 {
1600  bool showtext = PG_GETARG_BOOL(0);
1601 
1602  pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1603 
1604  return (Datum) 0;
1605 }
1606 
1607 Datum
1609 {
1610  bool showtext = PG_GETARG_BOOL(0);
1611 
1612  pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1613 
1614  return (Datum) 0;
1615 }
1616 
1617 Datum
1619 {
1620  bool showtext = PG_GETARG_BOOL(0);
1621 
1622  pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1623 
1624  return (Datum) 0;
1625 }
1626 
1627 Datum
1629 {
1630  bool showtext = PG_GETARG_BOOL(0);
1631 
1632  pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1633 
1634  return (Datum) 0;
1635 }
1636 
1637 Datum
1639 {
1640  bool showtext = PG_GETARG_BOOL(0);
1641 
1642  pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1643 
1644  return (Datum) 0;
1645 }
1646 
1647 /*
1648  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1649  * This can be removed someday, perhaps.
1650  */
1651 Datum
1653 {
1654  /* If it's really API 1.1, we'll figure that out below */
1655  pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1656 
1657  return (Datum) 0;
1658 }
1659 
1660 /* Common code for all versions of pg_stat_statements() */
1661 static void
1663  pgssVersion api_version,
1664  bool showtext)
1665 {
1666  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1667  Oid userid = GetUserId();
1668  bool is_allowed_role = false;
1669  char *qbuffer = NULL;
1670  Size qbuffer_size = 0;
1671  Size extent = 0;
1672  int gc_count = 0;
1673  HASH_SEQ_STATUS hash_seq;
1674  pgssEntry *entry;
1675 
1676  /*
1677  * Superusers or roles with the privileges of pg_read_all_stats members
1678  * are allowed
1679  */
1680  is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1681 
1682  /* hash table must exist already */
1683  if (!pgss || !pgss_hash)
1684  ereport(ERROR,
1685  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1686  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1687 
1688  InitMaterializedSRF(fcinfo, 0);
1689 
1690  /*
1691  * Check we have the expected number of output arguments. Aside from
1692  * being a good safety check, we need a kluge here to detect API version
1693  * 1.1, which was wedged into the code in an ill-considered way.
1694  */
1695  switch (rsinfo->setDesc->natts)
1696  {
1698  if (api_version != PGSS_V1_0)
1699  elog(ERROR, "incorrect number of output arguments");
1700  break;
1702  /* pg_stat_statements() should have told us 1.0 */
1703  if (api_version != PGSS_V1_0)
1704  elog(ERROR, "incorrect number of output arguments");
1705  api_version = PGSS_V1_1;
1706  break;
1708  if (api_version != PGSS_V1_2)
1709  elog(ERROR, "incorrect number of output arguments");
1710  break;
1712  if (api_version != PGSS_V1_3)
1713  elog(ERROR, "incorrect number of output arguments");
1714  break;
1716  if (api_version != PGSS_V1_8)
1717  elog(ERROR, "incorrect number of output arguments");
1718  break;
1720  if (api_version != PGSS_V1_9)
1721  elog(ERROR, "incorrect number of output arguments");
1722  break;
1724  if (api_version != PGSS_V1_10)
1725  elog(ERROR, "incorrect number of output arguments");
1726  break;
1728  if (api_version != PGSS_V1_11)
1729  elog(ERROR, "incorrect number of output arguments");
1730  break;
1732  if (api_version != PGSS_V1_12)
1733  elog(ERROR, "incorrect number of output arguments");
1734  break;
1735  default:
1736  elog(ERROR, "incorrect number of output arguments");
1737  }
1738 
1739  /*
1740  * We'd like to load the query text file (if needed) while not holding any
1741  * lock on pgss->lock. In the worst case we'll have to do this again
1742  * after we have the lock, but it's unlikely enough to make this a win
1743  * despite occasional duplicated work. We need to reload if anybody
1744  * writes to the file (either a retail qtext_store(), or a garbage
1745  * collection) between this point and where we've gotten shared lock. If
1746  * a qtext_store is actually in progress when we look, we might as well
1747  * skip the speculative load entirely.
1748  */
1749  if (showtext)
1750  {
1751  int n_writers;
1752 
1753  /* Take the mutex so we can examine variables */
1755  extent = pgss->extent;
1756  n_writers = pgss->n_writers;
1757  gc_count = pgss->gc_count;
1759 
1760  /* No point in loading file now if there are active writers */
1761  if (n_writers == 0)
1762  qbuffer = qtext_load_file(&qbuffer_size);
1763  }
1764 
1765  /*
1766  * Get shared lock, load or reload the query text file if we must, and
1767  * iterate over the hashtable entries.
1768  *
1769  * With a large hash table, we might be holding the lock rather longer
1770  * than one could wish. However, this only blocks creation of new hash
1771  * table entries, and the larger the hash table the less likely that is to
1772  * be needed. So we can hope this is okay. Perhaps someday we'll decide
1773  * we need to partition the hash table to limit the time spent holding any
1774  * one lock.
1775  */
1777 
1778  if (showtext)
1779  {
1780  /*
1781  * Here it is safe to examine extent and gc_count without taking the
1782  * mutex. Note that although other processes might change
1783  * pgss->extent just after we look at it, the strings they then write
1784  * into the file cannot yet be referenced in the hashtable, so we
1785  * don't care whether we see them or not.
1786  *
1787  * If qtext_load_file fails, we just press on; we'll return NULL for
1788  * every query text.
1789  */
1790  if (qbuffer == NULL ||
1791  pgss->extent != extent ||
1792  pgss->gc_count != gc_count)
1793  {
1794  free(qbuffer);
1795  qbuffer = qtext_load_file(&qbuffer_size);
1796  }
1797  }
1798 
1799  hash_seq_init(&hash_seq, pgss_hash);
1800  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1801  {
1803  bool nulls[PG_STAT_STATEMENTS_COLS];
1804  int i = 0;
1805  Counters tmp;
1806  double stddev;
1807  int64 queryid = entry->key.queryid;
1808  TimestampTz stats_since;
1809  TimestampTz minmax_stats_since;
1810 
1811  memset(values, 0, sizeof(values));
1812  memset(nulls, 0, sizeof(nulls));
1813 
1814  values[i++] = ObjectIdGetDatum(entry->key.userid);
1815  values[i++] = ObjectIdGetDatum(entry->key.dbid);
1816  if (api_version >= PGSS_V1_9)
1817  values[i++] = BoolGetDatum(entry->key.toplevel);
1818 
1819  if (is_allowed_role || entry->key.userid == userid)
1820  {
1821  if (api_version >= PGSS_V1_2)
1822  values[i++] = Int64GetDatumFast(queryid);
1823 
1824  if (showtext)
1825  {
1826  char *qstr = qtext_fetch(entry->query_offset,
1827  entry->query_len,
1828  qbuffer,
1829  qbuffer_size);
1830 
1831  if (qstr)
1832  {
1833  char *enc;
1834 
1835  enc = pg_any_to_server(qstr,
1836  entry->query_len,
1837  entry->encoding);
1838 
1840 
1841  if (enc != qstr)
1842  pfree(enc);
1843  }
1844  else
1845  {
1846  /* Just return a null if we fail to find the text */
1847  nulls[i++] = true;
1848  }
1849  }
1850  else
1851  {
1852  /* Query text not requested */
1853  nulls[i++] = true;
1854  }
1855  }
1856  else
1857  {
1858  /* Don't show queryid */
1859  if (api_version >= PGSS_V1_2)
1860  nulls[i++] = true;
1861 
1862  /*
1863  * Don't show query text, but hint as to the reason for not doing
1864  * so if it was requested
1865  */
1866  if (showtext)
1867  values[i++] = CStringGetTextDatum("<insufficient privilege>");
1868  else
1869  nulls[i++] = true;
1870  }
1871 
1872  /* copy counters to a local variable to keep locking time short */
1873  SpinLockAcquire(&entry->mutex);
1874  tmp = entry->counters;
1875  stats_since = entry->stats_since;
1876  minmax_stats_since = entry->minmax_stats_since;
1877  SpinLockRelease(&entry->mutex);
1878 
1879  /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1880  if (IS_STICKY(tmp))
1881  continue;
1882 
1883  /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1884  for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1885  {
1886  if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1887  {
1888  values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1889  values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1890  }
1891 
1892  if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1893  api_version >= PGSS_V1_8)
1894  {
1895  values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1896  values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1897  values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1898 
1899  /*
1900  * Note we are calculating the population variance here, not
1901  * the sample variance, as we have data for the whole
1902  * population, so Bessel's correction is not used, and we
1903  * don't divide by tmp.calls - 1.
1904  */
1905  if (tmp.calls[kind] > 1)
1906  stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1907  else
1908  stddev = 0.0;
1909  values[i++] = Float8GetDatumFast(stddev);
1910  }
1911  }
1912  values[i++] = Int64GetDatumFast(tmp.rows);
1915  if (api_version >= PGSS_V1_1)
1920  if (api_version >= PGSS_V1_1)
1925  if (api_version >= PGSS_V1_1)
1926  {
1929  }
1930  if (api_version >= PGSS_V1_11)
1931  {
1934  }
1935  if (api_version >= PGSS_V1_10)
1936  {
1939  }
1940  if (api_version >= PGSS_V1_8)
1941  {
1942  char buf[256];
1943  Datum wal_bytes;
1944 
1946  values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1947 
1948  snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1949 
1950  /* Convert to numeric. */
1951  wal_bytes = DirectFunctionCall3(numeric_in,
1953  ObjectIdGetDatum(0),
1954  Int32GetDatum(-1));
1955  values[i++] = wal_bytes;
1956  }
1957  if (api_version >= PGSS_V1_10)
1958  {
1967  }
1968  if (api_version >= PGSS_V1_11)
1969  {
1972  }
1973  if (api_version >= PGSS_V1_12)
1974  {
1977  }
1978  if (api_version >= PGSS_V1_11)
1979  {
1980  values[i++] = TimestampTzGetDatum(stats_since);
1981  values[i++] = TimestampTzGetDatum(minmax_stats_since);
1982  }
1983 
1984  Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1985  api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1986  api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1987  api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1988  api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1989  api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1990  api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
1991  api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
1992  api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
1993  -1 /* fail if you forget to update this assert */ ));
1994 
1995  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1996  }
1997 
1999 
2000  free(qbuffer);
2001 }
2002 
2003 /* Number of output arguments (columns) for pg_stat_statements_info */
2004 #define PG_STAT_STATEMENTS_INFO_COLS 2
2005 
2006 /*
2007  * Return statistics of pg_stat_statements.
2008  */
2009 Datum
2011 {
2012  pgssGlobalStats stats;
2013  TupleDesc tupdesc;
2015  bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2016 
2017  if (!pgss || !pgss_hash)
2018  ereport(ERROR,
2019  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2020  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2021 
2022  /* Build a tuple descriptor for our result type */
2023  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2024  elog(ERROR, "return type must be a row type");
2025 
2026  /* Read global statistics for pg_stat_statements */
2028  stats = pgss->stats;
2030 
2031  values[0] = Int64GetDatum(stats.dealloc);
2033 
2035 }
2036 
2037 /*
2038  * Estimate shared memory space needed.
2039  */
2040 static Size
2042 {
2043  Size size;
2044 
2045  size = MAXALIGN(sizeof(pgssSharedState));
2047 
2048  return size;
2049 }
2050 
2051 /*
2052  * Allocate a new hashtable entry.
2053  * caller must hold an exclusive lock on pgss->lock
2054  *
2055  * "query" need not be null-terminated; we rely on query_len instead
2056  *
2057  * If "sticky" is true, make the new entry artificially sticky so that it will
2058  * probably still be there when the query finishes execution. We do this by
2059  * giving it a median usage value rather than the normal value. (Strictly
2060  * speaking, query strings are normalized on a best effort basis, though it
2061  * would be difficult to demonstrate this even under artificial conditions.)
2062  *
2063  * Note: despite needing exclusive lock, it's not an error for the target
2064  * entry to already exist. This is because pgss_store releases and
2065  * reacquires lock after failing to find a match; so someone else could
2066  * have made the entry while we waited to get exclusive lock.
2067  */
2068 static pgssEntry *
2069 entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2070  bool sticky)
2071 {
2072  pgssEntry *entry;
2073  bool found;
2074 
2075  /* Make space if needed */
2077  entry_dealloc();
2078 
2079  /* Find or create an entry with desired hash code */
2080  entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2081 
2082  if (!found)
2083  {
2084  /* New entry, initialize it */
2085 
2086  /* reset the statistics */
2087  memset(&entry->counters, 0, sizeof(Counters));
2088  /* set the appropriate initial usage count */
2089  entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2090  /* re-initialize the mutex each time ... we assume no one using it */
2091  SpinLockInit(&entry->mutex);
2092  /* ... and don't forget the query text metadata */
2093  Assert(query_len >= 0);
2094  entry->query_offset = query_offset;
2095  entry->query_len = query_len;
2096  entry->encoding = encoding;
2097  entry->stats_since = GetCurrentTimestamp();
2098  entry->minmax_stats_since = entry->stats_since;
2099  }
2100 
2101  return entry;
2102 }
2103 
2104 /*
2105  * qsort comparator for sorting into increasing usage order
2106  */
2107 static int
2108 entry_cmp(const void *lhs, const void *rhs)
2109 {
2110  double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2111  double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2112 
2113  if (l_usage < r_usage)
2114  return -1;
2115  else if (l_usage > r_usage)
2116  return +1;
2117  else
2118  return 0;
2119 }
2120 
2121 /*
2122  * Deallocate least-used entries.
2123  *
2124  * Caller must hold an exclusive lock on pgss->lock.
2125  */
2126 static void
2128 {
2129  HASH_SEQ_STATUS hash_seq;
2130  pgssEntry **entries;
2131  pgssEntry *entry;
2132  int nvictims;
2133  int i;
2134  Size tottextlen;
2135  int nvalidtexts;
2136 
2137  /*
2138  * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2139  * While we're scanning the table, apply the decay factor to the usage
2140  * values, and update the mean query length.
2141  *
2142  * Note that the mean query length is almost immediately obsolete, since
2143  * we compute it before not after discarding the least-used entries.
2144  * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2145  * making two passes to get a more current result. Likewise, the new
2146  * cur_median_usage includes the entries we're about to zap.
2147  */
2148 
2149  entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2150 
2151  i = 0;
2152  tottextlen = 0;
2153  nvalidtexts = 0;
2154 
2155  hash_seq_init(&hash_seq, pgss_hash);
2156  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2157  {
2158  entries[i++] = entry;
2159  /* "Sticky" entries get a different usage decay rate. */
2160  if (IS_STICKY(entry->counters))
2162  else
2164  /* In the mean length computation, ignore dropped texts. */
2165  if (entry->query_len >= 0)
2166  {
2167  tottextlen += entry->query_len + 1;
2168  nvalidtexts++;
2169  }
2170  }
2171 
2172  /* Sort into increasing order by usage */
2173  qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2174 
2175  /* Record the (approximate) median usage */
2176  if (i > 0)
2177  pgss->cur_median_usage = entries[i / 2]->counters.usage;
2178  /* Record the mean query length */
2179  if (nvalidtexts > 0)
2180  pgss->mean_query_len = tottextlen / nvalidtexts;
2181  else
2183 
2184  /* Now zap an appropriate fraction of lowest-usage entries */
2185  nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2186  nvictims = Min(nvictims, i);
2187 
2188  for (i = 0; i < nvictims; i++)
2189  {
2190  hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2191  }
2192 
2193  pfree(entries);
2194 
2195  /* Increment the number of times entries are deallocated */
2197  pgss->stats.dealloc += 1;
2199 }
2200 
2201 /*
2202  * Given a query string (not necessarily null-terminated), allocate a new
2203  * entry in the external query text file and store the string there.
2204  *
2205  * If successful, returns true, and stores the new entry's offset in the file
2206  * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2207  * number of garbage collections that have occurred so far.
2208  *
2209  * On failure, returns false.
2210  *
2211  * At least a shared lock on pgss->lock must be held by the caller, so as
2212  * to prevent a concurrent garbage collection. Share-lock-holding callers
2213  * should pass a gc_count pointer to obtain the number of garbage collections,
2214  * so that they can recheck the count after obtaining exclusive lock to
2215  * detect whether a garbage collection occurred (and removed this entry).
2216  */
2217 static bool
2218 qtext_store(const char *query, int query_len,
2219  Size *query_offset, int *gc_count)
2220 {
2221  Size off;
2222  int fd;
2223 
2224  /*
2225  * We use a spinlock to protect extent/n_writers/gc_count, so that
2226  * multiple processes may execute this function concurrently.
2227  */
2229  off = pgss->extent;
2230  pgss->extent += query_len + 1;
2231  pgss->n_writers++;
2232  if (gc_count)
2233  *gc_count = pgss->gc_count;
2235 
2236  *query_offset = off;
2237 
2238  /*
2239  * Don't allow the file to grow larger than what qtext_load_file can
2240  * (theoretically) handle. This has been seen to be reachable on 32-bit
2241  * platforms.
2242  */
2243  if (unlikely(query_len >= MaxAllocHugeSize - off))
2244  {
2245  errno = EFBIG; /* not quite right, but it'll do */
2246  fd = -1;
2247  goto error;
2248  }
2249 
2250  /* Now write the data into the successfully-reserved part of the file */
2251  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2252  if (fd < 0)
2253  goto error;
2254 
2255  if (pg_pwrite(fd, query, query_len, off) != query_len)
2256  goto error;
2257  if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2258  goto error;
2259 
2261 
2262  /* Mark our write complete */
2264  pgss->n_writers--;
2266 
2267  return true;
2268 
2269 error:
2270  ereport(LOG,
2272  errmsg("could not write file \"%s\": %m",
2273  PGSS_TEXT_FILE)));
2274 
2275  if (fd >= 0)
2277 
2278  /* Mark our write complete */
2280  pgss->n_writers--;
2282 
2283  return false;
2284 }
2285 
2286 /*
2287  * Read the external query text file into a malloc'd buffer.
2288  *
2289  * Returns NULL (without throwing an error) if unable to read, eg
2290  * file not there or insufficient memory.
2291  *
2292  * On success, the buffer size is also returned into *buffer_size.
2293  *
2294  * This can be called without any lock on pgss->lock, but in that case
2295  * the caller is responsible for verifying that the result is sane.
2296  */
2297 static char *
2298 qtext_load_file(Size *buffer_size)
2299 {
2300  char *buf;
2301  int fd;
2302  struct stat stat;
2303  Size nread;
2304 
2306  if (fd < 0)
2307  {
2308  if (errno != ENOENT)
2309  ereport(LOG,
2311  errmsg("could not read file \"%s\": %m",
2312  PGSS_TEXT_FILE)));
2313  return NULL;
2314  }
2315 
2316  /* Get file length */
2317  if (fstat(fd, &stat))
2318  {
2319  ereport(LOG,
2321  errmsg("could not stat file \"%s\": %m",
2322  PGSS_TEXT_FILE)));
2324  return NULL;
2325  }
2326 
2327  /* Allocate buffer; beware that off_t might be wider than size_t */
2328  if (stat.st_size <= MaxAllocHugeSize)
2329  buf = (char *) malloc(stat.st_size);
2330  else
2331  buf = NULL;
2332  if (buf == NULL)
2333  {
2334  ereport(LOG,
2335  (errcode(ERRCODE_OUT_OF_MEMORY),
2336  errmsg("out of memory"),
2337  errdetail("Could not allocate enough memory to read file \"%s\".",
2338  PGSS_TEXT_FILE)));
2340  return NULL;
2341  }
2342 
2343  /*
2344  * OK, slurp in the file. Windows fails if we try to read more than
2345  * INT_MAX bytes at once, and other platforms might not like that either,
2346  * so read a very large file in 1GB segments.
2347  */
2348  nread = 0;
2349  while (nread < stat.st_size)
2350  {
2351  int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2352 
2353  /*
2354  * If we get a short read and errno doesn't get set, the reason is
2355  * probably that garbage collection truncated the file since we did
2356  * the fstat(), so we don't log a complaint --- but we don't return
2357  * the data, either, since it's most likely corrupt due to concurrent
2358  * writes from garbage collection.
2359  */
2360  errno = 0;
2361  if (read(fd, buf + nread, toread) != toread)
2362  {
2363  if (errno)
2364  ereport(LOG,
2366  errmsg("could not read file \"%s\": %m",
2367  PGSS_TEXT_FILE)));
2368  free(buf);
2370  return NULL;
2371  }
2372  nread += toread;
2373  }
2374 
2375  if (CloseTransientFile(fd) != 0)
2376  ereport(LOG,
2378  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2379 
2380  *buffer_size = nread;
2381  return buf;
2382 }
2383 
2384 /*
2385  * Locate a query text in the file image previously read by qtext_load_file().
2386  *
2387  * We validate the given offset/length, and return NULL if bogus. Otherwise,
2388  * the result points to a null-terminated string within the buffer.
2389  */
2390 static char *
2391 qtext_fetch(Size query_offset, int query_len,
2392  char *buffer, Size buffer_size)
2393 {
2394  /* File read failed? */
2395  if (buffer == NULL)
2396  return NULL;
2397  /* Bogus offset/length? */
2398  if (query_len < 0 ||
2399  query_offset + query_len >= buffer_size)
2400  return NULL;
2401  /* As a further sanity check, make sure there's a trailing null */
2402  if (buffer[query_offset + query_len] != '\0')
2403  return NULL;
2404  /* Looks OK */
2405  return buffer + query_offset;
2406 }
2407 
2408 /*
2409  * Do we need to garbage-collect the external query text file?
2410  *
2411  * Caller should hold at least a shared lock on pgss->lock.
2412  */
2413 static bool
2415 {
2416  Size extent;
2417 
2418  /* Read shared extent pointer */
2420  extent = pgss->extent;
2422 
2423  /*
2424  * Don't proceed if file does not exceed 512 bytes per possible entry.
2425  *
2426  * Here and in the next test, 32-bit machines have overflow hazards if
2427  * pgss_max and/or mean_query_len are large. Force the multiplications
2428  * and comparisons to be done in uint64 arithmetic to forestall trouble.
2429  */
2430  if ((uint64) extent < (uint64) 512 * pgss_max)
2431  return false;
2432 
2433  /*
2434  * Don't proceed if file is less than about 50% bloat. Nothing can or
2435  * should be done in the event of unusually large query texts accounting
2436  * for file's large size. We go to the trouble of maintaining the mean
2437  * query length in order to prevent garbage collection from thrashing
2438  * uselessly.
2439  */
2440  if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2441  return false;
2442 
2443  return true;
2444 }
2445 
2446 /*
2447  * Garbage-collect orphaned query texts in external file.
2448  *
2449  * This won't be called often in the typical case, since it's likely that
2450  * there won't be too much churn, and besides, a similar compaction process
2451  * occurs when serializing to disk at shutdown or as part of resetting.
2452  * Despite this, it seems prudent to plan for the edge case where the file
2453  * becomes unreasonably large, with no other method of compaction likely to
2454  * occur in the foreseeable future.
2455  *
2456  * The caller must hold an exclusive lock on pgss->lock.
2457  *
2458  * At the first sign of trouble we unlink the query text file to get a clean
2459  * slate (although existing statistics are retained), rather than risk
2460  * thrashing by allowing the same problem case to recur indefinitely.
2461  */
2462 static void
2464 {
2465  char *qbuffer;
2466  Size qbuffer_size;
2467  FILE *qfile = NULL;
2468  HASH_SEQ_STATUS hash_seq;
2469  pgssEntry *entry;
2470  Size extent;
2471  int nentries;
2472 
2473  /*
2474  * When called from pgss_store, some other session might have proceeded
2475  * with garbage collection in the no-lock-held interim of lock strength
2476  * escalation. Check once more that this is actually necessary.
2477  */
2478  if (!need_gc_qtexts())
2479  return;
2480 
2481  /*
2482  * Load the old texts file. If we fail (out of memory, for instance),
2483  * invalidate query texts. Hopefully this is rare. It might seem better
2484  * to leave things alone on an OOM failure, but the problem is that the
2485  * file is only going to get bigger; hoping for a future non-OOM result is
2486  * risky and can easily lead to complete denial of service.
2487  */
2488  qbuffer = qtext_load_file(&qbuffer_size);
2489  if (qbuffer == NULL)
2490  goto gc_fail;
2491 
2492  /*
2493  * We overwrite the query texts file in place, so as to reduce the risk of
2494  * an out-of-disk-space failure. Since the file is guaranteed not to get
2495  * larger, this should always work on traditional filesystems; though we
2496  * could still lose on copy-on-write filesystems.
2497  */
2499  if (qfile == NULL)
2500  {
2501  ereport(LOG,
2503  errmsg("could not write file \"%s\": %m",
2504  PGSS_TEXT_FILE)));
2505  goto gc_fail;
2506  }
2507 
2508  extent = 0;
2509  nentries = 0;
2510 
2511  hash_seq_init(&hash_seq, pgss_hash);
2512  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2513  {
2514  int query_len = entry->query_len;
2515  char *qry = qtext_fetch(entry->query_offset,
2516  query_len,
2517  qbuffer,
2518  qbuffer_size);
2519 
2520  if (qry == NULL)
2521  {
2522  /* Trouble ... drop the text */
2523  entry->query_offset = 0;
2524  entry->query_len = -1;
2525  /* entry will not be counted in mean query length computation */
2526  continue;
2527  }
2528 
2529  if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2530  {
2531  ereport(LOG,
2533  errmsg("could not write file \"%s\": %m",
2534  PGSS_TEXT_FILE)));
2535  hash_seq_term(&hash_seq);
2536  goto gc_fail;
2537  }
2538 
2539  entry->query_offset = extent;
2540  extent += query_len + 1;
2541  nentries++;
2542  }
2543 
2544  /*
2545  * Truncate away any now-unused space. If this fails for some odd reason,
2546  * we log it, but there's no need to fail.
2547  */
2548  if (ftruncate(fileno(qfile), extent) != 0)
2549  ereport(LOG,
2551  errmsg("could not truncate file \"%s\": %m",
2552  PGSS_TEXT_FILE)));
2553 
2554  if (FreeFile(qfile))
2555  {
2556  ereport(LOG,
2558  errmsg("could not write file \"%s\": %m",
2559  PGSS_TEXT_FILE)));
2560  qfile = NULL;
2561  goto gc_fail;
2562  }
2563 
2564  elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2565  pgss->extent, extent);
2566 
2567  /* Reset the shared extent pointer */
2568  pgss->extent = extent;
2569 
2570  /*
2571  * Also update the mean query length, to be sure that need_gc_qtexts()
2572  * won't still think we have a problem.
2573  */
2574  if (nentries > 0)
2575  pgss->mean_query_len = extent / nentries;
2576  else
2578 
2579  free(qbuffer);
2580 
2581  /*
2582  * OK, count a garbage collection cycle. (Note: even though we have
2583  * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2584  * other processes may examine gc_count while holding only the mutex.
2585  * Also, we have to advance the count *after* we've rewritten the file,
2586  * else other processes might not realize they read a stale file.)
2587  */
2588  record_gc_qtexts();
2589 
2590  return;
2591 
2592 gc_fail:
2593  /* clean up resources */
2594  if (qfile)
2595  FreeFile(qfile);
2596  free(qbuffer);
2597 
2598  /*
2599  * Since the contents of the external file are now uncertain, mark all
2600  * hashtable entries as having invalid texts.
2601  */
2602  hash_seq_init(&hash_seq, pgss_hash);
2603  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2604  {
2605  entry->query_offset = 0;
2606  entry->query_len = -1;
2607  }
2608 
2609  /*
2610  * Destroy the query text file and create a new, empty one
2611  */
2612  (void) unlink(PGSS_TEXT_FILE);
2614  if (qfile == NULL)
2615  ereport(LOG,
2617  errmsg("could not recreate file \"%s\": %m",
2618  PGSS_TEXT_FILE)));
2619  else
2620  FreeFile(qfile);
2621 
2622  /* Reset the shared extent pointer */
2623  pgss->extent = 0;
2624 
2625  /* Reset mean_query_len to match the new state */
2627 
2628  /*
2629  * Bump the GC count even though we failed.
2630  *
2631  * This is needed to make concurrent readers of file without any lock on
2632  * pgss->lock notice existence of new version of file. Once readers
2633  * subsequently observe a change in GC count with pgss->lock held, that
2634  * forces a safe reopen of file. Writers also require that we bump here,
2635  * of course. (As required by locking protocol, readers and writers don't
2636  * trust earlier file contents until gc_count is found unchanged after
2637  * pgss->lock acquired in shared or exclusive mode respectively.)
2638  */
2639  record_gc_qtexts();
2640 }
2641 
2642 #define SINGLE_ENTRY_RESET(e) \
2643 if (e) { \
2644  if (minmax_only) { \
2645  /* When requested reset only min/max statistics of an entry */ \
2646  for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2647  { \
2648  e->counters.max_time[kind] = 0; \
2649  e->counters.min_time[kind] = 0; \
2650  } \
2651  e->minmax_stats_since = stats_reset; \
2652  } \
2653  else \
2654  { \
2655  /* Remove the key otherwise */ \
2656  hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2657  num_remove++; \
2658  } \
2659 }
2660 
2661 /*
2662  * Reset entries corresponding to parameters passed.
2663  */
2664 static TimestampTz
2665 entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
2666 {
2667  HASH_SEQ_STATUS hash_seq;
2668  pgssEntry *entry;
2669  FILE *qfile;
2670  long num_entries;
2671  long num_remove = 0;
2672  pgssHashKey key;
2673  TimestampTz stats_reset;
2674 
2675  if (!pgss || !pgss_hash)
2676  ereport(ERROR,
2677  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2678  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2679 
2681  num_entries = hash_get_num_entries(pgss_hash);
2682 
2683  stats_reset = GetCurrentTimestamp();
2684 
2685  if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2686  {
2687  /* If all the parameters are available, use the fast path. */
2688  memset(&key, 0, sizeof(pgssHashKey));
2689  key.userid = userid;
2690  key.dbid = dbid;
2691  key.queryid = queryid;
2692 
2693  /*
2694  * Reset the entry if it exists, starting with the non-top-level
2695  * entry.
2696  */
2697  key.toplevel = false;
2698  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2699 
2700  SINGLE_ENTRY_RESET(entry);
2701 
2702  /* Also reset the top-level entry if it exists. */
2703  key.toplevel = true;
2704  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2705 
2706  SINGLE_ENTRY_RESET(entry);
2707  }
2708  else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2709  {
2710  /* Reset entries corresponding to valid parameters. */
2711  hash_seq_init(&hash_seq, pgss_hash);
2712  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2713  {
2714  if ((!userid || entry->key.userid == userid) &&
2715  (!dbid || entry->key.dbid == dbid) &&
2716  (!queryid || entry->key.queryid == queryid))
2717  {
2718  SINGLE_ENTRY_RESET(entry);
2719  }
2720  }
2721  }
2722  else
2723  {
2724  /* Reset all entries. */
2725  hash_seq_init(&hash_seq, pgss_hash);
2726  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2727  {
2728  SINGLE_ENTRY_RESET(entry);
2729  }
2730  }
2731 
2732  /* All entries are removed? */
2733  if (num_entries != num_remove)
2734  goto release_lock;
2735 
2736  /*
2737  * Reset global statistics for pg_stat_statements since all entries are
2738  * removed.
2739  */
2741  pgss->stats.dealloc = 0;
2742  pgss->stats.stats_reset = stats_reset;
2744 
2745  /*
2746  * Write new empty query file, perhaps even creating a new one to recover
2747  * if the file was missing.
2748  */
2750  if (qfile == NULL)
2751  {
2752  ereport(LOG,
2754  errmsg("could not create file \"%s\": %m",
2755  PGSS_TEXT_FILE)));
2756  goto done;
2757  }
2758 
2759  /* If ftruncate fails, log it, but it's not a fatal problem */
2760  if (ftruncate(fileno(qfile), 0) != 0)
2761  ereport(LOG,
2763  errmsg("could not truncate file \"%s\": %m",
2764  PGSS_TEXT_FILE)));
2765 
2766  FreeFile(qfile);
2767 
2768 done:
2769  pgss->extent = 0;
2770  /* This counts as a query text garbage collection for our purposes */
2771  record_gc_qtexts();
2772 
2773 release_lock:
2775 
2776  return stats_reset;
2777 }
2778 
2779 /*
2780  * Generate a normalized version of the query string that will be used to
2781  * represent all similar queries.
2782  *
2783  * Note that the normalized representation may well vary depending on
2784  * just which "equivalent" query is used to create the hashtable entry.
2785  * We assume this is OK.
2786  *
2787  * If query_loc > 0, then "query" has been advanced by that much compared to
2788  * the original string start, so we need to translate the provided locations
2789  * to compensate. (This lets us avoid re-scanning statements before the one
2790  * of interest, so it's worth doing.)
2791  *
2792  * *query_len_p contains the input string length, and is updated with
2793  * the result string length on exit. The resulting string might be longer
2794  * or shorter depending on what happens with replacement of constants.
2795  *
2796  * Returns a palloc'd string.
2797  */
2798 static char *
2799 generate_normalized_query(JumbleState *jstate, const char *query,
2800  int query_loc, int *query_len_p)
2801 {
2802  char *norm_query;
2803  int query_len = *query_len_p;
2804  int i,
2805  norm_query_buflen, /* Space allowed for norm_query */
2806  len_to_wrt, /* Length (in bytes) to write */
2807  quer_loc = 0, /* Source query byte location */
2808  n_quer_loc = 0, /* Normalized query byte location */
2809  last_off = 0, /* Offset from start for previous tok */
2810  last_tok_len = 0; /* Length (in bytes) of that tok */
2811 
2812  /*
2813  * Get constants' lengths (core system only gives us locations). Note
2814  * this also ensures the items are sorted by location.
2815  */
2816  fill_in_constant_lengths(jstate, query, query_loc);
2817 
2818  /*
2819  * Allow for $n symbols to be longer than the constants they replace.
2820  * Constants must take at least one byte in text form, while a $n symbol
2821  * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2822  * could refine that limit based on the max value of n for the current
2823  * query, but it hardly seems worth any extra effort to do so.
2824  */
2825  norm_query_buflen = query_len + jstate->clocations_count * 10;
2826 
2827  /* Allocate result buffer */
2828  norm_query = palloc(norm_query_buflen + 1);
2829 
2830  for (i = 0; i < jstate->clocations_count; i++)
2831  {
2832  int off, /* Offset from start for cur tok */
2833  tok_len; /* Length (in bytes) of that tok */
2834 
2835  off = jstate->clocations[i].location;
2836  /* Adjust recorded location if we're dealing with partial string */
2837  off -= query_loc;
2838 
2839  tok_len = jstate->clocations[i].length;
2840 
2841  if (tok_len < 0)
2842  continue; /* ignore any duplicates */
2843 
2844  /* Copy next chunk (what precedes the next constant) */
2845  len_to_wrt = off - last_off;
2846  len_to_wrt -= last_tok_len;
2847 
2848  Assert(len_to_wrt >= 0);
2849  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2850  n_quer_loc += len_to_wrt;
2851 
2852  /* And insert a param symbol in place of the constant token */
2853  n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2854  i + 1 + jstate->highest_extern_param_id);
2855 
2856  quer_loc = off + tok_len;
2857  last_off = off;
2858  last_tok_len = tok_len;
2859  }
2860 
2861  /*
2862  * We've copied up until the last ignorable constant. Copy over the
2863  * remaining bytes of the original query string.
2864  */
2865  len_to_wrt = query_len - quer_loc;
2866 
2867  Assert(len_to_wrt >= 0);
2868  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2869  n_quer_loc += len_to_wrt;
2870 
2871  Assert(n_quer_loc <= norm_query_buflen);
2872  norm_query[n_quer_loc] = '\0';
2873 
2874  *query_len_p = n_quer_loc;
2875  return norm_query;
2876 }
2877 
2878 /*
2879  * Given a valid SQL string and an array of constant-location records,
2880  * fill in the textual lengths of those constants.
2881  *
2882  * The constants may use any allowed constant syntax, such as float literals,
2883  * bit-strings, single-quoted strings and dollar-quoted strings. This is
2884  * accomplished by using the public API for the core scanner.
2885  *
2886  * It is the caller's job to ensure that the string is a valid SQL statement
2887  * with constants at the indicated locations. Since in practice the string
2888  * has already been parsed, and the locations that the caller provides will
2889  * have originated from within the authoritative parser, this should not be
2890  * a problem.
2891  *
2892  * Duplicate constant pointers are possible, and will have their lengths
2893  * marked as '-1', so that they are later ignored. (Actually, we assume the
2894  * lengths were initialized as -1 to start with, and don't change them here.)
2895  *
2896  * If query_loc > 0, then "query" has been advanced by that much compared to
2897  * the original string start, so we need to translate the provided locations
2898  * to compensate. (This lets us avoid re-scanning statements before the one
2899  * of interest, so it's worth doing.)
2900  *
2901  * N.B. There is an assumption that a '-' character at a Const location begins
2902  * a negative numeric constant. This precludes there ever being another
2903  * reason for a constant to start with a '-'.
2904  */
2905 static void
2906 fill_in_constant_lengths(JumbleState *jstate, const char *query,
2907  int query_loc)
2908 {
2909  LocationLen *locs;
2911  core_yy_extra_type yyextra;
2912  core_YYSTYPE yylval;
2913  YYLTYPE yylloc;
2914  int last_loc = -1;
2915  int i;
2916 
2917  /*
2918  * Sort the records by location so that we can process them in order while
2919  * scanning the query text.
2920  */
2921  if (jstate->clocations_count > 1)
2922  qsort(jstate->clocations, jstate->clocations_count,
2923  sizeof(LocationLen), comp_location);
2924  locs = jstate->clocations;
2925 
2926  /* initialize the flex scanner --- should match raw_parser() */
2927  yyscanner = scanner_init(query,
2928  &yyextra,
2929  &ScanKeywords,
2931 
2932  /* we don't want to re-emit any escape string warnings */
2933  yyextra.escape_string_warning = false;
2934 
2935  /* Search for each constant, in sequence */
2936  for (i = 0; i < jstate->clocations_count; i++)
2937  {
2938  int loc = locs[i].location;
2939  int tok;
2940 
2941  /* Adjust recorded location if we're dealing with partial string */
2942  loc -= query_loc;
2943 
2944  Assert(loc >= 0);
2945 
2946  if (loc <= last_loc)
2947  continue; /* Duplicate constant, ignore */
2948 
2949  /* Lex tokens until we find the desired constant */
2950  for (;;)
2951  {
2952  tok = core_yylex(&yylval, &yylloc, yyscanner);
2953 
2954  /* We should not hit end-of-string, but if we do, behave sanely */
2955  if (tok == 0)
2956  break; /* out of inner for-loop */
2957 
2958  /*
2959  * We should find the token position exactly, but if we somehow
2960  * run past it, work with that.
2961  */
2962  if (yylloc >= loc)
2963  {
2964  if (query[loc] == '-')
2965  {
2966  /*
2967  * It's a negative value - this is the one and only case
2968  * where we replace more than a single token.
2969  *
2970  * Do not compensate for the core system's special-case
2971  * adjustment of location to that of the leading '-'
2972  * operator in the event of a negative constant. It is
2973  * also useful for our purposes to start from the minus
2974  * symbol. In this way, queries like "select * from foo
2975  * where bar = 1" and "select * from foo where bar = -2"
2976  * will have identical normalized query strings.
2977  */
2978  tok = core_yylex(&yylval, &yylloc, yyscanner);
2979  if (tok == 0)
2980  break; /* out of inner for-loop */
2981  }
2982 
2983  /*
2984  * We now rely on the assumption that flex has placed a zero
2985  * byte after the text of the current token in scanbuf.
2986  */
2987  locs[i].length = strlen(yyextra.scanbuf + loc);
2988  break; /* out of inner for-loop */
2989  }
2990  }
2991 
2992  /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2993  if (tok == 0)
2994  break;
2995 
2996  last_loc = loc;
2997  }
2998 
3000 }
3001 
3002 /*
3003  * comp_location: comparator for qsorting LocationLen structs by location
3004  */
3005 static int
3006 comp_location(const void *a, const void *b)
3007 {
3008  int l = ((const LocationLen *) a)->location;
3009  int r = ((const LocationLen *) b)->location;
3010 
3011  return pg_cmp_s32(l, r);
3012 }
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:5268
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:636
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
static Datum values[MAXATTR]
Definition: bootstrap.c:150
#define CStringGetTextDatum(s)
Definition: builtins.h:97
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:995
#define PG_BINARY_R
Definition: c.h:1266
#define MAXALIGN(LEN)
Definition: c.h:802
signed int int32
Definition: c.h:496
#define Max(x, y)
Definition: c.h:989
#define Assert(condition)
Definition: c.h:849
#define PG_BINARY
Definition: c.h:1264
#define UINT64_FORMAT
Definition: c.h:540
#define unlikely(x)
Definition: c.h:314
#define PG_BINARY_W
Definition: c.h:1267
size_t Size
Definition: c.h:596
enc
int64 TimestampTz
Definition: timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1514
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:783
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
int errcode_for_file_access(void)
Definition: elog.c:876
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define PG_TRY(...)
Definition: elog.h:371
#define PG_END_TRY(...)
Definition: elog.h:396
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_FINALLY(...)
Definition: elog.h:388
#define ereport(elevel,...)
Definition: elog.h:149
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:69
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:68
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:66
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:139
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: execMain.c:306
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:67
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:474
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:414
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: executor.h:79
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:86
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:75
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:90
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2606
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int CloseTransientFile(int fd)
Definition: fd.c:2832
int FreeFile(FILE *file)
Definition: fd.c:2804
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2656
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:645
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
bool IsUnderPostmaster
Definition: globals.c:119
Oid MyDatabaseId
Definition: globals.c:93
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5201
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5090
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5237
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5116
@ PGC_SUSET
Definition: guc.h:74
@ PGC_POSTMASTER
Definition: guc.h:70
@ PGC_SIGHUP
Definition: guc.h:71
return str start
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:191
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
@ INSTRUMENT_ALL
Definition: instrument.h:65
static int pg_cmp_s32(int32 a, int32 b)
Definition: int.h:598
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:60
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:76
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
PGDLLIMPORT const ScanKeywordList ScanKeywords
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:573
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:670
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
void pfree(void *pointer)
Definition: mcxt.c:1521
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define MaxAllocHugeSize
Definition: memutils.h:45
void(* shmem_request_hook_type)(void)
Definition: miscadmin.h:507
Oid GetUserId(void)
Definition: miscinit.c:514
shmem_request_hook_type shmem_request_hook
Definition: miscinit.c:1781
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1778
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:59
void * arg
const void size_t len
int32 encoding
Definition: pg_database.h:41
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static ExecutorRun_hook_type prev_ExecutorRun
struct pgssSharedState pgssSharedState
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
void _PG_init(void)
static void gc_qtexts(void)
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
struct Counters Counters
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
PGSSTrackLevel
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
PG_MODULE_MAGIC
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void pgss_shmem_request(void)
static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
pgssStoreKind
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
#define PGSS_NUMKIND
static bool pgss_save
static void pgss_shmem_startup(void)
static int nesting_level
struct pgssGlobalStats pgssGlobalStats
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
struct pgssEntry pgssEntry
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
struct pgssHashKey pgssHashKey
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static char * buf
Definition: pg_test_fsync.c:73
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281
static int duration
Definition: pgbench.c:174
static core_yyscan_t yyscanner
Definition: pl_scanner.c:106
planner_hook_type planner_hook
Definition: planner.c:72
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.c:289
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.h:26
#define sprintf
Definition: port.h:240
#define pg_pwrite
Definition: port.h:226
#define snprintf
Definition: port.h:238
#define qsort(a, b, c, d)
Definition: port.h:447
#define Int64GetDatumFast(X)
Definition: postgres.h:554
uintptr_t Datum
Definition: postgres.h:64
#define Float8GetDatumFast(X)
Definition: postgres.h:556
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:350
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
unsigned int Oid
Definition: postgres_ext.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
const char * YYLTYPE
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
tree context
Definition: radixtree.h:1835
MemoryContextSwitchTo(old_ctx)
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
void scanner_finish(core_yyscan_t yyscanner)
PGDLLIMPORT const uint16 ScanKeywordTokens[]
void * core_yyscan_t
Definition: scanner.h:121
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
ScanDirection
Definition: sdir.h:25
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:332
static pg_noinline void Size size
Definition: slab.c:607
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
static void error(void)
Definition: sql-dyntest.c:147
instr_time local_blk_read_time
Definition: instrument.h:38
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
instr_time temp_blk_write_time
Definition: instrument.h:41
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
int64 local_blks_written
Definition: instrument.h:33
instr_time temp_blk_read_time
Definition: instrument.h:40
instr_time local_blk_write_time
Definition: instrument.h:39
int64 temp_blks_read
Definition: instrument.h:34
int64 shared_blks_read
Definition: instrument.h:27
int64 shared_blks_written
Definition: instrument.h:29
int64 temp_blks_written
Definition: instrument.h:35
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
int64 temp_blks_read
double min_time[PGSS_NUMKIND]
int64 local_blks_written
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
int64 jit_deform_count
double jit_emission_time
int64 shared_blks_hit
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 local_blks_dirtied
int64 jit_inlining_count
int64 shared_blks_read
int64 local_blks_hit
double jit_deform_time
int64 parallel_workers_to_launch
int64 local_blks_read
double shared_blk_read_time
double jit_inlining_time
int es_parallel_workers_to_launch
Definition: execnodes.h:711
struct JitContext * es_jit
Definition: execnodes.h:729
uint64 es_total_processed
Definition: execnodes.h:681
MemoryContext es_query_cxt
Definition: execnodes.h:675
int es_parallel_workers_launched
Definition: execnodes.h:713
fmNodePtr resultinfo
Definition: fmgr.h:89
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
WalUsage walusage
Definition: instrument.h:92
BufferUsage bufusage
Definition: instrument.h:91
JitInstrumentation instr
Definition: jit.h:62
instr_time generation_counter
Definition: jit.h:33
size_t created_functions
Definition: jit.h:30
instr_time optimization_counter
Definition: jit.h:42
instr_time deform_counter
Definition: jit.h:36
instr_time emission_counter
Definition: jit.h:45
instr_time inlining_counter
Definition: jit.h:39
int highest_extern_param_id
Definition: queryjumble.h:50
LocationLen * clocations
Definition: queryjumble.h:41
int clocations_count
Definition: queryjumble.h:47
Definition: lwlock.h:42
Definition: nodes.h:129
const char * p_sourcetext
Definition: parse_node.h:195
ParseLoc stmt_len
Definition: plannodes.h:99
ParseLoc stmt_location
Definition: plannodes.h:98
Node * utilityStmt
Definition: plannodes.h:95
uint64 queryId
Definition: plannodes.h:54
uint64 nprocessed
Definition: cmdtag.h:32
CommandTag commandTag
Definition: cmdtag.h:31
const char * sourceText
Definition: execdesc.h:38
EState * estate
Definition: execdesc.h:48
PlannedStmt * plannedstmt
Definition: execdesc.h:37
struct Instrumentation * totaltime
Definition: execdesc.h:55
Node * utilityStmt
Definition: parsenodes.h:136
ParseLoc stmt_location
Definition: parsenodes.h:240
TupleDesc setDesc
Definition: execnodes.h:343
Tuplestorestate * setResult
Definition: execnodes.h:342
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
Definition: guc.h:170
bool escape_string_warning
Definition: scanner.h:88
char * scanbuf
Definition: scanner.h:72
Counters counters
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
TimestampTz stats_reset
pgssGlobalStats stats
__int64 st_size
Definition: win32_port.h:273
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:540
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:70
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
ProcessUtilityContext
Definition: utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition: timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition: timestamp.h:68
#define fstat
Definition: win32_port.h:283
#define ftruncate(a, b)
Definition: win32_port.h:82