PostgreSQL Source Code  git master
pg_stat_statements.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * Track statement planning and execution times as well as resource
5  * usage across a whole database cluster.
6  *
7  * Execution costs are totaled for each distinct source query, and kept in
8  * a shared hashtable. (We track only as many distinct queries as will fit
9  * in the designated amount of shared memory.)
10  *
11  * Starting in Postgres 9.2, this module normalized query entries. As of
12  * Postgres 14, the normalization is done by the core if compute_query_id is
13  * enabled, or optionally by third-party modules.
14  *
15  * To facilitate presenting entries to users, we create "representative" query
16  * strings in which constants are replaced with parameter symbols ($n), to
17  * make it clearer what a normalized entry can represent. To save on shared
18  * memory, and to avoid having to truncate oversized query strings, we store
19  * these strings in a temporary external query-texts file. Offsets into this
20  * file are kept in shared memory.
21  *
22  * Note about locking issues: to create or delete an entry in the shared
23  * hashtable, one must hold pgss->lock exclusively. Modifying any field
24  * in an entry except the counters requires the same. To look up an entry,
25  * one must hold the lock shared. To read or update the counters within
26  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27  * disappear!) and also take the entry's mutex spinlock.
28  * The shared state variable pgss->extent (the next free spot in the external
29  * query-text file) should be accessed only while holding either the
30  * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31  * allow reserving file space while holding only shared lock on pgss->lock.
32  * Rewriting the entire external query-text file, eg for garbage collection,
33  * requires holding pgss->lock exclusively; this allows individual entries
34  * in the file to be read or written while holding only shared lock.
35  *
36  *
37  * Copyright (c) 2008-2024, PostgreSQL Global Development Group
38  *
39  * IDENTIFICATION
40  * contrib/pg_stat_statements/pg_stat_statements.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45 
46 #include <math.h>
47 #include <sys/stat.h>
48 #include <unistd.h>
49 
50 #include "access/parallel.h"
51 #include "catalog/pg_authid.h"
52 #include "common/hashfn.h"
53 #include "common/int.h"
54 #include "executor/instrument.h"
55 #include "funcapi.h"
56 #include "jit/jit.h"
57 #include "mb/pg_wchar.h"
58 #include "miscadmin.h"
59 #include "nodes/queryjumble.h"
60 #include "optimizer/planner.h"
61 #include "parser/analyze.h"
62 #include "parser/parsetree.h"
63 #include "parser/scanner.h"
64 #include "parser/scansup.h"
65 #include "pgstat.h"
66 #include "storage/fd.h"
67 #include "storage/ipc.h"
68 #include "storage/lwlock.h"
69 #include "storage/shmem.h"
70 #include "storage/spin.h"
71 #include "tcop/utility.h"
72 #include "utils/acl.h"
73 #include "utils/builtins.h"
74 #include "utils/memutils.h"
75 #include "utils/timestamp.h"
76 
78 
79 /* Location of permanent stats file (valid when database is shut down) */
80 #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
81 
82 /*
83  * Location of external query text file.
84  */
85 #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
86 
87 /* Magic number identifying the stats file format */
88 static const uint32 PGSS_FILE_HEADER = 0x20220408;
89 
90 /* PostgreSQL major version number, changes in which invalidate all entries */
91 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
92 
93 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
94 #define USAGE_EXEC(duration) (1.0)
95 #define USAGE_INIT (1.0) /* including initial planning */
96 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
97 #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
98 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
99 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
100 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
101 #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
102 
103 /*
104  * Extension version number, for supporting older extension versions' objects
105  */
106 typedef enum pgssVersion
107 {
117 
118 typedef enum pgssStoreKind
119 {
121 
122  /*
123  * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
124  * reference the underlying values in the arrays in the Counters struct,
125  * and this order is required in pg_stat_statements_internal().
126  */
129 
130  PGSS_NUMKIND /* Must be last value of this enum */
132 
133 /*
134  * Hashtable key that defines the identity of a hashtable entry. We separate
135  * queries by user and by database even if they are otherwise identical.
136  *
137  * If you add a new key to this struct, make sure to teach pgss_store() to
138  * zero the padding bytes. Otherwise, things will break, because pgss_hash is
139  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
140 
141  */
142 typedef struct pgssHashKey
143 {
144  Oid userid; /* user OID */
145  Oid dbid; /* database OID */
146  uint64 queryid; /* query identifier */
147  bool toplevel; /* query executed at top level */
149 
150 /*
151  * The actual stats counters kept within pgssEntry.
152  */
153 typedef struct Counters
154 {
155  int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
156  double total_time[PGSS_NUMKIND]; /* total planning/execution time,
157  * in msec */
158  double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
159  * msec since min/max reset */
160  double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
161  * msec since min/max reset */
162  double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
163  * msec */
164  double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
165  * planning/execution time in msec */
166  int64 rows; /* total # of retrieved or affected rows */
167  int64 shared_blks_hit; /* # of shared buffer hits */
168  int64 shared_blks_read; /* # of shared disk blocks read */
169  int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
170  int64 shared_blks_written; /* # of shared disk blocks written */
171  int64 local_blks_hit; /* # of local buffer hits */
172  int64 local_blks_read; /* # of local disk blocks read */
173  int64 local_blks_dirtied; /* # of local disk blocks dirtied */
174  int64 local_blks_written; /* # of local disk blocks written */
175  int64 temp_blks_read; /* # of temp blocks read */
176  int64 temp_blks_written; /* # of temp blocks written */
177  double shared_blk_read_time; /* time spent reading shared blocks,
178  * in msec */
179  double shared_blk_write_time; /* time spent writing shared blocks,
180  * in msec */
181  double local_blk_read_time; /* time spent reading local blocks, in
182  * msec */
183  double local_blk_write_time; /* time spent writing local blocks, in
184  * msec */
185  double temp_blk_read_time; /* time spent reading temp blocks, in msec */
186  double temp_blk_write_time; /* time spent writing temp blocks, in
187  * msec */
188  double usage; /* usage factor */
189  int64 wal_records; /* # of WAL records generated */
190  int64 wal_fpi; /* # of WAL full page images generated */
191  uint64 wal_bytes; /* total amount of WAL generated in bytes */
192  int64 jit_functions; /* total number of JIT functions emitted */
193  double jit_generation_time; /* total time to generate jit code */
194  int64 jit_inlining_count; /* number of times inlining time has been
195  * > 0 */
196  double jit_deform_time; /* total time to deform tuples in jit code */
197  int64 jit_deform_count; /* number of times deform time has been >
198  * 0 */
199 
200  double jit_inlining_time; /* total time to inline jit code */
201  int64 jit_optimization_count; /* number of times optimization time
202  * has been > 0 */
203  double jit_optimization_time; /* total time to optimize jit code */
204  int64 jit_emission_count; /* number of times emission time has been
205  * > 0 */
206  double jit_emission_time; /* total time to emit jit code */
208 
209 /*
210  * Global statistics for pg_stat_statements
211  */
212 typedef struct pgssGlobalStats
213 {
214  int64 dealloc; /* # of times entries were deallocated */
215  TimestampTz stats_reset; /* timestamp with all stats reset */
217 
218 /*
219  * Statistics per statement
220  *
221  * Note: in event of a failure in garbage collection of the query text file,
222  * we reset query_offset to zero and query_len to -1. This will be seen as
223  * an invalid state by qtext_fetch().
224  */
225 typedef struct pgssEntry
226 {
227  pgssHashKey key; /* hash key of entry - MUST BE FIRST */
228  Counters counters; /* the statistics for this query */
229  Size query_offset; /* query text offset in external file */
230  int query_len; /* # of valid bytes in query string, or -1 */
231  int encoding; /* query text encoding */
232  TimestampTz stats_since; /* timestamp of entry allocation */
233  TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
234  slock_t mutex; /* protects the counters only */
236 
237 /*
238  * Global shared state
239  */
240 typedef struct pgssSharedState
241 {
242  LWLock *lock; /* protects hashtable search/modification */
243  double cur_median_usage; /* current median usage in hashtable */
244  Size mean_query_len; /* current mean entry text length */
245  slock_t mutex; /* protects following fields only: */
246  Size extent; /* current extent of query file */
247  int n_writers; /* number of active writers to query file */
248  int gc_count; /* query file garbage collection cycle count */
249  pgssGlobalStats stats; /* global statistics for pgss */
251 
252 /*---- Local variables ----*/
253 
254 /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
255 static int nesting_level = 0;
256 
257 /* Saved hook values in case of unload */
267 
268 /* Links to shared memory state */
269 static pgssSharedState *pgss = NULL;
270 static HTAB *pgss_hash = NULL;
271 
272 /*---- GUC variables ----*/
273 
274 typedef enum
275 {
276  PGSS_TRACK_NONE, /* track no statements */
277  PGSS_TRACK_TOP, /* only top level statements */
278  PGSS_TRACK_ALL, /* all statements, including nested ones */
280 
281 static const struct config_enum_entry track_options[] =
282 {
283  {"none", PGSS_TRACK_NONE, false},
284  {"top", PGSS_TRACK_TOP, false},
285  {"all", PGSS_TRACK_ALL, false},
286  {NULL, 0, false}
287 };
288 
289 static int pgss_max = 5000; /* max # statements to track */
290 static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
291 static bool pgss_track_utility = true; /* whether to track utility commands */
292 static bool pgss_track_planning = false; /* whether to track planning
293  * duration */
294 static bool pgss_save = true; /* whether to save stats across shutdown */
295 
296 
297 #define pgss_enabled(level) \
298  (!IsParallelWorker() && \
299  (pgss_track == PGSS_TRACK_ALL || \
300  (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
301 
302 #define record_gc_qtexts() \
303  do { \
304  SpinLockAcquire(&pgss->mutex); \
305  pgss->gc_count++; \
306  SpinLockRelease(&pgss->mutex); \
307  } while(0)
308 
309 /*---- Function declarations ----*/
310 
322 
323 static void pgss_shmem_request(void);
324 static void pgss_shmem_startup(void);
325 static void pgss_shmem_shutdown(int code, Datum arg);
326 static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
327  JumbleState *jstate);
329  const char *query_string,
330  int cursorOptions,
331  ParamListInfo boundParams);
332 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
333 static void pgss_ExecutorRun(QueryDesc *queryDesc,
334  ScanDirection direction,
335  uint64 count, bool execute_once);
336 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
337 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
338 static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
339  bool readOnlyTree,
341  QueryEnvironment *queryEnv,
343 static void pgss_store(const char *query, uint64 queryId,
344  int query_location, int query_len,
345  pgssStoreKind kind,
346  double total_time, uint64 rows,
347  const BufferUsage *bufusage,
348  const WalUsage *walusage,
349  const struct JitInstrumentation *jitusage,
350  JumbleState *jstate);
352  pgssVersion api_version,
353  bool showtext);
354 static Size pgss_memsize(void);
355 static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
356  int encoding, bool sticky);
357 static void entry_dealloc(void);
358 static bool qtext_store(const char *query, int query_len,
359  Size *query_offset, int *gc_count);
360 static char *qtext_load_file(Size *buffer_size);
361 static char *qtext_fetch(Size query_offset, int query_len,
362  char *buffer, Size buffer_size);
363 static bool need_gc_qtexts(void);
364 static void gc_qtexts(void);
365 static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only);
366 static char *generate_normalized_query(JumbleState *jstate, const char *query,
367  int query_loc, int *query_len_p);
368 static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
369  int query_loc);
370 static int comp_location(const void *a, const void *b);
371 
372 
373 /*
374  * Module load callback
375  */
376 void
377 _PG_init(void)
378 {
379  /*
380  * In order to create our shared memory area, we have to be loaded via
381  * shared_preload_libraries. If not, fall out without hooking into any of
382  * the main system. (We don't throw error here because it seems useful to
383  * allow the pg_stat_statements functions to be created even when the
384  * module isn't active. The functions must protect themselves against
385  * being called then, however.)
386  */
388  return;
389 
390  /*
391  * Inform the postmaster that we want to enable query_id calculation if
392  * compute_query_id is set to auto.
393  */
394  EnableQueryId();
395 
396  /*
397  * Define (or redefine) custom GUC variables.
398  */
399  DefineCustomIntVariable("pg_stat_statements.max",
400  "Sets the maximum number of statements tracked by pg_stat_statements.",
401  NULL,
402  &pgss_max,
403  5000,
404  100,
405  INT_MAX / 2,
407  0,
408  NULL,
409  NULL,
410  NULL);
411 
412  DefineCustomEnumVariable("pg_stat_statements.track",
413  "Selects which statements are tracked by pg_stat_statements.",
414  NULL,
415  &pgss_track,
418  PGC_SUSET,
419  0,
420  NULL,
421  NULL,
422  NULL);
423 
424  DefineCustomBoolVariable("pg_stat_statements.track_utility",
425  "Selects whether utility commands are tracked by pg_stat_statements.",
426  NULL,
428  true,
429  PGC_SUSET,
430  0,
431  NULL,
432  NULL,
433  NULL);
434 
435  DefineCustomBoolVariable("pg_stat_statements.track_planning",
436  "Selects whether planning duration is tracked by pg_stat_statements.",
437  NULL,
439  false,
440  PGC_SUSET,
441  0,
442  NULL,
443  NULL,
444  NULL);
445 
446  DefineCustomBoolVariable("pg_stat_statements.save",
447  "Save pg_stat_statements statistics across server shutdowns.",
448  NULL,
449  &pgss_save,
450  true,
451  PGC_SIGHUP,
452  0,
453  NULL,
454  NULL,
455  NULL);
456 
457  MarkGUCPrefixReserved("pg_stat_statements");
458 
459  /*
460  * Install hooks.
461  */
480 }
481 
482 /*
483  * shmem_request hook: request additional shared resources. We'll allocate or
484  * attach to the shared resources in pgss_shmem_startup().
485  */
486 static void
488 {
491 
493  RequestNamedLWLockTranche("pg_stat_statements", 1);
494 }
495 
496 /*
497  * shmem_startup hook: allocate or attach to shared memory,
498  * then load any pre-existing statistics from file.
499  * Also create and load the query-texts file, which is expected to exist
500  * (even if empty) while the module is enabled.
501  */
502 static void
504 {
505  bool found;
506  HASHCTL info;
507  FILE *file = NULL;
508  FILE *qfile = NULL;
509  uint32 header;
510  int32 num;
511  int32 pgver;
512  int32 i;
513  int buffer_size;
514  char *buffer = NULL;
515 
518 
519  /* reset in case this is a restart within the postmaster */
520  pgss = NULL;
521  pgss_hash = NULL;
522 
523  /*
524  * Create or attach to the shared memory state, including hash table
525  */
526  LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
527 
528  pgss = ShmemInitStruct("pg_stat_statements",
529  sizeof(pgssSharedState),
530  &found);
531 
532  if (!found)
533  {
534  /* First time through ... */
535  pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
539  pgss->extent = 0;
540  pgss->n_writers = 0;
541  pgss->gc_count = 0;
542  pgss->stats.dealloc = 0;
544  }
545 
546  info.keysize = sizeof(pgssHashKey);
547  info.entrysize = sizeof(pgssEntry);
548  pgss_hash = ShmemInitHash("pg_stat_statements hash",
550  &info,
552 
553  LWLockRelease(AddinShmemInitLock);
554 
555  /*
556  * If we're in the postmaster (or a standalone backend...), set up a shmem
557  * exit hook to dump the statistics to disk.
558  */
559  if (!IsUnderPostmaster)
561 
562  /*
563  * Done if some other process already completed our initialization.
564  */
565  if (found)
566  return;
567 
568  /*
569  * Note: we don't bother with locks here, because there should be no other
570  * processes running when this code is reached.
571  */
572 
573  /* Unlink query text file possibly left over from crash */
574  unlink(PGSS_TEXT_FILE);
575 
576  /* Allocate new query text temp file */
578  if (qfile == NULL)
579  goto write_error;
580 
581  /*
582  * If we were told not to load old statistics, we're done. (Note we do
583  * not try to unlink any old dump file in this case. This seems a bit
584  * questionable but it's the historical behavior.)
585  */
586  if (!pgss_save)
587  {
588  FreeFile(qfile);
589  return;
590  }
591 
592  /*
593  * Attempt to load old statistics from the dump file.
594  */
596  if (file == NULL)
597  {
598  if (errno != ENOENT)
599  goto read_error;
600  /* No existing persisted stats file, so we're done */
601  FreeFile(qfile);
602  return;
603  }
604 
605  buffer_size = 2048;
606  buffer = (char *) palloc(buffer_size);
607 
608  if (fread(&header, sizeof(uint32), 1, file) != 1 ||
609  fread(&pgver, sizeof(uint32), 1, file) != 1 ||
610  fread(&num, sizeof(int32), 1, file) != 1)
611  goto read_error;
612 
613  if (header != PGSS_FILE_HEADER ||
614  pgver != PGSS_PG_MAJOR_VERSION)
615  goto data_error;
616 
617  for (i = 0; i < num; i++)
618  {
619  pgssEntry temp;
620  pgssEntry *entry;
621  Size query_offset;
622 
623  if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
624  goto read_error;
625 
626  /* Encoding is the only field we can easily sanity-check */
627  if (!PG_VALID_BE_ENCODING(temp.encoding))
628  goto data_error;
629 
630  /* Resize buffer as needed */
631  if (temp.query_len >= buffer_size)
632  {
633  buffer_size = Max(buffer_size * 2, temp.query_len + 1);
634  buffer = repalloc(buffer, buffer_size);
635  }
636 
637  if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
638  goto read_error;
639 
640  /* Should have a trailing null, but let's make sure */
641  buffer[temp.query_len] = '\0';
642 
643  /* Skip loading "sticky" entries */
644  if (IS_STICKY(temp.counters))
645  continue;
646 
647  /* Store the query text */
648  query_offset = pgss->extent;
649  if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
650  goto write_error;
651  pgss->extent += temp.query_len + 1;
652 
653  /* make the hashtable entry (discards old entries if too many) */
654  entry = entry_alloc(&temp.key, query_offset, temp.query_len,
655  temp.encoding,
656  false);
657 
658  /* copy in the actual stats */
659  entry->counters = temp.counters;
660  entry->stats_since = temp.stats_since;
662  }
663 
664  /* Read global statistics for pg_stat_statements */
665  if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
666  goto read_error;
667 
668  pfree(buffer);
669  FreeFile(file);
670  FreeFile(qfile);
671 
672  /*
673  * Remove the persisted stats file so it's not included in
674  * backups/replication standbys, etc. A new file will be written on next
675  * shutdown.
676  *
677  * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
678  * because we remove that file on startup; it acts inversely to
679  * PGSS_DUMP_FILE, in that it is only supposed to be around when the
680  * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
681  * when the server is not running. Leaving the file creates no danger of
682  * a newly restored database having a spurious record of execution costs,
683  * which is what we're really concerned about here.
684  */
685  unlink(PGSS_DUMP_FILE);
686 
687  return;
688 
689 read_error:
690  ereport(LOG,
692  errmsg("could not read file \"%s\": %m",
693  PGSS_DUMP_FILE)));
694  goto fail;
695 data_error:
696  ereport(LOG,
697  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
698  errmsg("ignoring invalid data in file \"%s\"",
699  PGSS_DUMP_FILE)));
700  goto fail;
701 write_error:
702  ereport(LOG,
704  errmsg("could not write file \"%s\": %m",
705  PGSS_TEXT_FILE)));
706 fail:
707  if (buffer)
708  pfree(buffer);
709  if (file)
710  FreeFile(file);
711  if (qfile)
712  FreeFile(qfile);
713  /* If possible, throw away the bogus file; ignore any error */
714  unlink(PGSS_DUMP_FILE);
715 
716  /*
717  * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
718  * server is running with pg_stat_statements enabled
719  */
720 }
721 
722 /*
723  * shmem_shutdown hook: Dump statistics into file.
724  *
725  * Note: we don't bother with acquiring lock, because there should be no
726  * other processes running when this is called.
727  */
728 static void
730 {
731  FILE *file;
732  char *qbuffer = NULL;
733  Size qbuffer_size = 0;
734  HASH_SEQ_STATUS hash_seq;
735  int32 num_entries;
736  pgssEntry *entry;
737 
738  /* Don't try to dump during a crash. */
739  if (code)
740  return;
741 
742  /* Safety check ... shouldn't get here unless shmem is set up. */
743  if (!pgss || !pgss_hash)
744  return;
745 
746  /* Don't dump if told not to. */
747  if (!pgss_save)
748  return;
749 
750  file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
751  if (file == NULL)
752  goto error;
753 
754  if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
755  goto error;
756  if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
757  goto error;
758  num_entries = hash_get_num_entries(pgss_hash);
759  if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
760  goto error;
761 
762  qbuffer = qtext_load_file(&qbuffer_size);
763  if (qbuffer == NULL)
764  goto error;
765 
766  /*
767  * When serializing to disk, we store query texts immediately after their
768  * entry data. Any orphaned query texts are thereby excluded.
769  */
770  hash_seq_init(&hash_seq, pgss_hash);
771  while ((entry = hash_seq_search(&hash_seq)) != NULL)
772  {
773  int len = entry->query_len;
774  char *qstr = qtext_fetch(entry->query_offset, len,
775  qbuffer, qbuffer_size);
776 
777  if (qstr == NULL)
778  continue; /* Ignore any entries with bogus texts */
779 
780  if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
781  fwrite(qstr, 1, len + 1, file) != len + 1)
782  {
783  /* note: we assume hash_seq_term won't change errno */
784  hash_seq_term(&hash_seq);
785  goto error;
786  }
787  }
788 
789  /* Dump global statistics for pg_stat_statements */
790  if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
791  goto error;
792 
793  free(qbuffer);
794  qbuffer = NULL;
795 
796  if (FreeFile(file))
797  {
798  file = NULL;
799  goto error;
800  }
801 
802  /*
803  * Rename file into place, so we atomically replace any old one.
804  */
806 
807  /* Unlink query-texts file; it's not needed while shutdown */
808  unlink(PGSS_TEXT_FILE);
809 
810  return;
811 
812 error:
813  ereport(LOG,
815  errmsg("could not write file \"%s\": %m",
816  PGSS_DUMP_FILE ".tmp")));
817  free(qbuffer);
818  if (file)
819  FreeFile(file);
820  unlink(PGSS_DUMP_FILE ".tmp");
821  unlink(PGSS_TEXT_FILE);
822 }
823 
824 /*
825  * Post-parse-analysis hook: mark query with a queryId
826  */
827 static void
829 {
831  prev_post_parse_analyze_hook(pstate, query, jstate);
832 
833  /* Safety check... */
835  return;
836 
837  /*
838  * If it's EXECUTE, clear the queryId so that stats will accumulate for
839  * the underlying PREPARE. But don't do this if we're not tracking
840  * utility statements, to avoid messing up another extension that might be
841  * tracking them.
842  */
843  if (query->utilityStmt)
844  {
846  {
847  query->queryId = UINT64CONST(0);
848  return;
849  }
850  }
851 
852  /*
853  * If query jumbling were able to identify any ignorable constants, we
854  * immediately create a hash table entry for the query, so that we can
855  * record the normalized form of the query string. If there were no such
856  * constants, the normalized string would be the same as the query text
857  * anyway, so there's no need for an early entry.
858  */
859  if (jstate && jstate->clocations_count > 0)
860  pgss_store(pstate->p_sourcetext,
861  query->queryId,
862  query->stmt_location,
863  query->stmt_len,
864  PGSS_INVALID,
865  0,
866  0,
867  NULL,
868  NULL,
869  NULL,
870  jstate);
871 }
872 
873 /*
874  * Planner hook: forward to regular planner, but measure planning time
875  * if needed.
876  */
877 static PlannedStmt *
879  const char *query_string,
880  int cursorOptions,
881  ParamListInfo boundParams)
882 {
883  PlannedStmt *result;
884 
885  /*
886  * We can't process the query if no query_string is provided, as
887  * pgss_store needs it. We also ignore query without queryid, as it would
888  * be treated as a utility statement, which may not be the case.
889  */
891  && pgss_track_planning && query_string
892  && parse->queryId != UINT64CONST(0))
893  {
896  BufferUsage bufusage_start,
897  bufusage;
898  WalUsage walusage_start,
899  walusage;
900 
901  /* We need to track buffer usage as the planner can access them. */
902  bufusage_start = pgBufferUsage;
903 
904  /*
905  * Similarly the planner could write some WAL records in some cases
906  * (e.g. setting a hint bit with those being WAL-logged)
907  */
908  walusage_start = pgWalUsage;
910 
911  nesting_level++;
912  PG_TRY();
913  {
914  if (prev_planner_hook)
915  result = prev_planner_hook(parse, query_string, cursorOptions,
916  boundParams);
917  else
918  result = standard_planner(parse, query_string, cursorOptions,
919  boundParams);
920  }
921  PG_FINALLY();
922  {
923  nesting_level--;
924  }
925  PG_END_TRY();
926 
929 
930  /* calc differences of buffer counters. */
931  memset(&bufusage, 0, sizeof(BufferUsage));
932  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
933 
934  /* calc differences of WAL counters. */
935  memset(&walusage, 0, sizeof(WalUsage));
936  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
937 
938  pgss_store(query_string,
939  parse->queryId,
940  parse->stmt_location,
941  parse->stmt_len,
942  PGSS_PLAN,
944  0,
945  &bufusage,
946  &walusage,
947  NULL,
948  NULL);
949  }
950  else
951  {
952  /*
953  * Even though we're not tracking plan time for this statement, we
954  * must still increment the nesting level, to ensure that functions
955  * evaluated during planning are not seen as top-level calls.
956  */
957  nesting_level++;
958  PG_TRY();
959  {
960  if (prev_planner_hook)
961  result = prev_planner_hook(parse, query_string, cursorOptions,
962  boundParams);
963  else
964  result = standard_planner(parse, query_string, cursorOptions,
965  boundParams);
966  }
967  PG_FINALLY();
968  {
969  nesting_level--;
970  }
971  PG_END_TRY();
972  }
973 
974  return result;
975 }
976 
977 /*
978  * ExecutorStart hook: start up tracking if needed
979  */
980 static void
981 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
982 {
983  if (prev_ExecutorStart)
984  prev_ExecutorStart(queryDesc, eflags);
985  else
986  standard_ExecutorStart(queryDesc, eflags);
987 
988  /*
989  * If query has queryId zero, don't track it. This prevents double
990  * counting of optimizable statements that are directly contained in
991  * utility statements.
992  */
993  if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
994  {
995  /*
996  * Set up to track total elapsed time in ExecutorRun. Make sure the
997  * space is allocated in the per-query context so it will go away at
998  * ExecutorEnd.
999  */
1000  if (queryDesc->totaltime == NULL)
1001  {
1002  MemoryContext oldcxt;
1003 
1004  oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1005  queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1006  MemoryContextSwitchTo(oldcxt);
1007  }
1008  }
1009 }
1010 
1011 /*
1012  * ExecutorRun hook: all we need do is track nesting depth
1013  */
1014 static void
1015 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
1016  bool execute_once)
1017 {
1018  nesting_level++;
1019  PG_TRY();
1020  {
1021  if (prev_ExecutorRun)
1022  prev_ExecutorRun(queryDesc, direction, count, execute_once);
1023  else
1024  standard_ExecutorRun(queryDesc, direction, count, execute_once);
1025  }
1026  PG_FINALLY();
1027  {
1028  nesting_level--;
1029  }
1030  PG_END_TRY();
1031 }
1032 
1033 /*
1034  * ExecutorFinish hook: all we need do is track nesting depth
1035  */
1036 static void
1038 {
1039  nesting_level++;
1040  PG_TRY();
1041  {
1042  if (prev_ExecutorFinish)
1043  prev_ExecutorFinish(queryDesc);
1044  else
1045  standard_ExecutorFinish(queryDesc);
1046  }
1047  PG_FINALLY();
1048  {
1049  nesting_level--;
1050  }
1051  PG_END_TRY();
1052 }
1053 
1054 /*
1055  * ExecutorEnd hook: store results if needed
1056  */
1057 static void
1059 {
1060  uint64 queryId = queryDesc->plannedstmt->queryId;
1061 
1062  if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1064  {
1065  /*
1066  * Make sure stats accumulation is done. (Note: it's okay if several
1067  * levels of hook all do this.)
1068  */
1069  InstrEndLoop(queryDesc->totaltime);
1070 
1071  pgss_store(queryDesc->sourceText,
1072  queryId,
1073  queryDesc->plannedstmt->stmt_location,
1074  queryDesc->plannedstmt->stmt_len,
1075  PGSS_EXEC,
1076  queryDesc->totaltime->total * 1000.0, /* convert to msec */
1077  queryDesc->estate->es_total_processed,
1078  &queryDesc->totaltime->bufusage,
1079  &queryDesc->totaltime->walusage,
1080  queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1081  NULL);
1082  }
1083 
1084  if (prev_ExecutorEnd)
1085  prev_ExecutorEnd(queryDesc);
1086  else
1087  standard_ExecutorEnd(queryDesc);
1088 }
1089 
1090 /*
1091  * ProcessUtility hook
1092  */
1093 static void
1094 pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1095  bool readOnlyTree,
1097  ParamListInfo params, QueryEnvironment *queryEnv,
1099 {
1100  Node *parsetree = pstmt->utilityStmt;
1101  uint64 saved_queryId = pstmt->queryId;
1102  int saved_stmt_location = pstmt->stmt_location;
1103  int saved_stmt_len = pstmt->stmt_len;
1104  bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
1105 
1106  /*
1107  * Force utility statements to get queryId zero. We do this even in cases
1108  * where the statement contains an optimizable statement for which a
1109  * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1110  * cases, runtime control will first go through ProcessUtility and then
1111  * the executor, and we don't want the executor hooks to do anything,
1112  * since we are already measuring the statement's costs at the utility
1113  * level.
1114  *
1115  * Note that this is only done if pg_stat_statements is enabled and
1116  * configured to track utility statements, in the unlikely possibility
1117  * that user configured another extension to handle utility statements
1118  * only.
1119  */
1120  if (enabled)
1121  pstmt->queryId = UINT64CONST(0);
1122 
1123  /*
1124  * If it's an EXECUTE statement, we don't track it and don't increment the
1125  * nesting level. This allows the cycles to be charged to the underlying
1126  * PREPARE instead (by the Executor hooks), which is much more useful.
1127  *
1128  * We also don't track execution of PREPARE. If we did, we would get one
1129  * hash table entry for the PREPARE (with hash calculated from the query
1130  * string), and then a different one with the same query string (but hash
1131  * calculated from the query tree) would be used to accumulate costs of
1132  * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1133  * actually run the planner (only parse+rewrite), its costs are generally
1134  * pretty negligible and it seems okay to just ignore it.
1135  */
1136  if (enabled &&
1137  !IsA(parsetree, ExecuteStmt) &&
1138  !IsA(parsetree, PrepareStmt))
1139  {
1140  instr_time start;
1142  uint64 rows;
1143  BufferUsage bufusage_start,
1144  bufusage;
1145  WalUsage walusage_start,
1146  walusage;
1147 
1148  bufusage_start = pgBufferUsage;
1149  walusage_start = pgWalUsage;
1151 
1152  nesting_level++;
1153  PG_TRY();
1154  {
1155  if (prev_ProcessUtility)
1156  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1157  context, params, queryEnv,
1158  dest, qc);
1159  else
1160  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1161  context, params, queryEnv,
1162  dest, qc);
1163  }
1164  PG_FINALLY();
1165  {
1166  nesting_level--;
1167  }
1168  PG_END_TRY();
1169 
1170  /*
1171  * CAUTION: do not access the *pstmt data structure again below here.
1172  * If it was a ROLLBACK or similar, that data structure may have been
1173  * freed. We must copy everything we still need into local variables,
1174  * which we did above.
1175  *
1176  * For the same reason, we can't risk restoring pstmt->queryId to its
1177  * former value, which'd otherwise be a good idea.
1178  */
1179 
1182 
1183  /*
1184  * Track the total number of rows retrieved or affected by the utility
1185  * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1186  * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1187  */
1188  rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1189  qc->commandTag == CMDTAG_FETCH ||
1190  qc->commandTag == CMDTAG_SELECT ||
1191  qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1192  qc->nprocessed : 0;
1193 
1194  /* calc differences of buffer counters. */
1195  memset(&bufusage, 0, sizeof(BufferUsage));
1196  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1197 
1198  /* calc differences of WAL counters. */
1199  memset(&walusage, 0, sizeof(WalUsage));
1200  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1201 
1202  pgss_store(queryString,
1203  saved_queryId,
1204  saved_stmt_location,
1205  saved_stmt_len,
1206  PGSS_EXEC,
1208  rows,
1209  &bufusage,
1210  &walusage,
1211  NULL,
1212  NULL);
1213  }
1214  else
1215  {
1216  /*
1217  * Even though we're not tracking execution time for this statement,
1218  * we must still increment the nesting level, to ensure that functions
1219  * evaluated within it are not seen as top-level calls. But don't do
1220  * so for EXECUTE; that way, when control reaches pgss_planner or
1221  * pgss_ExecutorStart, we will treat the costs as top-level if
1222  * appropriate. Likewise, don't bump for PREPARE, so that parse
1223  * analysis will treat the statement as top-level if appropriate.
1224  *
1225  * To be absolutely certain we don't mess up the nesting level,
1226  * evaluate the bump_level condition just once.
1227  */
1228  bool bump_level =
1229  !IsA(parsetree, ExecuteStmt) &&
1230  !IsA(parsetree, PrepareStmt);
1231 
1232  if (bump_level)
1233  nesting_level++;
1234  PG_TRY();
1235  {
1236  if (prev_ProcessUtility)
1237  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1238  context, params, queryEnv,
1239  dest, qc);
1240  else
1241  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1242  context, params, queryEnv,
1243  dest, qc);
1244  }
1245  PG_FINALLY();
1246  {
1247  if (bump_level)
1248  nesting_level--;
1249  }
1250  PG_END_TRY();
1251  }
1252 }
1253 
1254 /*
1255  * Store some statistics for a statement.
1256  *
1257  * If jstate is not NULL then we're trying to create an entry for which
1258  * we have no statistics as yet; we just want to record the normalized
1259  * query string. total_time, rows, bufusage and walusage are ignored in this
1260  * case.
1261  *
1262  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1263  * for the arrays in the Counters field.
1264  */
1265 static void
1266 pgss_store(const char *query, uint64 queryId,
1267  int query_location, int query_len,
1268  pgssStoreKind kind,
1269  double total_time, uint64 rows,
1270  const BufferUsage *bufusage,
1271  const WalUsage *walusage,
1272  const struct JitInstrumentation *jitusage,
1273  JumbleState *jstate)
1274 {
1275  pgssHashKey key;
1276  pgssEntry *entry;
1277  char *norm_query = NULL;
1278  int encoding = GetDatabaseEncoding();
1279 
1280  Assert(query != NULL);
1281 
1282  /* Safety check... */
1283  if (!pgss || !pgss_hash)
1284  return;
1285 
1286  /*
1287  * Nothing to do if compute_query_id isn't enabled and no other module
1288  * computed a query identifier.
1289  */
1290  if (queryId == UINT64CONST(0))
1291  return;
1292 
1293  /*
1294  * Confine our attention to the relevant part of the string, if the query
1295  * is a portion of a multi-statement source string, and update query
1296  * location and length if needed.
1297  */
1298  query = CleanQuerytext(query, &query_location, &query_len);
1299 
1300  /* Set up key for hashtable search */
1301 
1302  /* clear padding */
1303  memset(&key, 0, sizeof(pgssHashKey));
1304 
1305  key.userid = GetUserId();
1306  key.dbid = MyDatabaseId;
1307  key.queryid = queryId;
1308  key.toplevel = (nesting_level == 0);
1309 
1310  /* Lookup the hash table entry with shared lock. */
1312 
1313  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1314 
1315  /* Create new entry, if not present */
1316  if (!entry)
1317  {
1318  Size query_offset;
1319  int gc_count;
1320  bool stored;
1321  bool do_gc;
1322 
1323  /*
1324  * Create a new, normalized query string if caller asked. We don't
1325  * need to hold the lock while doing this work. (Note: in any case,
1326  * it's possible that someone else creates a duplicate hashtable entry
1327  * in the interval where we don't hold the lock below. That case is
1328  * handled by entry_alloc.)
1329  */
1330  if (jstate)
1331  {
1333  norm_query = generate_normalized_query(jstate, query,
1334  query_location,
1335  &query_len);
1337  }
1338 
1339  /* Append new query text to file with only shared lock held */
1340  stored = qtext_store(norm_query ? norm_query : query, query_len,
1341  &query_offset, &gc_count);
1342 
1343  /*
1344  * Determine whether we need to garbage collect external query texts
1345  * while the shared lock is still held. This micro-optimization
1346  * avoids taking the time to decide this while holding exclusive lock.
1347  */
1348  do_gc = need_gc_qtexts();
1349 
1350  /* Need exclusive lock to make a new hashtable entry - promote */
1353 
1354  /*
1355  * A garbage collection may have occurred while we weren't holding the
1356  * lock. In the unlikely event that this happens, the query text we
1357  * stored above will have been garbage collected, so write it again.
1358  * This should be infrequent enough that doing it while holding
1359  * exclusive lock isn't a performance problem.
1360  */
1361  if (!stored || pgss->gc_count != gc_count)
1362  stored = qtext_store(norm_query ? norm_query : query, query_len,
1363  &query_offset, NULL);
1364 
1365  /* If we failed to write to the text file, give up */
1366  if (!stored)
1367  goto done;
1368 
1369  /* OK to create a new hashtable entry */
1370  entry = entry_alloc(&key, query_offset, query_len, encoding,
1371  jstate != NULL);
1372 
1373  /* If needed, perform garbage collection while exclusive lock held */
1374  if (do_gc)
1375  gc_qtexts();
1376  }
1377 
1378  /* Increment the counts, except when jstate is not NULL */
1379  if (!jstate)
1380  {
1381  Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1382 
1383  /*
1384  * Grab the spinlock while updating the counters (see comment about
1385  * locking rules at the head of the file)
1386  */
1387  SpinLockAcquire(&entry->mutex);
1388 
1389  /* "Unstick" entry if it was previously sticky */
1390  if (IS_STICKY(entry->counters))
1391  entry->counters.usage = USAGE_INIT;
1392 
1393  entry->counters.calls[kind] += 1;
1394  entry->counters.total_time[kind] += total_time;
1395 
1396  if (entry->counters.calls[kind] == 1)
1397  {
1398  entry->counters.min_time[kind] = total_time;
1399  entry->counters.max_time[kind] = total_time;
1400  entry->counters.mean_time[kind] = total_time;
1401  }
1402  else
1403  {
1404  /*
1405  * Welford's method for accurately computing variance. See
1406  * <http://www.johndcook.com/blog/standard_deviation/>
1407  */
1408  double old_mean = entry->counters.mean_time[kind];
1409 
1410  entry->counters.mean_time[kind] +=
1411  (total_time - old_mean) / entry->counters.calls[kind];
1412  entry->counters.sum_var_time[kind] +=
1413  (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1414 
1415  /*
1416  * Calculate min and max time. min = 0 and max = 0 means that the
1417  * min/max statistics were reset
1418  */
1419  if (entry->counters.min_time[kind] == 0
1420  && entry->counters.max_time[kind] == 0)
1421  {
1422  entry->counters.min_time[kind] = total_time;
1423  entry->counters.max_time[kind] = total_time;
1424  }
1425  else
1426  {
1427  if (entry->counters.min_time[kind] > total_time)
1428  entry->counters.min_time[kind] = total_time;
1429  if (entry->counters.max_time[kind] < total_time)
1430  entry->counters.max_time[kind] = total_time;
1431  }
1432  }
1433  entry->counters.rows += rows;
1434  entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1435  entry->counters.shared_blks_read += bufusage->shared_blks_read;
1436  entry->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1437  entry->counters.shared_blks_written += bufusage->shared_blks_written;
1438  entry->counters.local_blks_hit += bufusage->local_blks_hit;
1439  entry->counters.local_blks_read += bufusage->local_blks_read;
1440  entry->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1441  entry->counters.local_blks_written += bufusage->local_blks_written;
1442  entry->counters.temp_blks_read += bufusage->temp_blks_read;
1443  entry->counters.temp_blks_written += bufusage->temp_blks_written;
1450  entry->counters.usage += USAGE_EXEC(total_time);
1451  entry->counters.wal_records += walusage->wal_records;
1452  entry->counters.wal_fpi += walusage->wal_fpi;
1453  entry->counters.wal_bytes += walusage->wal_bytes;
1454  if (jitusage)
1455  {
1456  entry->counters.jit_functions += jitusage->created_functions;
1458 
1459  if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1460  entry->counters.jit_deform_count++;
1462 
1464  entry->counters.jit_inlining_count++;
1466 
1470 
1472  entry->counters.jit_emission_count++;
1474  }
1475 
1476  SpinLockRelease(&entry->mutex);
1477  }
1478 
1479 done:
1481 
1482  /* We postpone this clean-up until we're out of the lock */
1483  if (norm_query)
1484  pfree(norm_query);
1485 }
1486 
1487 /*
1488  * Reset statement statistics corresponding to userid, dbid, and queryid.
1489  */
1490 Datum
1492 {
1493  Oid userid;
1494  Oid dbid;
1495  uint64 queryid;
1496 
1497  userid = PG_GETARG_OID(0);
1498  dbid = PG_GETARG_OID(1);
1499  queryid = (uint64) PG_GETARG_INT64(2);
1500 
1501  entry_reset(userid, dbid, queryid, false);
1502 
1503  PG_RETURN_VOID();
1504 }
1505 
1506 Datum
1508 {
1509  Oid userid;
1510  Oid dbid;
1511  uint64 queryid;
1512  bool minmax_only;
1513 
1514  userid = PG_GETARG_OID(0);
1515  dbid = PG_GETARG_OID(1);
1516  queryid = (uint64) PG_GETARG_INT64(2);
1517  minmax_only = PG_GETARG_BOOL(3);
1518 
1519  PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1520 }
1521 
1522 /*
1523  * Reset statement statistics.
1524  */
1525 Datum
1527 {
1528  entry_reset(0, 0, 0, false);
1529 
1530  PG_RETURN_VOID();
1531 }
1532 
1533 /* Number of output arguments (columns) for various API versions */
1534 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1535 #define PG_STAT_STATEMENTS_COLS_V1_1 18
1536 #define PG_STAT_STATEMENTS_COLS_V1_2 19
1537 #define PG_STAT_STATEMENTS_COLS_V1_3 23
1538 #define PG_STAT_STATEMENTS_COLS_V1_8 32
1539 #define PG_STAT_STATEMENTS_COLS_V1_9 33
1540 #define PG_STAT_STATEMENTS_COLS_V1_10 43
1541 #define PG_STAT_STATEMENTS_COLS_V1_11 49
1542 #define PG_STAT_STATEMENTS_COLS 49 /* maximum of above */
1543 
1544 /*
1545  * Retrieve statement statistics.
1546  *
1547  * The SQL API of this function has changed multiple times, and will likely
1548  * do so again in future. To support the case where a newer version of this
1549  * loadable module is being used with an old SQL declaration of the function,
1550  * we continue to support the older API versions. For 1.2 and later, the
1551  * expected API version is identified by embedding it in the C name of the
1552  * function. Unfortunately we weren't bright enough to do that for 1.1.
1553  */
1554 Datum
1556 {
1557  bool showtext = PG_GETARG_BOOL(0);
1558 
1559  pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1560 
1561  return (Datum) 0;
1562 }
1563 
1564 Datum
1566 {
1567  bool showtext = PG_GETARG_BOOL(0);
1568 
1569  pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1570 
1571  return (Datum) 0;
1572 }
1573 
1574 Datum
1576 {
1577  bool showtext = PG_GETARG_BOOL(0);
1578 
1579  pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1580 
1581  return (Datum) 0;
1582 }
1583 
1584 Datum
1586 {
1587  bool showtext = PG_GETARG_BOOL(0);
1588 
1589  pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1590 
1591  return (Datum) 0;
1592 }
1593 
1594 Datum
1596 {
1597  bool showtext = PG_GETARG_BOOL(0);
1598 
1599  pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1600 
1601  return (Datum) 0;
1602 }
1603 
1604 Datum
1606 {
1607  bool showtext = PG_GETARG_BOOL(0);
1608 
1609  pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1610 
1611  return (Datum) 0;
1612 }
1613 
1614 /*
1615  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1616  * This can be removed someday, perhaps.
1617  */
1618 Datum
1620 {
1621  /* If it's really API 1.1, we'll figure that out below */
1622  pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1623 
1624  return (Datum) 0;
1625 }
1626 
1627 /* Common code for all versions of pg_stat_statements() */
1628 static void
1630  pgssVersion api_version,
1631  bool showtext)
1632 {
1633  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1634  Oid userid = GetUserId();
1635  bool is_allowed_role = false;
1636  char *qbuffer = NULL;
1637  Size qbuffer_size = 0;
1638  Size extent = 0;
1639  int gc_count = 0;
1640  HASH_SEQ_STATUS hash_seq;
1641  pgssEntry *entry;
1642 
1643  /*
1644  * Superusers or roles with the privileges of pg_read_all_stats members
1645  * are allowed
1646  */
1647  is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1648 
1649  /* hash table must exist already */
1650  if (!pgss || !pgss_hash)
1651  ereport(ERROR,
1652  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1653  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1654 
1655  InitMaterializedSRF(fcinfo, 0);
1656 
1657  /*
1658  * Check we have the expected number of output arguments. Aside from
1659  * being a good safety check, we need a kluge here to detect API version
1660  * 1.1, which was wedged into the code in an ill-considered way.
1661  */
1662  switch (rsinfo->setDesc->natts)
1663  {
1665  if (api_version != PGSS_V1_0)
1666  elog(ERROR, "incorrect number of output arguments");
1667  break;
1669  /* pg_stat_statements() should have told us 1.0 */
1670  if (api_version != PGSS_V1_0)
1671  elog(ERROR, "incorrect number of output arguments");
1672  api_version = PGSS_V1_1;
1673  break;
1675  if (api_version != PGSS_V1_2)
1676  elog(ERROR, "incorrect number of output arguments");
1677  break;
1679  if (api_version != PGSS_V1_3)
1680  elog(ERROR, "incorrect number of output arguments");
1681  break;
1683  if (api_version != PGSS_V1_8)
1684  elog(ERROR, "incorrect number of output arguments");
1685  break;
1687  if (api_version != PGSS_V1_9)
1688  elog(ERROR, "incorrect number of output arguments");
1689  break;
1691  if (api_version != PGSS_V1_10)
1692  elog(ERROR, "incorrect number of output arguments");
1693  break;
1695  if (api_version != PGSS_V1_11)
1696  elog(ERROR, "incorrect number of output arguments");
1697  break;
1698  default:
1699  elog(ERROR, "incorrect number of output arguments");
1700  }
1701 
1702  /*
1703  * We'd like to load the query text file (if needed) while not holding any
1704  * lock on pgss->lock. In the worst case we'll have to do this again
1705  * after we have the lock, but it's unlikely enough to make this a win
1706  * despite occasional duplicated work. We need to reload if anybody
1707  * writes to the file (either a retail qtext_store(), or a garbage
1708  * collection) between this point and where we've gotten shared lock. If
1709  * a qtext_store is actually in progress when we look, we might as well
1710  * skip the speculative load entirely.
1711  */
1712  if (showtext)
1713  {
1714  int n_writers;
1715 
1716  /* Take the mutex so we can examine variables */
1718  extent = pgss->extent;
1719  n_writers = pgss->n_writers;
1720  gc_count = pgss->gc_count;
1722 
1723  /* No point in loading file now if there are active writers */
1724  if (n_writers == 0)
1725  qbuffer = qtext_load_file(&qbuffer_size);
1726  }
1727 
1728  /*
1729  * Get shared lock, load or reload the query text file if we must, and
1730  * iterate over the hashtable entries.
1731  *
1732  * With a large hash table, we might be holding the lock rather longer
1733  * than one could wish. However, this only blocks creation of new hash
1734  * table entries, and the larger the hash table the less likely that is to
1735  * be needed. So we can hope this is okay. Perhaps someday we'll decide
1736  * we need to partition the hash table to limit the time spent holding any
1737  * one lock.
1738  */
1740 
1741  if (showtext)
1742  {
1743  /*
1744  * Here it is safe to examine extent and gc_count without taking the
1745  * mutex. Note that although other processes might change
1746  * pgss->extent just after we look at it, the strings they then write
1747  * into the file cannot yet be referenced in the hashtable, so we
1748  * don't care whether we see them or not.
1749  *
1750  * If qtext_load_file fails, we just press on; we'll return NULL for
1751  * every query text.
1752  */
1753  if (qbuffer == NULL ||
1754  pgss->extent != extent ||
1755  pgss->gc_count != gc_count)
1756  {
1757  free(qbuffer);
1758  qbuffer = qtext_load_file(&qbuffer_size);
1759  }
1760  }
1761 
1762  hash_seq_init(&hash_seq, pgss_hash);
1763  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1764  {
1766  bool nulls[PG_STAT_STATEMENTS_COLS];
1767  int i = 0;
1768  Counters tmp;
1769  double stddev;
1770  int64 queryid = entry->key.queryid;
1771  TimestampTz stats_since;
1772  TimestampTz minmax_stats_since;
1773 
1774  memset(values, 0, sizeof(values));
1775  memset(nulls, 0, sizeof(nulls));
1776 
1777  values[i++] = ObjectIdGetDatum(entry->key.userid);
1778  values[i++] = ObjectIdGetDatum(entry->key.dbid);
1779  if (api_version >= PGSS_V1_9)
1780  values[i++] = BoolGetDatum(entry->key.toplevel);
1781 
1782  if (is_allowed_role || entry->key.userid == userid)
1783  {
1784  if (api_version >= PGSS_V1_2)
1785  values[i++] = Int64GetDatumFast(queryid);
1786 
1787  if (showtext)
1788  {
1789  char *qstr = qtext_fetch(entry->query_offset,
1790  entry->query_len,
1791  qbuffer,
1792  qbuffer_size);
1793 
1794  if (qstr)
1795  {
1796  char *enc;
1797 
1798  enc = pg_any_to_server(qstr,
1799  entry->query_len,
1800  entry->encoding);
1801 
1803 
1804  if (enc != qstr)
1805  pfree(enc);
1806  }
1807  else
1808  {
1809  /* Just return a null if we fail to find the text */
1810  nulls[i++] = true;
1811  }
1812  }
1813  else
1814  {
1815  /* Query text not requested */
1816  nulls[i++] = true;
1817  }
1818  }
1819  else
1820  {
1821  /* Don't show queryid */
1822  if (api_version >= PGSS_V1_2)
1823  nulls[i++] = true;
1824 
1825  /*
1826  * Don't show query text, but hint as to the reason for not doing
1827  * so if it was requested
1828  */
1829  if (showtext)
1830  values[i++] = CStringGetTextDatum("<insufficient privilege>");
1831  else
1832  nulls[i++] = true;
1833  }
1834 
1835  /* copy counters to a local variable to keep locking time short */
1836  SpinLockAcquire(&entry->mutex);
1837  tmp = entry->counters;
1838  stats_since = entry->stats_since;
1839  minmax_stats_since = entry->minmax_stats_since;
1840  SpinLockRelease(&entry->mutex);
1841 
1842  /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1843  if (IS_STICKY(tmp))
1844  continue;
1845 
1846  /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1847  for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1848  {
1849  if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1850  {
1851  values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1852  values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1853  }
1854 
1855  if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1856  api_version >= PGSS_V1_8)
1857  {
1858  values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1859  values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1860  values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1861 
1862  /*
1863  * Note we are calculating the population variance here, not
1864  * the sample variance, as we have data for the whole
1865  * population, so Bessel's correction is not used, and we
1866  * don't divide by tmp.calls - 1.
1867  */
1868  if (tmp.calls[kind] > 1)
1869  stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1870  else
1871  stddev = 0.0;
1872  values[i++] = Float8GetDatumFast(stddev);
1873  }
1874  }
1875  values[i++] = Int64GetDatumFast(tmp.rows);
1878  if (api_version >= PGSS_V1_1)
1883  if (api_version >= PGSS_V1_1)
1888  if (api_version >= PGSS_V1_1)
1889  {
1892  }
1893  if (api_version >= PGSS_V1_11)
1894  {
1897  }
1898  if (api_version >= PGSS_V1_10)
1899  {
1902  }
1903  if (api_version >= PGSS_V1_8)
1904  {
1905  char buf[256];
1906  Datum wal_bytes;
1907 
1909  values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1910 
1911  snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1912 
1913  /* Convert to numeric. */
1914  wal_bytes = DirectFunctionCall3(numeric_in,
1916  ObjectIdGetDatum(0),
1917  Int32GetDatum(-1));
1918  values[i++] = wal_bytes;
1919  }
1920  if (api_version >= PGSS_V1_10)
1921  {
1930  }
1931  if (api_version >= PGSS_V1_11)
1932  {
1935  values[i++] = TimestampTzGetDatum(stats_since);
1936  values[i++] = TimestampTzGetDatum(minmax_stats_since);
1937  }
1938 
1939  Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1940  api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1941  api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1942  api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1943  api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1944  api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1945  api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
1946  api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
1947  -1 /* fail if you forget to update this assert */ ));
1948 
1949  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1950  }
1951 
1953 
1954  free(qbuffer);
1955 }
1956 
1957 /* Number of output arguments (columns) for pg_stat_statements_info */
1958 #define PG_STAT_STATEMENTS_INFO_COLS 2
1959 
1960 /*
1961  * Return statistics of pg_stat_statements.
1962  */
1963 Datum
1965 {
1966  pgssGlobalStats stats;
1967  TupleDesc tupdesc;
1969  bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
1970 
1971  if (!pgss || !pgss_hash)
1972  ereport(ERROR,
1973  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1974  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1975 
1976  /* Build a tuple descriptor for our result type */
1977  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1978  elog(ERROR, "return type must be a row type");
1979 
1980  /* Read global statistics for pg_stat_statements */
1982  stats = pgss->stats;
1984 
1985  values[0] = Int64GetDatum(stats.dealloc);
1987 
1989 }
1990 
1991 /*
1992  * Estimate shared memory space needed.
1993  */
1994 static Size
1996 {
1997  Size size;
1998 
1999  size = MAXALIGN(sizeof(pgssSharedState));
2001 
2002  return size;
2003 }
2004 
2005 /*
2006  * Allocate a new hashtable entry.
2007  * caller must hold an exclusive lock on pgss->lock
2008  *
2009  * "query" need not be null-terminated; we rely on query_len instead
2010  *
2011  * If "sticky" is true, make the new entry artificially sticky so that it will
2012  * probably still be there when the query finishes execution. We do this by
2013  * giving it a median usage value rather than the normal value. (Strictly
2014  * speaking, query strings are normalized on a best effort basis, though it
2015  * would be difficult to demonstrate this even under artificial conditions.)
2016  *
2017  * Note: despite needing exclusive lock, it's not an error for the target
2018  * entry to already exist. This is because pgss_store releases and
2019  * reacquires lock after failing to find a match; so someone else could
2020  * have made the entry while we waited to get exclusive lock.
2021  */
2022 static pgssEntry *
2023 entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2024  bool sticky)
2025 {
2026  pgssEntry *entry;
2027  bool found;
2028 
2029  /* Make space if needed */
2031  entry_dealloc();
2032 
2033  /* Find or create an entry with desired hash code */
2034  entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2035 
2036  if (!found)
2037  {
2038  /* New entry, initialize it */
2039 
2040  /* reset the statistics */
2041  memset(&entry->counters, 0, sizeof(Counters));
2042  /* set the appropriate initial usage count */
2043  entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2044  /* re-initialize the mutex each time ... we assume no one using it */
2045  SpinLockInit(&entry->mutex);
2046  /* ... and don't forget the query text metadata */
2047  Assert(query_len >= 0);
2048  entry->query_offset = query_offset;
2049  entry->query_len = query_len;
2050  entry->encoding = encoding;
2051  entry->stats_since = GetCurrentTimestamp();
2052  entry->minmax_stats_since = entry->stats_since;
2053  }
2054 
2055  return entry;
2056 }
2057 
2058 /*
2059  * qsort comparator for sorting into increasing usage order
2060  */
2061 static int
2062 entry_cmp(const void *lhs, const void *rhs)
2063 {
2064  double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2065  double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2066 
2067  if (l_usage < r_usage)
2068  return -1;
2069  else if (l_usage > r_usage)
2070  return +1;
2071  else
2072  return 0;
2073 }
2074 
2075 /*
2076  * Deallocate least-used entries.
2077  *
2078  * Caller must hold an exclusive lock on pgss->lock.
2079  */
2080 static void
2082 {
2083  HASH_SEQ_STATUS hash_seq;
2084  pgssEntry **entries;
2085  pgssEntry *entry;
2086  int nvictims;
2087  int i;
2088  Size tottextlen;
2089  int nvalidtexts;
2090 
2091  /*
2092  * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2093  * While we're scanning the table, apply the decay factor to the usage
2094  * values, and update the mean query length.
2095  *
2096  * Note that the mean query length is almost immediately obsolete, since
2097  * we compute it before not after discarding the least-used entries.
2098  * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2099  * making two passes to get a more current result. Likewise, the new
2100  * cur_median_usage includes the entries we're about to zap.
2101  */
2102 
2103  entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2104 
2105  i = 0;
2106  tottextlen = 0;
2107  nvalidtexts = 0;
2108 
2109  hash_seq_init(&hash_seq, pgss_hash);
2110  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2111  {
2112  entries[i++] = entry;
2113  /* "Sticky" entries get a different usage decay rate. */
2114  if (IS_STICKY(entry->counters))
2116  else
2118  /* In the mean length computation, ignore dropped texts. */
2119  if (entry->query_len >= 0)
2120  {
2121  tottextlen += entry->query_len + 1;
2122  nvalidtexts++;
2123  }
2124  }
2125 
2126  /* Sort into increasing order by usage */
2127  qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2128 
2129  /* Record the (approximate) median usage */
2130  if (i > 0)
2131  pgss->cur_median_usage = entries[i / 2]->counters.usage;
2132  /* Record the mean query length */
2133  if (nvalidtexts > 0)
2134  pgss->mean_query_len = tottextlen / nvalidtexts;
2135  else
2137 
2138  /* Now zap an appropriate fraction of lowest-usage entries */
2139  nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2140  nvictims = Min(nvictims, i);
2141 
2142  for (i = 0; i < nvictims; i++)
2143  {
2144  hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2145  }
2146 
2147  pfree(entries);
2148 
2149  /* Increment the number of times entries are deallocated */
2151  pgss->stats.dealloc += 1;
2153 }
2154 
2155 /*
2156  * Given a query string (not necessarily null-terminated), allocate a new
2157  * entry in the external query text file and store the string there.
2158  *
2159  * If successful, returns true, and stores the new entry's offset in the file
2160  * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2161  * number of garbage collections that have occurred so far.
2162  *
2163  * On failure, returns false.
2164  *
2165  * At least a shared lock on pgss->lock must be held by the caller, so as
2166  * to prevent a concurrent garbage collection. Share-lock-holding callers
2167  * should pass a gc_count pointer to obtain the number of garbage collections,
2168  * so that they can recheck the count after obtaining exclusive lock to
2169  * detect whether a garbage collection occurred (and removed this entry).
2170  */
2171 static bool
2172 qtext_store(const char *query, int query_len,
2173  Size *query_offset, int *gc_count)
2174 {
2175  Size off;
2176  int fd;
2177 
2178  /*
2179  * We use a spinlock to protect extent/n_writers/gc_count, so that
2180  * multiple processes may execute this function concurrently.
2181  */
2183  off = pgss->extent;
2184  pgss->extent += query_len + 1;
2185  pgss->n_writers++;
2186  if (gc_count)
2187  *gc_count = pgss->gc_count;
2189 
2190  *query_offset = off;
2191 
2192  /*
2193  * Don't allow the file to grow larger than what qtext_load_file can
2194  * (theoretically) handle. This has been seen to be reachable on 32-bit
2195  * platforms.
2196  */
2197  if (unlikely(query_len >= MaxAllocHugeSize - off))
2198  {
2199  errno = EFBIG; /* not quite right, but it'll do */
2200  fd = -1;
2201  goto error;
2202  }
2203 
2204  /* Now write the data into the successfully-reserved part of the file */
2205  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2206  if (fd < 0)
2207  goto error;
2208 
2209  if (pg_pwrite(fd, query, query_len, off) != query_len)
2210  goto error;
2211  if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2212  goto error;
2213 
2215 
2216  /* Mark our write complete */
2218  pgss->n_writers--;
2220 
2221  return true;
2222 
2223 error:
2224  ereport(LOG,
2226  errmsg("could not write file \"%s\": %m",
2227  PGSS_TEXT_FILE)));
2228 
2229  if (fd >= 0)
2231 
2232  /* Mark our write complete */
2234  pgss->n_writers--;
2236 
2237  return false;
2238 }
2239 
2240 /*
2241  * Read the external query text file into a malloc'd buffer.
2242  *
2243  * Returns NULL (without throwing an error) if unable to read, eg
2244  * file not there or insufficient memory.
2245  *
2246  * On success, the buffer size is also returned into *buffer_size.
2247  *
2248  * This can be called without any lock on pgss->lock, but in that case
2249  * the caller is responsible for verifying that the result is sane.
2250  */
2251 static char *
2252 qtext_load_file(Size *buffer_size)
2253 {
2254  char *buf;
2255  int fd;
2256  struct stat stat;
2257  Size nread;
2258 
2260  if (fd < 0)
2261  {
2262  if (errno != ENOENT)
2263  ereport(LOG,
2265  errmsg("could not read file \"%s\": %m",
2266  PGSS_TEXT_FILE)));
2267  return NULL;
2268  }
2269 
2270  /* Get file length */
2271  if (fstat(fd, &stat))
2272  {
2273  ereport(LOG,
2275  errmsg("could not stat file \"%s\": %m",
2276  PGSS_TEXT_FILE)));
2278  return NULL;
2279  }
2280 
2281  /* Allocate buffer; beware that off_t might be wider than size_t */
2282  if (stat.st_size <= MaxAllocHugeSize)
2283  buf = (char *) malloc(stat.st_size);
2284  else
2285  buf = NULL;
2286  if (buf == NULL)
2287  {
2288  ereport(LOG,
2289  (errcode(ERRCODE_OUT_OF_MEMORY),
2290  errmsg("out of memory"),
2291  errdetail("Could not allocate enough memory to read file \"%s\".",
2292  PGSS_TEXT_FILE)));
2294  return NULL;
2295  }
2296 
2297  /*
2298  * OK, slurp in the file. Windows fails if we try to read more than
2299  * INT_MAX bytes at once, and other platforms might not like that either,
2300  * so read a very large file in 1GB segments.
2301  */
2302  nread = 0;
2303  while (nread < stat.st_size)
2304  {
2305  int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2306 
2307  /*
2308  * If we get a short read and errno doesn't get set, the reason is
2309  * probably that garbage collection truncated the file since we did
2310  * the fstat(), so we don't log a complaint --- but we don't return
2311  * the data, either, since it's most likely corrupt due to concurrent
2312  * writes from garbage collection.
2313  */
2314  errno = 0;
2315  if (read(fd, buf + nread, toread) != toread)
2316  {
2317  if (errno)
2318  ereport(LOG,
2320  errmsg("could not read file \"%s\": %m",
2321  PGSS_TEXT_FILE)));
2322  free(buf);
2324  return NULL;
2325  }
2326  nread += toread;
2327  }
2328 
2329  if (CloseTransientFile(fd) != 0)
2330  ereport(LOG,
2332  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2333 
2334  *buffer_size = nread;
2335  return buf;
2336 }
2337 
2338 /*
2339  * Locate a query text in the file image previously read by qtext_load_file().
2340  *
2341  * We validate the given offset/length, and return NULL if bogus. Otherwise,
2342  * the result points to a null-terminated string within the buffer.
2343  */
2344 static char *
2345 qtext_fetch(Size query_offset, int query_len,
2346  char *buffer, Size buffer_size)
2347 {
2348  /* File read failed? */
2349  if (buffer == NULL)
2350  return NULL;
2351  /* Bogus offset/length? */
2352  if (query_len < 0 ||
2353  query_offset + query_len >= buffer_size)
2354  return NULL;
2355  /* As a further sanity check, make sure there's a trailing null */
2356  if (buffer[query_offset + query_len] != '\0')
2357  return NULL;
2358  /* Looks OK */
2359  return buffer + query_offset;
2360 }
2361 
2362 /*
2363  * Do we need to garbage-collect the external query text file?
2364  *
2365  * Caller should hold at least a shared lock on pgss->lock.
2366  */
2367 static bool
2369 {
2370  Size extent;
2371 
2372  /* Read shared extent pointer */
2374  extent = pgss->extent;
2376 
2377  /*
2378  * Don't proceed if file does not exceed 512 bytes per possible entry.
2379  *
2380  * Here and in the next test, 32-bit machines have overflow hazards if
2381  * pgss_max and/or mean_query_len are large. Force the multiplications
2382  * and comparisons to be done in uint64 arithmetic to forestall trouble.
2383  */
2384  if ((uint64) extent < (uint64) 512 * pgss_max)
2385  return false;
2386 
2387  /*
2388  * Don't proceed if file is less than about 50% bloat. Nothing can or
2389  * should be done in the event of unusually large query texts accounting
2390  * for file's large size. We go to the trouble of maintaining the mean
2391  * query length in order to prevent garbage collection from thrashing
2392  * uselessly.
2393  */
2394  if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2395  return false;
2396 
2397  return true;
2398 }
2399 
2400 /*
2401  * Garbage-collect orphaned query texts in external file.
2402  *
2403  * This won't be called often in the typical case, since it's likely that
2404  * there won't be too much churn, and besides, a similar compaction process
2405  * occurs when serializing to disk at shutdown or as part of resetting.
2406  * Despite this, it seems prudent to plan for the edge case where the file
2407  * becomes unreasonably large, with no other method of compaction likely to
2408  * occur in the foreseeable future.
2409  *
2410  * The caller must hold an exclusive lock on pgss->lock.
2411  *
2412  * At the first sign of trouble we unlink the query text file to get a clean
2413  * slate (although existing statistics are retained), rather than risk
2414  * thrashing by allowing the same problem case to recur indefinitely.
2415  */
2416 static void
2418 {
2419  char *qbuffer;
2420  Size qbuffer_size;
2421  FILE *qfile = NULL;
2422  HASH_SEQ_STATUS hash_seq;
2423  pgssEntry *entry;
2424  Size extent;
2425  int nentries;
2426 
2427  /*
2428  * When called from pgss_store, some other session might have proceeded
2429  * with garbage collection in the no-lock-held interim of lock strength
2430  * escalation. Check once more that this is actually necessary.
2431  */
2432  if (!need_gc_qtexts())
2433  return;
2434 
2435  /*
2436  * Load the old texts file. If we fail (out of memory, for instance),
2437  * invalidate query texts. Hopefully this is rare. It might seem better
2438  * to leave things alone on an OOM failure, but the problem is that the
2439  * file is only going to get bigger; hoping for a future non-OOM result is
2440  * risky and can easily lead to complete denial of service.
2441  */
2442  qbuffer = qtext_load_file(&qbuffer_size);
2443  if (qbuffer == NULL)
2444  goto gc_fail;
2445 
2446  /*
2447  * We overwrite the query texts file in place, so as to reduce the risk of
2448  * an out-of-disk-space failure. Since the file is guaranteed not to get
2449  * larger, this should always work on traditional filesystems; though we
2450  * could still lose on copy-on-write filesystems.
2451  */
2453  if (qfile == NULL)
2454  {
2455  ereport(LOG,
2457  errmsg("could not write file \"%s\": %m",
2458  PGSS_TEXT_FILE)));
2459  goto gc_fail;
2460  }
2461 
2462  extent = 0;
2463  nentries = 0;
2464 
2465  hash_seq_init(&hash_seq, pgss_hash);
2466  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2467  {
2468  int query_len = entry->query_len;
2469  char *qry = qtext_fetch(entry->query_offset,
2470  query_len,
2471  qbuffer,
2472  qbuffer_size);
2473 
2474  if (qry == NULL)
2475  {
2476  /* Trouble ... drop the text */
2477  entry->query_offset = 0;
2478  entry->query_len = -1;
2479  /* entry will not be counted in mean query length computation */
2480  continue;
2481  }
2482 
2483  if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2484  {
2485  ereport(LOG,
2487  errmsg("could not write file \"%s\": %m",
2488  PGSS_TEXT_FILE)));
2489  hash_seq_term(&hash_seq);
2490  goto gc_fail;
2491  }
2492 
2493  entry->query_offset = extent;
2494  extent += query_len + 1;
2495  nentries++;
2496  }
2497 
2498  /*
2499  * Truncate away any now-unused space. If this fails for some odd reason,
2500  * we log it, but there's no need to fail.
2501  */
2502  if (ftruncate(fileno(qfile), extent) != 0)
2503  ereport(LOG,
2505  errmsg("could not truncate file \"%s\": %m",
2506  PGSS_TEXT_FILE)));
2507 
2508  if (FreeFile(qfile))
2509  {
2510  ereport(LOG,
2512  errmsg("could not write file \"%s\": %m",
2513  PGSS_TEXT_FILE)));
2514  qfile = NULL;
2515  goto gc_fail;
2516  }
2517 
2518  elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2519  pgss->extent, extent);
2520 
2521  /* Reset the shared extent pointer */
2522  pgss->extent = extent;
2523 
2524  /*
2525  * Also update the mean query length, to be sure that need_gc_qtexts()
2526  * won't still think we have a problem.
2527  */
2528  if (nentries > 0)
2529  pgss->mean_query_len = extent / nentries;
2530  else
2532 
2533  free(qbuffer);
2534 
2535  /*
2536  * OK, count a garbage collection cycle. (Note: even though we have
2537  * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2538  * other processes may examine gc_count while holding only the mutex.
2539  * Also, we have to advance the count *after* we've rewritten the file,
2540  * else other processes might not realize they read a stale file.)
2541  */
2542  record_gc_qtexts();
2543 
2544  return;
2545 
2546 gc_fail:
2547  /* clean up resources */
2548  if (qfile)
2549  FreeFile(qfile);
2550  free(qbuffer);
2551 
2552  /*
2553  * Since the contents of the external file are now uncertain, mark all
2554  * hashtable entries as having invalid texts.
2555  */
2556  hash_seq_init(&hash_seq, pgss_hash);
2557  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2558  {
2559  entry->query_offset = 0;
2560  entry->query_len = -1;
2561  }
2562 
2563  /*
2564  * Destroy the query text file and create a new, empty one
2565  */
2566  (void) unlink(PGSS_TEXT_FILE);
2568  if (qfile == NULL)
2569  ereport(LOG,
2571  errmsg("could not recreate file \"%s\": %m",
2572  PGSS_TEXT_FILE)));
2573  else
2574  FreeFile(qfile);
2575 
2576  /* Reset the shared extent pointer */
2577  pgss->extent = 0;
2578 
2579  /* Reset mean_query_len to match the new state */
2581 
2582  /*
2583  * Bump the GC count even though we failed.
2584  *
2585  * This is needed to make concurrent readers of file without any lock on
2586  * pgss->lock notice existence of new version of file. Once readers
2587  * subsequently observe a change in GC count with pgss->lock held, that
2588  * forces a safe reopen of file. Writers also require that we bump here,
2589  * of course. (As required by locking protocol, readers and writers don't
2590  * trust earlier file contents until gc_count is found unchanged after
2591  * pgss->lock acquired in shared or exclusive mode respectively.)
2592  */
2593  record_gc_qtexts();
2594 }
2595 
2596 #define SINGLE_ENTRY_RESET(e) \
2597 if (e) { \
2598  if (minmax_only) { \
2599  /* When requested reset only min/max statistics of an entry */ \
2600  for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2601  { \
2602  e->counters.max_time[kind] = 0; \
2603  e->counters.min_time[kind] = 0; \
2604  } \
2605  e->minmax_stats_since = stats_reset; \
2606  } \
2607  else \
2608  { \
2609  /* Remove the key otherwise */ \
2610  hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2611  num_remove++; \
2612  } \
2613 }
2614 
2615 /*
2616  * Reset entries corresponding to parameters passed.
2617  */
2618 static TimestampTz
2619 entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
2620 {
2621  HASH_SEQ_STATUS hash_seq;
2622  pgssEntry *entry;
2623  FILE *qfile;
2624  long num_entries;
2625  long num_remove = 0;
2626  pgssHashKey key;
2627  TimestampTz stats_reset;
2628 
2629  if (!pgss || !pgss_hash)
2630  ereport(ERROR,
2631  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2632  errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2633 
2635  num_entries = hash_get_num_entries(pgss_hash);
2636 
2637  stats_reset = GetCurrentTimestamp();
2638 
2639  if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2640  {
2641  /* If all the parameters are available, use the fast path. */
2642  memset(&key, 0, sizeof(pgssHashKey));
2643  key.userid = userid;
2644  key.dbid = dbid;
2645  key.queryid = queryid;
2646 
2647  /*
2648  * Reset the entry if it exists, starting with the non-top-level
2649  * entry.
2650  */
2651  key.toplevel = false;
2652  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2653 
2654  SINGLE_ENTRY_RESET(entry);
2655 
2656  /* Also reset the top-level entry if it exists. */
2657  key.toplevel = true;
2658  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2659 
2660  SINGLE_ENTRY_RESET(entry);
2661  }
2662  else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2663  {
2664  /* Reset entries corresponding to valid parameters. */
2665  hash_seq_init(&hash_seq, pgss_hash);
2666  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2667  {
2668  if ((!userid || entry->key.userid == userid) &&
2669  (!dbid || entry->key.dbid == dbid) &&
2670  (!queryid || entry->key.queryid == queryid))
2671  {
2672  SINGLE_ENTRY_RESET(entry);
2673  }
2674  }
2675  }
2676  else
2677  {
2678  /* Reset all entries. */
2679  hash_seq_init(&hash_seq, pgss_hash);
2680  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2681  {
2682  SINGLE_ENTRY_RESET(entry);
2683  }
2684  }
2685 
2686  /* All entries are removed? */
2687  if (num_entries != num_remove)
2688  goto release_lock;
2689 
2690  /*
2691  * Reset global statistics for pg_stat_statements since all entries are
2692  * removed.
2693  */
2695  pgss->stats.dealloc = 0;
2696  pgss->stats.stats_reset = stats_reset;
2698 
2699  /*
2700  * Write new empty query file, perhaps even creating a new one to recover
2701  * if the file was missing.
2702  */
2704  if (qfile == NULL)
2705  {
2706  ereport(LOG,
2708  errmsg("could not create file \"%s\": %m",
2709  PGSS_TEXT_FILE)));
2710  goto done;
2711  }
2712 
2713  /* If ftruncate fails, log it, but it's not a fatal problem */
2714  if (ftruncate(fileno(qfile), 0) != 0)
2715  ereport(LOG,
2717  errmsg("could not truncate file \"%s\": %m",
2718  PGSS_TEXT_FILE)));
2719 
2720  FreeFile(qfile);
2721 
2722 done:
2723  pgss->extent = 0;
2724  /* This counts as a query text garbage collection for our purposes */
2725  record_gc_qtexts();
2726 
2727 release_lock:
2729 
2730  return stats_reset;
2731 }
2732 
2733 /*
2734  * Generate a normalized version of the query string that will be used to
2735  * represent all similar queries.
2736  *
2737  * Note that the normalized representation may well vary depending on
2738  * just which "equivalent" query is used to create the hashtable entry.
2739  * We assume this is OK.
2740  *
2741  * If query_loc > 0, then "query" has been advanced by that much compared to
2742  * the original string start, so we need to translate the provided locations
2743  * to compensate. (This lets us avoid re-scanning statements before the one
2744  * of interest, so it's worth doing.)
2745  *
2746  * *query_len_p contains the input string length, and is updated with
2747  * the result string length on exit. The resulting string might be longer
2748  * or shorter depending on what happens with replacement of constants.
2749  *
2750  * Returns a palloc'd string.
2751  */
2752 static char *
2753 generate_normalized_query(JumbleState *jstate, const char *query,
2754  int query_loc, int *query_len_p)
2755 {
2756  char *norm_query;
2757  int query_len = *query_len_p;
2758  int i,
2759  norm_query_buflen, /* Space allowed for norm_query */
2760  len_to_wrt, /* Length (in bytes) to write */
2761  quer_loc = 0, /* Source query byte location */
2762  n_quer_loc = 0, /* Normalized query byte location */
2763  last_off = 0, /* Offset from start for previous tok */
2764  last_tok_len = 0; /* Length (in bytes) of that tok */
2765 
2766  /*
2767  * Get constants' lengths (core system only gives us locations). Note
2768  * this also ensures the items are sorted by location.
2769  */
2770  fill_in_constant_lengths(jstate, query, query_loc);
2771 
2772  /*
2773  * Allow for $n symbols to be longer than the constants they replace.
2774  * Constants must take at least one byte in text form, while a $n symbol
2775  * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2776  * could refine that limit based on the max value of n for the current
2777  * query, but it hardly seems worth any extra effort to do so.
2778  */
2779  norm_query_buflen = query_len + jstate->clocations_count * 10;
2780 
2781  /* Allocate result buffer */
2782  norm_query = palloc(norm_query_buflen + 1);
2783 
2784  for (i = 0; i < jstate->clocations_count; i++)
2785  {
2786  int off, /* Offset from start for cur tok */
2787  tok_len; /* Length (in bytes) of that tok */
2788 
2789  off = jstate->clocations[i].location;
2790  /* Adjust recorded location if we're dealing with partial string */
2791  off -= query_loc;
2792 
2793  tok_len = jstate->clocations[i].length;
2794 
2795  if (tok_len < 0)
2796  continue; /* ignore any duplicates */
2797 
2798  /* Copy next chunk (what precedes the next constant) */
2799  len_to_wrt = off - last_off;
2800  len_to_wrt -= last_tok_len;
2801 
2802  Assert(len_to_wrt >= 0);
2803  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2804  n_quer_loc += len_to_wrt;
2805 
2806  /* And insert a param symbol in place of the constant token */
2807  n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2808  i + 1 + jstate->highest_extern_param_id);
2809 
2810  quer_loc = off + tok_len;
2811  last_off = off;
2812  last_tok_len = tok_len;
2813  }
2814 
2815  /*
2816  * We've copied up until the last ignorable constant. Copy over the
2817  * remaining bytes of the original query string.
2818  */
2819  len_to_wrt = query_len - quer_loc;
2820 
2821  Assert(len_to_wrt >= 0);
2822  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2823  n_quer_loc += len_to_wrt;
2824 
2825  Assert(n_quer_loc <= norm_query_buflen);
2826  norm_query[n_quer_loc] = '\0';
2827 
2828  *query_len_p = n_quer_loc;
2829  return norm_query;
2830 }
2831 
2832 /*
2833  * Given a valid SQL string and an array of constant-location records,
2834  * fill in the textual lengths of those constants.
2835  *
2836  * The constants may use any allowed constant syntax, such as float literals,
2837  * bit-strings, single-quoted strings and dollar-quoted strings. This is
2838  * accomplished by using the public API for the core scanner.
2839  *
2840  * It is the caller's job to ensure that the string is a valid SQL statement
2841  * with constants at the indicated locations. Since in practice the string
2842  * has already been parsed, and the locations that the caller provides will
2843  * have originated from within the authoritative parser, this should not be
2844  * a problem.
2845  *
2846  * Duplicate constant pointers are possible, and will have their lengths
2847  * marked as '-1', so that they are later ignored. (Actually, we assume the
2848  * lengths were initialized as -1 to start with, and don't change them here.)
2849  *
2850  * If query_loc > 0, then "query" has been advanced by that much compared to
2851  * the original string start, so we need to translate the provided locations
2852  * to compensate. (This lets us avoid re-scanning statements before the one
2853  * of interest, so it's worth doing.)
2854  *
2855  * N.B. There is an assumption that a '-' character at a Const location begins
2856  * a negative numeric constant. This precludes there ever being another
2857  * reason for a constant to start with a '-'.
2858  */
2859 static void
2860 fill_in_constant_lengths(JumbleState *jstate, const char *query,
2861  int query_loc)
2862 {
2863  LocationLen *locs;
2865  core_yy_extra_type yyextra;
2866  core_YYSTYPE yylval;
2867  YYLTYPE yylloc;
2868  int last_loc = -1;
2869  int i;
2870 
2871  /*
2872  * Sort the records by location so that we can process them in order while
2873  * scanning the query text.
2874  */
2875  if (jstate->clocations_count > 1)
2876  qsort(jstate->clocations, jstate->clocations_count,
2877  sizeof(LocationLen), comp_location);
2878  locs = jstate->clocations;
2879 
2880  /* initialize the flex scanner --- should match raw_parser() */
2881  yyscanner = scanner_init(query,
2882  &yyextra,
2883  &ScanKeywords,
2885 
2886  /* we don't want to re-emit any escape string warnings */
2887  yyextra.escape_string_warning = false;
2888 
2889  /* Search for each constant, in sequence */
2890  for (i = 0; i < jstate->clocations_count; i++)
2891  {
2892  int loc = locs[i].location;
2893  int tok;
2894 
2895  /* Adjust recorded location if we're dealing with partial string */
2896  loc -= query_loc;
2897 
2898  Assert(loc >= 0);
2899 
2900  if (loc <= last_loc)
2901  continue; /* Duplicate constant, ignore */
2902 
2903  /* Lex tokens until we find the desired constant */
2904  for (;;)
2905  {
2906  tok = core_yylex(&yylval, &yylloc, yyscanner);
2907 
2908  /* We should not hit end-of-string, but if we do, behave sanely */
2909  if (tok == 0)
2910  break; /* out of inner for-loop */
2911 
2912  /*
2913  * We should find the token position exactly, but if we somehow
2914  * run past it, work with that.
2915  */
2916  if (yylloc >= loc)
2917  {
2918  if (query[loc] == '-')
2919  {
2920  /*
2921  * It's a negative value - this is the one and only case
2922  * where we replace more than a single token.
2923  *
2924  * Do not compensate for the core system's special-case
2925  * adjustment of location to that of the leading '-'
2926  * operator in the event of a negative constant. It is
2927  * also useful for our purposes to start from the minus
2928  * symbol. In this way, queries like "select * from foo
2929  * where bar = 1" and "select * from foo where bar = -2"
2930  * will have identical normalized query strings.
2931  */
2932  tok = core_yylex(&yylval, &yylloc, yyscanner);
2933  if (tok == 0)
2934  break; /* out of inner for-loop */
2935  }
2936 
2937  /*
2938  * We now rely on the assumption that flex has placed a zero
2939  * byte after the text of the current token in scanbuf.
2940  */
2941  locs[i].length = strlen(yyextra.scanbuf + loc);
2942  break; /* out of inner for-loop */
2943  }
2944  }
2945 
2946  /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2947  if (tok == 0)
2948  break;
2949 
2950  last_loc = loc;
2951  }
2952 
2954 }
2955 
2956 /*
2957  * comp_location: comparator for qsorting LocationLen structs by location
2958  */
2959 static int
2960 comp_location(const void *a, const void *b)
2961 {
2962  int l = ((const LocationLen *) a)->location;
2963  int r = ((const LocationLen *) b)->location;
2964 
2965  return pg_cmp_s32(l, r);
2966 }
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:5268
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:639
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
static Datum values[MAXATTR]
Definition: bootstrap.c:150
#define CStringGetTextDatum(s)
Definition: builtins.h:97
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
#define PG_BINARY_R
Definition: c.h:1275
#define MAXALIGN(LEN)
Definition: c.h:811
signed int int32
Definition: c.h:494
#define Max(x, y)
Definition: c.h:998
#define Assert(condition)
Definition: c.h:858
#define PG_BINARY
Definition: c.h:1273
#define UINT64_FORMAT
Definition: c.h:549
#define unlikely(x)
Definition: c.h:311
#define PG_BINARY_W
Definition: c.h:1276
size_t Size
Definition: c.h:605
enc
int64 TimestampTz
Definition: timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1514
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:783
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
int errcode_for_file_access(void)
Definition: elog.c:876
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define PG_TRY(...)
Definition: elog.h:371
#define PG_END_TRY(...)
Definition: elog.h:396
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_FINALLY(...)
Definition: elog.h:388
#define ereport(elevel,...)
Definition: elog.h:149
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:69
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:68
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:66
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:139
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: execMain.c:309
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:67
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:483
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:420
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: executor.h:79
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:86
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:75
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:90
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2606
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int CloseTransientFile(int fd)
Definition: fd.c:2832
int FreeFile(FILE *file)
Definition: fd.c:2804
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2656
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:645
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
bool IsUnderPostmaster
Definition: globals.c:119
Oid MyDatabaseId
Definition: globals.c:93
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5202
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5091
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5238
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5117
@ PGC_SUSET
Definition: guc.h:74
@ PGC_POSTMASTER
Definition: guc.h:70
@ PGC_SIGHUP
Definition: guc.h:71
return str start
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:191
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
@ INSTRUMENT_ALL
Definition: instrument.h:65
static int pg_cmp_s32(int32 a, int32 b)
Definition: int.h:598
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:60
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:76
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
PGDLLIMPORT const ScanKeywordList ScanKeywords
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:573
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:670
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
void pfree(void *pointer)
Definition: mcxt.c:1521
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define MaxAllocHugeSize
Definition: memutils.h:45
void(* shmem_request_hook_type)(void)
Definition: miscadmin.h:506
Oid GetUserId(void)
Definition: miscinit.c:514
shmem_request_hook_type shmem_request_hook
Definition: miscinit.c:1781
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1778
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:59
void * arg
const void size_t len
int32 encoding
Definition: pg_database.h:41
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static ExecutorRun_hook_type prev_ExecutorRun
struct pgssSharedState pgssSharedState
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate)
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
void _PG_init(void)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
struct Counters Counters
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
PGSSTrackLevel
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
PG_MODULE_MAGIC
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void pgss_shmem_request(void)
static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
pgssStoreKind
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
@ PGSS_NUMKIND
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
static bool pgss_save
static void pgss_shmem_startup(void)
static int nesting_level
struct pgssGlobalStats pgssGlobalStats
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
struct pgssEntry pgssEntry
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
struct pgssHashKey pgssHashKey
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static char * buf
Definition: pg_test_fsync.c:73
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281
static int duration
Definition: pgbench.c:174
static core_yyscan_t yyscanner
Definition: pl_scanner.c:106
planner_hook_type planner_hook
Definition: planner.c:72
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.c:289
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.h:26
#define sprintf
Definition: port.h:240
#define pg_pwrite
Definition: port.h:226
#define snprintf
Definition: port.h:238
#define qsort(a, b, c, d)
Definition: port.h:447
#define Int64GetDatumFast(X)
Definition: postgres.h:554
uintptr_t Datum
Definition: postgres.h:64
#define Float8GetDatumFast(X)
Definition: postgres.h:556
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:350
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
unsigned int Oid
Definition: postgres_ext.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
tree context
Definition: radixtree.h:1835
MemoryContextSwitchTo(old_ctx)
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
#define YYLTYPE
Definition: scanner.h:44
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
void scanner_finish(core_yyscan_t yyscanner)
PGDLLIMPORT const uint16 ScanKeywordTokens[]
void * core_yyscan_t
Definition: scanner.h:121
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
ScanDirection
Definition: sdir.h:25
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:332
static pg_noinline void Size size
Definition: slab.c:607
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
static void error(void)
Definition: sql-dyntest.c:147
instr_time local_blk_read_time
Definition: instrument.h:38
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
instr_time temp_blk_write_time
Definition: instrument.h:41
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
int64 local_blks_written
Definition: instrument.h:33
instr_time temp_blk_read_time
Definition: instrument.h:40
instr_time local_blk_write_time
Definition: instrument.h:39
int64 temp_blks_read
Definition: instrument.h:34
int64 shared_blks_read
Definition: instrument.h:27
int64 shared_blks_written
Definition: instrument.h:29
int64 temp_blks_written
Definition: instrument.h:35
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 shared_blks_written
double jit_generation_time
int64 temp_blks_read
double min_time[PGSS_NUMKIND]
int64 local_blks_written
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
int64 jit_deform_count
double jit_emission_time
int64 shared_blks_hit
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 local_blks_dirtied
int64 jit_inlining_count
int64 shared_blks_read
int64 local_blks_hit
double jit_deform_time
int64 local_blks_read
double shared_blk_read_time
double jit_inlining_time
struct JitContext * es_jit
Definition: execnodes.h:721
uint64 es_total_processed
Definition: execnodes.h:678
MemoryContext es_query_cxt
Definition: execnodes.h:672
fmNodePtr resultinfo
Definition: fmgr.h:89
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
WalUsage walusage
Definition: instrument.h:92
BufferUsage bufusage
Definition: instrument.h:91
JitInstrumentation instr
Definition: jit.h:62
instr_time generation_counter
Definition: jit.h:33
size_t created_functions
Definition: jit.h:30
instr_time optimization_counter
Definition: jit.h:42
instr_time deform_counter
Definition: jit.h:36
instr_time emission_counter
Definition: jit.h:45
instr_time inlining_counter
Definition: jit.h:39
int highest_extern_param_id
Definition: queryjumble.h:50
LocationLen * clocations
Definition: queryjumble.h:41
int clocations_count
Definition: queryjumble.h:47
Definition: lwlock.h:42
Definition: nodes.h:129
const char * p_sourcetext
Definition: parse_node.h:195
ParseLoc stmt_len
Definition: plannodes.h:99
ParseLoc stmt_location
Definition: plannodes.h:98
Node * utilityStmt
Definition: plannodes.h:95
uint64 queryId
Definition: plannodes.h:54
uint64 nprocessed
Definition: cmdtag.h:32
CommandTag commandTag
Definition: cmdtag.h:31
const char * sourceText
Definition: execdesc.h:38
EState * estate
Definition: execdesc.h:48
PlannedStmt * plannedstmt
Definition: execdesc.h:37
struct Instrumentation * totaltime
Definition: execdesc.h:55
Node * utilityStmt
Definition: parsenodes.h:136
ParseLoc stmt_location
Definition: parsenodes.h:240
TupleDesc setDesc
Definition: execnodes.h:343
Tuplestorestate * setResult
Definition: execnodes.h:342
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
Definition: guc.h:170
bool escape_string_warning
Definition: scanner.h:88
char * scanbuf
Definition: scanner.h:72
Counters counters
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
TimestampTz stats_reset
pgssGlobalStats stats
__int64 st_size
Definition: win32_port.h:273
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:540
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:70
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
ProcessUtilityContext
Definition: utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition: timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition: timestamp.h:68
#define fstat
Definition: win32_port.h:283
#define ftruncate(a, b)
Definition: win32_port.h:82