PostgreSQL Source Code  git master
pg_stat_statements.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * Track statement planning and execution times as well as resource
5  * usage across a whole database cluster.
6  *
7  * Execution costs are totaled for each distinct source query, and kept in
8  * a shared hashtable. (We track only as many distinct queries as will fit
9  * in the designated amount of shared memory.)
10  *
11  * Starting in Postgres 9.2, this module normalized query entries. As of
12  * Postgres 14, the normalization is done by the core if compute_query_id is
13  * enabled, or optionally by third-party modules.
14  *
15  * To facilitate presenting entries to users, we create "representative" query
16  * strings in which constants are replaced with parameter symbols ($n), to
17  * make it clearer what a normalized entry can represent. To save on shared
18  * memory, and to avoid having to truncate oversized query strings, we store
19  * these strings in a temporary external query-texts file. Offsets into this
20  * file are kept in shared memory.
21  *
22  * Note about locking issues: to create or delete an entry in the shared
23  * hashtable, one must hold pgss->lock exclusively. Modifying any field
24  * in an entry except the counters requires the same. To look up an entry,
25  * one must hold the lock shared. To read or update the counters within
26  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27  * disappear!) and also take the entry's mutex spinlock.
28  * The shared state variable pgss->extent (the next free spot in the external
29  * query-text file) should be accessed only while holding either the
30  * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31  * allow reserving file space while holding only shared lock on pgss->lock.
32  * Rewriting the entire external query-text file, eg for garbage collection,
33  * requires holding pgss->lock exclusively; this allows individual entries
34  * in the file to be read or written while holding only shared lock.
35  *
36  *
37  * Copyright (c) 2008-2021, PostgreSQL Global Development Group
38  *
39  * IDENTIFICATION
40  * contrib/pg_stat_statements/pg_stat_statements.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45 
46 #include <math.h>
47 #include <sys/stat.h>
48 #include <unistd.h>
49 
50 #include "access/parallel.h"
51 #include "catalog/pg_authid.h"
52 #include "common/hashfn.h"
53 #include "executor/instrument.h"
54 #include "funcapi.h"
55 #include "mb/pg_wchar.h"
56 #include "miscadmin.h"
57 #include "optimizer/planner.h"
58 #include "parser/analyze.h"
59 #include "parser/parsetree.h"
60 #include "parser/scanner.h"
61 #include "parser/scansup.h"
62 #include "pgstat.h"
63 #include "storage/fd.h"
64 #include "storage/ipc.h"
65 #include "storage/lwlock.h"
66 #include "storage/shmem.h"
67 #include "storage/spin.h"
68 #include "tcop/utility.h"
69 #include "utils/acl.h"
70 #include "utils/builtins.h"
71 #include "utils/queryjumble.h"
72 #include "utils/memutils.h"
73 #include "utils/timestamp.h"
74 
76 
77 /* Location of permanent stats file (valid when database is shut down) */
78 #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
79 
80 /*
81  * Location of external query text file. We don't keep it in the core
82  * system's stats_temp_directory. The core system can safely use that GUC
83  * setting, because the statistics collector temp file paths are set only once
84  * as part of changing the GUC, but pg_stat_statements has no way of avoiding
85  * race conditions. Besides, we only expect modest, infrequent I/O for query
86  * strings, so placing the file on a faster filesystem is not compelling.
87  */
88 #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
89 
90 /* Magic number identifying the stats file format */
91 static const uint32 PGSS_FILE_HEADER = 0x20201227;
92 
93 /* PostgreSQL major version number, changes in which invalidate all entries */
94 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
95 
96 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
97 #define USAGE_EXEC(duration) (1.0)
98 #define USAGE_INIT (1.0) /* including initial planning */
99 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
100 #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
101 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
102 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
103 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
104 #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
105 
106 /*
107  * Utility statements that pgss_ProcessUtility and pgss_post_parse_analyze
108  * ignores.
109  */
110 #define PGSS_HANDLED_UTILITY(n) (!IsA(n, ExecuteStmt) && \
111  !IsA(n, PrepareStmt) && \
112  !IsA(n, DeallocateStmt))
113 
114 /*
115  * Extension version number, for supporting older extension versions' objects
116  */
117 typedef enum pgssVersion
118 {
125 } pgssVersion;
126 
127 typedef enum pgssStoreKind
128 {
130 
131  /*
132  * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
133  * reference the underlying values in the arrays in the Counters struct,
134  * and this order is required in pg_stat_statements_internal().
135  */
138 
139  PGSS_NUMKIND /* Must be last value of this enum */
140 } pgssStoreKind;
141 
142 /*
143  * Hashtable key that defines the identity of a hashtable entry. We separate
144  * queries by user and by database even if they are otherwise identical.
145  *
146  * If you add a new key to this struct, make sure to teach pgss_store() to
147  * zero the padding bytes. Otherwise, things will break, because pgss_hash is
148  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
149 
150  */
151 typedef struct pgssHashKey
152 {
153  Oid userid; /* user OID */
154  Oid dbid; /* database OID */
155  uint64 queryid; /* query identifier */
156  bool toplevel; /* query executed at top level */
157 } pgssHashKey;
158 
159 /*
160  * The actual stats counters kept within pgssEntry.
161  */
162 typedef struct Counters
163 {
164  int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
165  double total_time[PGSS_NUMKIND]; /* total planning/execution time,
166  * in msec */
167  double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
168  * msec */
169  double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
170  * msec */
171  double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
172  * msec */
173  double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
174  * planning/execution time in msec */
175  int64 rows; /* total # of retrieved or affected rows */
176  int64 shared_blks_hit; /* # of shared buffer hits */
177  int64 shared_blks_read; /* # of shared disk blocks read */
178  int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
179  int64 shared_blks_written; /* # of shared disk blocks written */
180  int64 local_blks_hit; /* # of local buffer hits */
181  int64 local_blks_read; /* # of local disk blocks read */
182  int64 local_blks_dirtied; /* # of local disk blocks dirtied */
183  int64 local_blks_written; /* # of local disk blocks written */
184  int64 temp_blks_read; /* # of temp blocks read */
185  int64 temp_blks_written; /* # of temp blocks written */
186  double blk_read_time; /* time spent reading, in msec */
187  double blk_write_time; /* time spent writing, in msec */
188  double usage; /* usage factor */
189  int64 wal_records; /* # of WAL records generated */
190  int64 wal_fpi; /* # of WAL full page images generated */
191  uint64 wal_bytes; /* total amount of WAL generated in bytes */
192 } Counters;
193 
194 /*
195  * Global statistics for pg_stat_statements
196  */
197 typedef struct pgssGlobalStats
198 {
199  int64 dealloc; /* # of times entries were deallocated */
200  TimestampTz stats_reset; /* timestamp with all stats reset */
202 
203 /*
204  * Statistics per statement
205  *
206  * Note: in event of a failure in garbage collection of the query text file,
207  * we reset query_offset to zero and query_len to -1. This will be seen as
208  * an invalid state by qtext_fetch().
209  */
210 typedef struct pgssEntry
211 {
212  pgssHashKey key; /* hash key of entry - MUST BE FIRST */
213  Counters counters; /* the statistics for this query */
214  Size query_offset; /* query text offset in external file */
215  int query_len; /* # of valid bytes in query string, or -1 */
216  int encoding; /* query text encoding */
217  slock_t mutex; /* protects the counters only */
218 } pgssEntry;
219 
220 /*
221  * Global shared state
222  */
223 typedef struct pgssSharedState
224 {
225  LWLock *lock; /* protects hashtable search/modification */
226  double cur_median_usage; /* current median usage in hashtable */
227  Size mean_query_len; /* current mean entry text length */
228  slock_t mutex; /* protects following fields only: */
229  Size extent; /* current extent of query file */
230  int n_writers; /* number of active writers to query file */
231  int gc_count; /* query file garbage collection cycle count */
232  pgssGlobalStats stats; /* global statistics for pgss */
234 
235 /*---- Local variables ----*/
236 
237 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
238 static int exec_nested_level = 0;
239 
240 /* Current nesting depth of planner calls */
241 static int plan_nested_level = 0;
242 
243 /* Saved hook values in case of unload */
252 
253 /* Links to shared memory state */
254 static pgssSharedState *pgss = NULL;
255 static HTAB *pgss_hash = NULL;
256 
257 /*---- GUC variables ----*/
258 
259 typedef enum
260 {
261  PGSS_TRACK_NONE, /* track no statements */
262  PGSS_TRACK_TOP, /* only top level statements */
263  PGSS_TRACK_ALL /* all statements, including nested ones */
265 
266 static const struct config_enum_entry track_options[] =
267 {
268  {"none", PGSS_TRACK_NONE, false},
269  {"top", PGSS_TRACK_TOP, false},
270  {"all", PGSS_TRACK_ALL, false},
271  {NULL, 0, false}
272 };
273 
274 static int pgss_max; /* max # statements to track */
275 static int pgss_track; /* tracking level */
276 static bool pgss_track_utility; /* whether to track utility commands */
277 static bool pgss_track_planning; /* whether to track planning duration */
278 static bool pgss_save; /* whether to save stats across shutdown */
279 
280 
281 #define pgss_enabled(level) \
282  (!IsParallelWorker() && \
283  (pgss_track == PGSS_TRACK_ALL || \
284  (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
285 
286 #define record_gc_qtexts() \
287  do { \
288  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
289  SpinLockAcquire(&s->mutex); \
290  s->gc_count++; \
291  SpinLockRelease(&s->mutex); \
292  } while(0)
293 
294 /*---- Function declarations ----*/
295 
296 void _PG_init(void);
297 void _PG_fini(void);
298 
307 
308 static void pgss_shmem_startup(void);
309 static void pgss_shmem_shutdown(int code, Datum arg);
310 static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
311  JumbleState *jstate);
313  const char *query_string,
314  int cursorOptions,
315  ParamListInfo boundParams);
316 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
317 static void pgss_ExecutorRun(QueryDesc *queryDesc,
318  ScanDirection direction,
319  uint64 count, bool execute_once);
320 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
321 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
322 static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
323  ProcessUtilityContext context, ParamListInfo params,
324  QueryEnvironment *queryEnv,
326 static void pgss_store(const char *query, uint64 queryId,
327  int query_location, int query_len,
328  pgssStoreKind kind,
329  double total_time, uint64 rows,
330  const BufferUsage *bufusage,
331  const WalUsage *walusage,
332  JumbleState *jstate);
334  pgssVersion api_version,
335  bool showtext);
336 static Size pgss_memsize(void);
337 static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
338  int encoding, bool sticky);
339 static void entry_dealloc(void);
340 static bool qtext_store(const char *query, int query_len,
341  Size *query_offset, int *gc_count);
342 static char *qtext_load_file(Size *buffer_size);
343 static char *qtext_fetch(Size query_offset, int query_len,
344  char *buffer, Size buffer_size);
345 static bool need_gc_qtexts(void);
346 static void gc_qtexts(void);
347 static void entry_reset(Oid userid, Oid dbid, uint64 queryid);
348 static char *generate_normalized_query(JumbleState *jstate, const char *query,
349  int query_loc, int *query_len_p);
350 static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
351  int query_loc);
352 static int comp_location(const void *a, const void *b);
353 
354 
355 /*
356  * Module load callback
357  */
358 void
359 _PG_init(void)
360 {
361  /*
362  * In order to create our shared memory area, we have to be loaded via
363  * shared_preload_libraries. If not, fall out without hooking into any of
364  * the main system. (We don't throw error here because it seems useful to
365  * allow the pg_stat_statements functions to be created even when the
366  * module isn't active. The functions must protect themselves against
367  * being called then, however.)
368  */
370  return;
371 
372  /*
373  * Define (or redefine) custom GUC variables.
374  */
375  DefineCustomIntVariable("pg_stat_statements.max",
376  "Sets the maximum number of statements tracked by pg_stat_statements.",
377  NULL,
378  &pgss_max,
379  5000,
380  100,
381  INT_MAX,
383  0,
384  NULL,
385  NULL,
386  NULL);
387 
388  DefineCustomEnumVariable("pg_stat_statements.track",
389  "Selects which statements are tracked by pg_stat_statements.",
390  NULL,
391  &pgss_track,
393  track_options,
394  PGC_SUSET,
395  0,
396  NULL,
397  NULL,
398  NULL);
399 
400  DefineCustomBoolVariable("pg_stat_statements.track_utility",
401  "Selects whether utility commands are tracked by pg_stat_statements.",
402  NULL,
404  true,
405  PGC_SUSET,
406  0,
407  NULL,
408  NULL,
409  NULL);
410 
411  DefineCustomBoolVariable("pg_stat_statements.track_planning",
412  "Selects whether planning duration is tracked by pg_stat_statements.",
413  NULL,
415  false,
416  PGC_SUSET,
417  0,
418  NULL,
419  NULL,
420  NULL);
421 
422  DefineCustomBoolVariable("pg_stat_statements.save",
423  "Save pg_stat_statements statistics across server shutdowns.",
424  NULL,
425  &pgss_save,
426  true,
427  PGC_SIGHUP,
428  0,
429  NULL,
430  NULL,
431  NULL);
432 
433  EmitWarningsOnPlaceholders("pg_stat_statements");
434 
435  /*
436  * Request additional shared resources. (These are no-ops if we're not in
437  * the postmaster process.) We'll allocate or attach to the shared
438  * resources in pgss_shmem_startup().
439  */
441  RequestNamedLWLockTranche("pg_stat_statements", 1);
442 
443  /*
444  * Install hooks.
445  */
462 }
463 
464 /*
465  * Module unload callback
466  */
467 void
468 _PG_fini(void)
469 {
470  /* Uninstall hooks. */
479 }
480 
481 /*
482  * shmem_startup hook: allocate or attach to shared memory,
483  * then load any pre-existing statistics from file.
484  * Also create and load the query-texts file, which is expected to exist
485  * (even if empty) while the module is enabled.
486  */
487 static void
489 {
490  bool found;
491  HASHCTL info;
492  FILE *file = NULL;
493  FILE *qfile = NULL;
494  uint32 header;
495  int32 num;
496  int32 pgver;
497  int32 i;
498  int buffer_size;
499  char *buffer = NULL;
500 
503 
504  /* reset in case this is a restart within the postmaster */
505  pgss = NULL;
506  pgss_hash = NULL;
507 
508  /*
509  * Create or attach to the shared memory state, including hash table
510  */
511  LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
512 
513  pgss = ShmemInitStruct("pg_stat_statements",
514  sizeof(pgssSharedState),
515  &found);
516 
517  if (!found)
518  {
519  /* First time through ... */
520  pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
523  SpinLockInit(&pgss->mutex);
524  pgss->extent = 0;
525  pgss->n_writers = 0;
526  pgss->gc_count = 0;
527  pgss->stats.dealloc = 0;
529  }
530 
531  info.keysize = sizeof(pgssHashKey);
532  info.entrysize = sizeof(pgssEntry);
533  pgss_hash = ShmemInitHash("pg_stat_statements hash",
535  &info,
537 
538  LWLockRelease(AddinShmemInitLock);
539 
540  /*
541  * If we're in the postmaster (or a standalone backend...), set up a shmem
542  * exit hook to dump the statistics to disk.
543  */
544  if (!IsUnderPostmaster)
546 
547  /*
548  * Done if some other process already completed our initialization.
549  */
550  if (found)
551  return;
552 
553  /*
554  * Note: we don't bother with locks here, because there should be no other
555  * processes running when this code is reached.
556  */
557 
558  /* Unlink query text file possibly left over from crash */
559  unlink(PGSS_TEXT_FILE);
560 
561  /* Allocate new query text temp file */
563  if (qfile == NULL)
564  goto write_error;
565 
566  /*
567  * If we were told not to load old statistics, we're done. (Note we do
568  * not try to unlink any old dump file in this case. This seems a bit
569  * questionable but it's the historical behavior.)
570  */
571  if (!pgss_save)
572  {
573  FreeFile(qfile);
574  return;
575  }
576 
577  /*
578  * Attempt to load old statistics from the dump file.
579  */
581  if (file == NULL)
582  {
583  if (errno != ENOENT)
584  goto read_error;
585  /* No existing persisted stats file, so we're done */
586  FreeFile(qfile);
587  return;
588  }
589 
590  buffer_size = 2048;
591  buffer = (char *) palloc(buffer_size);
592 
593  if (fread(&header, sizeof(uint32), 1, file) != 1 ||
594  fread(&pgver, sizeof(uint32), 1, file) != 1 ||
595  fread(&num, sizeof(int32), 1, file) != 1)
596  goto read_error;
597 
598  if (header != PGSS_FILE_HEADER ||
599  pgver != PGSS_PG_MAJOR_VERSION)
600  goto data_error;
601 
602  for (i = 0; i < num; i++)
603  {
604  pgssEntry temp;
605  pgssEntry *entry;
606  Size query_offset;
607 
608  if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
609  goto read_error;
610 
611  /* Encoding is the only field we can easily sanity-check */
612  if (!PG_VALID_BE_ENCODING(temp.encoding))
613  goto data_error;
614 
615  /* Resize buffer as needed */
616  if (temp.query_len >= buffer_size)
617  {
618  buffer_size = Max(buffer_size * 2, temp.query_len + 1);
619  buffer = repalloc(buffer, buffer_size);
620  }
621 
622  if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
623  goto read_error;
624 
625  /* Should have a trailing null, but let's make sure */
626  buffer[temp.query_len] = '\0';
627 
628  /* Skip loading "sticky" entries */
629  if (IS_STICKY(temp.counters))
630  continue;
631 
632  /* Store the query text */
633  query_offset = pgss->extent;
634  if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
635  goto write_error;
636  pgss->extent += temp.query_len + 1;
637 
638  /* make the hashtable entry (discards old entries if too many) */
639  entry = entry_alloc(&temp.key, query_offset, temp.query_len,
640  temp.encoding,
641  false);
642 
643  /* copy in the actual stats */
644  entry->counters = temp.counters;
645  }
646 
647  /* Read global statistics for pg_stat_statements */
648  if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
649  goto read_error;
650 
651  pfree(buffer);
652  FreeFile(file);
653  FreeFile(qfile);
654 
655  /*
656  * Remove the persisted stats file so it's not included in
657  * backups/replication standbys, etc. A new file will be written on next
658  * shutdown.
659  *
660  * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
661  * because we remove that file on startup; it acts inversely to
662  * PGSS_DUMP_FILE, in that it is only supposed to be around when the
663  * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
664  * when the server is not running. Leaving the file creates no danger of
665  * a newly restored database having a spurious record of execution costs,
666  * which is what we're really concerned about here.
667  */
668  unlink(PGSS_DUMP_FILE);
669 
670  return;
671 
672 read_error:
673  ereport(LOG,
675  errmsg("could not read file \"%s\": %m",
676  PGSS_DUMP_FILE)));
677  goto fail;
678 data_error:
679  ereport(LOG,
680  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
681  errmsg("ignoring invalid data in file \"%s\"",
682  PGSS_DUMP_FILE)));
683  goto fail;
684 write_error:
685  ereport(LOG,
687  errmsg("could not write file \"%s\": %m",
688  PGSS_TEXT_FILE)));
689 fail:
690  if (buffer)
691  pfree(buffer);
692  if (file)
693  FreeFile(file);
694  if (qfile)
695  FreeFile(qfile);
696  /* If possible, throw away the bogus file; ignore any error */
697  unlink(PGSS_DUMP_FILE);
698 
699  /*
700  * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
701  * server is running with pg_stat_statements enabled
702  */
703 }
704 
705 /*
706  * shmem_shutdown hook: Dump statistics into file.
707  *
708  * Note: we don't bother with acquiring lock, because there should be no
709  * other processes running when this is called.
710  */
711 static void
713 {
714  FILE *file;
715  char *qbuffer = NULL;
716  Size qbuffer_size = 0;
717  HASH_SEQ_STATUS hash_seq;
718  int32 num_entries;
719  pgssEntry *entry;
720 
721  /* Don't try to dump during a crash. */
722  if (code)
723  return;
724 
725  /* Safety check ... shouldn't get here unless shmem is set up. */
726  if (!pgss || !pgss_hash)
727  return;
728 
729  /* Don't dump if told not to. */
730  if (!pgss_save)
731  return;
732 
733  file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
734  if (file == NULL)
735  goto error;
736 
737  if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
738  goto error;
739  if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
740  goto error;
741  num_entries = hash_get_num_entries(pgss_hash);
742  if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
743  goto error;
744 
745  qbuffer = qtext_load_file(&qbuffer_size);
746  if (qbuffer == NULL)
747  goto error;
748 
749  /*
750  * When serializing to disk, we store query texts immediately after their
751  * entry data. Any orphaned query texts are thereby excluded.
752  */
753  hash_seq_init(&hash_seq, pgss_hash);
754  while ((entry = hash_seq_search(&hash_seq)) != NULL)
755  {
756  int len = entry->query_len;
757  char *qstr = qtext_fetch(entry->query_offset, len,
758  qbuffer, qbuffer_size);
759 
760  if (qstr == NULL)
761  continue; /* Ignore any entries with bogus texts */
762 
763  if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
764  fwrite(qstr, 1, len + 1, file) != len + 1)
765  {
766  /* note: we assume hash_seq_term won't change errno */
767  hash_seq_term(&hash_seq);
768  goto error;
769  }
770  }
771 
772  /* Dump global statistics for pg_stat_statements */
773  if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
774  goto error;
775 
776  free(qbuffer);
777  qbuffer = NULL;
778 
779  if (FreeFile(file))
780  {
781  file = NULL;
782  goto error;
783  }
784 
785  /*
786  * Rename file into place, so we atomically replace any old one.
787  */
789 
790  /* Unlink query-texts file; it's not needed while shutdown */
791  unlink(PGSS_TEXT_FILE);
792 
793  return;
794 
795 error:
796  ereport(LOG,
798  errmsg("could not write file \"%s\": %m",
799  PGSS_DUMP_FILE ".tmp")));
800  if (qbuffer)
801  free(qbuffer);
802  if (file)
803  FreeFile(file);
804  unlink(PGSS_DUMP_FILE ".tmp");
805  unlink(PGSS_TEXT_FILE);
806 }
807 
808 /*
809  * Post-parse-analysis hook: mark query with a queryId
810  */
811 static void
813 {
815  prev_post_parse_analyze_hook(pstate, query, jstate);
816 
817  /* Safety check... */
818  if (!pgss || !pgss_hash || !pgss_enabled(exec_nested_level))
819  return;
820 
821  /*
822  * Clear queryId for prepared statements related utility, as those will
823  * inherit from the underlying statement's one (except DEALLOCATE which is
824  * entirely untracked).
825  */
826  if (query->utilityStmt)
827  {
829  query->queryId = UINT64CONST(0);
830  return;
831  }
832 
833  /*
834  * If query jumbling were able to identify any ignorable constants, we
835  * immediately create a hash table entry for the query, so that we can
836  * record the normalized form of the query string. If there were no such
837  * constants, the normalized string would be the same as the query text
838  * anyway, so there's no need for an early entry.
839  */
840  if (jstate && jstate->clocations_count > 0)
841  pgss_store(pstate->p_sourcetext,
842  query->queryId,
843  query->stmt_location,
844  query->stmt_len,
845  PGSS_INVALID,
846  0,
847  0,
848  NULL,
849  NULL,
850  jstate);
851 }
852 
853 /*
854  * Planner hook: forward to regular planner, but measure planning time
855  * if needed.
856  */
857 static PlannedStmt *
859  const char *query_string,
860  int cursorOptions,
861  ParamListInfo boundParams)
862 {
863  PlannedStmt *result;
864 
865  /*
866  * We can't process the query if no query_string is provided, as
867  * pgss_store needs it. We also ignore query without queryid, as it would
868  * be treated as a utility statement, which may not be the case.
869  *
870  * Note that planner_hook can be called from the planner itself, so we
871  * have a specific nesting level for the planner. However, utility
872  * commands containing optimizable statements can also call the planner,
873  * same for regular DML (for instance for underlying foreign key queries).
874  * So testing the planner nesting level only is not enough to detect real
875  * top level planner call.
876  */
878  && pgss_track_planning && query_string
879  && parse->queryId != UINT64CONST(0))
880  {
881  instr_time start;
883  BufferUsage bufusage_start,
884  bufusage;
885  WalUsage walusage_start,
886  walusage;
887 
888  /* We need to track buffer usage as the planner can access them. */
889  bufusage_start = pgBufferUsage;
890 
891  /*
892  * Similarly the planner could write some WAL records in some cases
893  * (e.g. setting a hint bit with those being WAL-logged)
894  */
895  walusage_start = pgWalUsage;
896  INSTR_TIME_SET_CURRENT(start);
897 
899  PG_TRY();
900  {
901  if (prev_planner_hook)
902  result = prev_planner_hook(parse, query_string, cursorOptions,
903  boundParams);
904  else
905  result = standard_planner(parse, query_string, cursorOptions,
906  boundParams);
907  }
908  PG_FINALLY();
909  {
911  }
912  PG_END_TRY();
913 
914  INSTR_TIME_SET_CURRENT(duration);
915  INSTR_TIME_SUBTRACT(duration, start);
916 
917  /* calc differences of buffer counters. */
918  memset(&bufusage, 0, sizeof(BufferUsage));
919  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
920 
921  /* calc differences of WAL counters. */
922  memset(&walusage, 0, sizeof(WalUsage));
923  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
924 
925  pgss_store(query_string,
926  parse->queryId,
927  parse->stmt_location,
928  parse->stmt_len,
929  PGSS_PLAN,
930  INSTR_TIME_GET_MILLISEC(duration),
931  0,
932  &bufusage,
933  &walusage,
934  NULL);
935  }
936  else
937  {
938  if (prev_planner_hook)
939  result = prev_planner_hook(parse, query_string, cursorOptions,
940  boundParams);
941  else
942  result = standard_planner(parse, query_string, cursorOptions,
943  boundParams);
944  }
945 
946  return result;
947 }
948 
949 /*
950  * ExecutorStart hook: start up tracking if needed
951  */
952 static void
953 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
954 {
955  if (prev_ExecutorStart)
956  prev_ExecutorStart(queryDesc, eflags);
957  else
958  standard_ExecutorStart(queryDesc, eflags);
959 
960  /*
961  * If query has queryId zero, don't track it. This prevents double
962  * counting of optimizable statements that are directly contained in
963  * utility statements.
964  */
965  if (pgss_enabled(exec_nested_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
966  {
967  /*
968  * Set up to track total elapsed time in ExecutorRun. Make sure the
969  * space is allocated in the per-query context so it will go away at
970  * ExecutorEnd.
971  */
972  if (queryDesc->totaltime == NULL)
973  {
974  MemoryContext oldcxt;
975 
976  oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
977  queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
978  MemoryContextSwitchTo(oldcxt);
979  }
980  }
981 }
982 
983 /*
984  * ExecutorRun hook: all we need do is track nesting depth
985  */
986 static void
987 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
988  bool execute_once)
989 {
991  PG_TRY();
992  {
993  if (prev_ExecutorRun)
994  prev_ExecutorRun(queryDesc, direction, count, execute_once);
995  else
996  standard_ExecutorRun(queryDesc, direction, count, execute_once);
997  }
998  PG_FINALLY();
999  {
1001  }
1002  PG_END_TRY();
1003 }
1004 
1005 /*
1006  * ExecutorFinish hook: all we need do is track nesting depth
1007  */
1008 static void
1010 {
1012  PG_TRY();
1013  {
1014  if (prev_ExecutorFinish)
1015  prev_ExecutorFinish(queryDesc);
1016  else
1017  standard_ExecutorFinish(queryDesc);
1018  }
1019  PG_FINALLY();
1020  {
1022  }
1023  PG_END_TRY();
1024 }
1025 
1026 /*
1027  * ExecutorEnd hook: store results if needed
1028  */
1029 static void
1031 {
1032  uint64 queryId = queryDesc->plannedstmt->queryId;
1033 
1034  if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1036  {
1037  /*
1038  * Make sure stats accumulation is done. (Note: it's okay if several
1039  * levels of hook all do this.)
1040  */
1041  InstrEndLoop(queryDesc->totaltime);
1042 
1043  pgss_store(queryDesc->sourceText,
1044  queryId,
1045  queryDesc->plannedstmt->stmt_location,
1046  queryDesc->plannedstmt->stmt_len,
1047  PGSS_EXEC,
1048  queryDesc->totaltime->total * 1000.0, /* convert to msec */
1049  queryDesc->estate->es_processed,
1050  &queryDesc->totaltime->bufusage,
1051  &queryDesc->totaltime->walusage,
1052  NULL);
1053  }
1054 
1055  if (prev_ExecutorEnd)
1056  prev_ExecutorEnd(queryDesc);
1057  else
1058  standard_ExecutorEnd(queryDesc);
1059 }
1060 
1061 /*
1062  * ProcessUtility hook
1063  */
1064 static void
1065 pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1066  ProcessUtilityContext context,
1067  ParamListInfo params, QueryEnvironment *queryEnv,
1069 {
1070  Node *parsetree = pstmt->utilityStmt;
1071  uint64 saved_queryId = pstmt->queryId;
1072 
1073  /*
1074  * Force utility statements to get queryId zero. We do this even in cases
1075  * where the statement contains an optimizable statement for which a
1076  * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1077  * cases, runtime control will first go through ProcessUtility and then
1078  * the executor, and we don't want the executor hooks to do anything,
1079  * since we are already measuring the statement's costs at the utility
1080  * level.
1081  *
1082  * Note that this is only done if pg_stat_statements is enabled and
1083  * configured to track utility statements, in the unlikely possibility
1084  * that user configured another extension to handle utility statements
1085  * only.
1086  */
1088  pstmt->queryId = UINT64CONST(0);
1089 
1090  /*
1091  * If it's an EXECUTE statement, we don't track it and don't increment the
1092  * nesting level. This allows the cycles to be charged to the underlying
1093  * PREPARE instead (by the Executor hooks), which is much more useful.
1094  *
1095  * We also don't track execution of PREPARE. If we did, we would get one
1096  * hash table entry for the PREPARE (with hash calculated from the query
1097  * string), and then a different one with the same query string (but hash
1098  * calculated from the query tree) would be used to accumulate costs of
1099  * ensuing EXECUTEs. This would be confusing, and inconsistent with other
1100  * cases where planning time is not included at all.
1101  *
1102  * Likewise, we don't track execution of DEALLOCATE.
1103  */
1105  PGSS_HANDLED_UTILITY(parsetree))
1106  {
1107  instr_time start;
1109  uint64 rows;
1110  BufferUsage bufusage_start,
1111  bufusage;
1112  WalUsage walusage_start,
1113  walusage;
1114 
1115  bufusage_start = pgBufferUsage;
1116  walusage_start = pgWalUsage;
1117  INSTR_TIME_SET_CURRENT(start);
1118 
1120  PG_TRY();
1121  {
1122  if (prev_ProcessUtility)
1123  prev_ProcessUtility(pstmt, queryString,
1124  context, params, queryEnv,
1125  dest, qc);
1126  else
1127  standard_ProcessUtility(pstmt, queryString,
1128  context, params, queryEnv,
1129  dest, qc);
1130  }
1131  PG_FINALLY();
1132  {
1134  }
1135  PG_END_TRY();
1136 
1137  INSTR_TIME_SET_CURRENT(duration);
1138  INSTR_TIME_SUBTRACT(duration, start);
1139 
1140  /*
1141  * Track the total number of rows retrieved or affected by the utility
1142  * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1143  * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1144  */
1145  rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1146  qc->commandTag == CMDTAG_FETCH ||
1147  qc->commandTag == CMDTAG_SELECT ||
1148  qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1149  qc->nprocessed : 0;
1150 
1151  /* calc differences of buffer counters. */
1152  memset(&bufusage, 0, sizeof(BufferUsage));
1153  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1154 
1155  /* calc differences of WAL counters. */
1156  memset(&walusage, 0, sizeof(WalUsage));
1157  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1158 
1159  pgss_store(queryString,
1160  saved_queryId,
1161  pstmt->stmt_location,
1162  pstmt->stmt_len,
1163  PGSS_EXEC,
1164  INSTR_TIME_GET_MILLISEC(duration),
1165  rows,
1166  &bufusage,
1167  &walusage,
1168  NULL);
1169  }
1170  else
1171  {
1172  if (prev_ProcessUtility)
1173  prev_ProcessUtility(pstmt, queryString,
1174  context, params, queryEnv,
1175  dest, qc);
1176  else
1177  standard_ProcessUtility(pstmt, queryString,
1178  context, params, queryEnv,
1179  dest, qc);
1180  }
1181 }
1182 
1183 /*
1184  * Store some statistics for a statement.
1185  *
1186  * If queryId is 0 then this is a utility statement for which we couldn't
1187  * compute a queryId during parse analysis, and we should compute a suitable
1188  * queryId internally.
1189  *
1190  * If jstate is not NULL then we're trying to create an entry for which
1191  * we have no statistics as yet; we just want to record the normalized
1192  * query string. total_time, rows, bufusage and walusage are ignored in this
1193  * case.
1194  *
1195  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1196  * for the arrays in the Counters field.
1197  */
1198 static void
1199 pgss_store(const char *query, uint64 queryId,
1200  int query_location, int query_len,
1201  pgssStoreKind kind,
1202  double total_time, uint64 rows,
1203  const BufferUsage *bufusage,
1204  const WalUsage *walusage,
1205  JumbleState *jstate)
1206 {
1207  pgssHashKey key;
1208  pgssEntry *entry;
1209  char *norm_query = NULL;
1210  int encoding = GetDatabaseEncoding();
1211 
1212  Assert(query != NULL);
1213 
1214  /* Safety check... */
1215  if (!pgss || !pgss_hash)
1216  return;
1217 
1218  /*
1219  * Nothing to do if compute_query_id isn't enabled and no other module
1220  * computed a query identifier.
1221  */
1222  if (queryId == UINT64CONST(0))
1223  return;
1224 
1225  /*
1226  * Confine our attention to the relevant part of the string, if the query
1227  * is a portion of a multi-statement source string, and update query
1228  * location and length if needed.
1229  */
1230  query = CleanQuerytext(query, &query_location, &query_len);
1231 
1232  /* Set up key for hashtable search */
1233 
1234  /* memset() is required when pgssHashKey is without padding only */
1235  memset(&key, 0, sizeof(pgssHashKey));
1236 
1237  key.userid = GetUserId();
1238  key.dbid = MyDatabaseId;
1239  key.queryid = queryId;
1240  key.toplevel = (exec_nested_level == 0);
1241 
1242  /* Lookup the hash table entry with shared lock. */
1243  LWLockAcquire(pgss->lock, LW_SHARED);
1244 
1245  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1246 
1247  /* Create new entry, if not present */
1248  if (!entry)
1249  {
1250  Size query_offset;
1251  int gc_count;
1252  bool stored;
1253  bool do_gc;
1254 
1255  /*
1256  * Create a new, normalized query string if caller asked. We don't
1257  * need to hold the lock while doing this work. (Note: in any case,
1258  * it's possible that someone else creates a duplicate hashtable entry
1259  * in the interval where we don't hold the lock below. That case is
1260  * handled by entry_alloc.)
1261  */
1262  if (jstate)
1263  {
1264  LWLockRelease(pgss->lock);
1265  norm_query = generate_normalized_query(jstate, query,
1266  query_location,
1267  &query_len);
1268  LWLockAcquire(pgss->lock, LW_SHARED);
1269  }
1270 
1271  /* Append new query text to file with only shared lock held */
1272  stored = qtext_store(norm_query ? norm_query : query, query_len,
1273  &query_offset, &gc_count);
1274 
1275  /*
1276  * Determine whether we need to garbage collect external query texts
1277  * while the shared lock is still held. This micro-optimization
1278  * avoids taking the time to decide this while holding exclusive lock.
1279  */
1280  do_gc = need_gc_qtexts();
1281 
1282  /* Need exclusive lock to make a new hashtable entry - promote */
1283  LWLockRelease(pgss->lock);
1285 
1286  /*
1287  * A garbage collection may have occurred while we weren't holding the
1288  * lock. In the unlikely event that this happens, the query text we
1289  * stored above will have been garbage collected, so write it again.
1290  * This should be infrequent enough that doing it while holding
1291  * exclusive lock isn't a performance problem.
1292  */
1293  if (!stored || pgss->gc_count != gc_count)
1294  stored = qtext_store(norm_query ? norm_query : query, query_len,
1295  &query_offset, NULL);
1296 
1297  /* If we failed to write to the text file, give up */
1298  if (!stored)
1299  goto done;
1300 
1301  /* OK to create a new hashtable entry */
1302  entry = entry_alloc(&key, query_offset, query_len, encoding,
1303  jstate != NULL);
1304 
1305  /* If needed, perform garbage collection while exclusive lock held */
1306  if (do_gc)
1307  gc_qtexts();
1308  }
1309 
1310  /* Increment the counts, except when jstate is not NULL */
1311  if (!jstate)
1312  {
1313  /*
1314  * Grab the spinlock while updating the counters (see comment about
1315  * locking rules at the head of the file)
1316  */
1317  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1318 
1319  Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1320 
1321  SpinLockAcquire(&e->mutex);
1322 
1323  /* "Unstick" entry if it was previously sticky */
1324  if (IS_STICKY(e->counters))
1325  e->counters.usage = USAGE_INIT;
1326 
1327  e->counters.calls[kind] += 1;
1328  e->counters.total_time[kind] += total_time;
1329 
1330  if (e->counters.calls[kind] == 1)
1331  {
1332  e->counters.min_time[kind] = total_time;
1333  e->counters.max_time[kind] = total_time;
1334  e->counters.mean_time[kind] = total_time;
1335  }
1336  else
1337  {
1338  /*
1339  * Welford's method for accurately computing variance. See
1340  * <http://www.johndcook.com/blog/standard_deviation/>
1341  */
1342  double old_mean = e->counters.mean_time[kind];
1343 
1344  e->counters.mean_time[kind] +=
1345  (total_time - old_mean) / e->counters.calls[kind];
1346  e->counters.sum_var_time[kind] +=
1347  (total_time - old_mean) * (total_time - e->counters.mean_time[kind]);
1348 
1349  /* calculate min and max time */
1350  if (e->counters.min_time[kind] > total_time)
1351  e->counters.min_time[kind] = total_time;
1352  if (e->counters.max_time[kind] < total_time)
1353  e->counters.max_time[kind] = total_time;
1354  }
1355  e->counters.rows += rows;
1356  e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1357  e->counters.shared_blks_read += bufusage->shared_blks_read;
1360  e->counters.local_blks_hit += bufusage->local_blks_hit;
1361  e->counters.local_blks_read += bufusage->local_blks_read;
1364  e->counters.temp_blks_read += bufusage->temp_blks_read;
1368  e->counters.usage += USAGE_EXEC(total_time);
1369  e->counters.wal_records += walusage->wal_records;
1370  e->counters.wal_fpi += walusage->wal_fpi;
1371  e->counters.wal_bytes += walusage->wal_bytes;
1372 
1373  SpinLockRelease(&e->mutex);
1374  }
1375 
1376 done:
1377  LWLockRelease(pgss->lock);
1378 
1379  /* We postpone this clean-up until we're out of the lock */
1380  if (norm_query)
1381  pfree(norm_query);
1382 }
1383 
1384 /*
1385  * Reset statement statistics corresponding to userid, dbid, and queryid.
1386  */
1387 Datum
1389 {
1390  Oid userid;
1391  Oid dbid;
1392  uint64 queryid;
1393 
1394  userid = PG_GETARG_OID(0);
1395  dbid = PG_GETARG_OID(1);
1396  queryid = (uint64) PG_GETARG_INT64(2);
1397 
1398  entry_reset(userid, dbid, queryid);
1399 
1400  PG_RETURN_VOID();
1401 }
1402 
1403 /*
1404  * Reset statement statistics.
1405  */
1406 Datum
1408 {
1409  entry_reset(0, 0, 0);
1410 
1411  PG_RETURN_VOID();
1412 }
1413 
1414 /* Number of output arguments (columns) for various API versions */
1415 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1416 #define PG_STAT_STATEMENTS_COLS_V1_1 18
1417 #define PG_STAT_STATEMENTS_COLS_V1_2 19
1418 #define PG_STAT_STATEMENTS_COLS_V1_3 23
1419 #define PG_STAT_STATEMENTS_COLS_V1_8 32
1420 #define PG_STAT_STATEMENTS_COLS_V1_9 33
1421 #define PG_STAT_STATEMENTS_COLS 33 /* maximum of above */
1422 
1423 /*
1424  * Retrieve statement statistics.
1425  *
1426  * The SQL API of this function has changed multiple times, and will likely
1427  * do so again in future. To support the case where a newer version of this
1428  * loadable module is being used with an old SQL declaration of the function,
1429  * we continue to support the older API versions. For 1.2 and later, the
1430  * expected API version is identified by embedding it in the C name of the
1431  * function. Unfortunately we weren't bright enough to do that for 1.1.
1432  */
1433 Datum
1435 {
1436  bool showtext = PG_GETARG_BOOL(0);
1437 
1438  pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1439 
1440  return (Datum) 0;
1441 }
1442 
1443 Datum
1445 {
1446  bool showtext = PG_GETARG_BOOL(0);
1447 
1448  pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1449 
1450  return (Datum) 0;
1451 }
1452 
1453 Datum
1455 {
1456  bool showtext = PG_GETARG_BOOL(0);
1457 
1458  pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1459 
1460  return (Datum) 0;
1461 }
1462 
1463 Datum
1465 {
1466  bool showtext = PG_GETARG_BOOL(0);
1467 
1468  pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1469 
1470  return (Datum) 0;
1471 }
1472 
1473 /*
1474  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1475  * This can be removed someday, perhaps.
1476  */
1477 Datum
1479 {
1480  /* If it's really API 1.1, we'll figure that out below */
1481  pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1482 
1483  return (Datum) 0;
1484 }
1485 
1486 /* Common code for all versions of pg_stat_statements() */
1487 static void
1489  pgssVersion api_version,
1490  bool showtext)
1491 {
1492  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1493  TupleDesc tupdesc;
1494  Tuplestorestate *tupstore;
1495  MemoryContext per_query_ctx;
1496  MemoryContext oldcontext;
1497  Oid userid = GetUserId();
1498  bool is_allowed_role = false;
1499  char *qbuffer = NULL;
1500  Size qbuffer_size = 0;
1501  Size extent = 0;
1502  int gc_count = 0;
1503  HASH_SEQ_STATUS hash_seq;
1504  pgssEntry *entry;
1505 
1506  /* Superusers or members of pg_read_all_stats members are allowed */
1507  is_allowed_role = is_member_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS);
1508 
1509  /* hash table must exist already */
1510  if (!pgss || !pgss_hash)
1511  ereport(ERROR,
1512  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1513  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1514 
1515  /* check to see if caller supports us returning a tuplestore */
1516  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1517  ereport(ERROR,
1518  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1519  errmsg("set-valued function called in context that cannot accept a set")));
1520  if (!(rsinfo->allowedModes & SFRM_Materialize))
1521  ereport(ERROR,
1522  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1523  errmsg("materialize mode required, but it is not allowed in this context")));
1524 
1525  /* Switch into long-lived context to construct returned data structures */
1526  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1527  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1528 
1529  /* Build a tuple descriptor for our result type */
1530  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1531  elog(ERROR, "return type must be a row type");
1532 
1533  /*
1534  * Check we have the expected number of output arguments. Aside from
1535  * being a good safety check, we need a kluge here to detect API version
1536  * 1.1, which was wedged into the code in an ill-considered way.
1537  */
1538  switch (tupdesc->natts)
1539  {
1541  if (api_version != PGSS_V1_0)
1542  elog(ERROR, "incorrect number of output arguments");
1543  break;
1545  /* pg_stat_statements() should have told us 1.0 */
1546  if (api_version != PGSS_V1_0)
1547  elog(ERROR, "incorrect number of output arguments");
1548  api_version = PGSS_V1_1;
1549  break;
1551  if (api_version != PGSS_V1_2)
1552  elog(ERROR, "incorrect number of output arguments");
1553  break;
1555  if (api_version != PGSS_V1_3)
1556  elog(ERROR, "incorrect number of output arguments");
1557  break;
1559  if (api_version != PGSS_V1_8)
1560  elog(ERROR, "incorrect number of output arguments");
1561  break;
1563  if (api_version != PGSS_V1_9)
1564  elog(ERROR, "incorrect number of output arguments");
1565  break;
1566  default:
1567  elog(ERROR, "incorrect number of output arguments");
1568  }
1569 
1570  tupstore = tuplestore_begin_heap(true, false, work_mem);
1571  rsinfo->returnMode = SFRM_Materialize;
1572  rsinfo->setResult = tupstore;
1573  rsinfo->setDesc = tupdesc;
1574 
1575  MemoryContextSwitchTo(oldcontext);
1576 
1577  /*
1578  * We'd like to load the query text file (if needed) while not holding any
1579  * lock on pgss->lock. In the worst case we'll have to do this again
1580  * after we have the lock, but it's unlikely enough to make this a win
1581  * despite occasional duplicated work. We need to reload if anybody
1582  * writes to the file (either a retail qtext_store(), or a garbage
1583  * collection) between this point and where we've gotten shared lock. If
1584  * a qtext_store is actually in progress when we look, we might as well
1585  * skip the speculative load entirely.
1586  */
1587  if (showtext)
1588  {
1589  int n_writers;
1590 
1591  /* Take the mutex so we can examine variables */
1592  {
1593  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1594 
1595  SpinLockAcquire(&s->mutex);
1596  extent = s->extent;
1597  n_writers = s->n_writers;
1598  gc_count = s->gc_count;
1599  SpinLockRelease(&s->mutex);
1600  }
1601 
1602  /* No point in loading file now if there are active writers */
1603  if (n_writers == 0)
1604  qbuffer = qtext_load_file(&qbuffer_size);
1605  }
1606 
1607  /*
1608  * Get shared lock, load or reload the query text file if we must, and
1609  * iterate over the hashtable entries.
1610  *
1611  * With a large hash table, we might be holding the lock rather longer
1612  * than one could wish. However, this only blocks creation of new hash
1613  * table entries, and the larger the hash table the less likely that is to
1614  * be needed. So we can hope this is okay. Perhaps someday we'll decide
1615  * we need to partition the hash table to limit the time spent holding any
1616  * one lock.
1617  */
1618  LWLockAcquire(pgss->lock, LW_SHARED);
1619 
1620  if (showtext)
1621  {
1622  /*
1623  * Here it is safe to examine extent and gc_count without taking the
1624  * mutex. Note that although other processes might change
1625  * pgss->extent just after we look at it, the strings they then write
1626  * into the file cannot yet be referenced in the hashtable, so we
1627  * don't care whether we see them or not.
1628  *
1629  * If qtext_load_file fails, we just press on; we'll return NULL for
1630  * every query text.
1631  */
1632  if (qbuffer == NULL ||
1633  pgss->extent != extent ||
1634  pgss->gc_count != gc_count)
1635  {
1636  if (qbuffer)
1637  free(qbuffer);
1638  qbuffer = qtext_load_file(&qbuffer_size);
1639  }
1640  }
1641 
1642  hash_seq_init(&hash_seq, pgss_hash);
1643  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1644  {
1646  bool nulls[PG_STAT_STATEMENTS_COLS];
1647  int i = 0;
1648  Counters tmp;
1649  double stddev;
1650  int64 queryid = entry->key.queryid;
1651 
1652  memset(values, 0, sizeof(values));
1653  memset(nulls, 0, sizeof(nulls));
1654 
1655  values[i++] = ObjectIdGetDatum(entry->key.userid);
1656  values[i++] = ObjectIdGetDatum(entry->key.dbid);
1657  if (api_version >= PGSS_V1_9)
1658  values[i++] = BoolGetDatum(entry->key.toplevel);
1659 
1660  if (is_allowed_role || entry->key.userid == userid)
1661  {
1662  if (api_version >= PGSS_V1_2)
1663  values[i++] = Int64GetDatumFast(queryid);
1664 
1665  if (showtext)
1666  {
1667  char *qstr = qtext_fetch(entry->query_offset,
1668  entry->query_len,
1669  qbuffer,
1670  qbuffer_size);
1671 
1672  if (qstr)
1673  {
1674  char *enc;
1675 
1676  enc = pg_any_to_server(qstr,
1677  entry->query_len,
1678  entry->encoding);
1679 
1680  values[i++] = CStringGetTextDatum(enc);
1681 
1682  if (enc != qstr)
1683  pfree(enc);
1684  }
1685  else
1686  {
1687  /* Just return a null if we fail to find the text */
1688  nulls[i++] = true;
1689  }
1690  }
1691  else
1692  {
1693  /* Query text not requested */
1694  nulls[i++] = true;
1695  }
1696  }
1697  else
1698  {
1699  /* Don't show queryid */
1700  if (api_version >= PGSS_V1_2)
1701  nulls[i++] = true;
1702 
1703  /*
1704  * Don't show query text, but hint as to the reason for not doing
1705  * so if it was requested
1706  */
1707  if (showtext)
1708  values[i++] = CStringGetTextDatum("<insufficient privilege>");
1709  else
1710  nulls[i++] = true;
1711  }
1712 
1713  /* copy counters to a local variable to keep locking time short */
1714  {
1715  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1716 
1717  SpinLockAcquire(&e->mutex);
1718  tmp = e->counters;
1719  SpinLockRelease(&e->mutex);
1720  }
1721 
1722  /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1723  if (IS_STICKY(tmp))
1724  continue;
1725 
1726  /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1727  for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1728  {
1729  if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1730  {
1731  values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1732  values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1733  }
1734 
1735  if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1736  api_version >= PGSS_V1_8)
1737  {
1738  values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1739  values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1740  values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1741 
1742  /*
1743  * Note we are calculating the population variance here, not
1744  * the sample variance, as we have data for the whole
1745  * population, so Bessel's correction is not used, and we
1746  * don't divide by tmp.calls - 1.
1747  */
1748  if (tmp.calls[kind] > 1)
1749  stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1750  else
1751  stddev = 0.0;
1752  values[i++] = Float8GetDatumFast(stddev);
1753  }
1754  }
1755  values[i++] = Int64GetDatumFast(tmp.rows);
1756  values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1757  values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1758  if (api_version >= PGSS_V1_1)
1759  values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1760  values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1761  values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1762  values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1763  if (api_version >= PGSS_V1_1)
1764  values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1765  values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1766  values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1767  values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1768  if (api_version >= PGSS_V1_1)
1769  {
1770  values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1771  values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1772  }
1773  if (api_version >= PGSS_V1_8)
1774  {
1775  char buf[256];
1776  Datum wal_bytes;
1777 
1778  values[i++] = Int64GetDatumFast(tmp.wal_records);
1779  values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1780 
1781  snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1782 
1783  /* Convert to numeric. */
1784  wal_bytes = DirectFunctionCall3(numeric_in,
1785  CStringGetDatum(buf),
1786  ObjectIdGetDatum(0),
1787  Int32GetDatum(-1));
1788  values[i++] = wal_bytes;
1789  }
1790 
1791  Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1792  api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1793  api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1794  api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1795  api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1796  api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1797  -1 /* fail if you forget to update this assert */ ));
1798 
1799  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1800  }
1801 
1802  /* clean up and return the tuplestore */
1803  LWLockRelease(pgss->lock);
1804 
1805  if (qbuffer)
1806  free(qbuffer);
1807 
1808  tuplestore_donestoring(tupstore);
1809 }
1810 
1811 /* Number of output arguments (columns) for pg_stat_statements_info */
1812 #define PG_STAT_STATEMENTS_INFO_COLS 2
1813 
1814 /*
1815  * Return statistics of pg_stat_statements.
1816  */
1817 Datum
1819 {
1820  pgssGlobalStats stats;
1821  TupleDesc tupdesc;
1823  bool nulls[PG_STAT_STATEMENTS_INFO_COLS];
1824 
1825  if (!pgss || !pgss_hash)
1826  ereport(ERROR,
1827  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1828  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1829 
1830  /* Build a tuple descriptor for our result type */
1831  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1832  elog(ERROR, "return type must be a row type");
1833 
1834  MemSet(values, 0, sizeof(values));
1835  MemSet(nulls, 0, sizeof(nulls));
1836 
1837  /* Read global statistics for pg_stat_statements */
1838  {
1839  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1840 
1841  SpinLockAcquire(&s->mutex);
1842  stats = s->stats;
1843  SpinLockRelease(&s->mutex);
1844  }
1845 
1846  values[0] = Int64GetDatum(stats.dealloc);
1847  values[1] = TimestampTzGetDatum(stats.stats_reset);
1848 
1849  PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
1850 }
1851 
1852 /*
1853  * Estimate shared memory space needed.
1854  */
1855 static Size
1857 {
1858  Size size;
1859 
1860  size = MAXALIGN(sizeof(pgssSharedState));
1861  size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
1862 
1863  return size;
1864 }
1865 
1866 /*
1867  * Allocate a new hashtable entry.
1868  * caller must hold an exclusive lock on pgss->lock
1869  *
1870  * "query" need not be null-terminated; we rely on query_len instead
1871  *
1872  * If "sticky" is true, make the new entry artificially sticky so that it will
1873  * probably still be there when the query finishes execution. We do this by
1874  * giving it a median usage value rather than the normal value. (Strictly
1875  * speaking, query strings are normalized on a best effort basis, though it
1876  * would be difficult to demonstrate this even under artificial conditions.)
1877  *
1878  * Note: despite needing exclusive lock, it's not an error for the target
1879  * entry to already exist. This is because pgss_store releases and
1880  * reacquires lock after failing to find a match; so someone else could
1881  * have made the entry while we waited to get exclusive lock.
1882  */
1883 static pgssEntry *
1884 entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
1885  bool sticky)
1886 {
1887  pgssEntry *entry;
1888  bool found;
1889 
1890  /* Make space if needed */
1891  while (hash_get_num_entries(pgss_hash) >= pgss_max)
1892  entry_dealloc();
1893 
1894  /* Find or create an entry with desired hash code */
1895  entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1896 
1897  if (!found)
1898  {
1899  /* New entry, initialize it */
1900 
1901  /* reset the statistics */
1902  memset(&entry->counters, 0, sizeof(Counters));
1903  /* set the appropriate initial usage count */
1904  entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1905  /* re-initialize the mutex each time ... we assume no one using it */
1906  SpinLockInit(&entry->mutex);
1907  /* ... and don't forget the query text metadata */
1908  Assert(query_len >= 0);
1909  entry->query_offset = query_offset;
1910  entry->query_len = query_len;
1911  entry->encoding = encoding;
1912  }
1913 
1914  return entry;
1915 }
1916 
1917 /*
1918  * qsort comparator for sorting into increasing usage order
1919  */
1920 static int
1921 entry_cmp(const void *lhs, const void *rhs)
1922 {
1923  double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1924  double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1925 
1926  if (l_usage < r_usage)
1927  return -1;
1928  else if (l_usage > r_usage)
1929  return +1;
1930  else
1931  return 0;
1932 }
1933 
1934 /*
1935  * Deallocate least-used entries.
1936  *
1937  * Caller must hold an exclusive lock on pgss->lock.
1938  */
1939 static void
1941 {
1942  HASH_SEQ_STATUS hash_seq;
1943  pgssEntry **entries;
1944  pgssEntry *entry;
1945  int nvictims;
1946  int i;
1947  Size tottextlen;
1948  int nvalidtexts;
1949 
1950  /*
1951  * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1952  * While we're scanning the table, apply the decay factor to the usage
1953  * values, and update the mean query length.
1954  *
1955  * Note that the mean query length is almost immediately obsolete, since
1956  * we compute it before not after discarding the least-used entries.
1957  * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
1958  * making two passes to get a more current result. Likewise, the new
1959  * cur_median_usage includes the entries we're about to zap.
1960  */
1961 
1962  entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1963 
1964  i = 0;
1965  tottextlen = 0;
1966  nvalidtexts = 0;
1967 
1968  hash_seq_init(&hash_seq, pgss_hash);
1969  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1970  {
1971  entries[i++] = entry;
1972  /* "Sticky" entries get a different usage decay rate. */
1973  if (IS_STICKY(entry->counters))
1975  else
1977  /* In the mean length computation, ignore dropped texts. */
1978  if (entry->query_len >= 0)
1979  {
1980  tottextlen += entry->query_len + 1;
1981  nvalidtexts++;
1982  }
1983  }
1984 
1985  /* Sort into increasing order by usage */
1986  qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1987 
1988  /* Record the (approximate) median usage */
1989  if (i > 0)
1990  pgss->cur_median_usage = entries[i / 2]->counters.usage;
1991  /* Record the mean query length */
1992  if (nvalidtexts > 0)
1993  pgss->mean_query_len = tottextlen / nvalidtexts;
1994  else
1996 
1997  /* Now zap an appropriate fraction of lowest-usage entries */
1998  nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1999  nvictims = Min(nvictims, i);
2000 
2001  for (i = 0; i < nvictims; i++)
2002  {
2003  hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2004  }
2005 
2006  pfree(entries);
2007 
2008  /* Increment the number of times entries are deallocated */
2009  {
2010  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2011 
2012  SpinLockAcquire(&s->mutex);
2013  s->stats.dealloc += 1;
2014  SpinLockRelease(&s->mutex);
2015  }
2016 }
2017 
2018 /*
2019  * Given a query string (not necessarily null-terminated), allocate a new
2020  * entry in the external query text file and store the string there.
2021  *
2022  * If successful, returns true, and stores the new entry's offset in the file
2023  * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2024  * number of garbage collections that have occurred so far.
2025  *
2026  * On failure, returns false.
2027  *
2028  * At least a shared lock on pgss->lock must be held by the caller, so as
2029  * to prevent a concurrent garbage collection. Share-lock-holding callers
2030  * should pass a gc_count pointer to obtain the number of garbage collections,
2031  * so that they can recheck the count after obtaining exclusive lock to
2032  * detect whether a garbage collection occurred (and removed this entry).
2033  */
2034 static bool
2035 qtext_store(const char *query, int query_len,
2036  Size *query_offset, int *gc_count)
2037 {
2038  Size off;
2039  int fd;
2040 
2041  /*
2042  * We use a spinlock to protect extent/n_writers/gc_count, so that
2043  * multiple processes may execute this function concurrently.
2044  */
2045  {
2046  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2047 
2048  SpinLockAcquire(&s->mutex);
2049  off = s->extent;
2050  s->extent += query_len + 1;
2051  s->n_writers++;
2052  if (gc_count)
2053  *gc_count = s->gc_count;
2054  SpinLockRelease(&s->mutex);
2055  }
2056 
2057  *query_offset = off;
2058 
2059  /* Now write the data into the successfully-reserved part of the file */
2060  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2061  if (fd < 0)
2062  goto error;
2063 
2064  if (pg_pwrite(fd, query, query_len, off) != query_len)
2065  goto error;
2066  if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2067  goto error;
2068 
2069  CloseTransientFile(fd);
2070 
2071  /* Mark our write complete */
2072  {
2073  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2074 
2075  SpinLockAcquire(&s->mutex);
2076  s->n_writers--;
2077  SpinLockRelease(&s->mutex);
2078  }
2079 
2080  return true;
2081 
2082 error:
2083  ereport(LOG,
2085  errmsg("could not write file \"%s\": %m",
2086  PGSS_TEXT_FILE)));
2087 
2088  if (fd >= 0)
2089  CloseTransientFile(fd);
2090 
2091  /* Mark our write complete */
2092  {
2093  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2094 
2095  SpinLockAcquire(&s->mutex);
2096  s->n_writers--;
2097  SpinLockRelease(&s->mutex);
2098  }
2099 
2100  return false;
2101 }
2102 
2103 /*
2104  * Read the external query text file into a malloc'd buffer.
2105  *
2106  * Returns NULL (without throwing an error) if unable to read, eg
2107  * file not there or insufficient memory.
2108  *
2109  * On success, the buffer size is also returned into *buffer_size.
2110  *
2111  * This can be called without any lock on pgss->lock, but in that case
2112  * the caller is responsible for verifying that the result is sane.
2113  */
2114 static char *
2115 qtext_load_file(Size *buffer_size)
2116 {
2117  char *buf;
2118  int fd;
2119  struct stat stat;
2120 
2121  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
2122  if (fd < 0)
2123  {
2124  if (errno != ENOENT)
2125  ereport(LOG,
2127  errmsg("could not read file \"%s\": %m",
2128  PGSS_TEXT_FILE)));
2129  return NULL;
2130  }
2131 
2132  /* Get file length */
2133  if (fstat(fd, &stat))
2134  {
2135  ereport(LOG,
2137  errmsg("could not stat file \"%s\": %m",
2138  PGSS_TEXT_FILE)));
2139  CloseTransientFile(fd);
2140  return NULL;
2141  }
2142 
2143  /* Allocate buffer; beware that off_t might be wider than size_t */
2144  if (stat.st_size <= MaxAllocHugeSize)
2145  buf = (char *) malloc(stat.st_size);
2146  else
2147  buf = NULL;
2148  if (buf == NULL)
2149  {
2150  ereport(LOG,
2151  (errcode(ERRCODE_OUT_OF_MEMORY),
2152  errmsg("out of memory"),
2153  errdetail("Could not allocate enough memory to read file \"%s\".",
2154  PGSS_TEXT_FILE)));
2155  CloseTransientFile(fd);
2156  return NULL;
2157  }
2158 
2159  /*
2160  * OK, slurp in the file. If we get a short read and errno doesn't get
2161  * set, the reason is probably that garbage collection truncated the file
2162  * since we did the fstat(), so we don't log a complaint --- but we don't
2163  * return the data, either, since it's most likely corrupt due to
2164  * concurrent writes from garbage collection.
2165  */
2166  errno = 0;
2167  if (read(fd, buf, stat.st_size) != stat.st_size)
2168  {
2169  if (errno)
2170  ereport(LOG,
2172  errmsg("could not read file \"%s\": %m",
2173  PGSS_TEXT_FILE)));
2174  free(buf);
2175  CloseTransientFile(fd);
2176  return NULL;
2177  }
2178 
2179  if (CloseTransientFile(fd) != 0)
2180  ereport(LOG,
2182  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2183 
2184  *buffer_size = stat.st_size;
2185  return buf;
2186 }
2187 
2188 /*
2189  * Locate a query text in the file image previously read by qtext_load_file().
2190  *
2191  * We validate the given offset/length, and return NULL if bogus. Otherwise,
2192  * the result points to a null-terminated string within the buffer.
2193  */
2194 static char *
2195 qtext_fetch(Size query_offset, int query_len,
2196  char *buffer, Size buffer_size)
2197 {
2198  /* File read failed? */
2199  if (buffer == NULL)
2200  return NULL;
2201  /* Bogus offset/length? */
2202  if (query_len < 0 ||
2203  query_offset + query_len >= buffer_size)
2204  return NULL;
2205  /* As a further sanity check, make sure there's a trailing null */
2206  if (buffer[query_offset + query_len] != '\0')
2207  return NULL;
2208  /* Looks OK */
2209  return buffer + query_offset;
2210 }
2211 
2212 /*
2213  * Do we need to garbage-collect the external query text file?
2214  *
2215  * Caller should hold at least a shared lock on pgss->lock.
2216  */
2217 static bool
2219 {
2220  Size extent;
2221 
2222  /* Read shared extent pointer */
2223  {
2224  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2225 
2226  SpinLockAcquire(&s->mutex);
2227  extent = s->extent;
2228  SpinLockRelease(&s->mutex);
2229  }
2230 
2231  /* Don't proceed if file does not exceed 512 bytes per possible entry */
2232  if (extent < 512 * pgss_max)
2233  return false;
2234 
2235  /*
2236  * Don't proceed if file is less than about 50% bloat. Nothing can or
2237  * should be done in the event of unusually large query texts accounting
2238  * for file's large size. We go to the trouble of maintaining the mean
2239  * query length in order to prevent garbage collection from thrashing
2240  * uselessly.
2241  */
2242  if (extent < pgss->mean_query_len * pgss_max * 2)
2243  return false;
2244 
2245  return true;
2246 }
2247 
2248 /*
2249  * Garbage-collect orphaned query texts in external file.
2250  *
2251  * This won't be called often in the typical case, since it's likely that
2252  * there won't be too much churn, and besides, a similar compaction process
2253  * occurs when serializing to disk at shutdown or as part of resetting.
2254  * Despite this, it seems prudent to plan for the edge case where the file
2255  * becomes unreasonably large, with no other method of compaction likely to
2256  * occur in the foreseeable future.
2257  *
2258  * The caller must hold an exclusive lock on pgss->lock.
2259  *
2260  * At the first sign of trouble we unlink the query text file to get a clean
2261  * slate (although existing statistics are retained), rather than risk
2262  * thrashing by allowing the same problem case to recur indefinitely.
2263  */
2264 static void
2266 {
2267  char *qbuffer;
2268  Size qbuffer_size;
2269  FILE *qfile = NULL;
2270  HASH_SEQ_STATUS hash_seq;
2271  pgssEntry *entry;
2272  Size extent;
2273  int nentries;
2274 
2275  /*
2276  * When called from pgss_store, some other session might have proceeded
2277  * with garbage collection in the no-lock-held interim of lock strength
2278  * escalation. Check once more that this is actually necessary.
2279  */
2280  if (!need_gc_qtexts())
2281  return;
2282 
2283  /*
2284  * Load the old texts file. If we fail (out of memory, for instance),
2285  * invalidate query texts. Hopefully this is rare. It might seem better
2286  * to leave things alone on an OOM failure, but the problem is that the
2287  * file is only going to get bigger; hoping for a future non-OOM result is
2288  * risky and can easily lead to complete denial of service.
2289  */
2290  qbuffer = qtext_load_file(&qbuffer_size);
2291  if (qbuffer == NULL)
2292  goto gc_fail;
2293 
2294  /*
2295  * We overwrite the query texts file in place, so as to reduce the risk of
2296  * an out-of-disk-space failure. Since the file is guaranteed not to get
2297  * larger, this should always work on traditional filesystems; though we
2298  * could still lose on copy-on-write filesystems.
2299  */
2301  if (qfile == NULL)
2302  {
2303  ereport(LOG,
2305  errmsg("could not write file \"%s\": %m",
2306  PGSS_TEXT_FILE)));
2307  goto gc_fail;
2308  }
2309 
2310  extent = 0;
2311  nentries = 0;
2312 
2313  hash_seq_init(&hash_seq, pgss_hash);
2314  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2315  {
2316  int query_len = entry->query_len;
2317  char *qry = qtext_fetch(entry->query_offset,
2318  query_len,
2319  qbuffer,
2320  qbuffer_size);
2321 
2322  if (qry == NULL)
2323  {
2324  /* Trouble ... drop the text */
2325  entry->query_offset = 0;
2326  entry->query_len = -1;
2327  /* entry will not be counted in mean query length computation */
2328  continue;
2329  }
2330 
2331  if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2332  {
2333  ereport(LOG,
2335  errmsg("could not write file \"%s\": %m",
2336  PGSS_TEXT_FILE)));
2337  hash_seq_term(&hash_seq);
2338  goto gc_fail;
2339  }
2340 
2341  entry->query_offset = extent;
2342  extent += query_len + 1;
2343  nentries++;
2344  }
2345 
2346  /*
2347  * Truncate away any now-unused space. If this fails for some odd reason,
2348  * we log it, but there's no need to fail.
2349  */
2350  if (ftruncate(fileno(qfile), extent) != 0)
2351  ereport(LOG,
2353  errmsg("could not truncate file \"%s\": %m",
2354  PGSS_TEXT_FILE)));
2355 
2356  if (FreeFile(qfile))
2357  {
2358  ereport(LOG,
2360  errmsg("could not write file \"%s\": %m",
2361  PGSS_TEXT_FILE)));
2362  qfile = NULL;
2363  goto gc_fail;
2364  }
2365 
2366  elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2367  pgss->extent, extent);
2368 
2369  /* Reset the shared extent pointer */
2370  pgss->extent = extent;
2371 
2372  /*
2373  * Also update the mean query length, to be sure that need_gc_qtexts()
2374  * won't still think we have a problem.
2375  */
2376  if (nentries > 0)
2377  pgss->mean_query_len = extent / nentries;
2378  else
2380 
2381  free(qbuffer);
2382 
2383  /*
2384  * OK, count a garbage collection cycle. (Note: even though we have
2385  * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2386  * other processes may examine gc_count while holding only the mutex.
2387  * Also, we have to advance the count *after* we've rewritten the file,
2388  * else other processes might not realize they read a stale file.)
2389  */
2390  record_gc_qtexts();
2391 
2392  return;
2393 
2394 gc_fail:
2395  /* clean up resources */
2396  if (qfile)
2397  FreeFile(qfile);
2398  if (qbuffer)
2399  free(qbuffer);
2400 
2401  /*
2402  * Since the contents of the external file are now uncertain, mark all
2403  * hashtable entries as having invalid texts.
2404  */
2405  hash_seq_init(&hash_seq, pgss_hash);
2406  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2407  {
2408  entry->query_offset = 0;
2409  entry->query_len = -1;
2410  }
2411 
2412  /*
2413  * Destroy the query text file and create a new, empty one
2414  */
2415  (void) unlink(PGSS_TEXT_FILE);
2417  if (qfile == NULL)
2418  ereport(LOG,
2420  errmsg("could not recreate file \"%s\": %m",
2421  PGSS_TEXT_FILE)));
2422  else
2423  FreeFile(qfile);
2424 
2425  /* Reset the shared extent pointer */
2426  pgss->extent = 0;
2427 
2428  /* Reset mean_query_len to match the new state */
2430 
2431  /*
2432  * Bump the GC count even though we failed.
2433  *
2434  * This is needed to make concurrent readers of file without any lock on
2435  * pgss->lock notice existence of new version of file. Once readers
2436  * subsequently observe a change in GC count with pgss->lock held, that
2437  * forces a safe reopen of file. Writers also require that we bump here,
2438  * of course. (As required by locking protocol, readers and writers don't
2439  * trust earlier file contents until gc_count is found unchanged after
2440  * pgss->lock acquired in shared or exclusive mode respectively.)
2441  */
2442  record_gc_qtexts();
2443 }
2444 
2445 /*
2446  * Release entries corresponding to parameters passed.
2447  */
2448 static void
2450 {
2451  HASH_SEQ_STATUS hash_seq;
2452  pgssEntry *entry;
2453  FILE *qfile;
2454  long num_entries;
2455  long num_remove = 0;
2456  pgssHashKey key;
2457 
2458  if (!pgss || !pgss_hash)
2459  ereport(ERROR,
2460  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2461  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
2462 
2464  num_entries = hash_get_num_entries(pgss_hash);
2465 
2466  if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2467  {
2468  /* If all the parameters are available, use the fast path. */
2469  memset(&key, 0, sizeof(pgssHashKey));
2470  key.userid = userid;
2471  key.dbid = dbid;
2472  key.queryid = queryid;
2473 
2474  /* Remove the key if it exists, starting with the top-level entry */
2475  key.toplevel = false;
2476  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
2477  if (entry) /* found */
2478  num_remove++;
2479 
2480  /* Also remove entries for top level statements */
2481  key.toplevel = true;
2482 
2483  /* Remove the key if exists */
2484  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
2485  if (entry) /* found */
2486  num_remove++;
2487  }
2488  else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2489  {
2490  /* Remove entries corresponding to valid parameters. */
2491  hash_seq_init(&hash_seq, pgss_hash);
2492  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2493  {
2494  if ((!userid || entry->key.userid == userid) &&
2495  (!dbid || entry->key.dbid == dbid) &&
2496  (!queryid || entry->key.queryid == queryid))
2497  {
2498  hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2499  num_remove++;
2500  }
2501  }
2502  }
2503  else
2504  {
2505  /* Remove all entries. */
2506  hash_seq_init(&hash_seq, pgss_hash);
2507  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2508  {
2509  hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2510  num_remove++;
2511  }
2512  }
2513 
2514  /* All entries are removed? */
2515  if (num_entries != num_remove)
2516  goto release_lock;
2517 
2518  /*
2519  * Reset global statistics for pg_stat_statements since all entries are
2520  * removed.
2521  */
2522  {
2523  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2524  TimestampTz stats_reset = GetCurrentTimestamp();
2525 
2526  SpinLockAcquire(&s->mutex);
2527  s->stats.dealloc = 0;
2528  s->stats.stats_reset = stats_reset;
2529  SpinLockRelease(&s->mutex);
2530  }
2531 
2532  /*
2533  * Write new empty query file, perhaps even creating a new one to recover
2534  * if the file was missing.
2535  */
2537  if (qfile == NULL)
2538  {
2539  ereport(LOG,
2541  errmsg("could not create file \"%s\": %m",
2542  PGSS_TEXT_FILE)));
2543  goto done;
2544  }
2545 
2546  /* If ftruncate fails, log it, but it's not a fatal problem */
2547  if (ftruncate(fileno(qfile), 0) != 0)
2548  ereport(LOG,
2550  errmsg("could not truncate file \"%s\": %m",
2551  PGSS_TEXT_FILE)));
2552 
2553  FreeFile(qfile);
2554 
2555 done:
2556  pgss->extent = 0;
2557  /* This counts as a query text garbage collection for our purposes */
2558  record_gc_qtexts();
2559 
2560 release_lock:
2561  LWLockRelease(pgss->lock);
2562 }
2563 
2564 /*
2565  * Generate a normalized version of the query string that will be used to
2566  * represent all similar queries.
2567  *
2568  * Note that the normalized representation may well vary depending on
2569  * just which "equivalent" query is used to create the hashtable entry.
2570  * We assume this is OK.
2571  *
2572  * If query_loc > 0, then "query" has been advanced by that much compared to
2573  * the original string start, so we need to translate the provided locations
2574  * to compensate. (This lets us avoid re-scanning statements before the one
2575  * of interest, so it's worth doing.)
2576  *
2577  * *query_len_p contains the input string length, and is updated with
2578  * the result string length on exit. The resulting string might be longer
2579  * or shorter depending on what happens with replacement of constants.
2580  *
2581  * Returns a palloc'd string.
2582  */
2583 static char *
2584 generate_normalized_query(JumbleState *jstate, const char *query,
2585  int query_loc, int *query_len_p)
2586 {
2587  char *norm_query;
2588  int query_len = *query_len_p;
2589  int i,
2590  norm_query_buflen, /* Space allowed for norm_query */
2591  len_to_wrt, /* Length (in bytes) to write */
2592  quer_loc = 0, /* Source query byte location */
2593  n_quer_loc = 0, /* Normalized query byte location */
2594  last_off = 0, /* Offset from start for previous tok */
2595  last_tok_len = 0; /* Length (in bytes) of that tok */
2596 
2597  /*
2598  * Get constants' lengths (core system only gives us locations). Note
2599  * this also ensures the items are sorted by location.
2600  */
2601  fill_in_constant_lengths(jstate, query, query_loc);
2602 
2603  /*
2604  * Allow for $n symbols to be longer than the constants they replace.
2605  * Constants must take at least one byte in text form, while a $n symbol
2606  * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2607  * could refine that limit based on the max value of n for the current
2608  * query, but it hardly seems worth any extra effort to do so.
2609  */
2610  norm_query_buflen = query_len + jstate->clocations_count * 10;
2611 
2612  /* Allocate result buffer */
2613  norm_query = palloc(norm_query_buflen + 1);
2614 
2615  for (i = 0; i < jstate->clocations_count; i++)
2616  {
2617  int off, /* Offset from start for cur tok */
2618  tok_len; /* Length (in bytes) of that tok */
2619 
2620  off = jstate->clocations[i].location;
2621  /* Adjust recorded location if we're dealing with partial string */
2622  off -= query_loc;
2623 
2624  tok_len = jstate->clocations[i].length;
2625 
2626  if (tok_len < 0)
2627  continue; /* ignore any duplicates */
2628 
2629  /* Copy next chunk (what precedes the next constant) */
2630  len_to_wrt = off - last_off;
2631  len_to_wrt -= last_tok_len;
2632 
2633  Assert(len_to_wrt >= 0);
2634  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2635  n_quer_loc += len_to_wrt;
2636 
2637  /* And insert a param symbol in place of the constant token */
2638  n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2639  i + 1 + jstate->highest_extern_param_id);
2640 
2641  quer_loc = off + tok_len;
2642  last_off = off;
2643  last_tok_len = tok_len;
2644  }
2645 
2646  /*
2647  * We've copied up until the last ignorable constant. Copy over the
2648  * remaining bytes of the original query string.
2649  */
2650  len_to_wrt = query_len - quer_loc;
2651 
2652  Assert(len_to_wrt >= 0);
2653  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2654  n_quer_loc += len_to_wrt;
2655 
2656  Assert(n_quer_loc <= norm_query_buflen);
2657  norm_query[n_quer_loc] = '\0';
2658 
2659  *query_len_p = n_quer_loc;
2660  return norm_query;
2661 }
2662 
2663 /*
2664  * Given a valid SQL string and an array of constant-location records,
2665  * fill in the textual lengths of those constants.
2666  *
2667  * The constants may use any allowed constant syntax, such as float literals,
2668  * bit-strings, single-quoted strings and dollar-quoted strings. This is
2669  * accomplished by using the public API for the core scanner.
2670  *
2671  * It is the caller's job to ensure that the string is a valid SQL statement
2672  * with constants at the indicated locations. Since in practice the string
2673  * has already been parsed, and the locations that the caller provides will
2674  * have originated from within the authoritative parser, this should not be
2675  * a problem.
2676  *
2677  * Duplicate constant pointers are possible, and will have their lengths
2678  * marked as '-1', so that they are later ignored. (Actually, we assume the
2679  * lengths were initialized as -1 to start with, and don't change them here.)
2680  *
2681  * If query_loc > 0, then "query" has been advanced by that much compared to
2682  * the original string start, so we need to translate the provided locations
2683  * to compensate. (This lets us avoid re-scanning statements before the one
2684  * of interest, so it's worth doing.)
2685  *
2686  * N.B. There is an assumption that a '-' character at a Const location begins
2687  * a negative numeric constant. This precludes there ever being another
2688  * reason for a constant to start with a '-'.
2689  */
2690 static void
2691 fill_in_constant_lengths(JumbleState *jstate, const char *query,
2692  int query_loc)
2693 {
2694  LocationLen *locs;
2696  core_yy_extra_type yyextra;
2697  core_YYSTYPE yylval;
2698  YYLTYPE yylloc;
2699  int last_loc = -1;
2700  int i;
2701 
2702  /*
2703  * Sort the records by location so that we can process them in order while
2704  * scanning the query text.
2705  */
2706  if (jstate->clocations_count > 1)
2707  qsort(jstate->clocations, jstate->clocations_count,
2708  sizeof(LocationLen), comp_location);
2709  locs = jstate->clocations;
2710 
2711  /* initialize the flex scanner --- should match raw_parser() */
2712  yyscanner = scanner_init(query,
2713  &yyextra,
2714  &ScanKeywords,
2716 
2717  /* we don't want to re-emit any escape string warnings */
2718  yyextra.escape_string_warning = false;
2719 
2720  /* Search for each constant, in sequence */
2721  for (i = 0; i < jstate->clocations_count; i++)
2722  {
2723  int loc = locs[i].location;
2724  int tok;
2725 
2726  /* Adjust recorded location if we're dealing with partial string */
2727  loc -= query_loc;
2728 
2729  Assert(loc >= 0);
2730 
2731  if (loc <= last_loc)
2732  continue; /* Duplicate constant, ignore */
2733 
2734  /* Lex tokens until we find the desired constant */
2735  for (;;)
2736  {
2737  tok = core_yylex(&yylval, &yylloc, yyscanner);
2738 
2739  /* We should not hit end-of-string, but if we do, behave sanely */
2740  if (tok == 0)
2741  break; /* out of inner for-loop */
2742 
2743  /*
2744  * We should find the token position exactly, but if we somehow
2745  * run past it, work with that.
2746  */
2747  if (yylloc >= loc)
2748  {
2749  if (query[loc] == '-')
2750  {
2751  /*
2752  * It's a negative value - this is the one and only case
2753  * where we replace more than a single token.
2754  *
2755  * Do not compensate for the core system's special-case
2756  * adjustment of location to that of the leading '-'
2757  * operator in the event of a negative constant. It is
2758  * also useful for our purposes to start from the minus
2759  * symbol. In this way, queries like "select * from foo
2760  * where bar = 1" and "select * from foo where bar = -2"
2761  * will have identical normalized query strings.
2762  */
2763  tok = core_yylex(&yylval, &yylloc, yyscanner);
2764  if (tok == 0)
2765  break; /* out of inner for-loop */
2766  }
2767 
2768  /*
2769  * We now rely on the assumption that flex has placed a zero
2770  * byte after the text of the current token in scanbuf.
2771  */
2772  locs[i].length = strlen(yyextra.scanbuf + loc);
2773  break; /* out of inner for-loop */
2774  }
2775  }
2776 
2777  /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2778  if (tok == 0)
2779  break;
2780 
2781  last_loc = loc;
2782  }
2783 
2784  scanner_finish(yyscanner);
2785 }
2786 
2787 /*
2788  * comp_location: comparator for qsorting LocationLen structs by location
2789  */
2790 static int
2791 comp_location(const void *a, const void *b)
2792 {
2793  int l = ((const LocationLen *) a)->location;
2794  int r = ((const LocationLen *) b)->location;
2795 
2796  if (l < r)
2797  return -1;
2798  else if (l > r)
2799  return +1;
2800  else
2801  return 0;
2802 }
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
int slock_t
Definition: s_lock.h:934
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9141
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: executor.h:69
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: execMain.c:309
void _PG_init(void)
Definition: lwlock.h:31
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:71
#define PG_STAT_STATEMENTS_COLS_V1_3
#define DEBUG1
Definition: elog.h:25
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:207
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
double total_time[PGSS_NUMKIND]
WalUsage walusage
Definition: instrument.h:76
#define PG_STAT_STATEMENTS_COLS_V1_8
int stmt_location
Definition: parsenodes.h:192
EState * estate
Definition: execdesc.h:48
void * core_yyscan_t
Definition: scanner.h:121
#define HASH_ELEM
Definition: hsearch.h:95
static void error(void)
Definition: sql-dyntest.c:147
#define PG_STAT_STATEMENTS_COLS_V1_0
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1598
#define USAGE_DEALLOC_PERCENT
int64 shared_blks_read
static const uint32 PGSS_PG_MAJOR_VERSION
int64 local_blks_written
Definition: guc.h:164
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:700
Oid GetUserId(void)
Definition: miscinit.c:478
instr_time blk_read_time
Definition: instrument.h:31
static ProcessUtility_hook_type prev_ProcessUtility
struct pgssSharedState pgssSharedState
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
int64 TimestampTz
Definition: timestamp.h:39
WalUsage pgWalUsage
Definition: instrument.c:22
int64 shared_blks_read
Definition: instrument.h:22
int64 shared_blks_dirtied
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_INFO_COLS
static void pgss_shmem_startup(void)
#define SpinLockInit(lock)
Definition: spin.h:60
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:202
PGDLLIMPORT const uint16 ScanKeywordTokens[]
struct timeval instr_time
Definition: instr_time.h:150
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.h:26
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:147
#define Min(x, y)
Definition: c.h:986
int64 wal_fpi
Definition: instrument.h:38
double max_time[PGSS_NUMKIND]
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:274
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static ExecutorRun_hook_type prev_ExecutorRun
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
static ExecutorEnd_hook_type prev_ExecutorEnd
Size entrysize
Definition: hsearch.h:76
#define PGSS_DUMP_FILE
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:76
#define pgss_enabled(level)
Definition: nodes.h:539
static int pgss_track
static int entry_cmp(const void *lhs, const void *rhs)
int errcode(int sqlerrcode)
Definition: elog.c:698
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:468
struct pgssEntry pgssEntry
#define PG_BINARY_W
Definition: c.h:1274
ProcessUtilityContext
Definition: utility.h:20
static ExecutorStart_hook_type prev_ExecutorStart
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:76
#define MemSet(start, val, len)
Definition: c.h:1008
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1382
int64 temp_blks_written
Definition: instrument.h:30
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
Counters counters
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static int exec_nested_level
int64 temp_blks_read
#define PG_STAT_STATEMENTS_COLS_V1_1
int64 shared_blks_dirtied
Definition: instrument.h:23
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Node * utilityStmt
Definition: parsenodes.h:128
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
#define PG_BINARY_R
Definition: c.h:1273
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1271
int stmt_len
Definition: plannodes.h:90
char * scanbuf
Definition: scanner.h:72
ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset)
Definition: pwrite.c:27
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:408
int duration
Definition: pgbench.c:182
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9226
#define MaxAllocHugeSize
Definition: memutils.h:44
signed int int32
Definition: c.h:429
int clocations_count
Definition: queryjumble.h:49
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:244
int64 local_blks_read
Definition: instrument.h:26
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:71
static planner_hook_type prev_planner_hook
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
PGDLLIMPORT const ScanKeywordList ScanKeywords
#define malloc(a)
Definition: header.h:50
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1816
#define ASSUMED_LENGTH_INIT
static bool pgss_track_planning
#define sprintf
Definition: port.h:218
static pgssSharedState * pgss
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: dynahash.c:219
#define fstat
Definition: win32_port.h:274
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
void pfree(void *pointer)
Definition: mcxt.c:1169
MemoryContext es_query_cxt
Definition: execnodes.h:599
int64 shared_blks_hit
static int pgss_max
static void entry_dealloc(void)
static HTAB * pgss_hash
int64 local_blks_hit
Definition: instrument.h:25
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
uint64 nprocessed
Definition: cmdtag.h:31
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:72
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:74
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:542
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2423
struct pg_encoding enc
Definition: encode.c:516
fmNodePtr resultinfo
Definition: fmgr.h:89
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
#define TimestampTzGetDatum(X)
Definition: timestamp.h:32
Definition: guc.h:75
void EmitWarningsOnPlaceholders(const char *className)
Definition: guc.c:9254
int highest_extern_param_id
Definition: queryjumble.h:52
#define USAGE_EXEC(duration)
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
int stmt_location
Definition: plannodes.h:89
pgssStoreKind
static char * buf
Definition: pg_test_fsync.c:68
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:21
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
bool IsUnderPostmaster
Definition: globals.c:112
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, JumbleState *jstate)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
int errdetail(const char *fmt,...)
Definition: elog.c:1042
int errcode_for_file_access(void)
Definition: elog.c:721
ScanDirection
Definition: sdir.h:22
#define CStringGetDatum(X)
Definition: postgres.h:622
Node * utilityStmt
Definition: plannodes.h:86
static char * qtext_load_file(Size *buffer_size)
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2373
#define YYLTYPE
Definition: scanner.h:44
unsigned int uint32
Definition: c.h:441
#define IS_STICKY(c)
int64 local_blks_dirtied
Definition: instrument.h:27
static ExecutorFinish_hook_type prev_ExecutorFinish
void _PG_fini(void)
planner_hook_type planner_hook
Definition: planner.c:74
__int64 st_size
Definition: win32_port.h:265
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:621
const char * p_sourcetext
Definition: parse_node.h:181
int64 temp_blks_written
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1697
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
int64 shared_blks_written
#define STICKY_DECREASE_FACTOR
#define USAGE_DECREASE_FACTOR
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:692
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:630
Definition: guc.h:72
struct pgssGlobalStats pgssGlobalStats
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
static Size pgss_memsize(void)
uint64 queryId
Definition: parsenodes.h:124
int CloseTransientFile(int fd)
Definition: fd.c:2600
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:53
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:780
pgssGlobalStats stats
static int comp_location(const void *a, const void *b)
#define SpinLockRelease(lock)
Definition: spin.h:64
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
#define HASH_BLOBS
Definition: hsearch.h:97
int64 local_blks_read
static bool need_gc_qtexts(void)
#define PGSS_HANDLED_UTILITY(n)
#define PG_FINALLY()
Definition: elog.h:330
uintptr_t Datum
Definition: postgres.h:411
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
static void pgss_shmem_shutdown(int code, Datum arg)
static void gc_qtexts(void)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
double mean_time[PGSS_NUMKIND]
Oid MyDatabaseId
Definition: globals.c:88
PGSSTrackLevel
#define Int64GetDatumFast(X)
Definition: postgres.h:804
static void entry_reset(Oid userid, Oid dbid, uint64 queryid)
Size keysize
Definition: hsearch.h:75
int work_mem
Definition: globals.c:124
BufferUsage bufusage
Definition: instrument.h:75
#define USAGE_INIT
int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
static zic_t const max_time
Definition: zic.c:584
#define BoolGetDatum(X)
Definition: postgres.h:446
pgssHashKey key
CommandTag commandTag
Definition: cmdtag.h:30
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
#define ereport(elevel,...)
Definition: elog.h:157
int allowedModes
Definition: execnodes.h:305
#define PG_STAT_STATEMENTS_COLS
#define free(a)
Definition: header.h:65
int64 calls[PGSS_NUMKIND]
#define PG_RETURN_VOID()
Definition: fmgr.h:349
bool is_member_of_role(Oid member, Oid role)
Definition: acl.c:4869
#define Float8GetDatumFast(X)
Definition: postgres.h:805
SetFunctionReturnMode returnMode
Definition: execnodes.h:307
double min_time[PGSS_NUMKIND]
struct Instrumentation * totaltime
Definition: execdesc.h:55
#define Max(x, y)
Definition: c.h:980
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:80
double blk_read_time
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
const char * CleanQuerytext(const char *query, int *location, int *len)
Definition: queryjumble.c:56
static bool pgss_track_utility
#define Assert(condition)
Definition: c.h:804
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:594
int64 local_blks_dirtied
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
uint64 es_processed
Definition: execnodes.h:603
PG_MODULE_MAGIC
instr_time blk_write_time
Definition: instrument.h:32
int64 local_blks_hit
size_t Size
Definition: c.h:540
struct Counters Counters
TimestampTz stats_reset
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
double blk_write_time
#define MAXALIGN(LEN)
Definition: c.h:757
#define record_gc_qtexts()
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:221
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
int32 encoding
Definition: pg_database.h:41
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:233
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:210
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
double sum_var_time[PGSS_NUMKIND]
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
static int plan_nested_level
Tuplestorestate * setResult
Definition: execnodes.h:310
static shmem_startup_hook_type prev_shmem_startup_hook
int64 wal_records
Definition: instrument.h:37
const char * sourceText
Definition: execdesc.h:38
static core_yyscan_t yyscanner
Definition: pl_scanner.c:106
int FreeFile(FILE *file)
Definition: fd.c:2572
static Datum values[MAXATTR]
Definition: bootstrap.c:166
ExprContext * econtext
Definition: execnodes.h:303
LocationLen * clocations
Definition: queryjumble.h:43
#define Int32GetDatum(X)
Definition: postgres.h:523
e
Definition: preproc-init.c:82
int64 local_blks_written
Definition: instrument.h:28
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
TupleDesc setDesc
Definition: execnodes.h:311
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
void * palloc(Size size)
Definition: mcxt.c:1062
uint64 queryId
Definition: plannodes.h:48
int errmsg(const char *fmt,...)
Definition: elog.c:909
Datum pg_stat_statements(PG_FUNCTION_ARGS)
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:65
int64 shared_blks_hit
Definition: instrument.h:21
#define elog(elevel,...)
Definition: elog.h:232
int i
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.c:276
bool escape_string_warning
Definition: scanner.h:88
static const uint32 PGSS_FILE_HEADER
#define CStringGetTextDatum(s)
Definition: builtins.h:82
void * arg
int64 temp_blks_read
Definition: instrument.h:29
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void scanner_finish(core_yyscan_t yyscanner)
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:73
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:341
struct pgssHashKey pgssHashKey
PlannedStmt * plannedstmt
Definition: execdesc.h:37
#define qsort(a, b, c, d)
Definition: port.h:504
uint64 wal_bytes
Definition: instrument.h:39
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
#define PG_TRY()
Definition: elog.h:313
static zic_t const min_time
Definition: zic.c:583
int64 shared_blks_written
Definition: instrument.h:24
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
#define PGSS_TEXT_FILE
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9115
#define snprintf
Definition: port.h:216
int stmt_len
Definition: parsenodes.h:193
#define UINT64_FORMAT
Definition: c.h:484
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:56
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define PG_END_TRY()
Definition: elog.h:338
#define ASSUMED_MEDIAN_INIT
#define read(a, b, c)
Definition: win32.h:13
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1512
static const struct config_enum_entry track_options[]
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define PG_STAT_STATEMENTS_COLS_V1_2
static bool pgss_save
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static struct subre * parse(struct vars *, int, int, struct state *, struct state *)
Definition: regcomp.c:665
#define PG_STAT_STATEMENTS_COLS_V1_9
#define ftruncate(a, b)
Definition: win32_port.h:65