PostgreSQL Source Code  git master
pg_stat_statements.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * Track statement planning and execution times as well as resource
5  * usage across a whole database cluster.
6  *
7  * Execution costs are totaled for each distinct source query, and kept in
8  * a shared hashtable. (We track only as many distinct queries as will fit
9  * in the designated amount of shared memory.)
10  *
11  * Starting in Postgres 9.2, this module normalized query entries. As of
12  * Postgres 14, the normalization is done by the core if compute_query_id is
13  * enabled, or optionally by third-party modules.
14  *
15  * To facilitate presenting entries to users, we create "representative" query
16  * strings in which constants are replaced with parameter symbols ($n), to
17  * make it clearer what a normalized entry can represent. To save on shared
18  * memory, and to avoid having to truncate oversized query strings, we store
19  * these strings in a temporary external query-texts file. Offsets into this
20  * file are kept in shared memory.
21  *
22  * Note about locking issues: to create or delete an entry in the shared
23  * hashtable, one must hold pgss->lock exclusively. Modifying any field
24  * in an entry except the counters requires the same. To look up an entry,
25  * one must hold the lock shared. To read or update the counters within
26  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27  * disappear!) and also take the entry's mutex spinlock.
28  * The shared state variable pgss->extent (the next free spot in the external
29  * query-text file) should be accessed only while holding either the
30  * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31  * allow reserving file space while holding only shared lock on pgss->lock.
32  * Rewriting the entire external query-text file, eg for garbage collection,
33  * requires holding pgss->lock exclusively; this allows individual entries
34  * in the file to be read or written while holding only shared lock.
35  *
36  *
37  * Copyright (c) 2008-2022, PostgreSQL Global Development Group
38  *
39  * IDENTIFICATION
40  * contrib/pg_stat_statements/pg_stat_statements.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45 
46 #include <math.h>
47 #include <sys/stat.h>
48 #include <unistd.h>
49 
50 #include "access/parallel.h"
51 #include "catalog/pg_authid.h"
52 #include "common/hashfn.h"
53 #include "executor/instrument.h"
54 #include "funcapi.h"
55 #include "jit/jit.h"
56 #include "mb/pg_wchar.h"
57 #include "miscadmin.h"
58 #include "optimizer/planner.h"
59 #include "parser/analyze.h"
60 #include "parser/parsetree.h"
61 #include "parser/scanner.h"
62 #include "parser/scansup.h"
63 #include "pgstat.h"
64 #include "storage/fd.h"
65 #include "storage/ipc.h"
66 #include "storage/lwlock.h"
67 #include "storage/shmem.h"
68 #include "storage/spin.h"
69 #include "tcop/utility.h"
70 #include "utils/acl.h"
71 #include "utils/builtins.h"
72 #include "utils/queryjumble.h"
73 #include "utils/memutils.h"
74 #include "utils/timestamp.h"
75 
77 
78 /* Location of permanent stats file (valid when database is shut down) */
79 #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
80 
81 /*
82  * Location of external query text file.
83  */
84 #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
85 
86 /* Magic number identifying the stats file format */
87 static const uint32 PGSS_FILE_HEADER = 0x20220408;
88 
89 /* PostgreSQL major version number, changes in which invalidate all entries */
90 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
91 
92 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
93 #define USAGE_EXEC(duration) (1.0)
94 #define USAGE_INIT (1.0) /* including initial planning */
95 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
96 #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
97 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
98 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
99 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
100 #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
101 
102 /*
103  * Utility statements that pgss_ProcessUtility and pgss_post_parse_analyze
104  * ignores.
105  */
106 #define PGSS_HANDLED_UTILITY(n) (!IsA(n, ExecuteStmt) && \
107  !IsA(n, PrepareStmt) && \
108  !IsA(n, DeallocateStmt))
109 
110 /*
111  * Extension version number, for supporting older extension versions' objects
112  */
113 typedef enum pgssVersion
114 {
121  PGSS_V1_10
123 
124 typedef enum pgssStoreKind
125 {
127 
128  /*
129  * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
130  * reference the underlying values in the arrays in the Counters struct,
131  * and this order is required in pg_stat_statements_internal().
132  */
135 
136  PGSS_NUMKIND /* Must be last value of this enum */
138 
139 /*
140  * Hashtable key that defines the identity of a hashtable entry. We separate
141  * queries by user and by database even if they are otherwise identical.
142  *
143  * If you add a new key to this struct, make sure to teach pgss_store() to
144  * zero the padding bytes. Otherwise, things will break, because pgss_hash is
145  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
146 
147  */
148 typedef struct pgssHashKey
149 {
150  Oid userid; /* user OID */
151  Oid dbid; /* database OID */
152  uint64 queryid; /* query identifier */
153  bool toplevel; /* query executed at top level */
155 
156 /*
157  * The actual stats counters kept within pgssEntry.
158  */
159 typedef struct Counters
160 {
161  int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
162  double total_time[PGSS_NUMKIND]; /* total planning/execution time,
163  * in msec */
164  double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
165  * msec */
166  double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
167  * msec */
168  double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
169  * msec */
170  double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
171  * planning/execution time in msec */
172  int64 rows; /* total # of retrieved or affected rows */
173  int64 shared_blks_hit; /* # of shared buffer hits */
174  int64 shared_blks_read; /* # of shared disk blocks read */
175  int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
176  int64 shared_blks_written; /* # of shared disk blocks written */
177  int64 local_blks_hit; /* # of local buffer hits */
178  int64 local_blks_read; /* # of local disk blocks read */
179  int64 local_blks_dirtied; /* # of local disk blocks dirtied */
180  int64 local_blks_written; /* # of local disk blocks written */
181  int64 temp_blks_read; /* # of temp blocks read */
182  int64 temp_blks_written; /* # of temp blocks written */
183  double blk_read_time; /* time spent reading blocks, in msec */
184  double blk_write_time; /* time spent writing blocks, in msec */
185  double temp_blk_read_time; /* time spent reading temp blocks, in msec */
186  double temp_blk_write_time; /* time spent writing temp blocks, in
187  * msec */
188  double usage; /* usage factor */
189  int64 wal_records; /* # of WAL records generated */
190  int64 wal_fpi; /* # of WAL full page images generated */
191  uint64 wal_bytes; /* total amount of WAL generated in bytes */
192  int64 jit_functions; /* total number of JIT functions emitted */
193  double jit_generation_time; /* total time to generate jit code */
194  int64 jit_inlining_count; /* number of times inlining time has been
195  * > 0 */
196  double jit_inlining_time; /* total time to inline jit code */
197  int64 jit_optimization_count; /* number of times optimization time
198  * has been > 0 */
199  double jit_optimization_time; /* total time to optimize jit code */
200  int64 jit_emission_count; /* number of times emission time has been
201  * > 0 */
202  double jit_emission_time; /* total time to emit jit code */
204 
205 /*
206  * Global statistics for pg_stat_statements
207  */
208 typedef struct pgssGlobalStats
209 {
210  int64 dealloc; /* # of times entries were deallocated */
211  TimestampTz stats_reset; /* timestamp with all stats reset */
213 
214 /*
215  * Statistics per statement
216  *
217  * Note: in event of a failure in garbage collection of the query text file,
218  * we reset query_offset to zero and query_len to -1. This will be seen as
219  * an invalid state by qtext_fetch().
220  */
221 typedef struct pgssEntry
222 {
223  pgssHashKey key; /* hash key of entry - MUST BE FIRST */
224  Counters counters; /* the statistics for this query */
225  Size query_offset; /* query text offset in external file */
226  int query_len; /* # of valid bytes in query string, or -1 */
227  int encoding; /* query text encoding */
228  slock_t mutex; /* protects the counters only */
230 
231 /*
232  * Global shared state
233  */
234 typedef struct pgssSharedState
235 {
236  LWLock *lock; /* protects hashtable search/modification */
237  double cur_median_usage; /* current median usage in hashtable */
238  Size mean_query_len; /* current mean entry text length */
239  slock_t mutex; /* protects following fields only: */
240  Size extent; /* current extent of query file */
241  int n_writers; /* number of active writers to query file */
242  int gc_count; /* query file garbage collection cycle count */
243  pgssGlobalStats stats; /* global statistics for pgss */
245 
246 /*---- Local variables ----*/
247 
248 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
249 static int exec_nested_level = 0;
250 
251 /* Current nesting depth of planner calls */
252 static int plan_nested_level = 0;
253 
254 /* Saved hook values in case of unload */
264 
265 /* Links to shared memory state */
266 static pgssSharedState *pgss = NULL;
267 static HTAB *pgss_hash = NULL;
268 
269 /*---- GUC variables ----*/
270 
271 typedef enum
272 {
273  PGSS_TRACK_NONE, /* track no statements */
274  PGSS_TRACK_TOP, /* only top level statements */
275  PGSS_TRACK_ALL /* all statements, including nested ones */
277 
278 static const struct config_enum_entry track_options[] =
279 {
280  {"none", PGSS_TRACK_NONE, false},
281  {"top", PGSS_TRACK_TOP, false},
282  {"all", PGSS_TRACK_ALL, false},
283  {NULL, 0, false}
284 };
285 
286 static int pgss_max; /* max # statements to track */
287 static int pgss_track; /* tracking level */
288 static bool pgss_track_utility; /* whether to track utility commands */
289 static bool pgss_track_planning; /* whether to track planning duration */
290 static bool pgss_save; /* whether to save stats across shutdown */
291 
292 
293 #define pgss_enabled(level) \
294  (!IsParallelWorker() && \
295  (pgss_track == PGSS_TRACK_ALL || \
296  (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
297 
298 #define record_gc_qtexts() \
299  do { \
300  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
301  SpinLockAcquire(&s->mutex); \
302  s->gc_count++; \
303  SpinLockRelease(&s->mutex); \
304  } while(0)
305 
306 /*---- Function declarations ----*/
307 
308 void _PG_init(void);
309 
319 
320 static void pgss_shmem_request(void);
321 static void pgss_shmem_startup(void);
322 static void pgss_shmem_shutdown(int code, Datum arg);
323 static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
324  JumbleState *jstate);
326  const char *query_string,
327  int cursorOptions,
328  ParamListInfo boundParams);
329 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
330 static void pgss_ExecutorRun(QueryDesc *queryDesc,
331  ScanDirection direction,
332  uint64 count, bool execute_once);
333 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
334 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
335 static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
336  bool readOnlyTree,
337  ProcessUtilityContext context, ParamListInfo params,
338  QueryEnvironment *queryEnv,
340 static void pgss_store(const char *query, uint64 queryId,
341  int query_location, int query_len,
342  pgssStoreKind kind,
343  double total_time, uint64 rows,
344  const BufferUsage *bufusage,
345  const WalUsage *walusage,
346  const struct JitInstrumentation *jitusage,
347  JumbleState *jstate);
349  pgssVersion api_version,
350  bool showtext);
351 static Size pgss_memsize(void);
352 static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
353  int encoding, bool sticky);
354 static void entry_dealloc(void);
355 static bool qtext_store(const char *query, int query_len,
356  Size *query_offset, int *gc_count);
357 static char *qtext_load_file(Size *buffer_size);
358 static char *qtext_fetch(Size query_offset, int query_len,
359  char *buffer, Size buffer_size);
360 static bool need_gc_qtexts(void);
361 static void gc_qtexts(void);
362 static void entry_reset(Oid userid, Oid dbid, uint64 queryid);
363 static char *generate_normalized_query(JumbleState *jstate, const char *query,
364  int query_loc, int *query_len_p);
365 static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
366  int query_loc);
367 static int comp_location(const void *a, const void *b);
368 
369 
370 /*
371  * Module load callback
372  */
373 void
374 _PG_init(void)
375 {
376  /*
377  * In order to create our shared memory area, we have to be loaded via
378  * shared_preload_libraries. If not, fall out without hooking into any of
379  * the main system. (We don't throw error here because it seems useful to
380  * allow the pg_stat_statements functions to be created even when the
381  * module isn't active. The functions must protect themselves against
382  * being called then, however.)
383  */
385  return;
386 
387  /*
388  * Inform the postmaster that we want to enable query_id calculation if
389  * compute_query_id is set to auto.
390  */
391  EnableQueryId();
392 
393  /*
394  * Define (or redefine) custom GUC variables.
395  */
396  DefineCustomIntVariable("pg_stat_statements.max",
397  "Sets the maximum number of statements tracked by pg_stat_statements.",
398  NULL,
399  &pgss_max,
400  5000,
401  100,
402  INT_MAX,
404  0,
405  NULL,
406  NULL,
407  NULL);
408 
409  DefineCustomEnumVariable("pg_stat_statements.track",
410  "Selects which statements are tracked by pg_stat_statements.",
411  NULL,
412  &pgss_track,
415  PGC_SUSET,
416  0,
417  NULL,
418  NULL,
419  NULL);
420 
421  DefineCustomBoolVariable("pg_stat_statements.track_utility",
422  "Selects whether utility commands are tracked by pg_stat_statements.",
423  NULL,
425  true,
426  PGC_SUSET,
427  0,
428  NULL,
429  NULL,
430  NULL);
431 
432  DefineCustomBoolVariable("pg_stat_statements.track_planning",
433  "Selects whether planning duration is tracked by pg_stat_statements.",
434  NULL,
436  false,
437  PGC_SUSET,
438  0,
439  NULL,
440  NULL,
441  NULL);
442 
443  DefineCustomBoolVariable("pg_stat_statements.save",
444  "Save pg_stat_statements statistics across server shutdowns.",
445  NULL,
446  &pgss_save,
447  true,
448  PGC_SIGHUP,
449  0,
450  NULL,
451  NULL,
452  NULL);
453 
454  MarkGUCPrefixReserved("pg_stat_statements");
455 
456  /*
457  * Install hooks.
458  */
477 }
478 
479 /*
480  * shmem_request hook: request additional shared resources. We'll allocate or
481  * attach to the shared resources in pgss_shmem_startup().
482  */
483 static void
485 {
488 
490  RequestNamedLWLockTranche("pg_stat_statements", 1);
491 }
492 
493 /*
494  * shmem_startup hook: allocate or attach to shared memory,
495  * then load any pre-existing statistics from file.
496  * Also create and load the query-texts file, which is expected to exist
497  * (even if empty) while the module is enabled.
498  */
499 static void
501 {
502  bool found;
503  HASHCTL info;
504  FILE *file = NULL;
505  FILE *qfile = NULL;
506  uint32 header;
507  int32 num;
508  int32 pgver;
509  int32 i;
510  int buffer_size;
511  char *buffer = NULL;
512 
515 
516  /* reset in case this is a restart within the postmaster */
517  pgss = NULL;
518  pgss_hash = NULL;
519 
520  /*
521  * Create or attach to the shared memory state, including hash table
522  */
523  LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
524 
525  pgss = ShmemInitStruct("pg_stat_statements",
526  sizeof(pgssSharedState),
527  &found);
528 
529  if (!found)
530  {
531  /* First time through ... */
532  pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
536  pgss->extent = 0;
537  pgss->n_writers = 0;
538  pgss->gc_count = 0;
539  pgss->stats.dealloc = 0;
541  }
542 
543  info.keysize = sizeof(pgssHashKey);
544  info.entrysize = sizeof(pgssEntry);
545  pgss_hash = ShmemInitHash("pg_stat_statements hash",
547  &info,
549 
550  LWLockRelease(AddinShmemInitLock);
551 
552  /*
553  * If we're in the postmaster (or a standalone backend...), set up a shmem
554  * exit hook to dump the statistics to disk.
555  */
556  if (!IsUnderPostmaster)
558 
559  /*
560  * Done if some other process already completed our initialization.
561  */
562  if (found)
563  return;
564 
565  /*
566  * Note: we don't bother with locks here, because there should be no other
567  * processes running when this code is reached.
568  */
569 
570  /* Unlink query text file possibly left over from crash */
571  unlink(PGSS_TEXT_FILE);
572 
573  /* Allocate new query text temp file */
575  if (qfile == NULL)
576  goto write_error;
577 
578  /*
579  * If we were told not to load old statistics, we're done. (Note we do
580  * not try to unlink any old dump file in this case. This seems a bit
581  * questionable but it's the historical behavior.)
582  */
583  if (!pgss_save)
584  {
585  FreeFile(qfile);
586  return;
587  }
588 
589  /*
590  * Attempt to load old statistics from the dump file.
591  */
593  if (file == NULL)
594  {
595  if (errno != ENOENT)
596  goto read_error;
597  /* No existing persisted stats file, so we're done */
598  FreeFile(qfile);
599  return;
600  }
601 
602  buffer_size = 2048;
603  buffer = (char *) palloc(buffer_size);
604 
605  if (fread(&header, sizeof(uint32), 1, file) != 1 ||
606  fread(&pgver, sizeof(uint32), 1, file) != 1 ||
607  fread(&num, sizeof(int32), 1, file) != 1)
608  goto read_error;
609 
610  if (header != PGSS_FILE_HEADER ||
611  pgver != PGSS_PG_MAJOR_VERSION)
612  goto data_error;
613 
614  for (i = 0; i < num; i++)
615  {
616  pgssEntry temp;
617  pgssEntry *entry;
618  Size query_offset;
619 
620  if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
621  goto read_error;
622 
623  /* Encoding is the only field we can easily sanity-check */
624  if (!PG_VALID_BE_ENCODING(temp.encoding))
625  goto data_error;
626 
627  /* Resize buffer as needed */
628  if (temp.query_len >= buffer_size)
629  {
630  buffer_size = Max(buffer_size * 2, temp.query_len + 1);
631  buffer = repalloc(buffer, buffer_size);
632  }
633 
634  if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
635  goto read_error;
636 
637  /* Should have a trailing null, but let's make sure */
638  buffer[temp.query_len] = '\0';
639 
640  /* Skip loading "sticky" entries */
641  if (IS_STICKY(temp.counters))
642  continue;
643 
644  /* Store the query text */
645  query_offset = pgss->extent;
646  if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
647  goto write_error;
648  pgss->extent += temp.query_len + 1;
649 
650  /* make the hashtable entry (discards old entries if too many) */
651  entry = entry_alloc(&temp.key, query_offset, temp.query_len,
652  temp.encoding,
653  false);
654 
655  /* copy in the actual stats */
656  entry->counters = temp.counters;
657  }
658 
659  /* Read global statistics for pg_stat_statements */
660  if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
661  goto read_error;
662 
663  pfree(buffer);
664  FreeFile(file);
665  FreeFile(qfile);
666 
667  /*
668  * Remove the persisted stats file so it's not included in
669  * backups/replication standbys, etc. A new file will be written on next
670  * shutdown.
671  *
672  * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
673  * because we remove that file on startup; it acts inversely to
674  * PGSS_DUMP_FILE, in that it is only supposed to be around when the
675  * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
676  * when the server is not running. Leaving the file creates no danger of
677  * a newly restored database having a spurious record of execution costs,
678  * which is what we're really concerned about here.
679  */
680  unlink(PGSS_DUMP_FILE);
681 
682  return;
683 
684 read_error:
685  ereport(LOG,
687  errmsg("could not read file \"%s\": %m",
688  PGSS_DUMP_FILE)));
689  goto fail;
690 data_error:
691  ereport(LOG,
692  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
693  errmsg("ignoring invalid data in file \"%s\"",
694  PGSS_DUMP_FILE)));
695  goto fail;
696 write_error:
697  ereport(LOG,
699  errmsg("could not write file \"%s\": %m",
700  PGSS_TEXT_FILE)));
701 fail:
702  if (buffer)
703  pfree(buffer);
704  if (file)
705  FreeFile(file);
706  if (qfile)
707  FreeFile(qfile);
708  /* If possible, throw away the bogus file; ignore any error */
709  unlink(PGSS_DUMP_FILE);
710 
711  /*
712  * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
713  * server is running with pg_stat_statements enabled
714  */
715 }
716 
717 /*
718  * shmem_shutdown hook: Dump statistics into file.
719  *
720  * Note: we don't bother with acquiring lock, because there should be no
721  * other processes running when this is called.
722  */
723 static void
725 {
726  FILE *file;
727  char *qbuffer = NULL;
728  Size qbuffer_size = 0;
729  HASH_SEQ_STATUS hash_seq;
730  int32 num_entries;
731  pgssEntry *entry;
732 
733  /* Don't try to dump during a crash. */
734  if (code)
735  return;
736 
737  /* Safety check ... shouldn't get here unless shmem is set up. */
738  if (!pgss || !pgss_hash)
739  return;
740 
741  /* Don't dump if told not to. */
742  if (!pgss_save)
743  return;
744 
745  file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
746  if (file == NULL)
747  goto error;
748 
749  if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
750  goto error;
751  if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
752  goto error;
753  num_entries = hash_get_num_entries(pgss_hash);
754  if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
755  goto error;
756 
757  qbuffer = qtext_load_file(&qbuffer_size);
758  if (qbuffer == NULL)
759  goto error;
760 
761  /*
762  * When serializing to disk, we store query texts immediately after their
763  * entry data. Any orphaned query texts are thereby excluded.
764  */
765  hash_seq_init(&hash_seq, pgss_hash);
766  while ((entry = hash_seq_search(&hash_seq)) != NULL)
767  {
768  int len = entry->query_len;
769  char *qstr = qtext_fetch(entry->query_offset, len,
770  qbuffer, qbuffer_size);
771 
772  if (qstr == NULL)
773  continue; /* Ignore any entries with bogus texts */
774 
775  if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
776  fwrite(qstr, 1, len + 1, file) != len + 1)
777  {
778  /* note: we assume hash_seq_term won't change errno */
779  hash_seq_term(&hash_seq);
780  goto error;
781  }
782  }
783 
784  /* Dump global statistics for pg_stat_statements */
785  if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
786  goto error;
787 
788  free(qbuffer);
789  qbuffer = NULL;
790 
791  if (FreeFile(file))
792  {
793  file = NULL;
794  goto error;
795  }
796 
797  /*
798  * Rename file into place, so we atomically replace any old one.
799  */
801 
802  /* Unlink query-texts file; it's not needed while shutdown */
803  unlink(PGSS_TEXT_FILE);
804 
805  return;
806 
807 error:
808  ereport(LOG,
810  errmsg("could not write file \"%s\": %m",
811  PGSS_DUMP_FILE ".tmp")));
812  if (qbuffer)
813  free(qbuffer);
814  if (file)
815  FreeFile(file);
816  unlink(PGSS_DUMP_FILE ".tmp");
817  unlink(PGSS_TEXT_FILE);
818 }
819 
820 /*
821  * Post-parse-analysis hook: mark query with a queryId
822  */
823 static void
825 {
827  prev_post_parse_analyze_hook(pstate, query, jstate);
828 
829  /* Safety check... */
831  return;
832 
833  /*
834  * Clear queryId for prepared statements related utility, as those will
835  * inherit from the underlying statement's one (except DEALLOCATE which is
836  * entirely untracked).
837  */
838  if (query->utilityStmt)
839  {
841  query->queryId = UINT64CONST(0);
842  return;
843  }
844 
845  /*
846  * If query jumbling were able to identify any ignorable constants, we
847  * immediately create a hash table entry for the query, so that we can
848  * record the normalized form of the query string. If there were no such
849  * constants, the normalized string would be the same as the query text
850  * anyway, so there's no need for an early entry.
851  */
852  if (jstate && jstate->clocations_count > 0)
853  pgss_store(pstate->p_sourcetext,
854  query->queryId,
855  query->stmt_location,
856  query->stmt_len,
857  PGSS_INVALID,
858  0,
859  0,
860  NULL,
861  NULL,
862  NULL,
863  jstate);
864 }
865 
866 /*
867  * Planner hook: forward to regular planner, but measure planning time
868  * if needed.
869  */
870 static PlannedStmt *
872  const char *query_string,
873  int cursorOptions,
874  ParamListInfo boundParams)
875 {
876  PlannedStmt *result;
877 
878  /*
879  * We can't process the query if no query_string is provided, as
880  * pgss_store needs it. We also ignore query without queryid, as it would
881  * be treated as a utility statement, which may not be the case.
882  *
883  * Note that planner_hook can be called from the planner itself, so we
884  * have a specific nesting level for the planner. However, utility
885  * commands containing optimizable statements can also call the planner,
886  * same for regular DML (for instance for underlying foreign key queries).
887  * So testing the planner nesting level only is not enough to detect real
888  * top level planner call.
889  */
891  && pgss_track_planning && query_string
892  && parse->queryId != UINT64CONST(0))
893  {
894  instr_time start;
896  BufferUsage bufusage_start,
897  bufusage;
898  WalUsage walusage_start,
899  walusage;
900 
901  /* We need to track buffer usage as the planner can access them. */
902  bufusage_start = pgBufferUsage;
903 
904  /*
905  * Similarly the planner could write some WAL records in some cases
906  * (e.g. setting a hint bit with those being WAL-logged)
907  */
908  walusage_start = pgWalUsage;
909  INSTR_TIME_SET_CURRENT(start);
910 
912  PG_TRY();
913  {
914  if (prev_planner_hook)
915  result = prev_planner_hook(parse, query_string, cursorOptions,
916  boundParams);
917  else
918  result = standard_planner(parse, query_string, cursorOptions,
919  boundParams);
920  }
921  PG_FINALLY();
922  {
924  }
925  PG_END_TRY();
926 
929 
930  /* calc differences of buffer counters. */
931  memset(&bufusage, 0, sizeof(BufferUsage));
932  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
933 
934  /* calc differences of WAL counters. */
935  memset(&walusage, 0, sizeof(WalUsage));
936  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
937 
938  pgss_store(query_string,
939  parse->queryId,
940  parse->stmt_location,
941  parse->stmt_len,
942  PGSS_PLAN,
944  0,
945  &bufusage,
946  &walusage,
947  NULL,
948  NULL);
949  }
950  else
951  {
952  if (prev_planner_hook)
953  result = prev_planner_hook(parse, query_string, cursorOptions,
954  boundParams);
955  else
956  result = standard_planner(parse, query_string, cursorOptions,
957  boundParams);
958  }
959 
960  return result;
961 }
962 
963 /*
964  * ExecutorStart hook: start up tracking if needed
965  */
966 static void
967 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
968 {
969  if (prev_ExecutorStart)
970  prev_ExecutorStart(queryDesc, eflags);
971  else
972  standard_ExecutorStart(queryDesc, eflags);
973 
974  /*
975  * If query has queryId zero, don't track it. This prevents double
976  * counting of optimizable statements that are directly contained in
977  * utility statements.
978  */
979  if (pgss_enabled(exec_nested_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
980  {
981  /*
982  * Set up to track total elapsed time in ExecutorRun. Make sure the
983  * space is allocated in the per-query context so it will go away at
984  * ExecutorEnd.
985  */
986  if (queryDesc->totaltime == NULL)
987  {
988  MemoryContext oldcxt;
989 
990  oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
991  queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
992  MemoryContextSwitchTo(oldcxt);
993  }
994  }
995 }
996 
997 /*
998  * ExecutorRun hook: all we need do is track nesting depth
999  */
1000 static void
1001 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
1002  bool execute_once)
1003 {
1005  PG_TRY();
1006  {
1007  if (prev_ExecutorRun)
1008  prev_ExecutorRun(queryDesc, direction, count, execute_once);
1009  else
1010  standard_ExecutorRun(queryDesc, direction, count, execute_once);
1011  }
1012  PG_FINALLY();
1013  {
1015  }
1016  PG_END_TRY();
1017 }
1018 
1019 /*
1020  * ExecutorFinish hook: all we need do is track nesting depth
1021  */
1022 static void
1024 {
1026  PG_TRY();
1027  {
1028  if (prev_ExecutorFinish)
1029  prev_ExecutorFinish(queryDesc);
1030  else
1031  standard_ExecutorFinish(queryDesc);
1032  }
1033  PG_FINALLY();
1034  {
1036  }
1037  PG_END_TRY();
1038 }
1039 
1040 /*
1041  * ExecutorEnd hook: store results if needed
1042  */
1043 static void
1045 {
1046  uint64 queryId = queryDesc->plannedstmt->queryId;
1047 
1048  if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1050  {
1051  /*
1052  * Make sure stats accumulation is done. (Note: it's okay if several
1053  * levels of hook all do this.)
1054  */
1055  InstrEndLoop(queryDesc->totaltime);
1056 
1057  pgss_store(queryDesc->sourceText,
1058  queryId,
1059  queryDesc->plannedstmt->stmt_location,
1060  queryDesc->plannedstmt->stmt_len,
1061  PGSS_EXEC,
1062  queryDesc->totaltime->total * 1000.0, /* convert to msec */
1063  queryDesc->estate->es_processed,
1064  &queryDesc->totaltime->bufusage,
1065  &queryDesc->totaltime->walusage,
1066  queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1067  NULL);
1068  }
1069 
1070  if (prev_ExecutorEnd)
1071  prev_ExecutorEnd(queryDesc);
1072  else
1073  standard_ExecutorEnd(queryDesc);
1074 }
1075 
1076 /*
1077  * ProcessUtility hook
1078  */
1079 static void
1080 pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1081  bool readOnlyTree,
1082  ProcessUtilityContext context,
1083  ParamListInfo params, QueryEnvironment *queryEnv,
1085 {
1086  Node *parsetree = pstmt->utilityStmt;
1087  uint64 saved_queryId = pstmt->queryId;
1088 
1089  /*
1090  * Force utility statements to get queryId zero. We do this even in cases
1091  * where the statement contains an optimizable statement for which a
1092  * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1093  * cases, runtime control will first go through ProcessUtility and then
1094  * the executor, and we don't want the executor hooks to do anything,
1095  * since we are already measuring the statement's costs at the utility
1096  * level.
1097  *
1098  * Note that this is only done if pg_stat_statements is enabled and
1099  * configured to track utility statements, in the unlikely possibility
1100  * that user configured another extension to handle utility statements
1101  * only.
1102  */
1104  pstmt->queryId = UINT64CONST(0);
1105 
1106  /*
1107  * If it's an EXECUTE statement, we don't track it and don't increment the
1108  * nesting level. This allows the cycles to be charged to the underlying
1109  * PREPARE instead (by the Executor hooks), which is much more useful.
1110  *
1111  * We also don't track execution of PREPARE. If we did, we would get one
1112  * hash table entry for the PREPARE (with hash calculated from the query
1113  * string), and then a different one with the same query string (but hash
1114  * calculated from the query tree) would be used to accumulate costs of
1115  * ensuing EXECUTEs. This would be confusing, and inconsistent with other
1116  * cases where planning time is not included at all.
1117  *
1118  * Likewise, we don't track execution of DEALLOCATE.
1119  */
1121  PGSS_HANDLED_UTILITY(parsetree))
1122  {
1123  instr_time start;
1125  uint64 rows;
1126  BufferUsage bufusage_start,
1127  bufusage;
1128  WalUsage walusage_start,
1129  walusage;
1130 
1131  bufusage_start = pgBufferUsage;
1132  walusage_start = pgWalUsage;
1133  INSTR_TIME_SET_CURRENT(start);
1134 
1136  PG_TRY();
1137  {
1138  if (prev_ProcessUtility)
1139  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1140  context, params, queryEnv,
1141  dest, qc);
1142  else
1143  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1144  context, params, queryEnv,
1145  dest, qc);
1146  }
1147  PG_FINALLY();
1148  {
1150  }
1151  PG_END_TRY();
1152 
1154  INSTR_TIME_SUBTRACT(duration, start);
1155 
1156  /*
1157  * Track the total number of rows retrieved or affected by the utility
1158  * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1159  * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1160  */
1161  rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1162  qc->commandTag == CMDTAG_FETCH ||
1163  qc->commandTag == CMDTAG_SELECT ||
1164  qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1165  qc->nprocessed : 0;
1166 
1167  /* calc differences of buffer counters. */
1168  memset(&bufusage, 0, sizeof(BufferUsage));
1169  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1170 
1171  /* calc differences of WAL counters. */
1172  memset(&walusage, 0, sizeof(WalUsage));
1173  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1174 
1175  pgss_store(queryString,
1176  saved_queryId,
1177  pstmt->stmt_location,
1178  pstmt->stmt_len,
1179  PGSS_EXEC,
1181  rows,
1182  &bufusage,
1183  &walusage,
1184  NULL,
1185  NULL);
1186  }
1187  else
1188  {
1189  if (prev_ProcessUtility)
1190  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1191  context, params, queryEnv,
1192  dest, qc);
1193  else
1194  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1195  context, params, queryEnv,
1196  dest, qc);
1197  }
1198 }
1199 
1200 /*
1201  * Store some statistics for a statement.
1202  *
1203  * If jstate is not NULL then we're trying to create an entry for which
1204  * we have no statistics as yet; we just want to record the normalized
1205  * query string. total_time, rows, bufusage and walusage are ignored in this
1206  * case.
1207  *
1208  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1209  * for the arrays in the Counters field.
1210  */
1211 static void
1212 pgss_store(const char *query, uint64 queryId,
1213  int query_location, int query_len,
1214  pgssStoreKind kind,
1215  double total_time, uint64 rows,
1216  const BufferUsage *bufusage,
1217  const WalUsage *walusage,
1218  const struct JitInstrumentation *jitusage,
1219  JumbleState *jstate)
1220 {
1221  pgssHashKey key;
1222  pgssEntry *entry;
1223  char *norm_query = NULL;
1224  int encoding = GetDatabaseEncoding();
1225 
1226  Assert(query != NULL);
1227 
1228  /* Safety check... */
1229  if (!pgss || !pgss_hash)
1230  return;
1231 
1232  /*
1233  * Nothing to do if compute_query_id isn't enabled and no other module
1234  * computed a query identifier.
1235  */
1236  if (queryId == UINT64CONST(0))
1237  return;
1238 
1239  /*
1240  * Confine our attention to the relevant part of the string, if the query
1241  * is a portion of a multi-statement source string, and update query
1242  * location and length if needed.
1243  */
1244  query = CleanQuerytext(query, &query_location, &query_len);
1245 
1246  /* Set up key for hashtable search */
1247 
1248  /* memset() is required when pgssHashKey is without padding only */
1249  memset(&key, 0, sizeof(pgssHashKey));
1250 
1251  key.userid = GetUserId();
1252  key.dbid = MyDatabaseId;
1253  key.queryid = queryId;
1254  key.toplevel = (exec_nested_level == 0);
1255 
1256  /* Lookup the hash table entry with shared lock. */
1258 
1259  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1260 
1261  /* Create new entry, if not present */
1262  if (!entry)
1263  {
1264  Size query_offset;
1265  int gc_count;
1266  bool stored;
1267  bool do_gc;
1268 
1269  /*
1270  * Create a new, normalized query string if caller asked. We don't
1271  * need to hold the lock while doing this work. (Note: in any case,
1272  * it's possible that someone else creates a duplicate hashtable entry
1273  * in the interval where we don't hold the lock below. That case is
1274  * handled by entry_alloc.)
1275  */
1276  if (jstate)
1277  {
1279  norm_query = generate_normalized_query(jstate, query,
1280  query_location,
1281  &query_len);
1283  }
1284 
1285  /* Append new query text to file with only shared lock held */
1286  stored = qtext_store(norm_query ? norm_query : query, query_len,
1287  &query_offset, &gc_count);
1288 
1289  /*
1290  * Determine whether we need to garbage collect external query texts
1291  * while the shared lock is still held. This micro-optimization
1292  * avoids taking the time to decide this while holding exclusive lock.
1293  */
1294  do_gc = need_gc_qtexts();
1295 
1296  /* Need exclusive lock to make a new hashtable entry - promote */
1299 
1300  /*
1301  * A garbage collection may have occurred while we weren't holding the
1302  * lock. In the unlikely event that this happens, the query text we
1303  * stored above will have been garbage collected, so write it again.
1304  * This should be infrequent enough that doing it while holding
1305  * exclusive lock isn't a performance problem.
1306  */
1307  if (!stored || pgss->gc_count != gc_count)
1308  stored = qtext_store(norm_query ? norm_query : query, query_len,
1309  &query_offset, NULL);
1310 
1311  /* If we failed to write to the text file, give up */
1312  if (!stored)
1313  goto done;
1314 
1315  /* OK to create a new hashtable entry */
1316  entry = entry_alloc(&key, query_offset, query_len, encoding,
1317  jstate != NULL);
1318 
1319  /* If needed, perform garbage collection while exclusive lock held */
1320  if (do_gc)
1321  gc_qtexts();
1322  }
1323 
1324  /* Increment the counts, except when jstate is not NULL */
1325  if (!jstate)
1326  {
1327  /*
1328  * Grab the spinlock while updating the counters (see comment about
1329  * locking rules at the head of the file)
1330  */
1331  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1332 
1333  Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1334 
1335  SpinLockAcquire(&e->mutex);
1336 
1337  /* "Unstick" entry if it was previously sticky */
1338  if (IS_STICKY(e->counters))
1339  e->counters.usage = USAGE_INIT;
1340 
1341  e->counters.calls[kind] += 1;
1342  e->counters.total_time[kind] += total_time;
1343 
1344  if (e->counters.calls[kind] == 1)
1345  {
1346  e->counters.min_time[kind] = total_time;
1347  e->counters.max_time[kind] = total_time;
1348  e->counters.mean_time[kind] = total_time;
1349  }
1350  else
1351  {
1352  /*
1353  * Welford's method for accurately computing variance. See
1354  * <http://www.johndcook.com/blog/standard_deviation/>
1355  */
1356  double old_mean = e->counters.mean_time[kind];
1357 
1358  e->counters.mean_time[kind] +=
1359  (total_time - old_mean) / e->counters.calls[kind];
1360  e->counters.sum_var_time[kind] +=
1361  (total_time - old_mean) * (total_time - e->counters.mean_time[kind]);
1362 
1363  /* calculate min and max time */
1364  if (e->counters.min_time[kind] > total_time)
1365  e->counters.min_time[kind] = total_time;
1366  if (e->counters.max_time[kind] < total_time)
1367  e->counters.max_time[kind] = total_time;
1368  }
1369  e->counters.rows += rows;
1370  e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1371  e->counters.shared_blks_read += bufusage->shared_blks_read;
1372  e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1373  e->counters.shared_blks_written += bufusage->shared_blks_written;
1374  e->counters.local_blks_hit += bufusage->local_blks_hit;
1375  e->counters.local_blks_read += bufusage->local_blks_read;
1376  e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1377  e->counters.local_blks_written += bufusage->local_blks_written;
1378  e->counters.temp_blks_read += bufusage->temp_blks_read;
1379  e->counters.temp_blks_written += bufusage->temp_blks_written;
1380  e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1381  e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1382  e->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
1383  e->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
1384  e->counters.usage += USAGE_EXEC(total_time);
1385  e->counters.wal_records += walusage->wal_records;
1386  e->counters.wal_fpi += walusage->wal_fpi;
1387  e->counters.wal_bytes += walusage->wal_bytes;
1388  if (jitusage)
1389  {
1390  e->counters.jit_functions += jitusage->created_functions;
1391  e->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1392 
1394  e->counters.jit_inlining_count++;
1395  e->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1396 
1398  e->counters.jit_optimization_count++;
1399  e->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1400 
1402  e->counters.jit_emission_count++;
1403  e->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1404  }
1405 
1406  SpinLockRelease(&e->mutex);
1407  }
1408 
1409 done:
1411 
1412  /* We postpone this clean-up until we're out of the lock */
1413  if (norm_query)
1414  pfree(norm_query);
1415 }
1416 
1417 /*
1418  * Reset statement statistics corresponding to userid, dbid, and queryid.
1419  */
1420 Datum
1422 {
1423  Oid userid;
1424  Oid dbid;
1425  uint64 queryid;
1426 
1427  userid = PG_GETARG_OID(0);
1428  dbid = PG_GETARG_OID(1);
1429  queryid = (uint64) PG_GETARG_INT64(2);
1430 
1431  entry_reset(userid, dbid, queryid);
1432 
1433  PG_RETURN_VOID();
1434 }
1435 
1436 /*
1437  * Reset statement statistics.
1438  */
1439 Datum
1441 {
1442  entry_reset(0, 0, 0);
1443 
1444  PG_RETURN_VOID();
1445 }
1446 
1447 /* Number of output arguments (columns) for various API versions */
1448 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1449 #define PG_STAT_STATEMENTS_COLS_V1_1 18
1450 #define PG_STAT_STATEMENTS_COLS_V1_2 19
1451 #define PG_STAT_STATEMENTS_COLS_V1_3 23
1452 #define PG_STAT_STATEMENTS_COLS_V1_8 32
1453 #define PG_STAT_STATEMENTS_COLS_V1_9 33
1454 #define PG_STAT_STATEMENTS_COLS_V1_10 43
1455 #define PG_STAT_STATEMENTS_COLS 43 /* maximum of above */
1456 
1457 /*
1458  * Retrieve statement statistics.
1459  *
1460  * The SQL API of this function has changed multiple times, and will likely
1461  * do so again in future. To support the case where a newer version of this
1462  * loadable module is being used with an old SQL declaration of the function,
1463  * we continue to support the older API versions. For 1.2 and later, the
1464  * expected API version is identified by embedding it in the C name of the
1465  * function. Unfortunately we weren't bright enough to do that for 1.1.
1466  */
1467 Datum
1469 {
1470  bool showtext = PG_GETARG_BOOL(0);
1471 
1472  pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1473 
1474  return (Datum) 0;
1475 }
1476 
1477 Datum
1479 {
1480  bool showtext = PG_GETARG_BOOL(0);
1481 
1482  pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1483 
1484  return (Datum) 0;
1485 }
1486 
1487 Datum
1489 {
1490  bool showtext = PG_GETARG_BOOL(0);
1491 
1492  pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1493 
1494  return (Datum) 0;
1495 }
1496 
1497 Datum
1499 {
1500  bool showtext = PG_GETARG_BOOL(0);
1501 
1502  pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1503 
1504  return (Datum) 0;
1505 }
1506 
1507 Datum
1509 {
1510  bool showtext = PG_GETARG_BOOL(0);
1511 
1512  pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1513 
1514  return (Datum) 0;
1515 }
1516 
1517 /*
1518  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1519  * This can be removed someday, perhaps.
1520  */
1521 Datum
1523 {
1524  /* If it's really API 1.1, we'll figure that out below */
1525  pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1526 
1527  return (Datum) 0;
1528 }
1529 
1530 /* Common code for all versions of pg_stat_statements() */
1531 static void
1533  pgssVersion api_version,
1534  bool showtext)
1535 {
1536  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1537  Oid userid = GetUserId();
1538  bool is_allowed_role = false;
1539  char *qbuffer = NULL;
1540  Size qbuffer_size = 0;
1541  Size extent = 0;
1542  int gc_count = 0;
1543  HASH_SEQ_STATUS hash_seq;
1544  pgssEntry *entry;
1545 
1546  /*
1547  * Superusers or roles with the privileges of pg_read_all_stats members
1548  * are allowed
1549  */
1550  is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1551 
1552  /* hash table must exist already */
1553  if (!pgss || !pgss_hash)
1554  ereport(ERROR,
1555  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1556  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1557 
1558  SetSingleFuncCall(fcinfo, 0);
1559 
1560  /*
1561  * Check we have the expected number of output arguments. Aside from
1562  * being a good safety check, we need a kluge here to detect API version
1563  * 1.1, which was wedged into the code in an ill-considered way.
1564  */
1565  switch (rsinfo->setDesc->natts)
1566  {
1568  if (api_version != PGSS_V1_0)
1569  elog(ERROR, "incorrect number of output arguments");
1570  break;
1572  /* pg_stat_statements() should have told us 1.0 */
1573  if (api_version != PGSS_V1_0)
1574  elog(ERROR, "incorrect number of output arguments");
1575  api_version = PGSS_V1_1;
1576  break;
1578  if (api_version != PGSS_V1_2)
1579  elog(ERROR, "incorrect number of output arguments");
1580  break;
1582  if (api_version != PGSS_V1_3)
1583  elog(ERROR, "incorrect number of output arguments");
1584  break;
1586  if (api_version != PGSS_V1_8)
1587  elog(ERROR, "incorrect number of output arguments");
1588  break;
1590  if (api_version != PGSS_V1_9)
1591  elog(ERROR, "incorrect number of output arguments");
1592  break;
1594  if (api_version != PGSS_V1_10)
1595  elog(ERROR, "incorrect number of output arguments");
1596  break;
1597  default:
1598  elog(ERROR, "incorrect number of output arguments");
1599  }
1600 
1601  /*
1602  * We'd like to load the query text file (if needed) while not holding any
1603  * lock on pgss->lock. In the worst case we'll have to do this again
1604  * after we have the lock, but it's unlikely enough to make this a win
1605  * despite occasional duplicated work. We need to reload if anybody
1606  * writes to the file (either a retail qtext_store(), or a garbage
1607  * collection) between this point and where we've gotten shared lock. If
1608  * a qtext_store is actually in progress when we look, we might as well
1609  * skip the speculative load entirely.
1610  */
1611  if (showtext)
1612  {
1613  int n_writers;
1614 
1615  /* Take the mutex so we can examine variables */
1616  {
1617  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1618 
1619  SpinLockAcquire(&s->mutex);
1620  extent = s->extent;
1621  n_writers = s->n_writers;
1622  gc_count = s->gc_count;
1623  SpinLockRelease(&s->mutex);
1624  }
1625 
1626  /* No point in loading file now if there are active writers */
1627  if (n_writers == 0)
1628  qbuffer = qtext_load_file(&qbuffer_size);
1629  }
1630 
1631  /*
1632  * Get shared lock, load or reload the query text file if we must, and
1633  * iterate over the hashtable entries.
1634  *
1635  * With a large hash table, we might be holding the lock rather longer
1636  * than one could wish. However, this only blocks creation of new hash
1637  * table entries, and the larger the hash table the less likely that is to
1638  * be needed. So we can hope this is okay. Perhaps someday we'll decide
1639  * we need to partition the hash table to limit the time spent holding any
1640  * one lock.
1641  */
1643 
1644  if (showtext)
1645  {
1646  /*
1647  * Here it is safe to examine extent and gc_count without taking the
1648  * mutex. Note that although other processes might change
1649  * pgss->extent just after we look at it, the strings they then write
1650  * into the file cannot yet be referenced in the hashtable, so we
1651  * don't care whether we see them or not.
1652  *
1653  * If qtext_load_file fails, we just press on; we'll return NULL for
1654  * every query text.
1655  */
1656  if (qbuffer == NULL ||
1657  pgss->extent != extent ||
1658  pgss->gc_count != gc_count)
1659  {
1660  if (qbuffer)
1661  free(qbuffer);
1662  qbuffer = qtext_load_file(&qbuffer_size);
1663  }
1664  }
1665 
1666  hash_seq_init(&hash_seq, pgss_hash);
1667  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1668  {
1670  bool nulls[PG_STAT_STATEMENTS_COLS];
1671  int i = 0;
1672  Counters tmp;
1673  double stddev;
1674  int64 queryid = entry->key.queryid;
1675 
1676  memset(values, 0, sizeof(values));
1677  memset(nulls, 0, sizeof(nulls));
1678 
1679  values[i++] = ObjectIdGetDatum(entry->key.userid);
1680  values[i++] = ObjectIdGetDatum(entry->key.dbid);
1681  if (api_version >= PGSS_V1_9)
1682  values[i++] = BoolGetDatum(entry->key.toplevel);
1683 
1684  if (is_allowed_role || entry->key.userid == userid)
1685  {
1686  if (api_version >= PGSS_V1_2)
1687  values[i++] = Int64GetDatumFast(queryid);
1688 
1689  if (showtext)
1690  {
1691  char *qstr = qtext_fetch(entry->query_offset,
1692  entry->query_len,
1693  qbuffer,
1694  qbuffer_size);
1695 
1696  if (qstr)
1697  {
1698  char *enc;
1699 
1700  enc = pg_any_to_server(qstr,
1701  entry->query_len,
1702  entry->encoding);
1703 
1705 
1706  if (enc != qstr)
1707  pfree(enc);
1708  }
1709  else
1710  {
1711  /* Just return a null if we fail to find the text */
1712  nulls[i++] = true;
1713  }
1714  }
1715  else
1716  {
1717  /* Query text not requested */
1718  nulls[i++] = true;
1719  }
1720  }
1721  else
1722  {
1723  /* Don't show queryid */
1724  if (api_version >= PGSS_V1_2)
1725  nulls[i++] = true;
1726 
1727  /*
1728  * Don't show query text, but hint as to the reason for not doing
1729  * so if it was requested
1730  */
1731  if (showtext)
1732  values[i++] = CStringGetTextDatum("<insufficient privilege>");
1733  else
1734  nulls[i++] = true;
1735  }
1736 
1737  /* copy counters to a local variable to keep locking time short */
1738  {
1739  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1740 
1741  SpinLockAcquire(&e->mutex);
1742  tmp = e->counters;
1743  SpinLockRelease(&e->mutex);
1744  }
1745 
1746  /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1747  if (IS_STICKY(tmp))
1748  continue;
1749 
1750  /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1751  for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1752  {
1753  if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1754  {
1755  values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1756  values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1757  }
1758 
1759  if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1760  api_version >= PGSS_V1_8)
1761  {
1762  values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1763  values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1764  values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1765 
1766  /*
1767  * Note we are calculating the population variance here, not
1768  * the sample variance, as we have data for the whole
1769  * population, so Bessel's correction is not used, and we
1770  * don't divide by tmp.calls - 1.
1771  */
1772  if (tmp.calls[kind] > 1)
1773  stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1774  else
1775  stddev = 0.0;
1776  values[i++] = Float8GetDatumFast(stddev);
1777  }
1778  }
1779  values[i++] = Int64GetDatumFast(tmp.rows);
1782  if (api_version >= PGSS_V1_1)
1787  if (api_version >= PGSS_V1_1)
1792  if (api_version >= PGSS_V1_1)
1793  {
1796  }
1797  if (api_version >= PGSS_V1_10)
1798  {
1801  }
1802  if (api_version >= PGSS_V1_8)
1803  {
1804  char buf[256];
1805  Datum wal_bytes;
1806 
1808  values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1809 
1810  snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1811 
1812  /* Convert to numeric. */
1813  wal_bytes = DirectFunctionCall3(numeric_in,
1815  ObjectIdGetDatum(0),
1816  Int32GetDatum(-1));
1817  values[i++] = wal_bytes;
1818  }
1819  if (api_version >= PGSS_V1_10)
1820  {
1829  }
1830 
1831  Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1832  api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1833  api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1834  api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1835  api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1836  api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1837  api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
1838  -1 /* fail if you forget to update this assert */ ));
1839 
1840  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1841  }
1842 
1844 
1845  if (qbuffer)
1846  free(qbuffer);
1847 }
1848 
1849 /* Number of output arguments (columns) for pg_stat_statements_info */
1850 #define PG_STAT_STATEMENTS_INFO_COLS 2
1851 
1852 /*
1853  * Return statistics of pg_stat_statements.
1854  */
1855 Datum
1857 {
1858  pgssGlobalStats stats;
1859  TupleDesc tupdesc;
1861  bool nulls[PG_STAT_STATEMENTS_INFO_COLS];
1862 
1863  if (!pgss || !pgss_hash)
1864  ereport(ERROR,
1865  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1866  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1867 
1868  /* Build a tuple descriptor for our result type */
1869  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1870  elog(ERROR, "return type must be a row type");
1871 
1872  MemSet(values, 0, sizeof(values));
1873  MemSet(nulls, 0, sizeof(nulls));
1874 
1875  /* Read global statistics for pg_stat_statements */
1876  {
1877  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1878 
1879  SpinLockAcquire(&s->mutex);
1880  stats = s->stats;
1881  SpinLockRelease(&s->mutex);
1882  }
1883 
1884  values[0] = Int64GetDatum(stats.dealloc);
1886 
1888 }
1889 
1890 /*
1891  * Estimate shared memory space needed.
1892  */
1893 static Size
1895 {
1896  Size size;
1897 
1898  size = MAXALIGN(sizeof(pgssSharedState));
1899  size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
1900 
1901  return size;
1902 }
1903 
1904 /*
1905  * Allocate a new hashtable entry.
1906  * caller must hold an exclusive lock on pgss->lock
1907  *
1908  * "query" need not be null-terminated; we rely on query_len instead
1909  *
1910  * If "sticky" is true, make the new entry artificially sticky so that it will
1911  * probably still be there when the query finishes execution. We do this by
1912  * giving it a median usage value rather than the normal value. (Strictly
1913  * speaking, query strings are normalized on a best effort basis, though it
1914  * would be difficult to demonstrate this even under artificial conditions.)
1915  *
1916  * Note: despite needing exclusive lock, it's not an error for the target
1917  * entry to already exist. This is because pgss_store releases and
1918  * reacquires lock after failing to find a match; so someone else could
1919  * have made the entry while we waited to get exclusive lock.
1920  */
1921 static pgssEntry *
1922 entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
1923  bool sticky)
1924 {
1925  pgssEntry *entry;
1926  bool found;
1927 
1928  /* Make space if needed */
1930  entry_dealloc();
1931 
1932  /* Find or create an entry with desired hash code */
1933  entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1934 
1935  if (!found)
1936  {
1937  /* New entry, initialize it */
1938 
1939  /* reset the statistics */
1940  memset(&entry->counters, 0, sizeof(Counters));
1941  /* set the appropriate initial usage count */
1942  entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1943  /* re-initialize the mutex each time ... we assume no one using it */
1944  SpinLockInit(&entry->mutex);
1945  /* ... and don't forget the query text metadata */
1946  Assert(query_len >= 0);
1947  entry->query_offset = query_offset;
1948  entry->query_len = query_len;
1949  entry->encoding = encoding;
1950  }
1951 
1952  return entry;
1953 }
1954 
1955 /*
1956  * qsort comparator for sorting into increasing usage order
1957  */
1958 static int
1959 entry_cmp(const void *lhs, const void *rhs)
1960 {
1961  double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1962  double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1963 
1964  if (l_usage < r_usage)
1965  return -1;
1966  else if (l_usage > r_usage)
1967  return +1;
1968  else
1969  return 0;
1970 }
1971 
1972 /*
1973  * Deallocate least-used entries.
1974  *
1975  * Caller must hold an exclusive lock on pgss->lock.
1976  */
1977 static void
1979 {
1980  HASH_SEQ_STATUS hash_seq;
1981  pgssEntry **entries;
1982  pgssEntry *entry;
1983  int nvictims;
1984  int i;
1985  Size tottextlen;
1986  int nvalidtexts;
1987 
1988  /*
1989  * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1990  * While we're scanning the table, apply the decay factor to the usage
1991  * values, and update the mean query length.
1992  *
1993  * Note that the mean query length is almost immediately obsolete, since
1994  * we compute it before not after discarding the least-used entries.
1995  * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
1996  * making two passes to get a more current result. Likewise, the new
1997  * cur_median_usage includes the entries we're about to zap.
1998  */
1999 
2000  entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2001 
2002  i = 0;
2003  tottextlen = 0;
2004  nvalidtexts = 0;
2005 
2006  hash_seq_init(&hash_seq, pgss_hash);
2007  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2008  {
2009  entries[i++] = entry;
2010  /* "Sticky" entries get a different usage decay rate. */
2011  if (IS_STICKY(entry->counters))
2013  else
2015  /* In the mean length computation, ignore dropped texts. */
2016  if (entry->query_len >= 0)
2017  {
2018  tottextlen += entry->query_len + 1;
2019  nvalidtexts++;
2020  }
2021  }
2022 
2023  /* Sort into increasing order by usage */
2024  qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2025 
2026  /* Record the (approximate) median usage */
2027  if (i > 0)
2028  pgss->cur_median_usage = entries[i / 2]->counters.usage;
2029  /* Record the mean query length */
2030  if (nvalidtexts > 0)
2031  pgss->mean_query_len = tottextlen / nvalidtexts;
2032  else
2034 
2035  /* Now zap an appropriate fraction of lowest-usage entries */
2036  nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2037  nvictims = Min(nvictims, i);
2038 
2039  for (i = 0; i < nvictims; i++)
2040  {
2041  hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2042  }
2043 
2044  pfree(entries);
2045 
2046  /* Increment the number of times entries are deallocated */
2047  {
2048  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2049 
2050  SpinLockAcquire(&s->mutex);
2051  s->stats.dealloc += 1;
2052  SpinLockRelease(&s->mutex);
2053  }
2054 }
2055 
2056 /*
2057  * Given a query string (not necessarily null-terminated), allocate a new
2058  * entry in the external query text file and store the string there.
2059  *
2060  * If successful, returns true, and stores the new entry's offset in the file
2061  * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2062  * number of garbage collections that have occurred so far.
2063  *
2064  * On failure, returns false.
2065  *
2066  * At least a shared lock on pgss->lock must be held by the caller, so as
2067  * to prevent a concurrent garbage collection. Share-lock-holding callers
2068  * should pass a gc_count pointer to obtain the number of garbage collections,
2069  * so that they can recheck the count after obtaining exclusive lock to
2070  * detect whether a garbage collection occurred (and removed this entry).
2071  */
2072 static bool
2073 qtext_store(const char *query, int query_len,
2074  Size *query_offset, int *gc_count)
2075 {
2076  Size off;
2077  int fd;
2078 
2079  /*
2080  * We use a spinlock to protect extent/n_writers/gc_count, so that
2081  * multiple processes may execute this function concurrently.
2082  */
2083  {
2084  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2085 
2086  SpinLockAcquire(&s->mutex);
2087  off = s->extent;
2088  s->extent += query_len + 1;
2089  s->n_writers++;
2090  if (gc_count)
2091  *gc_count = s->gc_count;
2092  SpinLockRelease(&s->mutex);
2093  }
2094 
2095  *query_offset = off;
2096 
2097  /* Now write the data into the successfully-reserved part of the file */
2098  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2099  if (fd < 0)
2100  goto error;
2101 
2102  if (pg_pwrite(fd, query, query_len, off) != query_len)
2103  goto error;
2104  if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2105  goto error;
2106 
2108 
2109  /* Mark our write complete */
2110  {
2111  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2112 
2113  SpinLockAcquire(&s->mutex);
2114  s->n_writers--;
2115  SpinLockRelease(&s->mutex);
2116  }
2117 
2118  return true;
2119 
2120 error:
2121  ereport(LOG,
2123  errmsg("could not write file \"%s\": %m",
2124  PGSS_TEXT_FILE)));
2125 
2126  if (fd >= 0)
2128 
2129  /* Mark our write complete */
2130  {
2131  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2132 
2133  SpinLockAcquire(&s->mutex);
2134  s->n_writers--;
2135  SpinLockRelease(&s->mutex);
2136  }
2137 
2138  return false;
2139 }
2140 
2141 /*
2142  * Read the external query text file into a malloc'd buffer.
2143  *
2144  * Returns NULL (without throwing an error) if unable to read, eg
2145  * file not there or insufficient memory.
2146  *
2147  * On success, the buffer size is also returned into *buffer_size.
2148  *
2149  * This can be called without any lock on pgss->lock, but in that case
2150  * the caller is responsible for verifying that the result is sane.
2151  */
2152 static char *
2153 qtext_load_file(Size *buffer_size)
2154 {
2155  char *buf;
2156  int fd;
2157  struct stat stat;
2158  Size nread;
2159 
2161  if (fd < 0)
2162  {
2163  if (errno != ENOENT)
2164  ereport(LOG,
2166  errmsg("could not read file \"%s\": %m",
2167  PGSS_TEXT_FILE)));
2168  return NULL;
2169  }
2170 
2171  /* Get file length */
2172  if (fstat(fd, &stat))
2173  {
2174  ereport(LOG,
2176  errmsg("could not stat file \"%s\": %m",
2177  PGSS_TEXT_FILE)));
2179  return NULL;
2180  }
2181 
2182  /* Allocate buffer; beware that off_t might be wider than size_t */
2183  if (stat.st_size <= MaxAllocHugeSize)
2184  buf = (char *) malloc(stat.st_size);
2185  else
2186  buf = NULL;
2187  if (buf == NULL)
2188  {
2189  ereport(LOG,
2190  (errcode(ERRCODE_OUT_OF_MEMORY),
2191  errmsg("out of memory"),
2192  errdetail("Could not allocate enough memory to read file \"%s\".",
2193  PGSS_TEXT_FILE)));
2195  return NULL;
2196  }
2197 
2198  /*
2199  * OK, slurp in the file. Windows fails if we try to read more than
2200  * INT_MAX bytes at once, and other platforms might not like that either,
2201  * so read a very large file in 1GB segments.
2202  */
2203  nread = 0;
2204  while (nread < stat.st_size)
2205  {
2206  int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2207 
2208  /*
2209  * If we get a short read and errno doesn't get set, the reason is
2210  * probably that garbage collection truncated the file since we did
2211  * the fstat(), so we don't log a complaint --- but we don't return
2212  * the data, either, since it's most likely corrupt due to concurrent
2213  * writes from garbage collection.
2214  */
2215  errno = 0;
2216  if (read(fd, buf + nread, toread) != toread)
2217  {
2218  if (errno)
2219  ereport(LOG,
2221  errmsg("could not read file \"%s\": %m",
2222  PGSS_TEXT_FILE)));
2223  free(buf);
2225  return NULL;
2226  }
2227  nread += toread;
2228  }
2229 
2230  if (CloseTransientFile(fd) != 0)
2231  ereport(LOG,
2233  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2234 
2235  *buffer_size = nread;
2236  return buf;
2237 }
2238 
2239 /*
2240  * Locate a query text in the file image previously read by qtext_load_file().
2241  *
2242  * We validate the given offset/length, and return NULL if bogus. Otherwise,
2243  * the result points to a null-terminated string within the buffer.
2244  */
2245 static char *
2246 qtext_fetch(Size query_offset, int query_len,
2247  char *buffer, Size buffer_size)
2248 {
2249  /* File read failed? */
2250  if (buffer == NULL)
2251  return NULL;
2252  /* Bogus offset/length? */
2253  if (query_len < 0 ||
2254  query_offset + query_len >= buffer_size)
2255  return NULL;
2256  /* As a further sanity check, make sure there's a trailing null */
2257  if (buffer[query_offset + query_len] != '\0')
2258  return NULL;
2259  /* Looks OK */
2260  return buffer + query_offset;
2261 }
2262 
2263 /*
2264  * Do we need to garbage-collect the external query text file?
2265  *
2266  * Caller should hold at least a shared lock on pgss->lock.
2267  */
2268 static bool
2270 {
2271  Size extent;
2272 
2273  /* Read shared extent pointer */
2274  {
2275  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2276 
2277  SpinLockAcquire(&s->mutex);
2278  extent = s->extent;
2279  SpinLockRelease(&s->mutex);
2280  }
2281 
2282  /* Don't proceed if file does not exceed 512 bytes per possible entry */
2283  if (extent < 512 * pgss_max)
2284  return false;
2285 
2286  /*
2287  * Don't proceed if file is less than about 50% bloat. Nothing can or
2288  * should be done in the event of unusually large query texts accounting
2289  * for file's large size. We go to the trouble of maintaining the mean
2290  * query length in order to prevent garbage collection from thrashing
2291  * uselessly.
2292  */
2293  if (extent < pgss->mean_query_len * pgss_max * 2)
2294  return false;
2295 
2296  return true;
2297 }
2298 
2299 /*
2300  * Garbage-collect orphaned query texts in external file.
2301  *
2302  * This won't be called often in the typical case, since it's likely that
2303  * there won't be too much churn, and besides, a similar compaction process
2304  * occurs when serializing to disk at shutdown or as part of resetting.
2305  * Despite this, it seems prudent to plan for the edge case where the file
2306  * becomes unreasonably large, with no other method of compaction likely to
2307  * occur in the foreseeable future.
2308  *
2309  * The caller must hold an exclusive lock on pgss->lock.
2310  *
2311  * At the first sign of trouble we unlink the query text file to get a clean
2312  * slate (although existing statistics are retained), rather than risk
2313  * thrashing by allowing the same problem case to recur indefinitely.
2314  */
2315 static void
2317 {
2318  char *qbuffer;
2319  Size qbuffer_size;
2320  FILE *qfile = NULL;
2321  HASH_SEQ_STATUS hash_seq;
2322  pgssEntry *entry;
2323  Size extent;
2324  int nentries;
2325 
2326  /*
2327  * When called from pgss_store, some other session might have proceeded
2328  * with garbage collection in the no-lock-held interim of lock strength
2329  * escalation. Check once more that this is actually necessary.
2330  */
2331  if (!need_gc_qtexts())
2332  return;
2333 
2334  /*
2335  * Load the old texts file. If we fail (out of memory, for instance),
2336  * invalidate query texts. Hopefully this is rare. It might seem better
2337  * to leave things alone on an OOM failure, but the problem is that the
2338  * file is only going to get bigger; hoping for a future non-OOM result is
2339  * risky and can easily lead to complete denial of service.
2340  */
2341  qbuffer = qtext_load_file(&qbuffer_size);
2342  if (qbuffer == NULL)
2343  goto gc_fail;
2344 
2345  /*
2346  * We overwrite the query texts file in place, so as to reduce the risk of
2347  * an out-of-disk-space failure. Since the file is guaranteed not to get
2348  * larger, this should always work on traditional filesystems; though we
2349  * could still lose on copy-on-write filesystems.
2350  */
2352  if (qfile == NULL)
2353  {
2354  ereport(LOG,
2356  errmsg("could not write file \"%s\": %m",
2357  PGSS_TEXT_FILE)));
2358  goto gc_fail;
2359  }
2360 
2361  extent = 0;
2362  nentries = 0;
2363 
2364  hash_seq_init(&hash_seq, pgss_hash);
2365  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2366  {
2367  int query_len = entry->query_len;
2368  char *qry = qtext_fetch(entry->query_offset,
2369  query_len,
2370  qbuffer,
2371  qbuffer_size);
2372 
2373  if (qry == NULL)
2374  {
2375  /* Trouble ... drop the text */
2376  entry->query_offset = 0;
2377  entry->query_len = -1;
2378  /* entry will not be counted in mean query length computation */
2379  continue;
2380  }
2381 
2382  if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2383  {
2384  ereport(LOG,
2386  errmsg("could not write file \"%s\": %m",
2387  PGSS_TEXT_FILE)));
2388  hash_seq_term(&hash_seq);
2389  goto gc_fail;
2390  }
2391 
2392  entry->query_offset = extent;
2393  extent += query_len + 1;
2394  nentries++;
2395  }
2396 
2397  /*
2398  * Truncate away any now-unused space. If this fails for some odd reason,
2399  * we log it, but there's no need to fail.
2400  */
2401  if (ftruncate(fileno(qfile), extent) != 0)
2402  ereport(LOG,
2404  errmsg("could not truncate file \"%s\": %m",
2405  PGSS_TEXT_FILE)));
2406 
2407  if (FreeFile(qfile))
2408  {
2409  ereport(LOG,
2411  errmsg("could not write file \"%s\": %m",
2412  PGSS_TEXT_FILE)));
2413  qfile = NULL;
2414  goto gc_fail;
2415  }
2416 
2417  elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2418  pgss->extent, extent);
2419 
2420  /* Reset the shared extent pointer */
2421  pgss->extent = extent;
2422 
2423  /*
2424  * Also update the mean query length, to be sure that need_gc_qtexts()
2425  * won't still think we have a problem.
2426  */
2427  if (nentries > 0)
2428  pgss->mean_query_len = extent / nentries;
2429  else
2431 
2432  free(qbuffer);
2433 
2434  /*
2435  * OK, count a garbage collection cycle. (Note: even though we have
2436  * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2437  * other processes may examine gc_count while holding only the mutex.
2438  * Also, we have to advance the count *after* we've rewritten the file,
2439  * else other processes might not realize they read a stale file.)
2440  */
2441  record_gc_qtexts();
2442 
2443  return;
2444 
2445 gc_fail:
2446  /* clean up resources */
2447  if (qfile)
2448  FreeFile(qfile);
2449  if (qbuffer)
2450  free(qbuffer);
2451 
2452  /*
2453  * Since the contents of the external file are now uncertain, mark all
2454  * hashtable entries as having invalid texts.
2455  */
2456  hash_seq_init(&hash_seq, pgss_hash);
2457  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2458  {
2459  entry->query_offset = 0;
2460  entry->query_len = -1;
2461  }
2462 
2463  /*
2464  * Destroy the query text file and create a new, empty one
2465  */
2466  (void) unlink(PGSS_TEXT_FILE);
2468  if (qfile == NULL)
2469  ereport(LOG,
2471  errmsg("could not recreate file \"%s\": %m",
2472  PGSS_TEXT_FILE)));
2473  else
2474  FreeFile(qfile);
2475 
2476  /* Reset the shared extent pointer */
2477  pgss->extent = 0;
2478 
2479  /* Reset mean_query_len to match the new state */
2481 
2482  /*
2483  * Bump the GC count even though we failed.
2484  *
2485  * This is needed to make concurrent readers of file without any lock on
2486  * pgss->lock notice existence of new version of file. Once readers
2487  * subsequently observe a change in GC count with pgss->lock held, that
2488  * forces a safe reopen of file. Writers also require that we bump here,
2489  * of course. (As required by locking protocol, readers and writers don't
2490  * trust earlier file contents until gc_count is found unchanged after
2491  * pgss->lock acquired in shared or exclusive mode respectively.)
2492  */
2493  record_gc_qtexts();
2494 }
2495 
2496 /*
2497  * Release entries corresponding to parameters passed.
2498  */
2499 static void
2500 entry_reset(Oid userid, Oid dbid, uint64 queryid)
2501 {
2502  HASH_SEQ_STATUS hash_seq;
2503  pgssEntry *entry;
2504  FILE *qfile;
2505  long num_entries;
2506  long num_remove = 0;
2507  pgssHashKey key;
2508 
2509  if (!pgss || !pgss_hash)
2510  ereport(ERROR,
2511  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2512  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
2513 
2515  num_entries = hash_get_num_entries(pgss_hash);
2516 
2517  if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2518  {
2519  /* If all the parameters are available, use the fast path. */
2520  memset(&key, 0, sizeof(pgssHashKey));
2521  key.userid = userid;
2522  key.dbid = dbid;
2523  key.queryid = queryid;
2524 
2525  /* Remove the key if it exists, starting with the top-level entry */
2526  key.toplevel = false;
2527  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
2528  if (entry) /* found */
2529  num_remove++;
2530 
2531  /* Also remove entries for top level statements */
2532  key.toplevel = true;
2533 
2534  /* Remove the key if exists */
2535  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
2536  if (entry) /* found */
2537  num_remove++;
2538  }
2539  else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2540  {
2541  /* Remove entries corresponding to valid parameters. */
2542  hash_seq_init(&hash_seq, pgss_hash);
2543  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2544  {
2545  if ((!userid || entry->key.userid == userid) &&
2546  (!dbid || entry->key.dbid == dbid) &&
2547  (!queryid || entry->key.queryid == queryid))
2548  {
2549  hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2550  num_remove++;
2551  }
2552  }
2553  }
2554  else
2555  {
2556  /* Remove all entries. */
2557  hash_seq_init(&hash_seq, pgss_hash);
2558  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2559  {
2560  hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2561  num_remove++;
2562  }
2563  }
2564 
2565  /* All entries are removed? */
2566  if (num_entries != num_remove)
2567  goto release_lock;
2568 
2569  /*
2570  * Reset global statistics for pg_stat_statements since all entries are
2571  * removed.
2572  */
2573  {
2574  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2575  TimestampTz stats_reset = GetCurrentTimestamp();
2576 
2577  SpinLockAcquire(&s->mutex);
2578  s->stats.dealloc = 0;
2579  s->stats.stats_reset = stats_reset;
2580  SpinLockRelease(&s->mutex);
2581  }
2582 
2583  /*
2584  * Write new empty query file, perhaps even creating a new one to recover
2585  * if the file was missing.
2586  */
2588  if (qfile == NULL)
2589  {
2590  ereport(LOG,
2592  errmsg("could not create file \"%s\": %m",
2593  PGSS_TEXT_FILE)));
2594  goto done;
2595  }
2596 
2597  /* If ftruncate fails, log it, but it's not a fatal problem */
2598  if (ftruncate(fileno(qfile), 0) != 0)
2599  ereport(LOG,
2601  errmsg("could not truncate file \"%s\": %m",
2602  PGSS_TEXT_FILE)));
2603 
2604  FreeFile(qfile);
2605 
2606 done:
2607  pgss->extent = 0;
2608  /* This counts as a query text garbage collection for our purposes */
2609  record_gc_qtexts();
2610 
2611 release_lock:
2613 }
2614 
2615 /*
2616  * Generate a normalized version of the query string that will be used to
2617  * represent all similar queries.
2618  *
2619  * Note that the normalized representation may well vary depending on
2620  * just which "equivalent" query is used to create the hashtable entry.
2621  * We assume this is OK.
2622  *
2623  * If query_loc > 0, then "query" has been advanced by that much compared to
2624  * the original string start, so we need to translate the provided locations
2625  * to compensate. (This lets us avoid re-scanning statements before the one
2626  * of interest, so it's worth doing.)
2627  *
2628  * *query_len_p contains the input string length, and is updated with
2629  * the result string length on exit. The resulting string might be longer
2630  * or shorter depending on what happens with replacement of constants.
2631  *
2632  * Returns a palloc'd string.
2633  */
2634 static char *
2635 generate_normalized_query(JumbleState *jstate, const char *query,
2636  int query_loc, int *query_len_p)
2637 {
2638  char *norm_query;
2639  int query_len = *query_len_p;
2640  int i,
2641  norm_query_buflen, /* Space allowed for norm_query */
2642  len_to_wrt, /* Length (in bytes) to write */
2643  quer_loc = 0, /* Source query byte location */
2644  n_quer_loc = 0, /* Normalized query byte location */
2645  last_off = 0, /* Offset from start for previous tok */
2646  last_tok_len = 0; /* Length (in bytes) of that tok */
2647 
2648  /*
2649  * Get constants' lengths (core system only gives us locations). Note
2650  * this also ensures the items are sorted by location.
2651  */
2652  fill_in_constant_lengths(jstate, query, query_loc);
2653 
2654  /*
2655  * Allow for $n symbols to be longer than the constants they replace.
2656  * Constants must take at least one byte in text form, while a $n symbol
2657  * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2658  * could refine that limit based on the max value of n for the current
2659  * query, but it hardly seems worth any extra effort to do so.
2660  */
2661  norm_query_buflen = query_len + jstate->clocations_count * 10;
2662 
2663  /* Allocate result buffer */
2664  norm_query = palloc(norm_query_buflen + 1);
2665 
2666  for (i = 0; i < jstate->clocations_count; i++)
2667  {
2668  int off, /* Offset from start for cur tok */
2669  tok_len; /* Length (in bytes) of that tok */
2670 
2671  off = jstate->clocations[i].location;
2672  /* Adjust recorded location if we're dealing with partial string */
2673  off -= query_loc;
2674 
2675  tok_len = jstate->clocations[i].length;
2676 
2677  if (tok_len < 0)
2678  continue; /* ignore any duplicates */
2679 
2680  /* Copy next chunk (what precedes the next constant) */
2681  len_to_wrt = off - last_off;
2682  len_to_wrt -= last_tok_len;
2683 
2684  Assert(len_to_wrt >= 0);
2685  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2686  n_quer_loc += len_to_wrt;
2687 
2688  /* And insert a param symbol in place of the constant token */
2689  n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2690  i + 1 + jstate->highest_extern_param_id);
2691 
2692  quer_loc = off + tok_len;
2693  last_off = off;
2694  last_tok_len = tok_len;
2695  }
2696 
2697  /*
2698  * We've copied up until the last ignorable constant. Copy over the
2699  * remaining bytes of the original query string.
2700  */
2701  len_to_wrt = query_len - quer_loc;
2702 
2703  Assert(len_to_wrt >= 0);
2704  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2705  n_quer_loc += len_to_wrt;
2706 
2707  Assert(n_quer_loc <= norm_query_buflen);
2708  norm_query[n_quer_loc] = '\0';
2709 
2710  *query_len_p = n_quer_loc;
2711  return norm_query;
2712 }
2713 
2714 /*
2715  * Given a valid SQL string and an array of constant-location records,
2716  * fill in the textual lengths of those constants.
2717  *
2718  * The constants may use any allowed constant syntax, such as float literals,
2719  * bit-strings, single-quoted strings and dollar-quoted strings. This is
2720  * accomplished by using the public API for the core scanner.
2721  *
2722  * It is the caller's job to ensure that the string is a valid SQL statement
2723  * with constants at the indicated locations. Since in practice the string
2724  * has already been parsed, and the locations that the caller provides will
2725  * have originated from within the authoritative parser, this should not be
2726  * a problem.
2727  *
2728  * Duplicate constant pointers are possible, and will have their lengths
2729  * marked as '-1', so that they are later ignored. (Actually, we assume the
2730  * lengths were initialized as -1 to start with, and don't change them here.)
2731  *
2732  * If query_loc > 0, then "query" has been advanced by that much compared to
2733  * the original string start, so we need to translate the provided locations
2734  * to compensate. (This lets us avoid re-scanning statements before the one
2735  * of interest, so it's worth doing.)
2736  *
2737  * N.B. There is an assumption that a '-' character at a Const location begins
2738  * a negative numeric constant. This precludes there ever being another
2739  * reason for a constant to start with a '-'.
2740  */
2741 static void
2742 fill_in_constant_lengths(JumbleState *jstate, const char *query,
2743  int query_loc)
2744 {
2745  LocationLen *locs;
2747  core_yy_extra_type yyextra;
2748  core_YYSTYPE yylval;
2749  YYLTYPE yylloc;
2750  int last_loc = -1;
2751  int i;
2752 
2753  /*
2754  * Sort the records by location so that we can process them in order while
2755  * scanning the query text.
2756  */
2757  if (jstate->clocations_count > 1)
2758  qsort(jstate->clocations, jstate->clocations_count,
2759  sizeof(LocationLen), comp_location);
2760  locs = jstate->clocations;
2761 
2762  /* initialize the flex scanner --- should match raw_parser() */
2763  yyscanner = scanner_init(query,
2764  &yyextra,
2765  &ScanKeywords,
2767 
2768  /* we don't want to re-emit any escape string warnings */
2769  yyextra.escape_string_warning = false;
2770 
2771  /* Search for each constant, in sequence */
2772  for (i = 0; i < jstate->clocations_count; i++)
2773  {
2774  int loc = locs[i].location;
2775  int tok;
2776 
2777  /* Adjust recorded location if we're dealing with partial string */
2778  loc -= query_loc;
2779 
2780  Assert(loc >= 0);
2781 
2782  if (loc <= last_loc)
2783  continue; /* Duplicate constant, ignore */
2784 
2785  /* Lex tokens until we find the desired constant */
2786  for (;;)
2787  {
2788  tok = core_yylex(&yylval, &yylloc, yyscanner);
2789 
2790  /* We should not hit end-of-string, but if we do, behave sanely */
2791  if (tok == 0)
2792  break; /* out of inner for-loop */
2793 
2794  /*
2795  * We should find the token position exactly, but if we somehow
2796  * run past it, work with that.
2797  */
2798  if (yylloc >= loc)
2799  {
2800  if (query[loc] == '-')
2801  {
2802  /*
2803  * It's a negative value - this is the one and only case
2804  * where we replace more than a single token.
2805  *
2806  * Do not compensate for the core system's special-case
2807  * adjustment of location to that of the leading '-'
2808  * operator in the event of a negative constant. It is
2809  * also useful for our purposes to start from the minus
2810  * symbol. In this way, queries like "select * from foo
2811  * where bar = 1" and "select * from foo where bar = -2"
2812  * will have identical normalized query strings.
2813  */
2814  tok = core_yylex(&yylval, &yylloc, yyscanner);
2815  if (tok == 0)
2816  break; /* out of inner for-loop */
2817  }
2818 
2819  /*
2820  * We now rely on the assumption that flex has placed a zero
2821  * byte after the text of the current token in scanbuf.
2822  */
2823  locs[i].length = strlen(yyextra.scanbuf + loc);
2824  break; /* out of inner for-loop */
2825  }
2826  }
2827 
2828  /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2829  if (tok == 0)
2830  break;
2831 
2832  last_loc = loc;
2833  }
2834 
2836 }
2837 
2838 /*
2839  * comp_location: comparator for qsorting LocationLen structs by location
2840  */
2841 static int
2842 comp_location(const void *a, const void *b)
2843 {
2844  int l = ((const LocationLen *) a)->location;
2845  int r = ((const LocationLen *) b)->location;
2846 
2847  if (l < r)
2848  return -1;
2849  else if (l > r)
2850  return +1;
2851  else
2852  return 0;
2853 }
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:4951
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:617
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1574
static Datum values[MAXATTR]
Definition: bootstrap.c:156
#define CStringGetTextDatum(s)
Definition: builtins.h:85
unsigned int uint32
Definition: c.h:441
#define Min(x, y)
Definition: c.h:986
#define PG_BINARY_R
Definition: c.h:1270
#define MAXALIGN(LEN)
Definition: c.h:757
signed int int32
Definition: c.h:429
#define Max(x, y)
Definition: c.h:980
#define PG_BINARY
Definition: c.h:1268
#define UINT64_FORMAT
Definition: c.h:484
#define PG_BINARY_W
Definition: c.h:1271
#define MemSet(start, val, len)
Definition: c.h:1008
size_t Size
Definition: c.h:540
int64 TimestampTz
Definition: timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1512
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1382
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:780
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
int errcode_for_file_access(void)
Definition: elog.c:716
int errdetail(const char *fmt,...)
Definition: elog.c:1037
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define LOG
Definition: elog.h:25
#define PG_END_TRY()
Definition: elog.h:324
#define PG_TRY()
Definition: elog.h:299
#define DEBUG1
Definition: elog.h:24
#define PG_FINALLY()
Definition: elog.h:316
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
struct pg_encoding enc
Definition: encode.c:562
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:75
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:74
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:72
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:148
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: execMain.c:311
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:73
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:470
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:410
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: executor.h:69
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:76
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:65
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:80
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2461
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:699
int CloseTransientFile(int fd)
Definition: fd.c:2688
int FreeFile(FILE *file)
Definition: fd.c:2660
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2511
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1683
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:635
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void SetSingleFuncCall(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:220
bool IsUnderPostmaster
Definition: globals.c:113
Oid MyDatabaseId
Definition: globals.c:89
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9587
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9476
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:9623
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9502
@ PGC_SUSET
Definition: guc.h:75
@ PGC_POSTMASTER
Definition: guc.h:71
@ PGC_SIGHUP
Definition: guc.h:72
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
struct timeval instr_time
Definition: instr_time.h:150
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:202
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:280
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:246
@ INSTRUMENT_ALL
Definition: instrument.h:63
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:55
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:69
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
PGDLLIMPORT const ScanKeywordList ScanKeywords
Assert(fmt[strlen(fmt) - 1] !='\n')
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:596
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1196
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:697
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1800
@ LW_SHARED
Definition: lwlock.h:105
@ LW_EXCLUSIVE
Definition: lwlock.h:104
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
void pfree(void *pointer)
Definition: mcxt.c:1175
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1188
void * palloc(Size size)
Definition: mcxt.c:1068
#define MaxAllocHugeSize
Definition: memutils.h:44
void(* shmem_request_hook_type)(void)
Definition: miscadmin.h:486
Oid GetUserId(void)
Definition: miscinit.c:492
shmem_request_hook_type shmem_request_hook
Definition: miscinit.c:1621
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1618
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:59
void * arg
const void size_t len
int32 encoding
Definition: pg_database.h:41
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:212
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_1
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static ExecutorRun_hook_type prev_ExecutorRun
struct pgssSharedState pgssSharedState
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate)
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
static int exec_nested_level
void _PG_init(void)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
#define PG_STAT_STATEMENTS_COLS
struct Counters Counters
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
PGSSTrackLevel
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
PG_MODULE_MAGIC
static int plan_nested_level
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void entry_reset(Oid userid, Oid dbid, uint64 queryid)
static void pgss_shmem_request(void)
pgssStoreKind
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
@ PGSS_NUMKIND
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
static bool pgss_save
static void pgss_shmem_startup(void)
struct pgssGlobalStats pgssGlobalStats
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
struct pgssEntry pgssEntry
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
struct pgssHashKey pgssHashKey
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
#define PGSS_HANDLED_UTILITY(n)
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static char * buf
Definition: pg_test_fsync.c:67
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:279
int duration
Definition: pgbench.c:187
static core_yyscan_t yyscanner
Definition: pl_scanner.c:106
planner_hook_type planner_hook
Definition: planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.c:282
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.h:26
#define sprintf
Definition: port.h:227
ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset)
Definition: pwrite.c:27
#define snprintf
Definition: port.h:225
#define qsort(a, b, c, d)
Definition: port.h:495
#define Int64GetDatumFast(X)
Definition: postgres.h:804
#define CStringGetDatum(X)
Definition: postgres.h:622
uintptr_t Datum
Definition: postgres.h:411
#define Float8GetDatumFast(X)
Definition: postgres.h:805
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define BoolGetDatum(X)
Definition: postgres.h:446
#define Int32GetDatum(X)
Definition: postgres.h:523
unsigned int Oid
Definition: postgres_ext.h:31
e
Definition: preproc-init.c:82
static int fd(const char *x, int i)
Definition: preproc-init.c:105
const char * CleanQuerytext(const char *query, int *location, int *len)
Definition: queryjumble.c:62
void EnableQueryId(void)
Definition: queryjumble.c:150
static struct subre * parse(struct vars *, int, int, struct state *, struct state *)
Definition: regcomp.c:673
int slock_t
Definition: s_lock.h:975
#define YYLTYPE
Definition: scanner.h:44
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
void scanner_finish(core_yyscan_t yyscanner)
PGDLLIMPORT const uint16 ScanKeywordTokens[]
void * core_yyscan_t
Definition: scanner.h:121
int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
ScanDirection
Definition: sdir.h:23
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:341
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
static void error(void)
Definition: sql-dyntest.c:147
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
instr_time temp_blk_write_time
Definition: instrument.h:39
int64 local_blks_written
Definition: instrument.h:33
instr_time blk_write_time
Definition: instrument.h:37
instr_time temp_blk_read_time
Definition: instrument.h:38
int64 temp_blks_read
Definition: instrument.h:34
int64 shared_blks_read
Definition: instrument.h:27
int64 shared_blks_written
Definition: instrument.h:29
int64 temp_blks_written
Definition: instrument.h:35
instr_time blk_read_time
Definition: instrument.h:36
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 shared_blks_written
double jit_generation_time
int64 temp_blks_read
double min_time[PGSS_NUMKIND]
int64 local_blks_written
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
int64 jit_emission_count
double jit_emission_time
int64 shared_blks_hit
double jit_optimization_time
double blk_read_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 local_blks_dirtied
int64 jit_inlining_count
int64 shared_blks_read
int64 local_blks_hit
int64 local_blks_read
double jit_inlining_time
double blk_write_time
uint64 es_processed
Definition: execnodes.h:636
struct JitContext * es_jit
Definition: execnodes.h:678
MemoryContext es_query_cxt
Definition: execnodes.h:632
fmNodePtr resultinfo
Definition: fmgr.h:89
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
WalUsage walusage
Definition: instrument.h:90
BufferUsage bufusage
Definition: instrument.h:89
JitInstrumentation instr
Definition: jit.h:61
instr_time generation_counter
Definition: jit.h:33
size_t created_functions
Definition: jit.h:30
instr_time optimization_counter
Definition: jit.h:39
instr_time emission_counter
Definition: jit.h:42
instr_time inlining_counter
Definition: jit.h:36
int highest_extern_param_id
Definition: queryjumble.h:52
LocationLen * clocations
Definition: queryjumble.h:43
int clocations_count
Definition: queryjumble.h:49
Definition: lwlock.h:32
Definition: nodes.h:574
const char * p_sourcetext
Definition: parse_node.h:182
int stmt_location
Definition: plannodes.h:90
int stmt_len
Definition: plannodes.h:91
Node * utilityStmt
Definition: plannodes.h:87
uint64 queryId
Definition: plannodes.h:49
uint64 nprocessed
Definition: cmdtag.h:31
CommandTag commandTag
Definition: cmdtag.h:30
const char * sourceText
Definition: execdesc.h:38
EState * estate
Definition: execdesc.h:48
PlannedStmt * plannedstmt
Definition: execdesc.h:37
struct Instrumentation * totaltime
Definition: execdesc.h:55
int stmt_location
Definition: parsenodes.h:197
uint64 queryId
Definition: parsenodes.h:125
int stmt_len
Definition: parsenodes.h:198
Node * utilityStmt
Definition: parsenodes.h:129
TupleDesc setDesc
Definition: execnodes.h:317
Tuplestorestate * setResult
Definition: execnodes.h:316
uint64 wal_bytes
Definition: instrument.h:53
int64 wal_fpi
Definition: instrument.h:52
int64 wal_records
Definition: instrument.h:51
Definition: guc.h:165
bool escape_string_warning
Definition: scanner.h:88
char * scanbuf
Definition: scanner.h:72
Counters counters
pgssHashKey key
TimestampTz stats_reset
pgssGlobalStats stats
__int64 st_size
Definition: win32_port.h:273
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:547
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:77
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
ProcessUtilityContext
Definition: utility.h:21
#define TimestampTzGetDatum(X)
Definition: timestamp.h:32
#define fstat
Definition: win32_port.h:282
#define ftruncate(a, b)
Definition: win32_port.h:73