PostgreSQL Source Code  git master
pg_stat_statements.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * Track statement planning and execution times as well as resource
5  * usage across a whole database cluster.
6  *
7  * Execution costs are totaled for each distinct source query, and kept in
8  * a shared hashtable. (We track only as many distinct queries as will fit
9  * in the designated amount of shared memory.)
10  *
11  * Starting in Postgres 9.2, this module normalized query entries. As of
12  * Postgres 14, the normalization is done by the core if compute_query_id is
13  * enabled, or optionally by third-party modules.
14  *
15  * To facilitate presenting entries to users, we create "representative" query
16  * strings in which constants are replaced with parameter symbols ($n), to
17  * make it clearer what a normalized entry can represent. To save on shared
18  * memory, and to avoid having to truncate oversized query strings, we store
19  * these strings in a temporary external query-texts file. Offsets into this
20  * file are kept in shared memory.
21  *
22  * Note about locking issues: to create or delete an entry in the shared
23  * hashtable, one must hold pgss->lock exclusively. Modifying any field
24  * in an entry except the counters requires the same. To look up an entry,
25  * one must hold the lock shared. To read or update the counters within
26  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27  * disappear!) and also take the entry's mutex spinlock.
28  * The shared state variable pgss->extent (the next free spot in the external
29  * query-text file) should be accessed only while holding either the
30  * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31  * allow reserving file space while holding only shared lock on pgss->lock.
32  * Rewriting the entire external query-text file, eg for garbage collection,
33  * requires holding pgss->lock exclusively; this allows individual entries
34  * in the file to be read or written while holding only shared lock.
35  *
36  *
37  * Copyright (c) 2008-2021, PostgreSQL Global Development Group
38  *
39  * IDENTIFICATION
40  * contrib/pg_stat_statements/pg_stat_statements.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45 
46 #include <math.h>
47 #include <sys/stat.h>
48 #include <unistd.h>
49 
50 #include "access/parallel.h"
51 #include "catalog/pg_authid.h"
52 #include "common/hashfn.h"
53 #include "executor/instrument.h"
54 #include "funcapi.h"
55 #include "mb/pg_wchar.h"
56 #include "miscadmin.h"
57 #include "optimizer/planner.h"
58 #include "parser/analyze.h"
59 #include "parser/parsetree.h"
60 #include "parser/scanner.h"
61 #include "parser/scansup.h"
62 #include "pgstat.h"
63 #include "storage/fd.h"
64 #include "storage/ipc.h"
65 #include "storage/lwlock.h"
66 #include "storage/shmem.h"
67 #include "storage/spin.h"
68 #include "tcop/utility.h"
69 #include "utils/acl.h"
70 #include "utils/builtins.h"
71 #include "utils/queryjumble.h"
72 #include "utils/memutils.h"
73 #include "utils/timestamp.h"
74 
76 
77 /* Location of permanent stats file (valid when database is shut down) */
78 #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
79 
80 /*
81  * Location of external query text file. We don't keep it in the core
82  * system's stats_temp_directory. The core system can safely use that GUC
83  * setting, because the statistics collector temp file paths are set only once
84  * as part of changing the GUC, but pg_stat_statements has no way of avoiding
85  * race conditions. Besides, we only expect modest, infrequent I/O for query
86  * strings, so placing the file on a faster filesystem is not compelling.
87  */
88 #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
89 
90 /* Magic number identifying the stats file format */
91 static const uint32 PGSS_FILE_HEADER = 0x20201227;
92 
93 /* PostgreSQL major version number, changes in which invalidate all entries */
94 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
95 
96 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
97 #define USAGE_EXEC(duration) (1.0)
98 #define USAGE_INIT (1.0) /* including initial planning */
99 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
100 #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
101 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
102 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
103 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
104 #define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
105 
106 /*
107  * Utility statements that pgss_ProcessUtility and pgss_post_parse_analyze
108  * ignores.
109  */
110 #define PGSS_HANDLED_UTILITY(n) (!IsA(n, ExecuteStmt) && \
111  !IsA(n, PrepareStmt) && \
112  !IsA(n, DeallocateStmt))
113 
114 /*
115  * Extension version number, for supporting older extension versions' objects
116  */
117 typedef enum pgssVersion
118 {
125 } pgssVersion;
126 
127 typedef enum pgssStoreKind
128 {
130 
131  /*
132  * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
133  * reference the underlying values in the arrays in the Counters struct,
134  * and this order is required in pg_stat_statements_internal().
135  */
138 
139  PGSS_NUMKIND /* Must be last value of this enum */
140 } pgssStoreKind;
141 
142 /*
143  * Hashtable key that defines the identity of a hashtable entry. We separate
144  * queries by user and by database even if they are otherwise identical.
145  *
146  * If you add a new key to this struct, make sure to teach pgss_store() to
147  * zero the padding bytes. Otherwise, things will break, because pgss_hash is
148  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
149 
150  */
151 typedef struct pgssHashKey
152 {
153  Oid userid; /* user OID */
154  Oid dbid; /* database OID */
155  uint64 queryid; /* query identifier */
156  bool toplevel; /* query executed at top level */
157 } pgssHashKey;
158 
159 /*
160  * The actual stats counters kept within pgssEntry.
161  */
162 typedef struct Counters
163 {
164  int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
165  double total_time[PGSS_NUMKIND]; /* total planning/execution time,
166  * in msec */
167  double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
168  * msec */
169  double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
170  * msec */
171  double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
172  * msec */
173  double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
174  * planning/execution time in msec */
175  int64 rows; /* total # of retrieved or affected rows */
176  int64 shared_blks_hit; /* # of shared buffer hits */
177  int64 shared_blks_read; /* # of shared disk blocks read */
178  int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
179  int64 shared_blks_written; /* # of shared disk blocks written */
180  int64 local_blks_hit; /* # of local buffer hits */
181  int64 local_blks_read; /* # of local disk blocks read */
182  int64 local_blks_dirtied; /* # of local disk blocks dirtied */
183  int64 local_blks_written; /* # of local disk blocks written */
184  int64 temp_blks_read; /* # of temp blocks read */
185  int64 temp_blks_written; /* # of temp blocks written */
186  double blk_read_time; /* time spent reading, in msec */
187  double blk_write_time; /* time spent writing, in msec */
188  double usage; /* usage factor */
189  int64 wal_records; /* # of WAL records generated */
190  int64 wal_fpi; /* # of WAL full page images generated */
191  uint64 wal_bytes; /* total amount of WAL generated in bytes */
192 } Counters;
193 
194 /*
195  * Global statistics for pg_stat_statements
196  */
197 typedef struct pgssGlobalStats
198 {
199  int64 dealloc; /* # of times entries were deallocated */
200  TimestampTz stats_reset; /* timestamp with all stats reset */
202 
203 /*
204  * Statistics per statement
205  *
206  * Note: in event of a failure in garbage collection of the query text file,
207  * we reset query_offset to zero and query_len to -1. This will be seen as
208  * an invalid state by qtext_fetch().
209  */
210 typedef struct pgssEntry
211 {
212  pgssHashKey key; /* hash key of entry - MUST BE FIRST */
213  Counters counters; /* the statistics for this query */
214  Size query_offset; /* query text offset in external file */
215  int query_len; /* # of valid bytes in query string, or -1 */
216  int encoding; /* query text encoding */
217  slock_t mutex; /* protects the counters only */
218 } pgssEntry;
219 
220 /*
221  * Global shared state
222  */
223 typedef struct pgssSharedState
224 {
225  LWLock *lock; /* protects hashtable search/modification */
226  double cur_median_usage; /* current median usage in hashtable */
227  Size mean_query_len; /* current mean entry text length */
228  slock_t mutex; /* protects following fields only: */
229  Size extent; /* current extent of query file */
230  int n_writers; /* number of active writers to query file */
231  int gc_count; /* query file garbage collection cycle count */
232  pgssGlobalStats stats; /* global statistics for pgss */
234 
235 /*---- Local variables ----*/
236 
237 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
238 static int exec_nested_level = 0;
239 
240 /* Current nesting depth of planner calls */
241 static int plan_nested_level = 0;
242 
243 /* Saved hook values in case of unload */
252 
253 /* Links to shared memory state */
254 static pgssSharedState *pgss = NULL;
255 static HTAB *pgss_hash = NULL;
256 
257 /*---- GUC variables ----*/
258 
259 typedef enum
260 {
261  PGSS_TRACK_NONE, /* track no statements */
262  PGSS_TRACK_TOP, /* only top level statements */
263  PGSS_TRACK_ALL /* all statements, including nested ones */
265 
266 static const struct config_enum_entry track_options[] =
267 {
268  {"none", PGSS_TRACK_NONE, false},
269  {"top", PGSS_TRACK_TOP, false},
270  {"all", PGSS_TRACK_ALL, false},
271  {NULL, 0, false}
272 };
273 
274 static int pgss_max; /* max # statements to track */
275 static int pgss_track; /* tracking level */
276 static bool pgss_track_utility; /* whether to track utility commands */
277 static bool pgss_track_planning; /* whether to track planning duration */
278 static bool pgss_save; /* whether to save stats across shutdown */
279 
280 
281 #define pgss_enabled(level) \
282  (!IsParallelWorker() && \
283  (pgss_track == PGSS_TRACK_ALL || \
284  (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
285 
286 #define record_gc_qtexts() \
287  do { \
288  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
289  SpinLockAcquire(&s->mutex); \
290  s->gc_count++; \
291  SpinLockRelease(&s->mutex); \
292  } while(0)
293 
294 /*---- Function declarations ----*/
295 
296 void _PG_init(void);
297 void _PG_fini(void);
298 
307 
308 static void pgss_shmem_startup(void);
309 static void pgss_shmem_shutdown(int code, Datum arg);
310 static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
311  JumbleState *jstate);
313  const char *query_string,
314  int cursorOptions,
315  ParamListInfo boundParams);
316 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
317 static void pgss_ExecutorRun(QueryDesc *queryDesc,
318  ScanDirection direction,
319  uint64 count, bool execute_once);
320 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
321 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
322 static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
323  bool readOnlyTree,
324  ProcessUtilityContext context, ParamListInfo params,
325  QueryEnvironment *queryEnv,
327 static void pgss_store(const char *query, uint64 queryId,
328  int query_location, int query_len,
329  pgssStoreKind kind,
330  double total_time, uint64 rows,
331  const BufferUsage *bufusage,
332  const WalUsage *walusage,
333  JumbleState *jstate);
335  pgssVersion api_version,
336  bool showtext);
337 static Size pgss_memsize(void);
338 static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
339  int encoding, bool sticky);
340 static void entry_dealloc(void);
341 static bool qtext_store(const char *query, int query_len,
342  Size *query_offset, int *gc_count);
343 static char *qtext_load_file(Size *buffer_size);
344 static char *qtext_fetch(Size query_offset, int query_len,
345  char *buffer, Size buffer_size);
346 static bool need_gc_qtexts(void);
347 static void gc_qtexts(void);
348 static void entry_reset(Oid userid, Oid dbid, uint64 queryid);
349 static char *generate_normalized_query(JumbleState *jstate, const char *query,
350  int query_loc, int *query_len_p);
351 static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
352  int query_loc);
353 static int comp_location(const void *a, const void *b);
354 
355 
356 /*
357  * Module load callback
358  */
359 void
360 _PG_init(void)
361 {
362  /*
363  * In order to create our shared memory area, we have to be loaded via
364  * shared_preload_libraries. If not, fall out without hooking into any of
365  * the main system. (We don't throw error here because it seems useful to
366  * allow the pg_stat_statements functions to be created even when the
367  * module isn't active. The functions must protect themselves against
368  * being called then, however.)
369  */
371  return;
372 
373  /*
374  * Inform the postmaster that we want to enable query_id calculation if
375  * compute_query_id is set to auto.
376  */
377  EnableQueryId();
378 
379  /*
380  * Define (or redefine) custom GUC variables.
381  */
382  DefineCustomIntVariable("pg_stat_statements.max",
383  "Sets the maximum number of statements tracked by pg_stat_statements.",
384  NULL,
385  &pgss_max,
386  5000,
387  100,
388  INT_MAX,
390  0,
391  NULL,
392  NULL,
393  NULL);
394 
395  DefineCustomEnumVariable("pg_stat_statements.track",
396  "Selects which statements are tracked by pg_stat_statements.",
397  NULL,
398  &pgss_track,
400  track_options,
401  PGC_SUSET,
402  0,
403  NULL,
404  NULL,
405  NULL);
406 
407  DefineCustomBoolVariable("pg_stat_statements.track_utility",
408  "Selects whether utility commands are tracked by pg_stat_statements.",
409  NULL,
411  true,
412  PGC_SUSET,
413  0,
414  NULL,
415  NULL,
416  NULL);
417 
418  DefineCustomBoolVariable("pg_stat_statements.track_planning",
419  "Selects whether planning duration is tracked by pg_stat_statements.",
420  NULL,
422  false,
423  PGC_SUSET,
424  0,
425  NULL,
426  NULL,
427  NULL);
428 
429  DefineCustomBoolVariable("pg_stat_statements.save",
430  "Save pg_stat_statements statistics across server shutdowns.",
431  NULL,
432  &pgss_save,
433  true,
434  PGC_SIGHUP,
435  0,
436  NULL,
437  NULL,
438  NULL);
439 
440  EmitWarningsOnPlaceholders("pg_stat_statements");
441 
442  /*
443  * Request additional shared resources. (These are no-ops if we're not in
444  * the postmaster process.) We'll allocate or attach to the shared
445  * resources in pgss_shmem_startup().
446  */
448  RequestNamedLWLockTranche("pg_stat_statements", 1);
449 
450  /*
451  * Install hooks.
452  */
469 }
470 
471 /*
472  * Module unload callback
473  */
474 void
475 _PG_fini(void)
476 {
477  /* Uninstall hooks. */
486 }
487 
488 /*
489  * shmem_startup hook: allocate or attach to shared memory,
490  * then load any pre-existing statistics from file.
491  * Also create and load the query-texts file, which is expected to exist
492  * (even if empty) while the module is enabled.
493  */
494 static void
496 {
497  bool found;
498  HASHCTL info;
499  FILE *file = NULL;
500  FILE *qfile = NULL;
501  uint32 header;
502  int32 num;
503  int32 pgver;
504  int32 i;
505  int buffer_size;
506  char *buffer = NULL;
507 
510 
511  /* reset in case this is a restart within the postmaster */
512  pgss = NULL;
513  pgss_hash = NULL;
514 
515  /*
516  * Create or attach to the shared memory state, including hash table
517  */
518  LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
519 
520  pgss = ShmemInitStruct("pg_stat_statements",
521  sizeof(pgssSharedState),
522  &found);
523 
524  if (!found)
525  {
526  /* First time through ... */
527  pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
530  SpinLockInit(&pgss->mutex);
531  pgss->extent = 0;
532  pgss->n_writers = 0;
533  pgss->gc_count = 0;
534  pgss->stats.dealloc = 0;
536  }
537 
538  info.keysize = sizeof(pgssHashKey);
539  info.entrysize = sizeof(pgssEntry);
540  pgss_hash = ShmemInitHash("pg_stat_statements hash",
542  &info,
544 
545  LWLockRelease(AddinShmemInitLock);
546 
547  /*
548  * If we're in the postmaster (or a standalone backend...), set up a shmem
549  * exit hook to dump the statistics to disk.
550  */
551  if (!IsUnderPostmaster)
553 
554  /*
555  * Done if some other process already completed our initialization.
556  */
557  if (found)
558  return;
559 
560  /*
561  * Note: we don't bother with locks here, because there should be no other
562  * processes running when this code is reached.
563  */
564 
565  /* Unlink query text file possibly left over from crash */
566  unlink(PGSS_TEXT_FILE);
567 
568  /* Allocate new query text temp file */
570  if (qfile == NULL)
571  goto write_error;
572 
573  /*
574  * If we were told not to load old statistics, we're done. (Note we do
575  * not try to unlink any old dump file in this case. This seems a bit
576  * questionable but it's the historical behavior.)
577  */
578  if (!pgss_save)
579  {
580  FreeFile(qfile);
581  return;
582  }
583 
584  /*
585  * Attempt to load old statistics from the dump file.
586  */
588  if (file == NULL)
589  {
590  if (errno != ENOENT)
591  goto read_error;
592  /* No existing persisted stats file, so we're done */
593  FreeFile(qfile);
594  return;
595  }
596 
597  buffer_size = 2048;
598  buffer = (char *) palloc(buffer_size);
599 
600  if (fread(&header, sizeof(uint32), 1, file) != 1 ||
601  fread(&pgver, sizeof(uint32), 1, file) != 1 ||
602  fread(&num, sizeof(int32), 1, file) != 1)
603  goto read_error;
604 
605  if (header != PGSS_FILE_HEADER ||
606  pgver != PGSS_PG_MAJOR_VERSION)
607  goto data_error;
608 
609  for (i = 0; i < num; i++)
610  {
611  pgssEntry temp;
612  pgssEntry *entry;
613  Size query_offset;
614 
615  if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
616  goto read_error;
617 
618  /* Encoding is the only field we can easily sanity-check */
619  if (!PG_VALID_BE_ENCODING(temp.encoding))
620  goto data_error;
621 
622  /* Resize buffer as needed */
623  if (temp.query_len >= buffer_size)
624  {
625  buffer_size = Max(buffer_size * 2, temp.query_len + 1);
626  buffer = repalloc(buffer, buffer_size);
627  }
628 
629  if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
630  goto read_error;
631 
632  /* Should have a trailing null, but let's make sure */
633  buffer[temp.query_len] = '\0';
634 
635  /* Skip loading "sticky" entries */
636  if (IS_STICKY(temp.counters))
637  continue;
638 
639  /* Store the query text */
640  query_offset = pgss->extent;
641  if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
642  goto write_error;
643  pgss->extent += temp.query_len + 1;
644 
645  /* make the hashtable entry (discards old entries if too many) */
646  entry = entry_alloc(&temp.key, query_offset, temp.query_len,
647  temp.encoding,
648  false);
649 
650  /* copy in the actual stats */
651  entry->counters = temp.counters;
652  }
653 
654  /* Read global statistics for pg_stat_statements */
655  if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
656  goto read_error;
657 
658  pfree(buffer);
659  FreeFile(file);
660  FreeFile(qfile);
661 
662  /*
663  * Remove the persisted stats file so it's not included in
664  * backups/replication standbys, etc. A new file will be written on next
665  * shutdown.
666  *
667  * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
668  * because we remove that file on startup; it acts inversely to
669  * PGSS_DUMP_FILE, in that it is only supposed to be around when the
670  * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
671  * when the server is not running. Leaving the file creates no danger of
672  * a newly restored database having a spurious record of execution costs,
673  * which is what we're really concerned about here.
674  */
675  unlink(PGSS_DUMP_FILE);
676 
677  return;
678 
679 read_error:
680  ereport(LOG,
682  errmsg("could not read file \"%s\": %m",
683  PGSS_DUMP_FILE)));
684  goto fail;
685 data_error:
686  ereport(LOG,
687  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
688  errmsg("ignoring invalid data in file \"%s\"",
689  PGSS_DUMP_FILE)));
690  goto fail;
691 write_error:
692  ereport(LOG,
694  errmsg("could not write file \"%s\": %m",
695  PGSS_TEXT_FILE)));
696 fail:
697  if (buffer)
698  pfree(buffer);
699  if (file)
700  FreeFile(file);
701  if (qfile)
702  FreeFile(qfile);
703  /* If possible, throw away the bogus file; ignore any error */
704  unlink(PGSS_DUMP_FILE);
705 
706  /*
707  * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
708  * server is running with pg_stat_statements enabled
709  */
710 }
711 
712 /*
713  * shmem_shutdown hook: Dump statistics into file.
714  *
715  * Note: we don't bother with acquiring lock, because there should be no
716  * other processes running when this is called.
717  */
718 static void
720 {
721  FILE *file;
722  char *qbuffer = NULL;
723  Size qbuffer_size = 0;
724  HASH_SEQ_STATUS hash_seq;
725  int32 num_entries;
726  pgssEntry *entry;
727 
728  /* Don't try to dump during a crash. */
729  if (code)
730  return;
731 
732  /* Safety check ... shouldn't get here unless shmem is set up. */
733  if (!pgss || !pgss_hash)
734  return;
735 
736  /* Don't dump if told not to. */
737  if (!pgss_save)
738  return;
739 
740  file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
741  if (file == NULL)
742  goto error;
743 
744  if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
745  goto error;
746  if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
747  goto error;
748  num_entries = hash_get_num_entries(pgss_hash);
749  if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
750  goto error;
751 
752  qbuffer = qtext_load_file(&qbuffer_size);
753  if (qbuffer == NULL)
754  goto error;
755 
756  /*
757  * When serializing to disk, we store query texts immediately after their
758  * entry data. Any orphaned query texts are thereby excluded.
759  */
760  hash_seq_init(&hash_seq, pgss_hash);
761  while ((entry = hash_seq_search(&hash_seq)) != NULL)
762  {
763  int len = entry->query_len;
764  char *qstr = qtext_fetch(entry->query_offset, len,
765  qbuffer, qbuffer_size);
766 
767  if (qstr == NULL)
768  continue; /* Ignore any entries with bogus texts */
769 
770  if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
771  fwrite(qstr, 1, len + 1, file) != len + 1)
772  {
773  /* note: we assume hash_seq_term won't change errno */
774  hash_seq_term(&hash_seq);
775  goto error;
776  }
777  }
778 
779  /* Dump global statistics for pg_stat_statements */
780  if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
781  goto error;
782 
783  free(qbuffer);
784  qbuffer = NULL;
785 
786  if (FreeFile(file))
787  {
788  file = NULL;
789  goto error;
790  }
791 
792  /*
793  * Rename file into place, so we atomically replace any old one.
794  */
796 
797  /* Unlink query-texts file; it's not needed while shutdown */
798  unlink(PGSS_TEXT_FILE);
799 
800  return;
801 
802 error:
803  ereport(LOG,
805  errmsg("could not write file \"%s\": %m",
806  PGSS_DUMP_FILE ".tmp")));
807  if (qbuffer)
808  free(qbuffer);
809  if (file)
810  FreeFile(file);
811  unlink(PGSS_DUMP_FILE ".tmp");
812  unlink(PGSS_TEXT_FILE);
813 }
814 
815 /*
816  * Post-parse-analysis hook: mark query with a queryId
817  */
818 static void
820 {
822  prev_post_parse_analyze_hook(pstate, query, jstate);
823 
824  /* Safety check... */
825  if (!pgss || !pgss_hash || !pgss_enabled(exec_nested_level))
826  return;
827 
828  /*
829  * Clear queryId for prepared statements related utility, as those will
830  * inherit from the underlying statement's one (except DEALLOCATE which is
831  * entirely untracked).
832  */
833  if (query->utilityStmt)
834  {
836  query->queryId = UINT64CONST(0);
837  return;
838  }
839 
840  /*
841  * If query jumbling were able to identify any ignorable constants, we
842  * immediately create a hash table entry for the query, so that we can
843  * record the normalized form of the query string. If there were no such
844  * constants, the normalized string would be the same as the query text
845  * anyway, so there's no need for an early entry.
846  */
847  if (jstate && jstate->clocations_count > 0)
848  pgss_store(pstate->p_sourcetext,
849  query->queryId,
850  query->stmt_location,
851  query->stmt_len,
852  PGSS_INVALID,
853  0,
854  0,
855  NULL,
856  NULL,
857  jstate);
858 }
859 
860 /*
861  * Planner hook: forward to regular planner, but measure planning time
862  * if needed.
863  */
864 static PlannedStmt *
866  const char *query_string,
867  int cursorOptions,
868  ParamListInfo boundParams)
869 {
870  PlannedStmt *result;
871 
872  /*
873  * We can't process the query if no query_string is provided, as
874  * pgss_store needs it. We also ignore query without queryid, as it would
875  * be treated as a utility statement, which may not be the case.
876  *
877  * Note that planner_hook can be called from the planner itself, so we
878  * have a specific nesting level for the planner. However, utility
879  * commands containing optimizable statements can also call the planner,
880  * same for regular DML (for instance for underlying foreign key queries).
881  * So testing the planner nesting level only is not enough to detect real
882  * top level planner call.
883  */
885  && pgss_track_planning && query_string
886  && parse->queryId != UINT64CONST(0))
887  {
888  instr_time start;
890  BufferUsage bufusage_start,
891  bufusage;
892  WalUsage walusage_start,
893  walusage;
894 
895  /* We need to track buffer usage as the planner can access them. */
896  bufusage_start = pgBufferUsage;
897 
898  /*
899  * Similarly the planner could write some WAL records in some cases
900  * (e.g. setting a hint bit with those being WAL-logged)
901  */
902  walusage_start = pgWalUsage;
903  INSTR_TIME_SET_CURRENT(start);
904 
906  PG_TRY();
907  {
908  if (prev_planner_hook)
909  result = prev_planner_hook(parse, query_string, cursorOptions,
910  boundParams);
911  else
912  result = standard_planner(parse, query_string, cursorOptions,
913  boundParams);
914  }
915  PG_FINALLY();
916  {
918  }
919  PG_END_TRY();
920 
921  INSTR_TIME_SET_CURRENT(duration);
922  INSTR_TIME_SUBTRACT(duration, start);
923 
924  /* calc differences of buffer counters. */
925  memset(&bufusage, 0, sizeof(BufferUsage));
926  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
927 
928  /* calc differences of WAL counters. */
929  memset(&walusage, 0, sizeof(WalUsage));
930  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
931 
932  pgss_store(query_string,
933  parse->queryId,
934  parse->stmt_location,
935  parse->stmt_len,
936  PGSS_PLAN,
937  INSTR_TIME_GET_MILLISEC(duration),
938  0,
939  &bufusage,
940  &walusage,
941  NULL);
942  }
943  else
944  {
945  if (prev_planner_hook)
946  result = prev_planner_hook(parse, query_string, cursorOptions,
947  boundParams);
948  else
949  result = standard_planner(parse, query_string, cursorOptions,
950  boundParams);
951  }
952 
953  return result;
954 }
955 
956 /*
957  * ExecutorStart hook: start up tracking if needed
958  */
959 static void
960 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
961 {
962  if (prev_ExecutorStart)
963  prev_ExecutorStart(queryDesc, eflags);
964  else
965  standard_ExecutorStart(queryDesc, eflags);
966 
967  /*
968  * If query has queryId zero, don't track it. This prevents double
969  * counting of optimizable statements that are directly contained in
970  * utility statements.
971  */
972  if (pgss_enabled(exec_nested_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
973  {
974  /*
975  * Set up to track total elapsed time in ExecutorRun. Make sure the
976  * space is allocated in the per-query context so it will go away at
977  * ExecutorEnd.
978  */
979  if (queryDesc->totaltime == NULL)
980  {
981  MemoryContext oldcxt;
982 
983  oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
984  queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
985  MemoryContextSwitchTo(oldcxt);
986  }
987  }
988 }
989 
990 /*
991  * ExecutorRun hook: all we need do is track nesting depth
992  */
993 static void
994 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
995  bool execute_once)
996 {
998  PG_TRY();
999  {
1000  if (prev_ExecutorRun)
1001  prev_ExecutorRun(queryDesc, direction, count, execute_once);
1002  else
1003  standard_ExecutorRun(queryDesc, direction, count, execute_once);
1004  }
1005  PG_FINALLY();
1006  {
1008  }
1009  PG_END_TRY();
1010 }
1011 
1012 /*
1013  * ExecutorFinish hook: all we need do is track nesting depth
1014  */
1015 static void
1017 {
1019  PG_TRY();
1020  {
1021  if (prev_ExecutorFinish)
1022  prev_ExecutorFinish(queryDesc);
1023  else
1024  standard_ExecutorFinish(queryDesc);
1025  }
1026  PG_FINALLY();
1027  {
1029  }
1030  PG_END_TRY();
1031 }
1032 
1033 /*
1034  * ExecutorEnd hook: store results if needed
1035  */
1036 static void
1038 {
1039  uint64 queryId = queryDesc->plannedstmt->queryId;
1040 
1041  if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1043  {
1044  /*
1045  * Make sure stats accumulation is done. (Note: it's okay if several
1046  * levels of hook all do this.)
1047  */
1048  InstrEndLoop(queryDesc->totaltime);
1049 
1050  pgss_store(queryDesc->sourceText,
1051  queryId,
1052  queryDesc->plannedstmt->stmt_location,
1053  queryDesc->plannedstmt->stmt_len,
1054  PGSS_EXEC,
1055  queryDesc->totaltime->total * 1000.0, /* convert to msec */
1056  queryDesc->estate->es_processed,
1057  &queryDesc->totaltime->bufusage,
1058  &queryDesc->totaltime->walusage,
1059  NULL);
1060  }
1061 
1062  if (prev_ExecutorEnd)
1063  prev_ExecutorEnd(queryDesc);
1064  else
1065  standard_ExecutorEnd(queryDesc);
1066 }
1067 
1068 /*
1069  * ProcessUtility hook
1070  */
1071 static void
1072 pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1073  bool readOnlyTree,
1074  ProcessUtilityContext context,
1075  ParamListInfo params, QueryEnvironment *queryEnv,
1077 {
1078  Node *parsetree = pstmt->utilityStmt;
1079  uint64 saved_queryId = pstmt->queryId;
1080 
1081  /*
1082  * Force utility statements to get queryId zero. We do this even in cases
1083  * where the statement contains an optimizable statement for which a
1084  * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1085  * cases, runtime control will first go through ProcessUtility and then
1086  * the executor, and we don't want the executor hooks to do anything,
1087  * since we are already measuring the statement's costs at the utility
1088  * level.
1089  *
1090  * Note that this is only done if pg_stat_statements is enabled and
1091  * configured to track utility statements, in the unlikely possibility
1092  * that user configured another extension to handle utility statements
1093  * only.
1094  */
1096  pstmt->queryId = UINT64CONST(0);
1097 
1098  /*
1099  * If it's an EXECUTE statement, we don't track it and don't increment the
1100  * nesting level. This allows the cycles to be charged to the underlying
1101  * PREPARE instead (by the Executor hooks), which is much more useful.
1102  *
1103  * We also don't track execution of PREPARE. If we did, we would get one
1104  * hash table entry for the PREPARE (with hash calculated from the query
1105  * string), and then a different one with the same query string (but hash
1106  * calculated from the query tree) would be used to accumulate costs of
1107  * ensuing EXECUTEs. This would be confusing, and inconsistent with other
1108  * cases where planning time is not included at all.
1109  *
1110  * Likewise, we don't track execution of DEALLOCATE.
1111  */
1113  PGSS_HANDLED_UTILITY(parsetree))
1114  {
1115  instr_time start;
1117  uint64 rows;
1118  BufferUsage bufusage_start,
1119  bufusage;
1120  WalUsage walusage_start,
1121  walusage;
1122 
1123  bufusage_start = pgBufferUsage;
1124  walusage_start = pgWalUsage;
1125  INSTR_TIME_SET_CURRENT(start);
1126 
1128  PG_TRY();
1129  {
1130  if (prev_ProcessUtility)
1131  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1132  context, params, queryEnv,
1133  dest, qc);
1134  else
1135  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1136  context, params, queryEnv,
1137  dest, qc);
1138  }
1139  PG_FINALLY();
1140  {
1142  }
1143  PG_END_TRY();
1144 
1145  INSTR_TIME_SET_CURRENT(duration);
1146  INSTR_TIME_SUBTRACT(duration, start);
1147 
1148  /*
1149  * Track the total number of rows retrieved or affected by the utility
1150  * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1151  * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1152  */
1153  rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1154  qc->commandTag == CMDTAG_FETCH ||
1155  qc->commandTag == CMDTAG_SELECT ||
1156  qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1157  qc->nprocessed : 0;
1158 
1159  /* calc differences of buffer counters. */
1160  memset(&bufusage, 0, sizeof(BufferUsage));
1161  BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1162 
1163  /* calc differences of WAL counters. */
1164  memset(&walusage, 0, sizeof(WalUsage));
1165  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1166 
1167  pgss_store(queryString,
1168  saved_queryId,
1169  pstmt->stmt_location,
1170  pstmt->stmt_len,
1171  PGSS_EXEC,
1172  INSTR_TIME_GET_MILLISEC(duration),
1173  rows,
1174  &bufusage,
1175  &walusage,
1176  NULL);
1177  }
1178  else
1179  {
1180  if (prev_ProcessUtility)
1181  prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1182  context, params, queryEnv,
1183  dest, qc);
1184  else
1185  standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1186  context, params, queryEnv,
1187  dest, qc);
1188  }
1189 }
1190 
1191 /*
1192  * Store some statistics for a statement.
1193  *
1194  * If queryId is 0 then this is a utility statement for which we couldn't
1195  * compute a queryId during parse analysis, and we should compute a suitable
1196  * queryId internally.
1197  *
1198  * If jstate is not NULL then we're trying to create an entry for which
1199  * we have no statistics as yet; we just want to record the normalized
1200  * query string. total_time, rows, bufusage and walusage are ignored in this
1201  * case.
1202  *
1203  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1204  * for the arrays in the Counters field.
1205  */
1206 static void
1207 pgss_store(const char *query, uint64 queryId,
1208  int query_location, int query_len,
1209  pgssStoreKind kind,
1210  double total_time, uint64 rows,
1211  const BufferUsage *bufusage,
1212  const WalUsage *walusage,
1213  JumbleState *jstate)
1214 {
1215  pgssHashKey key;
1216  pgssEntry *entry;
1217  char *norm_query = NULL;
1218  int encoding = GetDatabaseEncoding();
1219 
1220  Assert(query != NULL);
1221 
1222  /* Safety check... */
1223  if (!pgss || !pgss_hash)
1224  return;
1225 
1226  /*
1227  * Nothing to do if compute_query_id isn't enabled and no other module
1228  * computed a query identifier.
1229  */
1230  if (queryId == UINT64CONST(0))
1231  return;
1232 
1233  /*
1234  * Confine our attention to the relevant part of the string, if the query
1235  * is a portion of a multi-statement source string, and update query
1236  * location and length if needed.
1237  */
1238  query = CleanQuerytext(query, &query_location, &query_len);
1239 
1240  /* Set up key for hashtable search */
1241 
1242  /* memset() is required when pgssHashKey is without padding only */
1243  memset(&key, 0, sizeof(pgssHashKey));
1244 
1245  key.userid = GetUserId();
1246  key.dbid = MyDatabaseId;
1247  key.queryid = queryId;
1248  key.toplevel = (exec_nested_level == 0);
1249 
1250  /* Lookup the hash table entry with shared lock. */
1251  LWLockAcquire(pgss->lock, LW_SHARED);
1252 
1253  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1254 
1255  /* Create new entry, if not present */
1256  if (!entry)
1257  {
1258  Size query_offset;
1259  int gc_count;
1260  bool stored;
1261  bool do_gc;
1262 
1263  /*
1264  * Create a new, normalized query string if caller asked. We don't
1265  * need to hold the lock while doing this work. (Note: in any case,
1266  * it's possible that someone else creates a duplicate hashtable entry
1267  * in the interval where we don't hold the lock below. That case is
1268  * handled by entry_alloc.)
1269  */
1270  if (jstate)
1271  {
1272  LWLockRelease(pgss->lock);
1273  norm_query = generate_normalized_query(jstate, query,
1274  query_location,
1275  &query_len);
1276  LWLockAcquire(pgss->lock, LW_SHARED);
1277  }
1278 
1279  /* Append new query text to file with only shared lock held */
1280  stored = qtext_store(norm_query ? norm_query : query, query_len,
1281  &query_offset, &gc_count);
1282 
1283  /*
1284  * Determine whether we need to garbage collect external query texts
1285  * while the shared lock is still held. This micro-optimization
1286  * avoids taking the time to decide this while holding exclusive lock.
1287  */
1288  do_gc = need_gc_qtexts();
1289 
1290  /* Need exclusive lock to make a new hashtable entry - promote */
1291  LWLockRelease(pgss->lock);
1293 
1294  /*
1295  * A garbage collection may have occurred while we weren't holding the
1296  * lock. In the unlikely event that this happens, the query text we
1297  * stored above will have been garbage collected, so write it again.
1298  * This should be infrequent enough that doing it while holding
1299  * exclusive lock isn't a performance problem.
1300  */
1301  if (!stored || pgss->gc_count != gc_count)
1302  stored = qtext_store(norm_query ? norm_query : query, query_len,
1303  &query_offset, NULL);
1304 
1305  /* If we failed to write to the text file, give up */
1306  if (!stored)
1307  goto done;
1308 
1309  /* OK to create a new hashtable entry */
1310  entry = entry_alloc(&key, query_offset, query_len, encoding,
1311  jstate != NULL);
1312 
1313  /* If needed, perform garbage collection while exclusive lock held */
1314  if (do_gc)
1315  gc_qtexts();
1316  }
1317 
1318  /* Increment the counts, except when jstate is not NULL */
1319  if (!jstate)
1320  {
1321  /*
1322  * Grab the spinlock while updating the counters (see comment about
1323  * locking rules at the head of the file)
1324  */
1325  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1326 
1327  Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1328 
1329  SpinLockAcquire(&e->mutex);
1330 
1331  /* "Unstick" entry if it was previously sticky */
1332  if (IS_STICKY(e->counters))
1333  e->counters.usage = USAGE_INIT;
1334 
1335  e->counters.calls[kind] += 1;
1336  e->counters.total_time[kind] += total_time;
1337 
1338  if (e->counters.calls[kind] == 1)
1339  {
1340  e->counters.min_time[kind] = total_time;
1341  e->counters.max_time[kind] = total_time;
1342  e->counters.mean_time[kind] = total_time;
1343  }
1344  else
1345  {
1346  /*
1347  * Welford's method for accurately computing variance. See
1348  * <http://www.johndcook.com/blog/standard_deviation/>
1349  */
1350  double old_mean = e->counters.mean_time[kind];
1351 
1352  e->counters.mean_time[kind] +=
1353  (total_time - old_mean) / e->counters.calls[kind];
1354  e->counters.sum_var_time[kind] +=
1355  (total_time - old_mean) * (total_time - e->counters.mean_time[kind]);
1356 
1357  /* calculate min and max time */
1358  if (e->counters.min_time[kind] > total_time)
1359  e->counters.min_time[kind] = total_time;
1360  if (e->counters.max_time[kind] < total_time)
1361  e->counters.max_time[kind] = total_time;
1362  }
1363  e->counters.rows += rows;
1364  e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1365  e->counters.shared_blks_read += bufusage->shared_blks_read;
1368  e->counters.local_blks_hit += bufusage->local_blks_hit;
1369  e->counters.local_blks_read += bufusage->local_blks_read;
1372  e->counters.temp_blks_read += bufusage->temp_blks_read;
1376  e->counters.usage += USAGE_EXEC(total_time);
1377  e->counters.wal_records += walusage->wal_records;
1378  e->counters.wal_fpi += walusage->wal_fpi;
1379  e->counters.wal_bytes += walusage->wal_bytes;
1380 
1381  SpinLockRelease(&e->mutex);
1382  }
1383 
1384 done:
1385  LWLockRelease(pgss->lock);
1386 
1387  /* We postpone this clean-up until we're out of the lock */
1388  if (norm_query)
1389  pfree(norm_query);
1390 }
1391 
1392 /*
1393  * Reset statement statistics corresponding to userid, dbid, and queryid.
1394  */
1395 Datum
1397 {
1398  Oid userid;
1399  Oid dbid;
1400  uint64 queryid;
1401 
1402  userid = PG_GETARG_OID(0);
1403  dbid = PG_GETARG_OID(1);
1404  queryid = (uint64) PG_GETARG_INT64(2);
1405 
1406  entry_reset(userid, dbid, queryid);
1407 
1408  PG_RETURN_VOID();
1409 }
1410 
1411 /*
1412  * Reset statement statistics.
1413  */
1414 Datum
1416 {
1417  entry_reset(0, 0, 0);
1418 
1419  PG_RETURN_VOID();
1420 }
1421 
1422 /* Number of output arguments (columns) for various API versions */
1423 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1424 #define PG_STAT_STATEMENTS_COLS_V1_1 18
1425 #define PG_STAT_STATEMENTS_COLS_V1_2 19
1426 #define PG_STAT_STATEMENTS_COLS_V1_3 23
1427 #define PG_STAT_STATEMENTS_COLS_V1_8 32
1428 #define PG_STAT_STATEMENTS_COLS_V1_9 33
1429 #define PG_STAT_STATEMENTS_COLS 33 /* maximum of above */
1430 
1431 /*
1432  * Retrieve statement statistics.
1433  *
1434  * The SQL API of this function has changed multiple times, and will likely
1435  * do so again in future. To support the case where a newer version of this
1436  * loadable module is being used with an old SQL declaration of the function,
1437  * we continue to support the older API versions. For 1.2 and later, the
1438  * expected API version is identified by embedding it in the C name of the
1439  * function. Unfortunately we weren't bright enough to do that for 1.1.
1440  */
1441 Datum
1443 {
1444  bool showtext = PG_GETARG_BOOL(0);
1445 
1446  pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1447 
1448  return (Datum) 0;
1449 }
1450 
1451 Datum
1453 {
1454  bool showtext = PG_GETARG_BOOL(0);
1455 
1456  pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1457 
1458  return (Datum) 0;
1459 }
1460 
1461 Datum
1463 {
1464  bool showtext = PG_GETARG_BOOL(0);
1465 
1466  pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1467 
1468  return (Datum) 0;
1469 }
1470 
1471 Datum
1473 {
1474  bool showtext = PG_GETARG_BOOL(0);
1475 
1476  pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1477 
1478  return (Datum) 0;
1479 }
1480 
1481 /*
1482  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1483  * This can be removed someday, perhaps.
1484  */
1485 Datum
1487 {
1488  /* If it's really API 1.1, we'll figure that out below */
1489  pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1490 
1491  return (Datum) 0;
1492 }
1493 
1494 /* Common code for all versions of pg_stat_statements() */
1495 static void
1497  pgssVersion api_version,
1498  bool showtext)
1499 {
1500  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1501  TupleDesc tupdesc;
1502  Tuplestorestate *tupstore;
1503  MemoryContext per_query_ctx;
1504  MemoryContext oldcontext;
1505  Oid userid = GetUserId();
1506  bool is_allowed_role = false;
1507  char *qbuffer = NULL;
1508  Size qbuffer_size = 0;
1509  Size extent = 0;
1510  int gc_count = 0;
1511  HASH_SEQ_STATUS hash_seq;
1512  pgssEntry *entry;
1513 
1514  /* Superusers or members of pg_read_all_stats members are allowed */
1515  is_allowed_role = is_member_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS);
1516 
1517  /* hash table must exist already */
1518  if (!pgss || !pgss_hash)
1519  ereport(ERROR,
1520  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1521  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1522 
1523  /* check to see if caller supports us returning a tuplestore */
1524  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1525  ereport(ERROR,
1526  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1527  errmsg("set-valued function called in context that cannot accept a set")));
1528  if (!(rsinfo->allowedModes & SFRM_Materialize))
1529  ereport(ERROR,
1530  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1531  errmsg("materialize mode required, but it is not allowed in this context")));
1532 
1533  /* Switch into long-lived context to construct returned data structures */
1534  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1535  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1536 
1537  /* Build a tuple descriptor for our result type */
1538  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1539  elog(ERROR, "return type must be a row type");
1540 
1541  /*
1542  * Check we have the expected number of output arguments. Aside from
1543  * being a good safety check, we need a kluge here to detect API version
1544  * 1.1, which was wedged into the code in an ill-considered way.
1545  */
1546  switch (tupdesc->natts)
1547  {
1549  if (api_version != PGSS_V1_0)
1550  elog(ERROR, "incorrect number of output arguments");
1551  break;
1553  /* pg_stat_statements() should have told us 1.0 */
1554  if (api_version != PGSS_V1_0)
1555  elog(ERROR, "incorrect number of output arguments");
1556  api_version = PGSS_V1_1;
1557  break;
1559  if (api_version != PGSS_V1_2)
1560  elog(ERROR, "incorrect number of output arguments");
1561  break;
1563  if (api_version != PGSS_V1_3)
1564  elog(ERROR, "incorrect number of output arguments");
1565  break;
1567  if (api_version != PGSS_V1_8)
1568  elog(ERROR, "incorrect number of output arguments");
1569  break;
1571  if (api_version != PGSS_V1_9)
1572  elog(ERROR, "incorrect number of output arguments");
1573  break;
1574  default:
1575  elog(ERROR, "incorrect number of output arguments");
1576  }
1577 
1578  tupstore = tuplestore_begin_heap(true, false, work_mem);
1579  rsinfo->returnMode = SFRM_Materialize;
1580  rsinfo->setResult = tupstore;
1581  rsinfo->setDesc = tupdesc;
1582 
1583  MemoryContextSwitchTo(oldcontext);
1584 
1585  /*
1586  * We'd like to load the query text file (if needed) while not holding any
1587  * lock on pgss->lock. In the worst case we'll have to do this again
1588  * after we have the lock, but it's unlikely enough to make this a win
1589  * despite occasional duplicated work. We need to reload if anybody
1590  * writes to the file (either a retail qtext_store(), or a garbage
1591  * collection) between this point and where we've gotten shared lock. If
1592  * a qtext_store is actually in progress when we look, we might as well
1593  * skip the speculative load entirely.
1594  */
1595  if (showtext)
1596  {
1597  int n_writers;
1598 
1599  /* Take the mutex so we can examine variables */
1600  {
1601  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1602 
1603  SpinLockAcquire(&s->mutex);
1604  extent = s->extent;
1605  n_writers = s->n_writers;
1606  gc_count = s->gc_count;
1607  SpinLockRelease(&s->mutex);
1608  }
1609 
1610  /* No point in loading file now if there are active writers */
1611  if (n_writers == 0)
1612  qbuffer = qtext_load_file(&qbuffer_size);
1613  }
1614 
1615  /*
1616  * Get shared lock, load or reload the query text file if we must, and
1617  * iterate over the hashtable entries.
1618  *
1619  * With a large hash table, we might be holding the lock rather longer
1620  * than one could wish. However, this only blocks creation of new hash
1621  * table entries, and the larger the hash table the less likely that is to
1622  * be needed. So we can hope this is okay. Perhaps someday we'll decide
1623  * we need to partition the hash table to limit the time spent holding any
1624  * one lock.
1625  */
1626  LWLockAcquire(pgss->lock, LW_SHARED);
1627 
1628  if (showtext)
1629  {
1630  /*
1631  * Here it is safe to examine extent and gc_count without taking the
1632  * mutex. Note that although other processes might change
1633  * pgss->extent just after we look at it, the strings they then write
1634  * into the file cannot yet be referenced in the hashtable, so we
1635  * don't care whether we see them or not.
1636  *
1637  * If qtext_load_file fails, we just press on; we'll return NULL for
1638  * every query text.
1639  */
1640  if (qbuffer == NULL ||
1641  pgss->extent != extent ||
1642  pgss->gc_count != gc_count)
1643  {
1644  if (qbuffer)
1645  free(qbuffer);
1646  qbuffer = qtext_load_file(&qbuffer_size);
1647  }
1648  }
1649 
1650  hash_seq_init(&hash_seq, pgss_hash);
1651  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1652  {
1654  bool nulls[PG_STAT_STATEMENTS_COLS];
1655  int i = 0;
1656  Counters tmp;
1657  double stddev;
1658  int64 queryid = entry->key.queryid;
1659 
1660  memset(values, 0, sizeof(values));
1661  memset(nulls, 0, sizeof(nulls));
1662 
1663  values[i++] = ObjectIdGetDatum(entry->key.userid);
1664  values[i++] = ObjectIdGetDatum(entry->key.dbid);
1665  if (api_version >= PGSS_V1_9)
1666  values[i++] = BoolGetDatum(entry->key.toplevel);
1667 
1668  if (is_allowed_role || entry->key.userid == userid)
1669  {
1670  if (api_version >= PGSS_V1_2)
1671  values[i++] = Int64GetDatumFast(queryid);
1672 
1673  if (showtext)
1674  {
1675  char *qstr = qtext_fetch(entry->query_offset,
1676  entry->query_len,
1677  qbuffer,
1678  qbuffer_size);
1679 
1680  if (qstr)
1681  {
1682  char *enc;
1683 
1684  enc = pg_any_to_server(qstr,
1685  entry->query_len,
1686  entry->encoding);
1687 
1688  values[i++] = CStringGetTextDatum(enc);
1689 
1690  if (enc != qstr)
1691  pfree(enc);
1692  }
1693  else
1694  {
1695  /* Just return a null if we fail to find the text */
1696  nulls[i++] = true;
1697  }
1698  }
1699  else
1700  {
1701  /* Query text not requested */
1702  nulls[i++] = true;
1703  }
1704  }
1705  else
1706  {
1707  /* Don't show queryid */
1708  if (api_version >= PGSS_V1_2)
1709  nulls[i++] = true;
1710 
1711  /*
1712  * Don't show query text, but hint as to the reason for not doing
1713  * so if it was requested
1714  */
1715  if (showtext)
1716  values[i++] = CStringGetTextDatum("<insufficient privilege>");
1717  else
1718  nulls[i++] = true;
1719  }
1720 
1721  /* copy counters to a local variable to keep locking time short */
1722  {
1723  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1724 
1725  SpinLockAcquire(&e->mutex);
1726  tmp = e->counters;
1727  SpinLockRelease(&e->mutex);
1728  }
1729 
1730  /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1731  if (IS_STICKY(tmp))
1732  continue;
1733 
1734  /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1735  for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1736  {
1737  if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1738  {
1739  values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1740  values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1741  }
1742 
1743  if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1744  api_version >= PGSS_V1_8)
1745  {
1746  values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1747  values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1748  values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1749 
1750  /*
1751  * Note we are calculating the population variance here, not
1752  * the sample variance, as we have data for the whole
1753  * population, so Bessel's correction is not used, and we
1754  * don't divide by tmp.calls - 1.
1755  */
1756  if (tmp.calls[kind] > 1)
1757  stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1758  else
1759  stddev = 0.0;
1760  values[i++] = Float8GetDatumFast(stddev);
1761  }
1762  }
1763  values[i++] = Int64GetDatumFast(tmp.rows);
1764  values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1765  values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1766  if (api_version >= PGSS_V1_1)
1767  values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1768  values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1769  values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1770  values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1771  if (api_version >= PGSS_V1_1)
1772  values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1773  values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1774  values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1775  values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1776  if (api_version >= PGSS_V1_1)
1777  {
1778  values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1779  values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1780  }
1781  if (api_version >= PGSS_V1_8)
1782  {
1783  char buf[256];
1784  Datum wal_bytes;
1785 
1786  values[i++] = Int64GetDatumFast(tmp.wal_records);
1787  values[i++] = Int64GetDatumFast(tmp.wal_fpi);
1788 
1789  snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1790 
1791  /* Convert to numeric. */
1792  wal_bytes = DirectFunctionCall3(numeric_in,
1793  CStringGetDatum(buf),
1794  ObjectIdGetDatum(0),
1795  Int32GetDatum(-1));
1796  values[i++] = wal_bytes;
1797  }
1798 
1799  Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1800  api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1801  api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1802  api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1803  api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1804  api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1805  -1 /* fail if you forget to update this assert */ ));
1806 
1807  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1808  }
1809 
1810  /* clean up and return the tuplestore */
1811  LWLockRelease(pgss->lock);
1812 
1813  if (qbuffer)
1814  free(qbuffer);
1815 
1816  tuplestore_donestoring(tupstore);
1817 }
1818 
1819 /* Number of output arguments (columns) for pg_stat_statements_info */
1820 #define PG_STAT_STATEMENTS_INFO_COLS 2
1821 
1822 /*
1823  * Return statistics of pg_stat_statements.
1824  */
1825 Datum
1827 {
1828  pgssGlobalStats stats;
1829  TupleDesc tupdesc;
1831  bool nulls[PG_STAT_STATEMENTS_INFO_COLS];
1832 
1833  if (!pgss || !pgss_hash)
1834  ereport(ERROR,
1835  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1836  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1837 
1838  /* Build a tuple descriptor for our result type */
1839  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1840  elog(ERROR, "return type must be a row type");
1841 
1842  MemSet(values, 0, sizeof(values));
1843  MemSet(nulls, 0, sizeof(nulls));
1844 
1845  /* Read global statistics for pg_stat_statements */
1846  {
1847  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1848 
1849  SpinLockAcquire(&s->mutex);
1850  stats = s->stats;
1851  SpinLockRelease(&s->mutex);
1852  }
1853 
1854  values[0] = Int64GetDatum(stats.dealloc);
1855  values[1] = TimestampTzGetDatum(stats.stats_reset);
1856 
1857  PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
1858 }
1859 
1860 /*
1861  * Estimate shared memory space needed.
1862  */
1863 static Size
1865 {
1866  Size size;
1867 
1868  size = MAXALIGN(sizeof(pgssSharedState));
1869  size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
1870 
1871  return size;
1872 }
1873 
1874 /*
1875  * Allocate a new hashtable entry.
1876  * caller must hold an exclusive lock on pgss->lock
1877  *
1878  * "query" need not be null-terminated; we rely on query_len instead
1879  *
1880  * If "sticky" is true, make the new entry artificially sticky so that it will
1881  * probably still be there when the query finishes execution. We do this by
1882  * giving it a median usage value rather than the normal value. (Strictly
1883  * speaking, query strings are normalized on a best effort basis, though it
1884  * would be difficult to demonstrate this even under artificial conditions.)
1885  *
1886  * Note: despite needing exclusive lock, it's not an error for the target
1887  * entry to already exist. This is because pgss_store releases and
1888  * reacquires lock after failing to find a match; so someone else could
1889  * have made the entry while we waited to get exclusive lock.
1890  */
1891 static pgssEntry *
1892 entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
1893  bool sticky)
1894 {
1895  pgssEntry *entry;
1896  bool found;
1897 
1898  /* Make space if needed */
1899  while (hash_get_num_entries(pgss_hash) >= pgss_max)
1900  entry_dealloc();
1901 
1902  /* Find or create an entry with desired hash code */
1903  entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1904 
1905  if (!found)
1906  {
1907  /* New entry, initialize it */
1908 
1909  /* reset the statistics */
1910  memset(&entry->counters, 0, sizeof(Counters));
1911  /* set the appropriate initial usage count */
1912  entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1913  /* re-initialize the mutex each time ... we assume no one using it */
1914  SpinLockInit(&entry->mutex);
1915  /* ... and don't forget the query text metadata */
1916  Assert(query_len >= 0);
1917  entry->query_offset = query_offset;
1918  entry->query_len = query_len;
1919  entry->encoding = encoding;
1920  }
1921 
1922  return entry;
1923 }
1924 
1925 /*
1926  * qsort comparator for sorting into increasing usage order
1927  */
1928 static int
1929 entry_cmp(const void *lhs, const void *rhs)
1930 {
1931  double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1932  double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1933 
1934  if (l_usage < r_usage)
1935  return -1;
1936  else if (l_usage > r_usage)
1937  return +1;
1938  else
1939  return 0;
1940 }
1941 
1942 /*
1943  * Deallocate least-used entries.
1944  *
1945  * Caller must hold an exclusive lock on pgss->lock.
1946  */
1947 static void
1949 {
1950  HASH_SEQ_STATUS hash_seq;
1951  pgssEntry **entries;
1952  pgssEntry *entry;
1953  int nvictims;
1954  int i;
1955  Size tottextlen;
1956  int nvalidtexts;
1957 
1958  /*
1959  * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1960  * While we're scanning the table, apply the decay factor to the usage
1961  * values, and update the mean query length.
1962  *
1963  * Note that the mean query length is almost immediately obsolete, since
1964  * we compute it before not after discarding the least-used entries.
1965  * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
1966  * making two passes to get a more current result. Likewise, the new
1967  * cur_median_usage includes the entries we're about to zap.
1968  */
1969 
1970  entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1971 
1972  i = 0;
1973  tottextlen = 0;
1974  nvalidtexts = 0;
1975 
1976  hash_seq_init(&hash_seq, pgss_hash);
1977  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1978  {
1979  entries[i++] = entry;
1980  /* "Sticky" entries get a different usage decay rate. */
1981  if (IS_STICKY(entry->counters))
1983  else
1985  /* In the mean length computation, ignore dropped texts. */
1986  if (entry->query_len >= 0)
1987  {
1988  tottextlen += entry->query_len + 1;
1989  nvalidtexts++;
1990  }
1991  }
1992 
1993  /* Sort into increasing order by usage */
1994  qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1995 
1996  /* Record the (approximate) median usage */
1997  if (i > 0)
1998  pgss->cur_median_usage = entries[i / 2]->counters.usage;
1999  /* Record the mean query length */
2000  if (nvalidtexts > 0)
2001  pgss->mean_query_len = tottextlen / nvalidtexts;
2002  else
2004 
2005  /* Now zap an appropriate fraction of lowest-usage entries */
2006  nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2007  nvictims = Min(nvictims, i);
2008 
2009  for (i = 0; i < nvictims; i++)
2010  {
2011  hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2012  }
2013 
2014  pfree(entries);
2015 
2016  /* Increment the number of times entries are deallocated */
2017  {
2018  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2019 
2020  SpinLockAcquire(&s->mutex);
2021  s->stats.dealloc += 1;
2022  SpinLockRelease(&s->mutex);
2023  }
2024 }
2025 
2026 /*
2027  * Given a query string (not necessarily null-terminated), allocate a new
2028  * entry in the external query text file and store the string there.
2029  *
2030  * If successful, returns true, and stores the new entry's offset in the file
2031  * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2032  * number of garbage collections that have occurred so far.
2033  *
2034  * On failure, returns false.
2035  *
2036  * At least a shared lock on pgss->lock must be held by the caller, so as
2037  * to prevent a concurrent garbage collection. Share-lock-holding callers
2038  * should pass a gc_count pointer to obtain the number of garbage collections,
2039  * so that they can recheck the count after obtaining exclusive lock to
2040  * detect whether a garbage collection occurred (and removed this entry).
2041  */
2042 static bool
2043 qtext_store(const char *query, int query_len,
2044  Size *query_offset, int *gc_count)
2045 {
2046  Size off;
2047  int fd;
2048 
2049  /*
2050  * We use a spinlock to protect extent/n_writers/gc_count, so that
2051  * multiple processes may execute this function concurrently.
2052  */
2053  {
2054  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2055 
2056  SpinLockAcquire(&s->mutex);
2057  off = s->extent;
2058  s->extent += query_len + 1;
2059  s->n_writers++;
2060  if (gc_count)
2061  *gc_count = s->gc_count;
2062  SpinLockRelease(&s->mutex);
2063  }
2064 
2065  *query_offset = off;
2066 
2067  /* Now write the data into the successfully-reserved part of the file */
2068  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2069  if (fd < 0)
2070  goto error;
2071 
2072  if (pg_pwrite(fd, query, query_len, off) != query_len)
2073  goto error;
2074  if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2075  goto error;
2076 
2077  CloseTransientFile(fd);
2078 
2079  /* Mark our write complete */
2080  {
2081  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2082 
2083  SpinLockAcquire(&s->mutex);
2084  s->n_writers--;
2085  SpinLockRelease(&s->mutex);
2086  }
2087 
2088  return true;
2089 
2090 error:
2091  ereport(LOG,
2093  errmsg("could not write file \"%s\": %m",
2094  PGSS_TEXT_FILE)));
2095 
2096  if (fd >= 0)
2097  CloseTransientFile(fd);
2098 
2099  /* Mark our write complete */
2100  {
2101  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2102 
2103  SpinLockAcquire(&s->mutex);
2104  s->n_writers--;
2105  SpinLockRelease(&s->mutex);
2106  }
2107 
2108  return false;
2109 }
2110 
2111 /*
2112  * Read the external query text file into a malloc'd buffer.
2113  *
2114  * Returns NULL (without throwing an error) if unable to read, eg
2115  * file not there or insufficient memory.
2116  *
2117  * On success, the buffer size is also returned into *buffer_size.
2118  *
2119  * This can be called without any lock on pgss->lock, but in that case
2120  * the caller is responsible for verifying that the result is sane.
2121  */
2122 static char *
2123 qtext_load_file(Size *buffer_size)
2124 {
2125  char *buf;
2126  int fd;
2127  struct stat stat;
2128 
2129  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
2130  if (fd < 0)
2131  {
2132  if (errno != ENOENT)
2133  ereport(LOG,
2135  errmsg("could not read file \"%s\": %m",
2136  PGSS_TEXT_FILE)));
2137  return NULL;
2138  }
2139 
2140  /* Get file length */
2141  if (fstat(fd, &stat))
2142  {
2143  ereport(LOG,
2145  errmsg("could not stat file \"%s\": %m",
2146  PGSS_TEXT_FILE)));
2147  CloseTransientFile(fd);
2148  return NULL;
2149  }
2150 
2151  /* Allocate buffer; beware that off_t might be wider than size_t */
2152  if (stat.st_size <= MaxAllocHugeSize)
2153  buf = (char *) malloc(stat.st_size);
2154  else
2155  buf = NULL;
2156  if (buf == NULL)
2157  {
2158  ereport(LOG,
2159  (errcode(ERRCODE_OUT_OF_MEMORY),
2160  errmsg("out of memory"),
2161  errdetail("Could not allocate enough memory to read file \"%s\".",
2162  PGSS_TEXT_FILE)));
2163  CloseTransientFile(fd);
2164  return NULL;
2165  }
2166 
2167  /*
2168  * OK, slurp in the file. If we get a short read and errno doesn't get
2169  * set, the reason is probably that garbage collection truncated the file
2170  * since we did the fstat(), so we don't log a complaint --- but we don't
2171  * return the data, either, since it's most likely corrupt due to
2172  * concurrent writes from garbage collection.
2173  */
2174  errno = 0;
2175  if (read(fd, buf, stat.st_size) != stat.st_size)
2176  {
2177  if (errno)
2178  ereport(LOG,
2180  errmsg("could not read file \"%s\": %m",
2181  PGSS_TEXT_FILE)));
2182  free(buf);
2183  CloseTransientFile(fd);
2184  return NULL;
2185  }
2186 
2187  if (CloseTransientFile(fd) != 0)
2188  ereport(LOG,
2190  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2191 
2192  *buffer_size = stat.st_size;
2193  return buf;
2194 }
2195 
2196 /*
2197  * Locate a query text in the file image previously read by qtext_load_file().
2198  *
2199  * We validate the given offset/length, and return NULL if bogus. Otherwise,
2200  * the result points to a null-terminated string within the buffer.
2201  */
2202 static char *
2203 qtext_fetch(Size query_offset, int query_len,
2204  char *buffer, Size buffer_size)
2205 {
2206  /* File read failed? */
2207  if (buffer == NULL)
2208  return NULL;
2209  /* Bogus offset/length? */
2210  if (query_len < 0 ||
2211  query_offset + query_len >= buffer_size)
2212  return NULL;
2213  /* As a further sanity check, make sure there's a trailing null */
2214  if (buffer[query_offset + query_len] != '\0')
2215  return NULL;
2216  /* Looks OK */
2217  return buffer + query_offset;
2218 }
2219 
2220 /*
2221  * Do we need to garbage-collect the external query text file?
2222  *
2223  * Caller should hold at least a shared lock on pgss->lock.
2224  */
2225 static bool
2227 {
2228  Size extent;
2229 
2230  /* Read shared extent pointer */
2231  {
2232  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2233 
2234  SpinLockAcquire(&s->mutex);
2235  extent = s->extent;
2236  SpinLockRelease(&s->mutex);
2237  }
2238 
2239  /* Don't proceed if file does not exceed 512 bytes per possible entry */
2240  if (extent < 512 * pgss_max)
2241  return false;
2242 
2243  /*
2244  * Don't proceed if file is less than about 50% bloat. Nothing can or
2245  * should be done in the event of unusually large query texts accounting
2246  * for file's large size. We go to the trouble of maintaining the mean
2247  * query length in order to prevent garbage collection from thrashing
2248  * uselessly.
2249  */
2250  if (extent < pgss->mean_query_len * pgss_max * 2)
2251  return false;
2252 
2253  return true;
2254 }
2255 
2256 /*
2257  * Garbage-collect orphaned query texts in external file.
2258  *
2259  * This won't be called often in the typical case, since it's likely that
2260  * there won't be too much churn, and besides, a similar compaction process
2261  * occurs when serializing to disk at shutdown or as part of resetting.
2262  * Despite this, it seems prudent to plan for the edge case where the file
2263  * becomes unreasonably large, with no other method of compaction likely to
2264  * occur in the foreseeable future.
2265  *
2266  * The caller must hold an exclusive lock on pgss->lock.
2267  *
2268  * At the first sign of trouble we unlink the query text file to get a clean
2269  * slate (although existing statistics are retained), rather than risk
2270  * thrashing by allowing the same problem case to recur indefinitely.
2271  */
2272 static void
2274 {
2275  char *qbuffer;
2276  Size qbuffer_size;
2277  FILE *qfile = NULL;
2278  HASH_SEQ_STATUS hash_seq;
2279  pgssEntry *entry;
2280  Size extent;
2281  int nentries;
2282 
2283  /*
2284  * When called from pgss_store, some other session might have proceeded
2285  * with garbage collection in the no-lock-held interim of lock strength
2286  * escalation. Check once more that this is actually necessary.
2287  */
2288  if (!need_gc_qtexts())
2289  return;
2290 
2291  /*
2292  * Load the old texts file. If we fail (out of memory, for instance),
2293  * invalidate query texts. Hopefully this is rare. It might seem better
2294  * to leave things alone on an OOM failure, but the problem is that the
2295  * file is only going to get bigger; hoping for a future non-OOM result is
2296  * risky and can easily lead to complete denial of service.
2297  */
2298  qbuffer = qtext_load_file(&qbuffer_size);
2299  if (qbuffer == NULL)
2300  goto gc_fail;
2301 
2302  /*
2303  * We overwrite the query texts file in place, so as to reduce the risk of
2304  * an out-of-disk-space failure. Since the file is guaranteed not to get
2305  * larger, this should always work on traditional filesystems; though we
2306  * could still lose on copy-on-write filesystems.
2307  */
2309  if (qfile == NULL)
2310  {
2311  ereport(LOG,
2313  errmsg("could not write file \"%s\": %m",
2314  PGSS_TEXT_FILE)));
2315  goto gc_fail;
2316  }
2317 
2318  extent = 0;
2319  nentries = 0;
2320 
2321  hash_seq_init(&hash_seq, pgss_hash);
2322  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2323  {
2324  int query_len = entry->query_len;
2325  char *qry = qtext_fetch(entry->query_offset,
2326  query_len,
2327  qbuffer,
2328  qbuffer_size);
2329 
2330  if (qry == NULL)
2331  {
2332  /* Trouble ... drop the text */
2333  entry->query_offset = 0;
2334  entry->query_len = -1;
2335  /* entry will not be counted in mean query length computation */
2336  continue;
2337  }
2338 
2339  if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2340  {
2341  ereport(LOG,
2343  errmsg("could not write file \"%s\": %m",
2344  PGSS_TEXT_FILE)));
2345  hash_seq_term(&hash_seq);
2346  goto gc_fail;
2347  }
2348 
2349  entry->query_offset = extent;
2350  extent += query_len + 1;
2351  nentries++;
2352  }
2353 
2354  /*
2355  * Truncate away any now-unused space. If this fails for some odd reason,
2356  * we log it, but there's no need to fail.
2357  */
2358  if (ftruncate(fileno(qfile), extent) != 0)
2359  ereport(LOG,
2361  errmsg("could not truncate file \"%s\": %m",
2362  PGSS_TEXT_FILE)));
2363 
2364  if (FreeFile(qfile))
2365  {
2366  ereport(LOG,
2368  errmsg("could not write file \"%s\": %m",
2369  PGSS_TEXT_FILE)));
2370  qfile = NULL;
2371  goto gc_fail;
2372  }
2373 
2374  elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2375  pgss->extent, extent);
2376 
2377  /* Reset the shared extent pointer */
2378  pgss->extent = extent;
2379 
2380  /*
2381  * Also update the mean query length, to be sure that need_gc_qtexts()
2382  * won't still think we have a problem.
2383  */
2384  if (nentries > 0)
2385  pgss->mean_query_len = extent / nentries;
2386  else
2388 
2389  free(qbuffer);
2390 
2391  /*
2392  * OK, count a garbage collection cycle. (Note: even though we have
2393  * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2394  * other processes may examine gc_count while holding only the mutex.
2395  * Also, we have to advance the count *after* we've rewritten the file,
2396  * else other processes might not realize they read a stale file.)
2397  */
2398  record_gc_qtexts();
2399 
2400  return;
2401 
2402 gc_fail:
2403  /* clean up resources */
2404  if (qfile)
2405  FreeFile(qfile);
2406  if (qbuffer)
2407  free(qbuffer);
2408 
2409  /*
2410  * Since the contents of the external file are now uncertain, mark all
2411  * hashtable entries as having invalid texts.
2412  */
2413  hash_seq_init(&hash_seq, pgss_hash);
2414  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2415  {
2416  entry->query_offset = 0;
2417  entry->query_len = -1;
2418  }
2419 
2420  /*
2421  * Destroy the query text file and create a new, empty one
2422  */
2423  (void) unlink(PGSS_TEXT_FILE);
2425  if (qfile == NULL)
2426  ereport(LOG,
2428  errmsg("could not recreate file \"%s\": %m",
2429  PGSS_TEXT_FILE)));
2430  else
2431  FreeFile(qfile);
2432 
2433  /* Reset the shared extent pointer */
2434  pgss->extent = 0;
2435 
2436  /* Reset mean_query_len to match the new state */
2438 
2439  /*
2440  * Bump the GC count even though we failed.
2441  *
2442  * This is needed to make concurrent readers of file without any lock on
2443  * pgss->lock notice existence of new version of file. Once readers
2444  * subsequently observe a change in GC count with pgss->lock held, that
2445  * forces a safe reopen of file. Writers also require that we bump here,
2446  * of course. (As required by locking protocol, readers and writers don't
2447  * trust earlier file contents until gc_count is found unchanged after
2448  * pgss->lock acquired in shared or exclusive mode respectively.)
2449  */
2450  record_gc_qtexts();
2451 }
2452 
2453 /*
2454  * Release entries corresponding to parameters passed.
2455  */
2456 static void
2458 {
2459  HASH_SEQ_STATUS hash_seq;
2460  pgssEntry *entry;
2461  FILE *qfile;
2462  long num_entries;
2463  long num_remove = 0;
2464  pgssHashKey key;
2465 
2466  if (!pgss || !pgss_hash)
2467  ereport(ERROR,
2468  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2469  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
2470 
2472  num_entries = hash_get_num_entries(pgss_hash);
2473 
2474  if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2475  {
2476  /* If all the parameters are available, use the fast path. */
2477  memset(&key, 0, sizeof(pgssHashKey));
2478  key.userid = userid;
2479  key.dbid = dbid;
2480  key.queryid = queryid;
2481 
2482  /* Remove the key if it exists, starting with the top-level entry */
2483  key.toplevel = false;
2484  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
2485  if (entry) /* found */
2486  num_remove++;
2487 
2488  /* Also remove entries for top level statements */
2489  key.toplevel = true;
2490 
2491  /* Remove the key if exists */
2492  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
2493  if (entry) /* found */
2494  num_remove++;
2495  }
2496  else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2497  {
2498  /* Remove entries corresponding to valid parameters. */
2499  hash_seq_init(&hash_seq, pgss_hash);
2500  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2501  {
2502  if ((!userid || entry->key.userid == userid) &&
2503  (!dbid || entry->key.dbid == dbid) &&
2504  (!queryid || entry->key.queryid == queryid))
2505  {
2506  hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2507  num_remove++;
2508  }
2509  }
2510  }
2511  else
2512  {
2513  /* Remove all entries. */
2514  hash_seq_init(&hash_seq, pgss_hash);
2515  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2516  {
2517  hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2518  num_remove++;
2519  }
2520  }
2521 
2522  /* All entries are removed? */
2523  if (num_entries != num_remove)
2524  goto release_lock;
2525 
2526  /*
2527  * Reset global statistics for pg_stat_statements since all entries are
2528  * removed.
2529  */
2530  {
2531  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2532  TimestampTz stats_reset = GetCurrentTimestamp();
2533 
2534  SpinLockAcquire(&s->mutex);
2535  s->stats.dealloc = 0;
2536  s->stats.stats_reset = stats_reset;
2537  SpinLockRelease(&s->mutex);
2538  }
2539 
2540  /*
2541  * Write new empty query file, perhaps even creating a new one to recover
2542  * if the file was missing.
2543  */
2545  if (qfile == NULL)
2546  {
2547  ereport(LOG,
2549  errmsg("could not create file \"%s\": %m",
2550  PGSS_TEXT_FILE)));
2551  goto done;
2552  }
2553 
2554  /* If ftruncate fails, log it, but it's not a fatal problem */
2555  if (ftruncate(fileno(qfile), 0) != 0)
2556  ereport(LOG,
2558  errmsg("could not truncate file \"%s\": %m",
2559  PGSS_TEXT_FILE)));
2560 
2561  FreeFile(qfile);
2562 
2563 done:
2564  pgss->extent = 0;
2565  /* This counts as a query text garbage collection for our purposes */
2566  record_gc_qtexts();
2567 
2568 release_lock:
2569  LWLockRelease(pgss->lock);
2570 }
2571 
2572 /*
2573  * Generate a normalized version of the query string that will be used to
2574  * represent all similar queries.
2575  *
2576  * Note that the normalized representation may well vary depending on
2577  * just which "equivalent" query is used to create the hashtable entry.
2578  * We assume this is OK.
2579  *
2580  * If query_loc > 0, then "query" has been advanced by that much compared to
2581  * the original string start, so we need to translate the provided locations
2582  * to compensate. (This lets us avoid re-scanning statements before the one
2583  * of interest, so it's worth doing.)
2584  *
2585  * *query_len_p contains the input string length, and is updated with
2586  * the result string length on exit. The resulting string might be longer
2587  * or shorter depending on what happens with replacement of constants.
2588  *
2589  * Returns a palloc'd string.
2590  */
2591 static char *
2592 generate_normalized_query(JumbleState *jstate, const char *query,
2593  int query_loc, int *query_len_p)
2594 {
2595  char *norm_query;
2596  int query_len = *query_len_p;
2597  int i,
2598  norm_query_buflen, /* Space allowed for norm_query */
2599  len_to_wrt, /* Length (in bytes) to write */
2600  quer_loc = 0, /* Source query byte location */
2601  n_quer_loc = 0, /* Normalized query byte location */
2602  last_off = 0, /* Offset from start for previous tok */
2603  last_tok_len = 0; /* Length (in bytes) of that tok */
2604 
2605  /*
2606  * Get constants' lengths (core system only gives us locations). Note
2607  * this also ensures the items are sorted by location.
2608  */
2609  fill_in_constant_lengths(jstate, query, query_loc);
2610 
2611  /*
2612  * Allow for $n symbols to be longer than the constants they replace.
2613  * Constants must take at least one byte in text form, while a $n symbol
2614  * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2615  * could refine that limit based on the max value of n for the current
2616  * query, but it hardly seems worth any extra effort to do so.
2617  */
2618  norm_query_buflen = query_len + jstate->clocations_count * 10;
2619 
2620  /* Allocate result buffer */
2621  norm_query = palloc(norm_query_buflen + 1);
2622 
2623  for (i = 0; i < jstate->clocations_count; i++)
2624  {
2625  int off, /* Offset from start for cur tok */
2626  tok_len; /* Length (in bytes) of that tok */
2627 
2628  off = jstate->clocations[i].location;
2629  /* Adjust recorded location if we're dealing with partial string */
2630  off -= query_loc;
2631 
2632  tok_len = jstate->clocations[i].length;
2633 
2634  if (tok_len < 0)
2635  continue; /* ignore any duplicates */
2636 
2637  /* Copy next chunk (what precedes the next constant) */
2638  len_to_wrt = off - last_off;
2639  len_to_wrt -= last_tok_len;
2640 
2641  Assert(len_to_wrt >= 0);
2642  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2643  n_quer_loc += len_to_wrt;
2644 
2645  /* And insert a param symbol in place of the constant token */
2646  n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2647  i + 1 + jstate->highest_extern_param_id);
2648 
2649  quer_loc = off + tok_len;
2650  last_off = off;
2651  last_tok_len = tok_len;
2652  }
2653 
2654  /*
2655  * We've copied up until the last ignorable constant. Copy over the
2656  * remaining bytes of the original query string.
2657  */
2658  len_to_wrt = query_len - quer_loc;
2659 
2660  Assert(len_to_wrt >= 0);
2661  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2662  n_quer_loc += len_to_wrt;
2663 
2664  Assert(n_quer_loc <= norm_query_buflen);
2665  norm_query[n_quer_loc] = '\0';
2666 
2667  *query_len_p = n_quer_loc;
2668  return norm_query;
2669 }
2670 
2671 /*
2672  * Given a valid SQL string and an array of constant-location records,
2673  * fill in the textual lengths of those constants.
2674  *
2675  * The constants may use any allowed constant syntax, such as float literals,
2676  * bit-strings, single-quoted strings and dollar-quoted strings. This is
2677  * accomplished by using the public API for the core scanner.
2678  *
2679  * It is the caller's job to ensure that the string is a valid SQL statement
2680  * with constants at the indicated locations. Since in practice the string
2681  * has already been parsed, and the locations that the caller provides will
2682  * have originated from within the authoritative parser, this should not be
2683  * a problem.
2684  *
2685  * Duplicate constant pointers are possible, and will have their lengths
2686  * marked as '-1', so that they are later ignored. (Actually, we assume the
2687  * lengths were initialized as -1 to start with, and don't change them here.)
2688  *
2689  * If query_loc > 0, then "query" has been advanced by that much compared to
2690  * the original string start, so we need to translate the provided locations
2691  * to compensate. (This lets us avoid re-scanning statements before the one
2692  * of interest, so it's worth doing.)
2693  *
2694  * N.B. There is an assumption that a '-' character at a Const location begins
2695  * a negative numeric constant. This precludes there ever being another
2696  * reason for a constant to start with a '-'.
2697  */
2698 static void
2699 fill_in_constant_lengths(JumbleState *jstate, const char *query,
2700  int query_loc)
2701 {
2702  LocationLen *locs;
2704  core_yy_extra_type yyextra;
2705  core_YYSTYPE yylval;
2706  YYLTYPE yylloc;
2707  int last_loc = -1;
2708  int i;
2709 
2710  /*
2711  * Sort the records by location so that we can process them in order while
2712  * scanning the query text.
2713  */
2714  if (jstate->clocations_count > 1)
2715  qsort(jstate->clocations, jstate->clocations_count,
2716  sizeof(LocationLen), comp_location);
2717  locs = jstate->clocations;
2718 
2719  /* initialize the flex scanner --- should match raw_parser() */
2720  yyscanner = scanner_init(query,
2721  &yyextra,
2722  &ScanKeywords,
2724 
2725  /* we don't want to re-emit any escape string warnings */
2726  yyextra.escape_string_warning = false;
2727 
2728  /* Search for each constant, in sequence */
2729  for (i = 0; i < jstate->clocations_count; i++)
2730  {
2731  int loc = locs[i].location;
2732  int tok;
2733 
2734  /* Adjust recorded location if we're dealing with partial string */
2735  loc -= query_loc;
2736 
2737  Assert(loc >= 0);
2738 
2739  if (loc <= last_loc)
2740  continue; /* Duplicate constant, ignore */
2741 
2742  /* Lex tokens until we find the desired constant */
2743  for (;;)
2744  {
2745  tok = core_yylex(&yylval, &yylloc, yyscanner);
2746 
2747  /* We should not hit end-of-string, but if we do, behave sanely */
2748  if (tok == 0)
2749  break; /* out of inner for-loop */
2750 
2751  /*
2752  * We should find the token position exactly, but if we somehow
2753  * run past it, work with that.
2754  */
2755  if (yylloc >= loc)
2756  {
2757  if (query[loc] == '-')
2758  {
2759  /*
2760  * It's a negative value - this is the one and only case
2761  * where we replace more than a single token.
2762  *
2763  * Do not compensate for the core system's special-case
2764  * adjustment of location to that of the leading '-'
2765  * operator in the event of a negative constant. It is
2766  * also useful for our purposes to start from the minus
2767  * symbol. In this way, queries like "select * from foo
2768  * where bar = 1" and "select * from foo where bar = -2"
2769  * will have identical normalized query strings.
2770  */
2771  tok = core_yylex(&yylval, &yylloc, yyscanner);
2772  if (tok == 0)
2773  break; /* out of inner for-loop */
2774  }
2775 
2776  /*
2777  * We now rely on the assumption that flex has placed a zero
2778  * byte after the text of the current token in scanbuf.
2779  */
2780  locs[i].length = strlen(yyextra.scanbuf + loc);
2781  break; /* out of inner for-loop */
2782  }
2783  }
2784 
2785  /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2786  if (tok == 0)
2787  break;
2788 
2789  last_loc = loc;
2790  }
2791 
2792  scanner_finish(yyscanner);
2793 }
2794 
2795 /*
2796  * comp_location: comparator for qsorting LocationLen structs by location
2797  */
2798 static int
2799 comp_location(const void *a, const void *b)
2800 {
2801  int l = ((const LocationLen *) a)->location;
2802  int r = ((const LocationLen *) b)->location;
2803 
2804  if (l < r)
2805  return -1;
2806  else if (l > r)
2807  return +1;
2808  else
2809  return 0;
2810 }
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
int slock_t
Definition: s_lock.h:934
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9177
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: executor.h:69
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: execMain.c:309
void _PG_init(void)
Definition: lwlock.h:31
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:71
#define PG_STAT_STATEMENTS_COLS_V1_3
#define DEBUG1
Definition: elog.h:25
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:207
double total_time[PGSS_NUMKIND]
WalUsage walusage
Definition: instrument.h:88
#define PG_STAT_STATEMENTS_COLS_V1_8
int stmt_location
Definition: parsenodes.h:192
EState * estate
Definition: execdesc.h:48
void * core_yyscan_t
Definition: scanner.h:121
#define HASH_ELEM
Definition: hsearch.h:95
static void error(void)
Definition: sql-dyntest.c:147
#define PG_STAT_STATEMENTS_COLS_V1_0
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1598
#define USAGE_DEALLOC_PERCENT
int64 shared_blks_read
static const uint32 PGSS_PG_MAJOR_VERSION
int64 local_blks_written
Definition: guc.h:164
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:699
Oid GetUserId(void)
Definition: miscinit.c:478
instr_time blk_read_time
Definition: instrument.h:36
static ProcessUtility_hook_type prev_ProcessUtility
struct pgssSharedState pgssSharedState
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
int64 TimestampTz
Definition: timestamp.h:39
WalUsage pgWalUsage
Definition: instrument.c:22
int64 shared_blks_read
Definition: instrument.h:27
int64 shared_blks_dirtied
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_INFO_COLS
static void pgss_shmem_startup(void)
#define SpinLockInit(lock)
Definition: spin.h:60
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:202
PGDLLIMPORT const uint16 ScanKeywordTokens[]
struct timeval instr_time
Definition: instr_time.h:150
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.h:26
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:147
#define Min(x, y)
Definition: c.h:986
int64 wal_fpi
Definition: instrument.h:50
double max_time[PGSS_NUMKIND]
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:274
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static ExecutorRun_hook_type prev_ExecutorRun
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
static ExecutorEnd_hook_type prev_ExecutorEnd
Size entrysize
Definition: hsearch.h:76
#define PGSS_DUMP_FILE
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:76
#define pgss_enabled(level)
Definition: nodes.h:539
static int pgss_track
static int entry_cmp(const void *lhs, const void *rhs)
int errcode(int sqlerrcode)
Definition: elog.c:698
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:468
struct pgssEntry pgssEntry
#define PG_BINARY_W
Definition: c.h:1274
ProcessUtilityContext
Definition: utility.h:20
static ExecutorStart_hook_type prev_ExecutorStart
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:76
#define MemSet(start, val, len)
Definition: c.h:1008
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1382
int64 temp_blks_written
Definition: instrument.h:35
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
Counters counters
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static int exec_nested_level
int64 temp_blks_read
#define PG_STAT_STATEMENTS_COLS_V1_1
int64 shared_blks_dirtied
Definition: instrument.h:28
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Node * utilityStmt
Definition: parsenodes.h:128
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
#define PG_BINARY_R
Definition: c.h:1273
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1271
int stmt_len
Definition: plannodes.h:90
char * scanbuf
Definition: scanner.h:72
ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset)
Definition: pwrite.c:27
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:408
int duration
Definition: pgbench.c:184
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9262
#define MaxAllocHugeSize
Definition: memutils.h:44
signed int int32
Definition: c.h:429
int clocations_count
Definition: queryjumble.h:49
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:244
int64 local_blks_read
Definition: instrument.h:31
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:71
static planner_hook_type prev_planner_hook
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
PGDLLIMPORT const ScanKeywordList ScanKeywords
#define malloc(a)
Definition: header.h:50
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define ASSUMED_LENGTH_INIT
static bool pgss_track_planning
#define sprintf
Definition: port.h:218
static pgssSharedState * pgss
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: dynahash.c:219
#define fstat
Definition: win32_port.h:274
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
void pfree(void *pointer)
Definition: mcxt.c:1169
MemoryContext es_query_cxt
Definition: execnodes.h:600
int64 shared_blks_hit
static int pgss_max
static void entry_dealloc(void)
static HTAB * pgss_hash
int64 local_blks_hit
Definition: instrument.h:30
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
uint64 nprocessed
Definition: cmdtag.h:31
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:72
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:74
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2467
struct pg_encoding enc
Definition: encode.c:516
fmNodePtr resultinfo
Definition: fmgr.h:89
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
#define TimestampTzGetDatum(X)
Definition: timestamp.h:32
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:544
Definition: guc.h:75
void EmitWarningsOnPlaceholders(const char *className)
Definition: guc.c:9290
int highest_extern_param_id
Definition: queryjumble.h:52
#define USAGE_EXEC(duration)
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
int stmt_location
Definition: plannodes.h:89
pgssStoreKind
static char * buf
Definition: pg_test_fsync.c:68
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:21
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
bool IsUnderPostmaster
Definition: globals.c:112
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, JumbleState *jstate)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
int errdetail(const char *fmt,...)
Definition: elog.c:1042
int errcode_for_file_access(void)
Definition: elog.c:721
ScanDirection
Definition: sdir.h:22
#define CStringGetDatum(X)
Definition: postgres.h:622
Node * utilityStmt
Definition: plannodes.h:86
static char * qtext_load_file(Size *buffer_size)
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2417
#define YYLTYPE
Definition: scanner.h:44
unsigned int uint32
Definition: c.h:441
#define IS_STICKY(c)
int64 local_blks_dirtied
Definition: instrument.h:32
static ExecutorFinish_hook_type prev_ExecutorFinish
void _PG_fini(void)
planner_hook_type planner_hook
Definition: planner.c:74
__int64 st_size
Definition: win32_port.h:265
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:625
const char * p_sourcetext
Definition: parse_node.h:181
int64 temp_blks_written
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1697
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
int64 shared_blks_written
#define STICKY_DECREASE_FACTOR
#define USAGE_DECREASE_FACTOR
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:692
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:630
Definition: guc.h:72
struct pgssGlobalStats pgssGlobalStats
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
static Size pgss_memsize(void)
uint64 queryId
Definition: parsenodes.h:124
int CloseTransientFile(int fd)
Definition: fd.c:2644
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:53
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:780
pgssGlobalStats stats
static int comp_location(const void *a, const void *b)
#define SpinLockRelease(lock)
Definition: spin.h:64
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
#define HASH_BLOBS
Definition: hsearch.h:97
int64 local_blks_read
static bool need_gc_qtexts(void)
#define PGSS_HANDLED_UTILITY(n)
#define PG_FINALLY()
Definition: elog.h:330
uintptr_t Datum
Definition: postgres.h:411
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
static void pgss_shmem_shutdown(int code, Datum arg)
static void gc_qtexts(void)
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
double mean_time[PGSS_NUMKIND]
Oid MyDatabaseId
Definition: globals.c:88
PGSSTrackLevel
#define Int64GetDatumFast(X)
Definition: postgres.h:804
static void entry_reset(Oid userid, Oid dbid, uint64 queryid)
Size keysize
Definition: hsearch.h:75
int work_mem
Definition: globals.c:124
BufferUsage bufusage
Definition: instrument.h:87
#define USAGE_INIT
int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
static zic_t const max_time
Definition: zic.c:584
#define BoolGetDatum(X)
Definition: postgres.h:446
pgssHashKey key
CommandTag commandTag
Definition: cmdtag.h:30
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
#define ereport(elevel,...)
Definition: elog.h:157
int allowedModes
Definition: execnodes.h:305
#define PG_STAT_STATEMENTS_COLS
#define free(a)
Definition: header.h:65
int64 calls[PGSS_NUMKIND]
#define PG_RETURN_VOID()
Definition: fmgr.h:349
bool is_member_of_role(Oid member, Oid role)
Definition: acl.c:4869
#define Float8GetDatumFast(X)
Definition: postgres.h:805
SetFunctionReturnMode returnMode
Definition: execnodes.h:307
double min_time[PGSS_NUMKIND]
struct Instrumentation * totaltime
Definition: execdesc.h:55
#define Max(x, y)
Definition: c.h:980
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:80
double blk_read_time
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
const char * CleanQuerytext(const char *query, int *location, int *len)
Definition: queryjumble.c:62
static bool pgss_track_utility
#define Assert(condition)
Definition: c.h:804
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:595
int64 local_blks_dirtied
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
uint64 es_processed
Definition: execnodes.h:604
PG_MODULE_MAGIC
instr_time blk_write_time
Definition: instrument.h:37
void EnableQueryId(void)
Definition: queryjumble.c:150
int64 local_blks_hit
size_t Size
Definition: c.h:540
struct Counters Counters
TimestampTz stats_reset
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
double blk_write_time
#define MAXALIGN(LEN)
Definition: c.h:757
#define record_gc_qtexts()
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:220
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
int32 encoding
Definition: pg_database.h:41
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:233
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:210
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
double sum_var_time[PGSS_NUMKIND]
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
static int plan_nested_level
Tuplestorestate * setResult
Definition: execnodes.h:310
static shmem_startup_hook_type prev_shmem_startup_hook
int64 wal_records
Definition: instrument.h:49
const char * sourceText
Definition: execdesc.h:38
static core_yyscan_t yyscanner
Definition: pl_scanner.c:106
int FreeFile(FILE *file)
Definition: fd.c:2616
static Datum values[MAXATTR]
Definition: bootstrap.c:166
ExprContext * econtext
Definition: execnodes.h:303
LocationLen * clocations
Definition: queryjumble.h:43
#define Int32GetDatum(X)
Definition: postgres.h:523
e
Definition: preproc-init.c:82
int64 local_blks_written
Definition: instrument.h:33
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
TupleDesc setDesc
Definition: execnodes.h:311
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
void * palloc(Size size)
Definition: mcxt.c:1062
uint64 queryId
Definition: plannodes.h:48
int errmsg(const char *fmt,...)
Definition: elog.c:909
Datum pg_stat_statements(PG_FUNCTION_ARGS)
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:65
int64 shared_blks_hit
Definition: instrument.h:26
#define elog(elevel,...)
Definition: elog.h:232
int i
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.c:276
bool escape_string_warning
Definition: scanner.h:88
static const uint32 PGSS_FILE_HEADER
#define CStringGetTextDatum(s)
Definition: builtins.h:82
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
void * arg
int64 temp_blks_read
Definition: instrument.h:34
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void scanner_finish(core_yyscan_t yyscanner)
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:73
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:341
struct pgssHashKey pgssHashKey
PlannedStmt * plannedstmt
Definition: execdesc.h:37
#define qsort(a, b, c, d)
Definition: port.h:504
uint64 wal_bytes
Definition: instrument.h:51
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
#define PG_TRY()
Definition: elog.h:313
static zic_t const min_time
Definition: zic.c:583
int64 shared_blks_written
Definition: instrument.h:29
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
#define PGSS_TEXT_FILE
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:9151
#define snprintf
Definition: port.h:216
int stmt_len
Definition: parsenodes.h:193
#define UINT64_FORMAT
Definition: c.h:484
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:58
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define PG_END_TRY()
Definition: elog.h:338
#define ASSUMED_MEDIAN_INIT
#define read(a, b, c)
Definition: win32.h:13
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1512
static const struct config_enum_entry track_options[]
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define PG_STAT_STATEMENTS_COLS_V1_2
static bool pgss_save
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static struct subre * parse(struct vars *, int, int, struct state *, struct state *)
Definition: regcomp.c:665
#define PG_STAT_STATEMENTS_COLS_V1_9
#define ftruncate(a, b)
Definition: win32_port.h:65