PostgreSQL Source Code  git master
pg_stat_statements.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * Track statement execution times across a whole database cluster.
5  *
6  * Execution costs are totalled for each distinct source query, and kept in
7  * a shared hashtable. (We track only as many distinct queries as will fit
8  * in the designated amount of shared memory.)
9  *
10  * As of Postgres 9.2, this module normalizes query entries. Normalization
11  * is a process whereby similar queries, typically differing only in their
12  * constants (though the exact rules are somewhat more subtle than that) are
13  * recognized as equivalent, and are tracked as a single entry. This is
14  * particularly useful for non-prepared queries.
15  *
16  * Normalization is implemented by fingerprinting queries, selectively
17  * serializing those fields of each query tree's nodes that are judged to be
18  * essential to the query. This is referred to as a query jumble. This is
19  * distinct from a regular serialization in that various extraneous
20  * information is ignored as irrelevant or not essential to the query, such
21  * as the collations of Vars and, most notably, the values of constants.
22  *
23  * This jumble is acquired at the end of parse analysis of each query, and
24  * a 64-bit hash of it is stored into the query's Query.queryId field.
25  * The server then copies this value around, making it available in plan
26  * tree(s) generated from the query. The executor can then use this value
27  * to blame query costs on the proper queryId.
28  *
29  * To facilitate presenting entries to users, we create "representative" query
30  * strings in which constants are replaced with parameter symbols ($n), to
31  * make it clearer what a normalized entry can represent. To save on shared
32  * memory, and to avoid having to truncate oversized query strings, we store
33  * these strings in a temporary external query-texts file. Offsets into this
34  * file are kept in shared memory.
35  *
36  * Note about locking issues: to create or delete an entry in the shared
37  * hashtable, one must hold pgss->lock exclusively. Modifying any field
38  * in an entry except the counters requires the same. To look up an entry,
39  * one must hold the lock shared. To read or update the counters within
40  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
41  * disappear!) and also take the entry's mutex spinlock.
42  * The shared state variable pgss->extent (the next free spot in the external
43  * query-text file) should be accessed only while holding either the
44  * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
45  * allow reserving file space while holding only shared lock on pgss->lock.
46  * Rewriting the entire external query-text file, eg for garbage collection,
47  * requires holding pgss->lock exclusively; this allows individual entries
48  * in the file to be read or written while holding only shared lock.
49  *
50  *
51  * Copyright (c) 2008-2017, PostgreSQL Global Development Group
52  *
53  * IDENTIFICATION
54  * contrib/pg_stat_statements/pg_stat_statements.c
55  *
56  *-------------------------------------------------------------------------
57  */
58 #include "postgres.h"
59 
60 #include <math.h>
61 #include <sys/stat.h>
62 #include <unistd.h>
63 
64 #include "access/hash.h"
65 #include "catalog/pg_authid.h"
66 #include "executor/instrument.h"
67 #include "funcapi.h"
68 #include "mb/pg_wchar.h"
69 #include "miscadmin.h"
70 #include "parser/analyze.h"
71 #include "parser/parsetree.h"
72 #include "parser/scanner.h"
73 #include "parser/scansup.h"
74 #include "pgstat.h"
75 #include "storage/fd.h"
76 #include "storage/ipc.h"
77 #include "storage/spin.h"
78 #include "tcop/utility.h"
79 #include "utils/builtins.h"
80 #include "utils/memutils.h"
81 
83 
84 /* Location of permanent stats file (valid when database is shut down) */
85 #define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
86 
87 /*
88  * Location of external query text file. We don't keep it in the core
89  * system's stats_temp_directory. The core system can safely use that GUC
90  * setting, because the statistics collector temp file paths are set only once
91  * as part of changing the GUC, but pg_stat_statements has no way of avoiding
92  * race conditions. Besides, we only expect modest, infrequent I/O for query
93  * strings, so placing the file on a faster filesystem is not compelling.
94  */
95 #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
96 
97 /* Magic number identifying the stats file format */
98 static const uint32 PGSS_FILE_HEADER = 0x20171004;
99 
100 /* PostgreSQL major version number, changes in which invalidate all entries */
101 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
102 
103 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
104 #define USAGE_EXEC(duration) (1.0)
105 #define USAGE_INIT (1.0) /* including initial planning */
106 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
107 #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
108 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
109 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
110 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
111 
112 #define JUMBLE_SIZE 1024 /* query serialization buffer size */
113 
114 /*
115  * Extension version number, for supporting older extension versions' objects
116  */
117 typedef enum pgssVersion
118 {
123 } pgssVersion;
124 
125 /*
126  * Hashtable key that defines the identity of a hashtable entry. We separate
127  * queries by user and by database even if they are otherwise identical.
128  *
129  * Right now, this structure contains no padding. If you add any, make sure
130  * to teach pgss_store() to zero the padding bytes. Otherwise, things will
131  * break, because pgss_hash is created using HASH_BLOBS, and thus tag_hash
132  * is used to hash this.
133  */
134 typedef struct pgssHashKey
135 {
136  Oid userid; /* user OID */
137  Oid dbid; /* database OID */
138  uint64 queryid; /* query identifier */
139 } pgssHashKey;
140 
141 /*
142  * The actual stats counters kept within pgssEntry.
143  */
144 typedef struct Counters
145 {
146  int64 calls; /* # of times executed */
147  double total_time; /* total execution time, in msec */
148  double min_time; /* minimum execution time in msec */
149  double max_time; /* maximum execution time in msec */
150  double mean_time; /* mean execution time in msec */
151  double sum_var_time; /* sum of variances in execution time in msec */
152  int64 rows; /* total # of retrieved or affected rows */
153  int64 shared_blks_hit; /* # of shared buffer hits */
154  int64 shared_blks_read; /* # of shared disk blocks read */
155  int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
156  int64 shared_blks_written; /* # of shared disk blocks written */
157  int64 local_blks_hit; /* # of local buffer hits */
158  int64 local_blks_read; /* # of local disk blocks read */
159  int64 local_blks_dirtied; /* # of local disk blocks dirtied */
160  int64 local_blks_written; /* # of local disk blocks written */
161  int64 temp_blks_read; /* # of temp blocks read */
162  int64 temp_blks_written; /* # of temp blocks written */
163  double blk_read_time; /* time spent reading, in msec */
164  double blk_write_time; /* time spent writing, in msec */
165  double usage; /* usage factor */
166 } Counters;
167 
168 /*
169  * Statistics per statement
170  *
171  * Note: in event of a failure in garbage collection of the query text file,
172  * we reset query_offset to zero and query_len to -1. This will be seen as
173  * an invalid state by qtext_fetch().
174  */
175 typedef struct pgssEntry
176 {
177  pgssHashKey key; /* hash key of entry - MUST BE FIRST */
178  Counters counters; /* the statistics for this query */
179  Size query_offset; /* query text offset in external file */
180  int query_len; /* # of valid bytes in query string, or -1 */
181  int encoding; /* query text encoding */
182  slock_t mutex; /* protects the counters only */
183 } pgssEntry;
184 
185 /*
186  * Global shared state
187  */
188 typedef struct pgssSharedState
189 {
190  LWLock *lock; /* protects hashtable search/modification */
191  double cur_median_usage; /* current median usage in hashtable */
192  Size mean_query_len; /* current mean entry text length */
193  slock_t mutex; /* protects following fields only: */
194  Size extent; /* current extent of query file */
195  int n_writers; /* number of active writers to query file */
196  int gc_count; /* query file garbage collection cycle count */
198 
199 /*
200  * Struct for tracking locations/lengths of constants during normalization
201  */
202 typedef struct pgssLocationLen
203 {
204  int location; /* start offset in query text */
205  int length; /* length in bytes, or -1 to ignore */
207 
208 /*
209  * Working state for computing a query jumble and producing a normalized
210  * query string
211  */
212 typedef struct pgssJumbleState
213 {
214  /* Jumble of current query tree */
215  unsigned char *jumble;
216 
217  /* Number of bytes used in jumble[] */
219 
220  /* Array of locations of constants that should be removed */
222 
223  /* Allocated length of clocations array */
225 
226  /* Current number of valid entries in clocations array */
228 
229  /* highest Param id we've seen, in order to start normalization correctly */
232 
233 /*---- Local variables ----*/
234 
235 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
236 static int nested_level = 0;
237 
238 /* Saved hook values in case of unload */
246 
247 /* Links to shared memory state */
248 static pgssSharedState *pgss = NULL;
249 static HTAB *pgss_hash = NULL;
250 
251 /*---- GUC variables ----*/
252 
253 typedef enum
254 {
255  PGSS_TRACK_NONE, /* track no statements */
256  PGSS_TRACK_TOP, /* only top level statements */
257  PGSS_TRACK_ALL /* all statements, including nested ones */
259 
260 static const struct config_enum_entry track_options[] =
261 {
262  {"none", PGSS_TRACK_NONE, false},
263  {"top", PGSS_TRACK_TOP, false},
264  {"all", PGSS_TRACK_ALL, false},
265  {NULL, 0, false}
266 };
267 
268 static int pgss_max; /* max # statements to track */
269 static int pgss_track; /* tracking level */
270 static bool pgss_track_utility; /* whether to track utility commands */
271 static bool pgss_save; /* whether to save stats across shutdown */
272 
273 
274 #define pgss_enabled() \
275  (pgss_track == PGSS_TRACK_ALL || \
276  (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
277 
278 #define record_gc_qtexts() \
279  do { \
280  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
281  SpinLockAcquire(&s->mutex); \
282  s->gc_count++; \
283  SpinLockRelease(&s->mutex); \
284  } while(0)
285 
286 /*---- Function declarations ----*/
287 
288 void _PG_init(void);
289 void _PG_fini(void);
290 
295 
296 static void pgss_shmem_startup(void);
297 static void pgss_shmem_shutdown(int code, Datum arg);
298 static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
299 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
300 static void pgss_ExecutorRun(QueryDesc *queryDesc,
301  ScanDirection direction,
302  uint64 count, bool execute_once);
303 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
304 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
305 static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
306  ProcessUtilityContext context, ParamListInfo params,
307  QueryEnvironment *queryEnv,
308  DestReceiver *dest, char *completionTag);
309 static uint64 pgss_hash_string(const char *str, int len);
310 static void pgss_store(const char *query, uint64 queryId,
311  int query_location, int query_len,
312  double total_time, uint64 rows,
313  const BufferUsage *bufusage,
314  pgssJumbleState *jstate);
316  pgssVersion api_version,
317  bool showtext);
318 static Size pgss_memsize(void);
319 static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
320  int encoding, bool sticky);
321 static void entry_dealloc(void);
322 static bool qtext_store(const char *query, int query_len,
323  Size *query_offset, int *gc_count);
324 static char *qtext_load_file(Size *buffer_size);
325 static char *qtext_fetch(Size query_offset, int query_len,
326  char *buffer, Size buffer_size);
327 static bool need_gc_qtexts(void);
328 static void gc_qtexts(void);
329 static void entry_reset(void);
330 static void AppendJumble(pgssJumbleState *jstate,
331  const unsigned char *item, Size size);
332 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
333 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
334 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
335 static void RecordConstLocation(pgssJumbleState *jstate, int location);
336 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
337  int query_loc, int *query_len_p, int encoding);
338 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
339  int query_loc);
340 static int comp_location(const void *a, const void *b);
341 
342 
343 /*
344  * Module load callback
345  */
346 void
347 _PG_init(void)
348 {
349  /*
350  * In order to create our shared memory area, we have to be loaded via
351  * shared_preload_libraries. If not, fall out without hooking into any of
352  * the main system. (We don't throw error here because it seems useful to
353  * allow the pg_stat_statements functions to be created even when the
354  * module isn't active. The functions must protect themselves against
355  * being called then, however.)
356  */
358  return;
359 
360  /*
361  * Define (or redefine) custom GUC variables.
362  */
363  DefineCustomIntVariable("pg_stat_statements.max",
364  "Sets the maximum number of statements tracked by pg_stat_statements.",
365  NULL,
366  &pgss_max,
367  5000,
368  100,
369  INT_MAX,
371  0,
372  NULL,
373  NULL,
374  NULL);
375 
376  DefineCustomEnumVariable("pg_stat_statements.track",
377  "Selects which statements are tracked by pg_stat_statements.",
378  NULL,
379  &pgss_track,
381  track_options,
382  PGC_SUSET,
383  0,
384  NULL,
385  NULL,
386  NULL);
387 
388  DefineCustomBoolVariable("pg_stat_statements.track_utility",
389  "Selects whether utility commands are tracked by pg_stat_statements.",
390  NULL,
392  true,
393  PGC_SUSET,
394  0,
395  NULL,
396  NULL,
397  NULL);
398 
399  DefineCustomBoolVariable("pg_stat_statements.save",
400  "Save pg_stat_statements statistics across server shutdowns.",
401  NULL,
402  &pgss_save,
403  true,
404  PGC_SIGHUP,
405  0,
406  NULL,
407  NULL,
408  NULL);
409 
410  EmitWarningsOnPlaceholders("pg_stat_statements");
411 
412  /*
413  * Request additional shared resources. (These are no-ops if we're not in
414  * the postmaster process.) We'll allocate or attach to the shared
415  * resources in pgss_shmem_startup().
416  */
418  RequestNamedLWLockTranche("pg_stat_statements", 1);
419 
420  /*
421  * Install hooks.
422  */
437 }
438 
439 /*
440  * Module unload callback
441  */
442 void
443 _PG_fini(void)
444 {
445  /* Uninstall hooks. */
453 }
454 
455 /*
456  * shmem_startup hook: allocate or attach to shared memory,
457  * then load any pre-existing statistics from file.
458  * Also create and load the query-texts file, which is expected to exist
459  * (even if empty) while the module is enabled.
460  */
461 static void
463 {
464  bool found;
465  HASHCTL info;
466  FILE *file = NULL;
467  FILE *qfile = NULL;
468  uint32 header;
469  int32 num;
470  int32 pgver;
471  int32 i;
472  int buffer_size;
473  char *buffer = NULL;
474 
477 
478  /* reset in case this is a restart within the postmaster */
479  pgss = NULL;
480  pgss_hash = NULL;
481 
482  /*
483  * Create or attach to the shared memory state, including hash table
484  */
485  LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
486 
487  pgss = ShmemInitStruct("pg_stat_statements",
488  sizeof(pgssSharedState),
489  &found);
490 
491  if (!found)
492  {
493  /* First time through ... */
494  pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
497  SpinLockInit(&pgss->mutex);
498  pgss->extent = 0;
499  pgss->n_writers = 0;
500  pgss->gc_count = 0;
501  }
502 
503  memset(&info, 0, sizeof(info));
504  info.keysize = sizeof(pgssHashKey);
505  info.entrysize = sizeof(pgssEntry);
506  pgss_hash = ShmemInitHash("pg_stat_statements hash",
508  &info,
510 
511  LWLockRelease(AddinShmemInitLock);
512 
513  /*
514  * If we're in the postmaster (or a standalone backend...), set up a shmem
515  * exit hook to dump the statistics to disk.
516  */
517  if (!IsUnderPostmaster)
519 
520  /*
521  * Done if some other process already completed our initialization.
522  */
523  if (found)
524  return;
525 
526  /*
527  * Note: we don't bother with locks here, because there should be no other
528  * processes running when this code is reached.
529  */
530 
531  /* Unlink query text file possibly left over from crash */
532  unlink(PGSS_TEXT_FILE);
533 
534  /* Allocate new query text temp file */
536  if (qfile == NULL)
537  goto write_error;
538 
539  /*
540  * If we were told not to load old statistics, we're done. (Note we do
541  * not try to unlink any old dump file in this case. This seems a bit
542  * questionable but it's the historical behavior.)
543  */
544  if (!pgss_save)
545  {
546  FreeFile(qfile);
547  return;
548  }
549 
550  /*
551  * Attempt to load old statistics from the dump file.
552  */
554  if (file == NULL)
555  {
556  if (errno != ENOENT)
557  goto read_error;
558  /* No existing persisted stats file, so we're done */
559  FreeFile(qfile);
560  return;
561  }
562 
563  buffer_size = 2048;
564  buffer = (char *) palloc(buffer_size);
565 
566  if (fread(&header, sizeof(uint32), 1, file) != 1 ||
567  fread(&pgver, sizeof(uint32), 1, file) != 1 ||
568  fread(&num, sizeof(int32), 1, file) != 1)
569  goto read_error;
570 
571  if (header != PGSS_FILE_HEADER ||
572  pgver != PGSS_PG_MAJOR_VERSION)
573  goto data_error;
574 
575  for (i = 0; i < num; i++)
576  {
577  pgssEntry temp;
578  pgssEntry *entry;
579  Size query_offset;
580 
581  if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
582  goto read_error;
583 
584  /* Encoding is the only field we can easily sanity-check */
585  if (!PG_VALID_BE_ENCODING(temp.encoding))
586  goto data_error;
587 
588  /* Resize buffer as needed */
589  if (temp.query_len >= buffer_size)
590  {
591  buffer_size = Max(buffer_size * 2, temp.query_len + 1);
592  buffer = repalloc(buffer, buffer_size);
593  }
594 
595  if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
596  goto read_error;
597 
598  /* Should have a trailing null, but let's make sure */
599  buffer[temp.query_len] = '\0';
600 
601  /* Skip loading "sticky" entries */
602  if (temp.counters.calls == 0)
603  continue;
604 
605  /* Store the query text */
606  query_offset = pgss->extent;
607  if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
608  goto write_error;
609  pgss->extent += temp.query_len + 1;
610 
611  /* make the hashtable entry (discards old entries if too many) */
612  entry = entry_alloc(&temp.key, query_offset, temp.query_len,
613  temp.encoding,
614  false);
615 
616  /* copy in the actual stats */
617  entry->counters = temp.counters;
618  }
619 
620  pfree(buffer);
621  FreeFile(file);
622  FreeFile(qfile);
623 
624  /*
625  * Remove the persisted stats file so it's not included in
626  * backups/replication slaves, etc. A new file will be written on next
627  * shutdown.
628  *
629  * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
630  * because we remove that file on startup; it acts inversely to
631  * PGSS_DUMP_FILE, in that it is only supposed to be around when the
632  * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
633  * when the server is not running. Leaving the file creates no danger of
634  * a newly restored database having a spurious record of execution costs,
635  * which is what we're really concerned about here.
636  */
637  unlink(PGSS_DUMP_FILE);
638 
639  return;
640 
641 read_error:
642  ereport(LOG,
644  errmsg("could not read pg_stat_statement file \"%s\": %m",
645  PGSS_DUMP_FILE)));
646  goto fail;
647 data_error:
648  ereport(LOG,
649  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
650  errmsg("ignoring invalid data in pg_stat_statement file \"%s\"",
651  PGSS_DUMP_FILE)));
652  goto fail;
653 write_error:
654  ereport(LOG,
656  errmsg("could not write pg_stat_statement file \"%s\": %m",
657  PGSS_TEXT_FILE)));
658 fail:
659  if (buffer)
660  pfree(buffer);
661  if (file)
662  FreeFile(file);
663  if (qfile)
664  FreeFile(qfile);
665  /* If possible, throw away the bogus file; ignore any error */
666  unlink(PGSS_DUMP_FILE);
667 
668  /*
669  * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
670  * server is running with pg_stat_statements enabled
671  */
672 }
673 
674 /*
675  * shmem_shutdown hook: Dump statistics into file.
676  *
677  * Note: we don't bother with acquiring lock, because there should be no
678  * other processes running when this is called.
679  */
680 static void
682 {
683  FILE *file;
684  char *qbuffer = NULL;
685  Size qbuffer_size = 0;
686  HASH_SEQ_STATUS hash_seq;
687  int32 num_entries;
688  pgssEntry *entry;
689 
690  /* Don't try to dump during a crash. */
691  if (code)
692  return;
693 
694  /* Safety check ... shouldn't get here unless shmem is set up. */
695  if (!pgss || !pgss_hash)
696  return;
697 
698  /* Don't dump if told not to. */
699  if (!pgss_save)
700  return;
701 
702  file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
703  if (file == NULL)
704  goto error;
705 
706  if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
707  goto error;
708  if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
709  goto error;
710  num_entries = hash_get_num_entries(pgss_hash);
711  if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
712  goto error;
713 
714  qbuffer = qtext_load_file(&qbuffer_size);
715  if (qbuffer == NULL)
716  goto error;
717 
718  /*
719  * When serializing to disk, we store query texts immediately after their
720  * entry data. Any orphaned query texts are thereby excluded.
721  */
722  hash_seq_init(&hash_seq, pgss_hash);
723  while ((entry = hash_seq_search(&hash_seq)) != NULL)
724  {
725  int len = entry->query_len;
726  char *qstr = qtext_fetch(entry->query_offset, len,
727  qbuffer, qbuffer_size);
728 
729  if (qstr == NULL)
730  continue; /* Ignore any entries with bogus texts */
731 
732  if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
733  fwrite(qstr, 1, len + 1, file) != len + 1)
734  {
735  /* note: we assume hash_seq_term won't change errno */
736  hash_seq_term(&hash_seq);
737  goto error;
738  }
739  }
740 
741  free(qbuffer);
742  qbuffer = NULL;
743 
744  if (FreeFile(file))
745  {
746  file = NULL;
747  goto error;
748  }
749 
750  /*
751  * Rename file into place, so we atomically replace any old one.
752  */
754 
755  /* Unlink query-texts file; it's not needed while shutdown */
756  unlink(PGSS_TEXT_FILE);
757 
758  return;
759 
760 error:
761  ereport(LOG,
763  errmsg("could not write pg_stat_statement file \"%s\": %m",
764  PGSS_DUMP_FILE ".tmp")));
765  if (qbuffer)
766  free(qbuffer);
767  if (file)
768  FreeFile(file);
769  unlink(PGSS_DUMP_FILE ".tmp");
770  unlink(PGSS_TEXT_FILE);
771 }
772 
773 /*
774  * Post-parse-analysis hook: mark query with a queryId
775  */
776 static void
778 {
779  pgssJumbleState jstate;
780 
782  prev_post_parse_analyze_hook(pstate, query);
783 
784  /* Assert we didn't do this already */
785  Assert(query->queryId == UINT64CONST(0));
786 
787  /* Safety check... */
788  if (!pgss || !pgss_hash)
789  return;
790 
791  /*
792  * Utility statements get queryId zero. We do this even in cases where
793  * the statement contains an optimizable statement for which a queryId
794  * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases,
795  * runtime control will first go through ProcessUtility and then the
796  * executor, and we don't want the executor hooks to do anything, since we
797  * are already measuring the statement's costs at the utility level.
798  */
799  if (query->utilityStmt)
800  {
801  query->queryId = UINT64CONST(0);
802  return;
803  }
804 
805  /* Set up workspace for query jumbling */
806  jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
807  jstate.jumble_len = 0;
808  jstate.clocations_buf_size = 32;
809  jstate.clocations = (pgssLocationLen *)
810  palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
811  jstate.clocations_count = 0;
812  jstate.highest_extern_param_id = 0;
813 
814  /* Compute query ID and mark the Query node with it */
815  JumbleQuery(&jstate, query);
816  query->queryId =
818 
819  /*
820  * If we are unlucky enough to get a hash of zero, use 1 instead, to
821  * prevent confusion with the utility-statement case.
822  */
823  if (query->queryId == UINT64CONST(0))
824  query->queryId = UINT64CONST(1);
825 
826  /*
827  * If we were able to identify any ignorable constants, we immediately
828  * create a hash table entry for the query, so that we can record the
829  * normalized form of the query string. If there were no such constants,
830  * the normalized string would be the same as the query text anyway, so
831  * there's no need for an early entry.
832  */
833  if (jstate.clocations_count > 0)
834  pgss_store(pstate->p_sourcetext,
835  query->queryId,
836  query->stmt_location,
837  query->stmt_len,
838  0,
839  0,
840  NULL,
841  &jstate);
842 }
843 
844 /*
845  * ExecutorStart hook: start up tracking if needed
846  */
847 static void
848 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
849 {
850  if (prev_ExecutorStart)
851  prev_ExecutorStart(queryDesc, eflags);
852  else
853  standard_ExecutorStart(queryDesc, eflags);
854 
855  /*
856  * If query has queryId zero, don't track it. This prevents double
857  * counting of optimizable statements that are directly contained in
858  * utility statements.
859  */
860  if (pgss_enabled() && queryDesc->plannedstmt->queryId != UINT64CONST(0))
861  {
862  /*
863  * Set up to track total elapsed time in ExecutorRun. Make sure the
864  * space is allocated in the per-query context so it will go away at
865  * ExecutorEnd.
866  */
867  if (queryDesc->totaltime == NULL)
868  {
869  MemoryContext oldcxt;
870 
871  oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
872  queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
873  MemoryContextSwitchTo(oldcxt);
874  }
875  }
876 }
877 
878 /*
879  * ExecutorRun hook: all we need do is track nesting depth
880  */
881 static void
882 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
883  bool execute_once)
884 {
885  nested_level++;
886  PG_TRY();
887  {
888  if (prev_ExecutorRun)
889  prev_ExecutorRun(queryDesc, direction, count, execute_once);
890  else
891  standard_ExecutorRun(queryDesc, direction, count, execute_once);
892  nested_level--;
893  }
894  PG_CATCH();
895  {
896  nested_level--;
897  PG_RE_THROW();
898  }
899  PG_END_TRY();
900 }
901 
902 /*
903  * ExecutorFinish hook: all we need do is track nesting depth
904  */
905 static void
907 {
908  nested_level++;
909  PG_TRY();
910  {
912  prev_ExecutorFinish(queryDesc);
913  else
914  standard_ExecutorFinish(queryDesc);
915  nested_level--;
916  }
917  PG_CATCH();
918  {
919  nested_level--;
920  PG_RE_THROW();
921  }
922  PG_END_TRY();
923 }
924 
925 /*
926  * ExecutorEnd hook: store results if needed
927  */
928 static void
930 {
931  uint64 queryId = queryDesc->plannedstmt->queryId;
932 
933  if (queryId != UINT64CONST(0) && queryDesc->totaltime && pgss_enabled())
934  {
935  /*
936  * Make sure stats accumulation is done. (Note: it's okay if several
937  * levels of hook all do this.)
938  */
939  InstrEndLoop(queryDesc->totaltime);
940 
941  pgss_store(queryDesc->sourceText,
942  queryId,
943  queryDesc->plannedstmt->stmt_location,
944  queryDesc->plannedstmt->stmt_len,
945  queryDesc->totaltime->total * 1000.0, /* convert to msec */
946  queryDesc->estate->es_processed,
947  &queryDesc->totaltime->bufusage,
948  NULL);
949  }
950 
951  if (prev_ExecutorEnd)
952  prev_ExecutorEnd(queryDesc);
953  else
954  standard_ExecutorEnd(queryDesc);
955 }
956 
957 /*
958  * ProcessUtility hook
959  */
960 static void
961 pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
962  ProcessUtilityContext context,
963  ParamListInfo params, QueryEnvironment *queryEnv,
964  DestReceiver *dest, char *completionTag)
965 {
966  Node *parsetree = pstmt->utilityStmt;
967 
968  /*
969  * If it's an EXECUTE statement, we don't track it and don't increment the
970  * nesting level. This allows the cycles to be charged to the underlying
971  * PREPARE instead (by the Executor hooks), which is much more useful.
972  *
973  * We also don't track execution of PREPARE. If we did, we would get one
974  * hash table entry for the PREPARE (with hash calculated from the query
975  * string), and then a different one with the same query string (but hash
976  * calculated from the query tree) would be used to accumulate costs of
977  * ensuing EXECUTEs. This would be confusing, and inconsistent with other
978  * cases where planning time is not included at all.
979  *
980  * Likewise, we don't track execution of DEALLOCATE.
981  */
982  if (pgss_track_utility && pgss_enabled() &&
983  !IsA(parsetree, ExecuteStmt) &&
984  !IsA(parsetree, PrepareStmt) &&
985  !IsA(parsetree, DeallocateStmt))
986  {
987  instr_time start;
989  uint64 rows;
990  BufferUsage bufusage_start,
991  bufusage;
992 
993  bufusage_start = pgBufferUsage;
994  INSTR_TIME_SET_CURRENT(start);
995 
996  nested_level++;
997  PG_TRY();
998  {
1000  prev_ProcessUtility(pstmt, queryString,
1001  context, params, queryEnv,
1002  dest, completionTag);
1003  else
1004  standard_ProcessUtility(pstmt, queryString,
1005  context, params, queryEnv,
1006  dest, completionTag);
1007  nested_level--;
1008  }
1009  PG_CATCH();
1010  {
1011  nested_level--;
1012  PG_RE_THROW();
1013  }
1014  PG_END_TRY();
1015 
1016  INSTR_TIME_SET_CURRENT(duration);
1017  INSTR_TIME_SUBTRACT(duration, start);
1018 
1019  /* parse command tag to retrieve the number of affected rows. */
1020  if (completionTag &&
1021  strncmp(completionTag, "COPY ", 5) == 0)
1022  rows = pg_strtouint64(completionTag + 5, NULL, 10);
1023  else
1024  rows = 0;
1025 
1026  /* calc differences of buffer counters. */
1027  bufusage.shared_blks_hit =
1029  bufusage.shared_blks_read =
1031  bufusage.shared_blks_dirtied =
1033  bufusage.shared_blks_written =
1035  bufusage.local_blks_hit =
1036  pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
1037  bufusage.local_blks_read =
1039  bufusage.local_blks_dirtied =
1041  bufusage.local_blks_written =
1043  bufusage.temp_blks_read =
1044  pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
1045  bufusage.temp_blks_written =
1048  INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
1050  INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
1051 
1052  pgss_store(queryString,
1053  0, /* signal that it's a utility stmt */
1054  pstmt->stmt_location,
1055  pstmt->stmt_len,
1056  INSTR_TIME_GET_MILLISEC(duration),
1057  rows,
1058  &bufusage,
1059  NULL);
1060  }
1061  else
1062  {
1063  if (prev_ProcessUtility)
1064  prev_ProcessUtility(pstmt, queryString,
1065  context, params, queryEnv,
1066  dest, completionTag);
1067  else
1068  standard_ProcessUtility(pstmt, queryString,
1069  context, params, queryEnv,
1070  dest, completionTag);
1071  }
1072 }
1073 
1074 /*
1075  * Given an arbitrarily long query string, produce a hash for the purposes of
1076  * identifying the query, without normalizing constants. Used when hashing
1077  * utility statements.
1078  */
1079 static uint64
1080 pgss_hash_string(const char *str, int len)
1081 {
1082  return DatumGetUInt64(hash_any_extended((const unsigned char *) str,
1083  len, 0));
1084 }
1085 
1086 /*
1087  * Store some statistics for a statement.
1088  *
1089  * If queryId is 0 then this is a utility statement and we should compute
1090  * a suitable queryId internally.
1091  *
1092  * If jstate is not NULL then we're trying to create an entry for which
1093  * we have no statistics as yet; we just want to record the normalized
1094  * query string. total_time, rows, bufusage are ignored in this case.
1095  */
1096 static void
1097 pgss_store(const char *query, uint64 queryId,
1098  int query_location, int query_len,
1099  double total_time, uint64 rows,
1100  const BufferUsage *bufusage,
1101  pgssJumbleState *jstate)
1102 {
1103  pgssHashKey key;
1104  pgssEntry *entry;
1105  char *norm_query = NULL;
1106  int encoding = GetDatabaseEncoding();
1107 
1108  Assert(query != NULL);
1109 
1110  /* Safety check... */
1111  if (!pgss || !pgss_hash)
1112  return;
1113 
1114  /*
1115  * Confine our attention to the relevant part of the string, if the query
1116  * is a portion of a multi-statement source string.
1117  *
1118  * First apply starting offset, unless it's -1 (unknown).
1119  */
1120  if (query_location >= 0)
1121  {
1122  Assert(query_location <= strlen(query));
1123  query += query_location;
1124  /* Length of 0 (or -1) means "rest of string" */
1125  if (query_len <= 0)
1126  query_len = strlen(query);
1127  else
1128  Assert(query_len <= strlen(query));
1129  }
1130  else
1131  {
1132  /* If query location is unknown, distrust query_len as well */
1133  query_location = 0;
1134  query_len = strlen(query);
1135  }
1136 
1137  /*
1138  * Discard leading and trailing whitespace, too. Use scanner_isspace()
1139  * not libc's isspace(), because we want to match the lexer's behavior.
1140  */
1141  while (query_len > 0 && scanner_isspace(query[0]))
1142  query++, query_location++, query_len--;
1143  while (query_len > 0 && scanner_isspace(query[query_len - 1]))
1144  query_len--;
1145 
1146  /*
1147  * For utility statements, we just hash the query string to get an ID.
1148  */
1149  if (queryId == UINT64CONST(0))
1150  queryId = pgss_hash_string(query, query_len);
1151 
1152  /* Set up key for hashtable search */
1153  key.userid = GetUserId();
1154  key.dbid = MyDatabaseId;
1155  key.queryid = queryId;
1156 
1157  /* Lookup the hash table entry with shared lock. */
1158  LWLockAcquire(pgss->lock, LW_SHARED);
1159 
1160  entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1161 
1162  /* Create new entry, if not present */
1163  if (!entry)
1164  {
1165  Size query_offset;
1166  int gc_count;
1167  bool stored;
1168  bool do_gc;
1169 
1170  /*
1171  * Create a new, normalized query string if caller asked. We don't
1172  * need to hold the lock while doing this work. (Note: in any case,
1173  * it's possible that someone else creates a duplicate hashtable entry
1174  * in the interval where we don't hold the lock below. That case is
1175  * handled by entry_alloc.)
1176  */
1177  if (jstate)
1178  {
1179  LWLockRelease(pgss->lock);
1180  norm_query = generate_normalized_query(jstate, query,
1181  query_location,
1182  &query_len,
1183  encoding);
1184  LWLockAcquire(pgss->lock, LW_SHARED);
1185  }
1186 
1187  /* Append new query text to file with only shared lock held */
1188  stored = qtext_store(norm_query ? norm_query : query, query_len,
1189  &query_offset, &gc_count);
1190 
1191  /*
1192  * Determine whether we need to garbage collect external query texts
1193  * while the shared lock is still held. This micro-optimization
1194  * avoids taking the time to decide this while holding exclusive lock.
1195  */
1196  do_gc = need_gc_qtexts();
1197 
1198  /* Need exclusive lock to make a new hashtable entry - promote */
1199  LWLockRelease(pgss->lock);
1201 
1202  /*
1203  * A garbage collection may have occurred while we weren't holding the
1204  * lock. In the unlikely event that this happens, the query text we
1205  * stored above will have been garbage collected, so write it again.
1206  * This should be infrequent enough that doing it while holding
1207  * exclusive lock isn't a performance problem.
1208  */
1209  if (!stored || pgss->gc_count != gc_count)
1210  stored = qtext_store(norm_query ? norm_query : query, query_len,
1211  &query_offset, NULL);
1212 
1213  /* If we failed to write to the text file, give up */
1214  if (!stored)
1215  goto done;
1216 
1217  /* OK to create a new hashtable entry */
1218  entry = entry_alloc(&key, query_offset, query_len, encoding,
1219  jstate != NULL);
1220 
1221  /* If needed, perform garbage collection while exclusive lock held */
1222  if (do_gc)
1223  gc_qtexts();
1224  }
1225 
1226  /* Increment the counts, except when jstate is not NULL */
1227  if (!jstate)
1228  {
1229  /*
1230  * Grab the spinlock while updating the counters (see comment about
1231  * locking rules at the head of the file)
1232  */
1233  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1234 
1235  SpinLockAcquire(&e->mutex);
1236 
1237  /* "Unstick" entry if it was previously sticky */
1238  if (e->counters.calls == 0)
1239  e->counters.usage = USAGE_INIT;
1240 
1241  e->counters.calls += 1;
1242  e->counters.total_time += total_time;
1243  if (e->counters.calls == 1)
1244  {
1245  e->counters.min_time = total_time;
1246  e->counters.max_time = total_time;
1247  e->counters.mean_time = total_time;
1248  }
1249  else
1250  {
1251  /*
1252  * Welford's method for accurately computing variance. See
1253  * <http://www.johndcook.com/blog/standard_deviation/>
1254  */
1255  double old_mean = e->counters.mean_time;
1256 
1257  e->counters.mean_time +=
1258  (total_time - old_mean) / e->counters.calls;
1259  e->counters.sum_var_time +=
1260  (total_time - old_mean) * (total_time - e->counters.mean_time);
1261 
1262  /* calculate min and max time */
1263  if (e->counters.min_time > total_time)
1264  e->counters.min_time = total_time;
1265  if (e->counters.max_time < total_time)
1266  e->counters.max_time = total_time;
1267  }
1268  e->counters.rows += rows;
1269  e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1270  e->counters.shared_blks_read += bufusage->shared_blks_read;
1273  e->counters.local_blks_hit += bufusage->local_blks_hit;
1274  e->counters.local_blks_read += bufusage->local_blks_read;
1277  e->counters.temp_blks_read += bufusage->temp_blks_read;
1281  e->counters.usage += USAGE_EXEC(total_time);
1282 
1283  SpinLockRelease(&e->mutex);
1284  }
1285 
1286 done:
1287  LWLockRelease(pgss->lock);
1288 
1289  /* We postpone this clean-up until we're out of the lock */
1290  if (norm_query)
1291  pfree(norm_query);
1292 }
1293 
1294 /*
1295  * Reset all statement statistics.
1296  */
1297 Datum
1299 {
1300  if (!pgss || !pgss_hash)
1301  ereport(ERROR,
1302  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1303  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1304  entry_reset();
1305  PG_RETURN_VOID();
1306 }
1307 
1308 /* Number of output arguments (columns) for various API versions */
1309 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1310 #define PG_STAT_STATEMENTS_COLS_V1_1 18
1311 #define PG_STAT_STATEMENTS_COLS_V1_2 19
1312 #define PG_STAT_STATEMENTS_COLS_V1_3 23
1313 #define PG_STAT_STATEMENTS_COLS 23 /* maximum of above */
1314 
1315 /*
1316  * Retrieve statement statistics.
1317  *
1318  * The SQL API of this function has changed multiple times, and will likely
1319  * do so again in future. To support the case where a newer version of this
1320  * loadable module is being used with an old SQL declaration of the function,
1321  * we continue to support the older API versions. For 1.2 and later, the
1322  * expected API version is identified by embedding it in the C name of the
1323  * function. Unfortunately we weren't bright enough to do that for 1.1.
1324  */
1325 Datum
1327 {
1328  bool showtext = PG_GETARG_BOOL(0);
1329 
1330  pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1331 
1332  return (Datum) 0;
1333 }
1334 
1335 Datum
1337 {
1338  bool showtext = PG_GETARG_BOOL(0);
1339 
1340  pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1341 
1342  return (Datum) 0;
1343 }
1344 
1345 /*
1346  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1347  * This can be removed someday, perhaps.
1348  */
1349 Datum
1351 {
1352  /* If it's really API 1.1, we'll figure that out below */
1353  pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1354 
1355  return (Datum) 0;
1356 }
1357 
1358 /* Common code for all versions of pg_stat_statements() */
1359 static void
1361  pgssVersion api_version,
1362  bool showtext)
1363 {
1364  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1365  TupleDesc tupdesc;
1366  Tuplestorestate *tupstore;
1367  MemoryContext per_query_ctx;
1368  MemoryContext oldcontext;
1369  Oid userid = GetUserId();
1370  bool is_allowed_role = false;
1371  char *qbuffer = NULL;
1372  Size qbuffer_size = 0;
1373  Size extent = 0;
1374  int gc_count = 0;
1375  HASH_SEQ_STATUS hash_seq;
1376  pgssEntry *entry;
1377 
1378  /* Superusers or members of pg_read_all_stats members are allowed */
1380 
1381  /* hash table must exist already */
1382  if (!pgss || !pgss_hash)
1383  ereport(ERROR,
1384  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1385  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1386 
1387  /* check to see if caller supports us returning a tuplestore */
1388  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1389  ereport(ERROR,
1390  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1391  errmsg("set-valued function called in context that cannot accept a set")));
1392  if (!(rsinfo->allowedModes & SFRM_Materialize))
1393  ereport(ERROR,
1394  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1395  errmsg("materialize mode required, but it is not " \
1396  "allowed in this context")));
1397 
1398  /* Switch into long-lived context to construct returned data structures */
1399  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1400  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1401 
1402  /* Build a tuple descriptor for our result type */
1403  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1404  elog(ERROR, "return type must be a row type");
1405 
1406  /*
1407  * Check we have the expected number of output arguments. Aside from
1408  * being a good safety check, we need a kluge here to detect API version
1409  * 1.1, which was wedged into the code in an ill-considered way.
1410  */
1411  switch (tupdesc->natts)
1412  {
1414  if (api_version != PGSS_V1_0)
1415  elog(ERROR, "incorrect number of output arguments");
1416  break;
1418  /* pg_stat_statements() should have told us 1.0 */
1419  if (api_version != PGSS_V1_0)
1420  elog(ERROR, "incorrect number of output arguments");
1421  api_version = PGSS_V1_1;
1422  break;
1424  if (api_version != PGSS_V1_2)
1425  elog(ERROR, "incorrect number of output arguments");
1426  break;
1428  if (api_version != PGSS_V1_3)
1429  elog(ERROR, "incorrect number of output arguments");
1430  break;
1431  default:
1432  elog(ERROR, "incorrect number of output arguments");
1433  }
1434 
1435  tupstore = tuplestore_begin_heap(true, false, work_mem);
1436  rsinfo->returnMode = SFRM_Materialize;
1437  rsinfo->setResult = tupstore;
1438  rsinfo->setDesc = tupdesc;
1439 
1440  MemoryContextSwitchTo(oldcontext);
1441 
1442  /*
1443  * We'd like to load the query text file (if needed) while not holding any
1444  * lock on pgss->lock. In the worst case we'll have to do this again
1445  * after we have the lock, but it's unlikely enough to make this a win
1446  * despite occasional duplicated work. We need to reload if anybody
1447  * writes to the file (either a retail qtext_store(), or a garbage
1448  * collection) between this point and where we've gotten shared lock. If
1449  * a qtext_store is actually in progress when we look, we might as well
1450  * skip the speculative load entirely.
1451  */
1452  if (showtext)
1453  {
1454  int n_writers;
1455 
1456  /* Take the mutex so we can examine variables */
1457  {
1458  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1459 
1460  SpinLockAcquire(&s->mutex);
1461  extent = s->extent;
1462  n_writers = s->n_writers;
1463  gc_count = s->gc_count;
1464  SpinLockRelease(&s->mutex);
1465  }
1466 
1467  /* No point in loading file now if there are active writers */
1468  if (n_writers == 0)
1469  qbuffer = qtext_load_file(&qbuffer_size);
1470  }
1471 
1472  /*
1473  * Get shared lock, load or reload the query text file if we must, and
1474  * iterate over the hashtable entries.
1475  *
1476  * With a large hash table, we might be holding the lock rather longer
1477  * than one could wish. However, this only blocks creation of new hash
1478  * table entries, and the larger the hash table the less likely that is to
1479  * be needed. So we can hope this is okay. Perhaps someday we'll decide
1480  * we need to partition the hash table to limit the time spent holding any
1481  * one lock.
1482  */
1483  LWLockAcquire(pgss->lock, LW_SHARED);
1484 
1485  if (showtext)
1486  {
1487  /*
1488  * Here it is safe to examine extent and gc_count without taking the
1489  * mutex. Note that although other processes might change
1490  * pgss->extent just after we look at it, the strings they then write
1491  * into the file cannot yet be referenced in the hashtable, so we
1492  * don't care whether we see them or not.
1493  *
1494  * If qtext_load_file fails, we just press on; we'll return NULL for
1495  * every query text.
1496  */
1497  if (qbuffer == NULL ||
1498  pgss->extent != extent ||
1499  pgss->gc_count != gc_count)
1500  {
1501  if (qbuffer)
1502  free(qbuffer);
1503  qbuffer = qtext_load_file(&qbuffer_size);
1504  }
1505  }
1506 
1507  hash_seq_init(&hash_seq, pgss_hash);
1508  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1509  {
1511  bool nulls[PG_STAT_STATEMENTS_COLS];
1512  int i = 0;
1513  Counters tmp;
1514  double stddev;
1515  int64 queryid = entry->key.queryid;
1516 
1517  memset(values, 0, sizeof(values));
1518  memset(nulls, 0, sizeof(nulls));
1519 
1520  values[i++] = ObjectIdGetDatum(entry->key.userid);
1521  values[i++] = ObjectIdGetDatum(entry->key.dbid);
1522 
1523  if (is_allowed_role || entry->key.userid == userid)
1524  {
1525  if (api_version >= PGSS_V1_2)
1526  values[i++] = Int64GetDatumFast(queryid);
1527 
1528  if (showtext)
1529  {
1530  char *qstr = qtext_fetch(entry->query_offset,
1531  entry->query_len,
1532  qbuffer,
1533  qbuffer_size);
1534 
1535  if (qstr)
1536  {
1537  char *enc;
1538 
1539  enc = pg_any_to_server(qstr,
1540  entry->query_len,
1541  entry->encoding);
1542 
1543  values[i++] = CStringGetTextDatum(enc);
1544 
1545  if (enc != qstr)
1546  pfree(enc);
1547  }
1548  else
1549  {
1550  /* Just return a null if we fail to find the text */
1551  nulls[i++] = true;
1552  }
1553  }
1554  else
1555  {
1556  /* Query text not requested */
1557  nulls[i++] = true;
1558  }
1559  }
1560  else
1561  {
1562  /* Don't show queryid */
1563  if (api_version >= PGSS_V1_2)
1564  nulls[i++] = true;
1565 
1566  /*
1567  * Don't show query text, but hint as to the reason for not doing
1568  * so if it was requested
1569  */
1570  if (showtext)
1571  values[i++] = CStringGetTextDatum("<insufficient privilege>");
1572  else
1573  nulls[i++] = true;
1574  }
1575 
1576  /* copy counters to a local variable to keep locking time short */
1577  {
1578  volatile pgssEntry *e = (volatile pgssEntry *) entry;
1579 
1580  SpinLockAcquire(&e->mutex);
1581  tmp = e->counters;
1582  SpinLockRelease(&e->mutex);
1583  }
1584 
1585  /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1586  if (tmp.calls == 0)
1587  continue;
1588 
1589  values[i++] = Int64GetDatumFast(tmp.calls);
1590  values[i++] = Float8GetDatumFast(tmp.total_time);
1591  if (api_version >= PGSS_V1_3)
1592  {
1593  values[i++] = Float8GetDatumFast(tmp.min_time);
1594  values[i++] = Float8GetDatumFast(tmp.max_time);
1595  values[i++] = Float8GetDatumFast(tmp.mean_time);
1596 
1597  /*
1598  * Note we are calculating the population variance here, not the
1599  * sample variance, as we have data for the whole population, so
1600  * Bessel's correction is not used, and we don't divide by
1601  * tmp.calls - 1.
1602  */
1603  if (tmp.calls > 1)
1604  stddev = sqrt(tmp.sum_var_time / tmp.calls);
1605  else
1606  stddev = 0.0;
1607  values[i++] = Float8GetDatumFast(stddev);
1608  }
1609  values[i++] = Int64GetDatumFast(tmp.rows);
1610  values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1611  values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1612  if (api_version >= PGSS_V1_1)
1613  values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1614  values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1615  values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1616  values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1617  if (api_version >= PGSS_V1_1)
1618  values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1619  values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1620  values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1621  values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1622  if (api_version >= PGSS_V1_1)
1623  {
1624  values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1625  values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1626  }
1627 
1628  Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1629  api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1630  api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1631  api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1632  -1 /* fail if you forget to update this assert */ ));
1633 
1634  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1635  }
1636 
1637  /* clean up and return the tuplestore */
1638  LWLockRelease(pgss->lock);
1639 
1640  if (qbuffer)
1641  free(qbuffer);
1642 
1643  tuplestore_donestoring(tupstore);
1644 }
1645 
1646 /*
1647  * Estimate shared memory space needed.
1648  */
1649 static Size
1651 {
1652  Size size;
1653 
1654  size = MAXALIGN(sizeof(pgssSharedState));
1655  size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
1656 
1657  return size;
1658 }
1659 
1660 /*
1661  * Allocate a new hashtable entry.
1662  * caller must hold an exclusive lock on pgss->lock
1663  *
1664  * "query" need not be null-terminated; we rely on query_len instead
1665  *
1666  * If "sticky" is true, make the new entry artificially sticky so that it will
1667  * probably still be there when the query finishes execution. We do this by
1668  * giving it a median usage value rather than the normal value. (Strictly
1669  * speaking, query strings are normalized on a best effort basis, though it
1670  * would be difficult to demonstrate this even under artificial conditions.)
1671  *
1672  * Note: despite needing exclusive lock, it's not an error for the target
1673  * entry to already exist. This is because pgss_store releases and
1674  * reacquires lock after failing to find a match; so someone else could
1675  * have made the entry while we waited to get exclusive lock.
1676  */
1677 static pgssEntry *
1678 entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
1679  bool sticky)
1680 {
1681  pgssEntry *entry;
1682  bool found;
1683 
1684  /* Make space if needed */
1685  while (hash_get_num_entries(pgss_hash) >= pgss_max)
1686  entry_dealloc();
1687 
1688  /* Find or create an entry with desired hash code */
1689  entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1690 
1691  if (!found)
1692  {
1693  /* New entry, initialize it */
1694 
1695  /* reset the statistics */
1696  memset(&entry->counters, 0, sizeof(Counters));
1697  /* set the appropriate initial usage count */
1698  entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1699  /* re-initialize the mutex each time ... we assume no one using it */
1700  SpinLockInit(&entry->mutex);
1701  /* ... and don't forget the query text metadata */
1702  Assert(query_len >= 0);
1703  entry->query_offset = query_offset;
1704  entry->query_len = query_len;
1705  entry->encoding = encoding;
1706  }
1707 
1708  return entry;
1709 }
1710 
1711 /*
1712  * qsort comparator for sorting into increasing usage order
1713  */
1714 static int
1715 entry_cmp(const void *lhs, const void *rhs)
1716 {
1717  double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1718  double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1719 
1720  if (l_usage < r_usage)
1721  return -1;
1722  else if (l_usage > r_usage)
1723  return +1;
1724  else
1725  return 0;
1726 }
1727 
1728 /*
1729  * Deallocate least-used entries.
1730  *
1731  * Caller must hold an exclusive lock on pgss->lock.
1732  */
1733 static void
1735 {
1736  HASH_SEQ_STATUS hash_seq;
1737  pgssEntry **entries;
1738  pgssEntry *entry;
1739  int nvictims;
1740  int i;
1741  Size tottextlen;
1742  int nvalidtexts;
1743 
1744  /*
1745  * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1746  * While we're scanning the table, apply the decay factor to the usage
1747  * values, and update the mean query length.
1748  *
1749  * Note that the mean query length is almost immediately obsolete, since
1750  * we compute it before not after discarding the least-used entries.
1751  * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
1752  * making two passes to get a more current result. Likewise, the new
1753  * cur_median_usage includes the entries we're about to zap.
1754  */
1755 
1756  entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1757 
1758  i = 0;
1759  tottextlen = 0;
1760  nvalidtexts = 0;
1761 
1762  hash_seq_init(&hash_seq, pgss_hash);
1763  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1764  {
1765  entries[i++] = entry;
1766  /* "Sticky" entries get a different usage decay rate. */
1767  if (entry->counters.calls == 0)
1769  else
1771  /* In the mean length computation, ignore dropped texts. */
1772  if (entry->query_len >= 0)
1773  {
1774  tottextlen += entry->query_len + 1;
1775  nvalidtexts++;
1776  }
1777  }
1778 
1779  /* Sort into increasing order by usage */
1780  qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1781 
1782  /* Record the (approximate) median usage */
1783  if (i > 0)
1784  pgss->cur_median_usage = entries[i / 2]->counters.usage;
1785  /* Record the mean query length */
1786  if (nvalidtexts > 0)
1787  pgss->mean_query_len = tottextlen / nvalidtexts;
1788  else
1790 
1791  /* Now zap an appropriate fraction of lowest-usage entries */
1792  nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1793  nvictims = Min(nvictims, i);
1794 
1795  for (i = 0; i < nvictims; i++)
1796  {
1797  hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
1798  }
1799 
1800  pfree(entries);
1801 }
1802 
1803 /*
1804  * Given a query string (not necessarily null-terminated), allocate a new
1805  * entry in the external query text file and store the string there.
1806  *
1807  * If successful, returns true, and stores the new entry's offset in the file
1808  * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
1809  * number of garbage collections that have occurred so far.
1810  *
1811  * On failure, returns false.
1812  *
1813  * At least a shared lock on pgss->lock must be held by the caller, so as
1814  * to prevent a concurrent garbage collection. Share-lock-holding callers
1815  * should pass a gc_count pointer to obtain the number of garbage collections,
1816  * so that they can recheck the count after obtaining exclusive lock to
1817  * detect whether a garbage collection occurred (and removed this entry).
1818  */
1819 static bool
1820 qtext_store(const char *query, int query_len,
1821  Size *query_offset, int *gc_count)
1822 {
1823  Size off;
1824  int fd;
1825 
1826  /*
1827  * We use a spinlock to protect extent/n_writers/gc_count, so that
1828  * multiple processes may execute this function concurrently.
1829  */
1830  {
1831  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1832 
1833  SpinLockAcquire(&s->mutex);
1834  off = s->extent;
1835  s->extent += query_len + 1;
1836  s->n_writers++;
1837  if (gc_count)
1838  *gc_count = s->gc_count;
1839  SpinLockRelease(&s->mutex);
1840  }
1841 
1842  *query_offset = off;
1843 
1844  /* Now write the data into the successfully-reserved part of the file */
1845  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
1846  if (fd < 0)
1847  goto error;
1848 
1849  if (lseek(fd, off, SEEK_SET) != off)
1850  goto error;
1851 
1852  if (write(fd, query, query_len) != query_len)
1853  goto error;
1854  if (write(fd, "\0", 1) != 1)
1855  goto error;
1856 
1857  CloseTransientFile(fd);
1858 
1859  /* Mark our write complete */
1860  {
1861  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1862 
1863  SpinLockAcquire(&s->mutex);
1864  s->n_writers--;
1865  SpinLockRelease(&s->mutex);
1866  }
1867 
1868  return true;
1869 
1870 error:
1871  ereport(LOG,
1873  errmsg("could not write pg_stat_statement file \"%s\": %m",
1874  PGSS_TEXT_FILE)));
1875 
1876  if (fd >= 0)
1877  CloseTransientFile(fd);
1878 
1879  /* Mark our write complete */
1880  {
1881  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1882 
1883  SpinLockAcquire(&s->mutex);
1884  s->n_writers--;
1885  SpinLockRelease(&s->mutex);
1886  }
1887 
1888  return false;
1889 }
1890 
1891 /*
1892  * Read the external query text file into a malloc'd buffer.
1893  *
1894  * Returns NULL (without throwing an error) if unable to read, eg
1895  * file not there or insufficient memory.
1896  *
1897  * On success, the buffer size is also returned into *buffer_size.
1898  *
1899  * This can be called without any lock on pgss->lock, but in that case
1900  * the caller is responsible for verifying that the result is sane.
1901  */
1902 static char *
1903 qtext_load_file(Size *buffer_size)
1904 {
1905  char *buf;
1906  int fd;
1907  struct stat stat;
1908 
1909  fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
1910  if (fd < 0)
1911  {
1912  if (errno != ENOENT)
1913  ereport(LOG,
1915  errmsg("could not read pg_stat_statement file \"%s\": %m",
1916  PGSS_TEXT_FILE)));
1917  return NULL;
1918  }
1919 
1920  /* Get file length */
1921  if (fstat(fd, &stat))
1922  {
1923  ereport(LOG,
1925  errmsg("could not stat pg_stat_statement file \"%s\": %m",
1926  PGSS_TEXT_FILE)));
1927  CloseTransientFile(fd);
1928  return NULL;
1929  }
1930 
1931  /* Allocate buffer; beware that off_t might be wider than size_t */
1932  if (stat.st_size <= MaxAllocHugeSize)
1933  buf = (char *) malloc(stat.st_size);
1934  else
1935  buf = NULL;
1936  if (buf == NULL)
1937  {
1938  ereport(LOG,
1939  (errcode(ERRCODE_OUT_OF_MEMORY),
1940  errmsg("out of memory"),
1941  errdetail("Could not allocate enough memory to read pg_stat_statement file \"%s\".",
1942  PGSS_TEXT_FILE)));
1943  CloseTransientFile(fd);
1944  return NULL;
1945  }
1946 
1947  /*
1948  * OK, slurp in the file. If we get a short read and errno doesn't get
1949  * set, the reason is probably that garbage collection truncated the file
1950  * since we did the fstat(), so we don't log a complaint --- but we don't
1951  * return the data, either, since it's most likely corrupt due to
1952  * concurrent writes from garbage collection.
1953  */
1954  errno = 0;
1955  if (read(fd, buf, stat.st_size) != stat.st_size)
1956  {
1957  if (errno)
1958  ereport(LOG,
1960  errmsg("could not read pg_stat_statement file \"%s\": %m",
1961  PGSS_TEXT_FILE)));
1962  free(buf);
1963  CloseTransientFile(fd);
1964  return NULL;
1965  }
1966 
1967  CloseTransientFile(fd);
1968 
1969  *buffer_size = stat.st_size;
1970  return buf;
1971 }
1972 
1973 /*
1974  * Locate a query text in the file image previously read by qtext_load_file().
1975  *
1976  * We validate the given offset/length, and return NULL if bogus. Otherwise,
1977  * the result points to a null-terminated string within the buffer.
1978  */
1979 static char *
1980 qtext_fetch(Size query_offset, int query_len,
1981  char *buffer, Size buffer_size)
1982 {
1983  /* File read failed? */
1984  if (buffer == NULL)
1985  return NULL;
1986  /* Bogus offset/length? */
1987  if (query_len < 0 ||
1988  query_offset + query_len >= buffer_size)
1989  return NULL;
1990  /* As a further sanity check, make sure there's a trailing null */
1991  if (buffer[query_offset + query_len] != '\0')
1992  return NULL;
1993  /* Looks OK */
1994  return buffer + query_offset;
1995 }
1996 
1997 /*
1998  * Do we need to garbage-collect the external query text file?
1999  *
2000  * Caller should hold at least a shared lock on pgss->lock.
2001  */
2002 static bool
2004 {
2005  Size extent;
2006 
2007  /* Read shared extent pointer */
2008  {
2009  volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
2010 
2011  SpinLockAcquire(&s->mutex);
2012  extent = s->extent;
2013  SpinLockRelease(&s->mutex);
2014  }
2015 
2016  /* Don't proceed if file does not exceed 512 bytes per possible entry */
2017  if (extent < 512 * pgss_max)
2018  return false;
2019 
2020  /*
2021  * Don't proceed if file is less than about 50% bloat. Nothing can or
2022  * should be done in the event of unusually large query texts accounting
2023  * for file's large size. We go to the trouble of maintaining the mean
2024  * query length in order to prevent garbage collection from thrashing
2025  * uselessly.
2026  */
2027  if (extent < pgss->mean_query_len * pgss_max * 2)
2028  return false;
2029 
2030  return true;
2031 }
2032 
2033 /*
2034  * Garbage-collect orphaned query texts in external file.
2035  *
2036  * This won't be called often in the typical case, since it's likely that
2037  * there won't be too much churn, and besides, a similar compaction process
2038  * occurs when serializing to disk at shutdown or as part of resetting.
2039  * Despite this, it seems prudent to plan for the edge case where the file
2040  * becomes unreasonably large, with no other method of compaction likely to
2041  * occur in the foreseeable future.
2042  *
2043  * The caller must hold an exclusive lock on pgss->lock.
2044  *
2045  * At the first sign of trouble we unlink the query text file to get a clean
2046  * slate (although existing statistics are retained), rather than risk
2047  * thrashing by allowing the same problem case to recur indefinitely.
2048  */
2049 static void
2051 {
2052  char *qbuffer;
2053  Size qbuffer_size;
2054  FILE *qfile = NULL;
2055  HASH_SEQ_STATUS hash_seq;
2056  pgssEntry *entry;
2057  Size extent;
2058  int nentries;
2059 
2060  /*
2061  * When called from pgss_store, some other session might have proceeded
2062  * with garbage collection in the no-lock-held interim of lock strength
2063  * escalation. Check once more that this is actually necessary.
2064  */
2065  if (!need_gc_qtexts())
2066  return;
2067 
2068  /*
2069  * Load the old texts file. If we fail (out of memory, for instance),
2070  * invalidate query texts. Hopefully this is rare. It might seem better
2071  * to leave things alone on an OOM failure, but the problem is that the
2072  * file is only going to get bigger; hoping for a future non-OOM result is
2073  * risky and can easily lead to complete denial of service.
2074  */
2075  qbuffer = qtext_load_file(&qbuffer_size);
2076  if (qbuffer == NULL)
2077  goto gc_fail;
2078 
2079  /*
2080  * We overwrite the query texts file in place, so as to reduce the risk of
2081  * an out-of-disk-space failure. Since the file is guaranteed not to get
2082  * larger, this should always work on traditional filesystems; though we
2083  * could still lose on copy-on-write filesystems.
2084  */
2086  if (qfile == NULL)
2087  {
2088  ereport(LOG,
2090  errmsg("could not write pg_stat_statement file \"%s\": %m",
2091  PGSS_TEXT_FILE)));
2092  goto gc_fail;
2093  }
2094 
2095  extent = 0;
2096  nentries = 0;
2097 
2098  hash_seq_init(&hash_seq, pgss_hash);
2099  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2100  {
2101  int query_len = entry->query_len;
2102  char *qry = qtext_fetch(entry->query_offset,
2103  query_len,
2104  qbuffer,
2105  qbuffer_size);
2106 
2107  if (qry == NULL)
2108  {
2109  /* Trouble ... drop the text */
2110  entry->query_offset = 0;
2111  entry->query_len = -1;
2112  /* entry will not be counted in mean query length computation */
2113  continue;
2114  }
2115 
2116  if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2117  {
2118  ereport(LOG,
2120  errmsg("could not write pg_stat_statement file \"%s\": %m",
2121  PGSS_TEXT_FILE)));
2122  hash_seq_term(&hash_seq);
2123  goto gc_fail;
2124  }
2125 
2126  entry->query_offset = extent;
2127  extent += query_len + 1;
2128  nentries++;
2129  }
2130 
2131  /*
2132  * Truncate away any now-unused space. If this fails for some odd reason,
2133  * we log it, but there's no need to fail.
2134  */
2135  if (ftruncate(fileno(qfile), extent) != 0)
2136  ereport(LOG,
2138  errmsg("could not truncate pg_stat_statement file \"%s\": %m",
2139  PGSS_TEXT_FILE)));
2140 
2141  if (FreeFile(qfile))
2142  {
2143  ereport(LOG,
2145  errmsg("could not write pg_stat_statement file \"%s\": %m",
2146  PGSS_TEXT_FILE)));
2147  qfile = NULL;
2148  goto gc_fail;
2149  }
2150 
2151  elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2152  pgss->extent, extent);
2153 
2154  /* Reset the shared extent pointer */
2155  pgss->extent = extent;
2156 
2157  /*
2158  * Also update the mean query length, to be sure that need_gc_qtexts()
2159  * won't still think we have a problem.
2160  */
2161  if (nentries > 0)
2162  pgss->mean_query_len = extent / nentries;
2163  else
2165 
2166  free(qbuffer);
2167 
2168  /*
2169  * OK, count a garbage collection cycle. (Note: even though we have
2170  * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2171  * other processes may examine gc_count while holding only the mutex.
2172  * Also, we have to advance the count *after* we've rewritten the file,
2173  * else other processes might not realize they read a stale file.)
2174  */
2175  record_gc_qtexts();
2176 
2177  return;
2178 
2179 gc_fail:
2180  /* clean up resources */
2181  if (qfile)
2182  FreeFile(qfile);
2183  if (qbuffer)
2184  free(qbuffer);
2185 
2186  /*
2187  * Since the contents of the external file are now uncertain, mark all
2188  * hashtable entries as having invalid texts.
2189  */
2190  hash_seq_init(&hash_seq, pgss_hash);
2191  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2192  {
2193  entry->query_offset = 0;
2194  entry->query_len = -1;
2195  }
2196 
2197  /*
2198  * Destroy the query text file and create a new, empty one
2199  */
2200  (void) unlink(PGSS_TEXT_FILE);
2202  if (qfile == NULL)
2203  ereport(LOG,
2205  errmsg("could not write new pg_stat_statement file \"%s\": %m",
2206  PGSS_TEXT_FILE)));
2207  else
2208  FreeFile(qfile);
2209 
2210  /* Reset the shared extent pointer */
2211  pgss->extent = 0;
2212 
2213  /* Reset mean_query_len to match the new state */
2215 
2216  /*
2217  * Bump the GC count even though we failed.
2218  *
2219  * This is needed to make concurrent readers of file without any lock on
2220  * pgss->lock notice existence of new version of file. Once readers
2221  * subsequently observe a change in GC count with pgss->lock held, that
2222  * forces a safe reopen of file. Writers also require that we bump here,
2223  * of course. (As required by locking protocol, readers and writers don't
2224  * trust earlier file contents until gc_count is found unchanged after
2225  * pgss->lock acquired in shared or exclusive mode respectively.)
2226  */
2227  record_gc_qtexts();
2228 }
2229 
2230 /*
2231  * Release all entries.
2232  */
2233 static void
2235 {
2236  HASH_SEQ_STATUS hash_seq;
2237  pgssEntry *entry;
2238  FILE *qfile;
2239 
2241 
2242  hash_seq_init(&hash_seq, pgss_hash);
2243  while ((entry = hash_seq_search(&hash_seq)) != NULL)
2244  {
2245  hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2246  }
2247 
2248  /*
2249  * Write new empty query file, perhaps even creating a new one to recover
2250  * if the file was missing.
2251  */
2253  if (qfile == NULL)
2254  {
2255  ereport(LOG,
2257  errmsg("could not create pg_stat_statement file \"%s\": %m",
2258  PGSS_TEXT_FILE)));
2259  goto done;
2260  }
2261 
2262  /* If ftruncate fails, log it, but it's not a fatal problem */
2263  if (ftruncate(fileno(qfile), 0) != 0)
2264  ereport(LOG,
2266  errmsg("could not truncate pg_stat_statement file \"%s\": %m",
2267  PGSS_TEXT_FILE)));
2268 
2269  FreeFile(qfile);
2270 
2271 done:
2272  pgss->extent = 0;
2273  /* This counts as a query text garbage collection for our purposes */
2274  record_gc_qtexts();
2275 
2276  LWLockRelease(pgss->lock);
2277 }
2278 
2279 /*
2280  * AppendJumble: Append a value that is substantive in a given query to
2281  * the current jumble.
2282  */
2283 static void
2284 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
2285 {
2286  unsigned char *jumble = jstate->jumble;
2287  Size jumble_len = jstate->jumble_len;
2288 
2289  /*
2290  * Whenever the jumble buffer is full, we hash the current contents and
2291  * reset the buffer to contain just that hash value, thus relying on the
2292  * hash to summarize everything so far.
2293  */
2294  while (size > 0)
2295  {
2296  Size part_size;
2297 
2298  if (jumble_len >= JUMBLE_SIZE)
2299  {
2300  uint64 start_hash;
2301 
2302  start_hash = DatumGetUInt64(hash_any_extended(jumble,
2303  JUMBLE_SIZE, 0));
2304  memcpy(jumble, &start_hash, sizeof(start_hash));
2305  jumble_len = sizeof(start_hash);
2306  }
2307  part_size = Min(size, JUMBLE_SIZE - jumble_len);
2308  memcpy(jumble + jumble_len, item, part_size);
2309  jumble_len += part_size;
2310  item += part_size;
2311  size -= part_size;
2312  }
2313  jstate->jumble_len = jumble_len;
2314 }
2315 
2316 /*
2317  * Wrappers around AppendJumble to encapsulate details of serialization
2318  * of individual local variable elements.
2319  */
2320 #define APP_JUMB(item) \
2321  AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
2322 #define APP_JUMB_STRING(str) \
2323  AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
2324 
2325 /*
2326  * JumbleQuery: Selectively serialize the query tree, appending significant
2327  * data to the "query jumble" while ignoring nonsignificant data.
2328  *
2329  * Rule of thumb for what to include is that we should ignore anything not
2330  * semantically significant (such as alias names) as well as anything that can
2331  * be deduced from child nodes (else we'd just be double-hashing that piece
2332  * of information).
2333  */
2334 static void
2336 {
2337  Assert(IsA(query, Query));
2338  Assert(query->utilityStmt == NULL);
2339 
2340  APP_JUMB(query->commandType);
2341  /* resultRelation is usually predictable from commandType */
2342  JumbleExpr(jstate, (Node *) query->cteList);
2343  JumbleRangeTable(jstate, query->rtable);
2344  JumbleExpr(jstate, (Node *) query->jointree);
2345  JumbleExpr(jstate, (Node *) query->targetList);
2346  JumbleExpr(jstate, (Node *) query->onConflict);
2347  JumbleExpr(jstate, (Node *) query->returningList);
2348  JumbleExpr(jstate, (Node *) query->groupClause);
2349  JumbleExpr(jstate, (Node *) query->groupingSets);
2350  JumbleExpr(jstate, query->havingQual);
2351  JumbleExpr(jstate, (Node *) query->windowClause);
2352  JumbleExpr(jstate, (Node *) query->distinctClause);
2353  JumbleExpr(jstate, (Node *) query->sortClause);
2354  JumbleExpr(jstate, query->limitOffset);
2355  JumbleExpr(jstate, query->limitCount);
2356  /* we ignore rowMarks */
2357  JumbleExpr(jstate, query->setOperations);
2358 }
2359 
2360 /*
2361  * Jumble a range table
2362  */
2363 static void
2365 {
2366  ListCell *lc;
2367 
2368  foreach(lc, rtable)
2369  {
2371 
2372  APP_JUMB(rte->rtekind);
2373  switch (rte->rtekind)
2374  {
2375  case RTE_RELATION:
2376  APP_JUMB(rte->relid);
2377  JumbleExpr(jstate, (Node *) rte->tablesample);
2378  break;
2379  case RTE_SUBQUERY:
2380  JumbleQuery(jstate, rte->subquery);
2381  break;
2382  case RTE_JOIN:
2383  APP_JUMB(rte->jointype);
2384  break;
2385  case RTE_FUNCTION:
2386  JumbleExpr(jstate, (Node *) rte->functions);
2387  break;
2388  case RTE_TABLEFUNC:
2389  JumbleExpr(jstate, (Node *) rte->tablefunc);
2390  break;
2391  case RTE_VALUES:
2392  JumbleExpr(jstate, (Node *) rte->values_lists);
2393  break;
2394  case RTE_CTE:
2395 
2396  /*
2397  * Depending on the CTE name here isn't ideal, but it's the
2398  * only info we have to identify the referenced WITH item.
2399  */
2400  APP_JUMB_STRING(rte->ctename);
2401  APP_JUMB(rte->ctelevelsup);
2402  break;
2403  case RTE_NAMEDTUPLESTORE:
2404  APP_JUMB_STRING(rte->enrname);
2405  break;
2406  default:
2407  elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
2408  break;
2409  }
2410  }
2411 }
2412 
2413 /*
2414  * Jumble an expression tree
2415  *
2416  * In general this function should handle all the same node types that
2417  * expression_tree_walker() does, and therefore it's coded to be as parallel
2418  * to that function as possible. However, since we are only invoked on
2419  * queries immediately post-parse-analysis, we need not handle node types
2420  * that only appear in planning.
2421  *
2422  * Note: the reason we don't simply use expression_tree_walker() is that the
2423  * point of that function is to support tree walkers that don't care about
2424  * most tree node types, but here we care about all types. We should complain
2425  * about any unrecognized node type.
2426  */
2427 static void
2429 {
2430  ListCell *temp;
2431 
2432  if (node == NULL)
2433  return;
2434 
2435  /* Guard against stack overflow due to overly complex expressions */
2437 
2438  /*
2439  * We always emit the node's NodeTag, then any additional fields that are
2440  * considered significant, and then we recurse to any child nodes.
2441  */
2442  APP_JUMB(node->type);
2443 
2444  switch (nodeTag(node))
2445  {
2446  case T_Var:
2447  {
2448  Var *var = (Var *) node;
2449 
2450  APP_JUMB(var->varno);
2451  APP_JUMB(var->varattno);
2452  APP_JUMB(var->varlevelsup);
2453  }
2454  break;
2455  case T_Const:
2456  {
2457  Const *c = (Const *) node;
2458 
2459  /* We jumble only the constant's type, not its value */
2460  APP_JUMB(c->consttype);
2461  /* Also, record its parse location for query normalization */
2462  RecordConstLocation(jstate, c->location);
2463  }
2464  break;
2465  case T_Param:
2466  {
2467  Param *p = (Param *) node;
2468 
2469  APP_JUMB(p->paramkind);
2470  APP_JUMB(p->paramid);
2471  APP_JUMB(p->paramtype);
2472  /* Also, track the highest external Param id */
2473  if (p->paramkind == PARAM_EXTERN &&
2474  p->paramid > jstate->highest_extern_param_id)
2475  jstate->highest_extern_param_id = p->paramid;
2476  }
2477  break;
2478  case T_Aggref:
2479  {
2480  Aggref *expr = (Aggref *) node;
2481 
2482  APP_JUMB(expr->aggfnoid);
2483  JumbleExpr(jstate, (Node *) expr->aggdirectargs);
2484  JumbleExpr(jstate, (Node *) expr->args);
2485  JumbleExpr(jstate, (Node *) expr->aggorder);
2486  JumbleExpr(jstate, (Node *) expr->aggdistinct);
2487  JumbleExpr(jstate, (Node *) expr->aggfilter);
2488  }
2489  break;
2490  case T_GroupingFunc:
2491  {
2492  GroupingFunc *grpnode = (GroupingFunc *) node;
2493 
2494  JumbleExpr(jstate, (Node *) grpnode->refs);
2495  }
2496  break;
2497  case T_WindowFunc:
2498  {
2499  WindowFunc *expr = (WindowFunc *) node;
2500 
2501  APP_JUMB(expr->winfnoid);
2502  APP_JUMB(expr->winref);
2503  JumbleExpr(jstate, (Node *) expr->args);
2504  JumbleExpr(jstate, (Node *) expr->aggfilter);
2505  }
2506  break;
2507  case T_ArrayRef:
2508  {
2509  ArrayRef *aref = (ArrayRef *) node;
2510 
2511  JumbleExpr(jstate, (Node *) aref->refupperindexpr);
2512  JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
2513  JumbleExpr(jstate, (Node *) aref->refexpr);
2514  JumbleExpr(jstate, (Node *) aref->refassgnexpr);
2515  }
2516  break;
2517  case T_FuncExpr:
2518  {
2519  FuncExpr *expr = (FuncExpr *) node;
2520 
2521  APP_JUMB(expr->funcid);
2522  JumbleExpr(jstate, (Node *) expr->args);
2523  }
2524  break;
2525  case T_NamedArgExpr:
2526  {
2527  NamedArgExpr *nae = (NamedArgExpr *) node;
2528 
2529  APP_JUMB(nae->argnumber);
2530  JumbleExpr(jstate, (Node *) nae->arg);
2531  }
2532  break;
2533  case T_OpExpr:
2534  case T_DistinctExpr: /* struct-equivalent to OpExpr */
2535  case T_NullIfExpr: /* struct-equivalent to OpExpr */
2536  {
2537  OpExpr *expr = (OpExpr *) node;
2538 
2539  APP_JUMB(expr->opno);
2540  JumbleExpr(jstate, (Node *) expr->args);
2541  }
2542  break;
2543  case T_ScalarArrayOpExpr:
2544  {
2545  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
2546 
2547  APP_JUMB(expr->opno);
2548  APP_JUMB(expr->useOr);
2549  JumbleExpr(jstate, (Node *) expr->args);
2550  }
2551  break;
2552  case T_BoolExpr:
2553  {
2554  BoolExpr *expr = (BoolExpr *) node;
2555 
2556  APP_JUMB(expr->boolop);
2557  JumbleExpr(jstate, (Node *) expr->args);
2558  }
2559  break;
2560  case T_SubLink:
2561  {
2562  SubLink *sublink = (SubLink *) node;
2563 
2564  APP_JUMB(sublink->subLinkType);
2565  APP_JUMB(sublink->subLinkId);
2566  JumbleExpr(jstate, (Node *) sublink->testexpr);
2567  JumbleQuery(jstate, castNode(Query, sublink->subselect));
2568  }
2569  break;
2570  case T_FieldSelect:
2571  {
2572  FieldSelect *fs = (FieldSelect *) node;
2573 
2574  APP_JUMB(fs->fieldnum);
2575  JumbleExpr(jstate, (Node *) fs->arg);
2576  }
2577  break;
2578  case T_FieldStore:
2579  {
2580  FieldStore *fstore = (FieldStore *) node;
2581 
2582  JumbleExpr(jstate, (Node *) fstore->arg);
2583  JumbleExpr(jstate, (Node *) fstore->newvals);
2584  }
2585  break;
2586  case T_RelabelType:
2587  {
2588  RelabelType *rt = (RelabelType *) node;
2589 
2590  APP_JUMB(rt->resulttype);
2591  JumbleExpr(jstate, (Node *) rt->arg);
2592  }
2593  break;
2594  case T_CoerceViaIO:
2595  {
2596  CoerceViaIO *cio = (CoerceViaIO *) node;
2597 
2598  APP_JUMB(cio->resulttype);
2599  JumbleExpr(jstate, (Node *) cio->arg);
2600  }
2601  break;
2602  case T_ArrayCoerceExpr:
2603  {
2604  ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
2605 
2606  APP_JUMB(acexpr->resulttype);
2607  JumbleExpr(jstate, (Node *) acexpr->arg);
2608  JumbleExpr(jstate, (Node *) acexpr->elemexpr);
2609  }
2610  break;
2611  case T_ConvertRowtypeExpr:
2612  {
2613  ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
2614 
2615  APP_JUMB(crexpr->resulttype);
2616  JumbleExpr(jstate, (Node *) crexpr->arg);
2617  }
2618  break;
2619  case T_CollateExpr:
2620  {
2621  CollateExpr *ce = (CollateExpr *) node;
2622 
2623  APP_JUMB(ce->collOid);
2624  JumbleExpr(jstate, (Node *) ce->arg);
2625  }
2626  break;
2627  case T_CaseExpr:
2628  {
2629  CaseExpr *caseexpr = (CaseExpr *) node;
2630 
2631  JumbleExpr(jstate, (Node *) caseexpr->arg);
2632  foreach(temp, caseexpr->args)
2633  {
2634  CaseWhen *when = lfirst_node(CaseWhen, temp);
2635 
2636  JumbleExpr(jstate, (Node *) when->expr);
2637  JumbleExpr(jstate, (Node *) when->result);
2638  }
2639  JumbleExpr(jstate, (Node *) caseexpr->defresult);
2640  }
2641  break;
2642  case T_CaseTestExpr:
2643  {
2644  CaseTestExpr *ct = (CaseTestExpr *) node;
2645 
2646  APP_JUMB(ct->typeId);
2647  }
2648  break;
2649  case T_ArrayExpr:
2650  JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
2651  break;
2652  case T_RowExpr:
2653  JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
2654  break;
2655  case T_RowCompareExpr:
2656  {
2657  RowCompareExpr *rcexpr = (RowCompareExpr *) node;
2658 
2659  APP_JUMB(rcexpr->rctype);
2660  JumbleExpr(jstate, (Node *) rcexpr->largs);
2661  JumbleExpr(jstate, (Node *) rcexpr->rargs);
2662  }
2663  break;
2664  case T_CoalesceExpr:
2665  JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
2666  break;
2667  case T_MinMaxExpr:
2668  {
2669  MinMaxExpr *mmexpr = (MinMaxExpr *) node;
2670 
2671  APP_JUMB(mmexpr->op);
2672  JumbleExpr(jstate, (Node *) mmexpr->args);
2673  }
2674  break;
2675  case T_SQLValueFunction:
2676  {
2677  SQLValueFunction *svf = (SQLValueFunction *) node;
2678 
2679  APP_JUMB(svf->op);
2680  /* type is fully determined by op */
2681  APP_JUMB(svf->typmod);
2682  }
2683  break;
2684  case T_XmlExpr:
2685  {
2686  XmlExpr *xexpr = (XmlExpr *) node;
2687 
2688  APP_JUMB(xexpr->op);
2689  JumbleExpr(jstate, (Node *) xexpr->named_args);
2690  JumbleExpr(jstate, (Node *) xexpr->args);
2691  }
2692  break;
2693  case T_NullTest:
2694  {
2695  NullTest *nt = (NullTest *) node;
2696 
2697  APP_JUMB(nt->nulltesttype);
2698  JumbleExpr(jstate, (Node *) nt->arg);
2699  }
2700  break;
2701  case T_BooleanTest:
2702  {
2703  BooleanTest *bt = (BooleanTest *) node;
2704 
2705  APP_JUMB(bt->booltesttype);
2706  JumbleExpr(jstate, (Node *) bt->arg);
2707  }
2708  break;
2709  case T_CoerceToDomain:
2710  {
2711  CoerceToDomain *cd = (CoerceToDomain *) node;
2712 
2713  APP_JUMB(cd->resulttype);
2714  JumbleExpr(jstate, (Node *) cd->arg);
2715  }
2716  break;
2717  case T_CoerceToDomainValue:
2718  {
2719  CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
2720 
2721  APP_JUMB(cdv->typeId);
2722  }
2723  break;
2724  case T_SetToDefault:
2725  {
2726  SetToDefault *sd = (SetToDefault *) node;
2727 
2728  APP_JUMB(sd->typeId);
2729  }
2730  break;
2731  case T_CurrentOfExpr:
2732  {
2733  CurrentOfExpr *ce = (CurrentOfExpr *) node;
2734 
2735  APP_JUMB(ce->cvarno);
2736  if (ce->cursor_name)
2738  APP_JUMB(ce->cursor_param);
2739  }
2740  break;
2741  case T_NextValueExpr:
2742  {
2743  NextValueExpr *nve = (NextValueExpr *) node;
2744 
2745  APP_JUMB(nve->seqid);
2746  APP_JUMB(nve->typeId);
2747  }
2748  break;
2749  case T_InferenceElem:
2750  {
2751  InferenceElem *ie = (InferenceElem *) node;
2752 
2753  APP_JUMB(ie->infercollid);
2754  APP_JUMB(ie->inferopclass);
2755  JumbleExpr(jstate, ie->expr);
2756  }
2757  break;
2758  case T_TargetEntry:
2759  {
2760  TargetEntry *tle = (TargetEntry *) node;
2761 
2762  APP_JUMB(tle->resno);
2763  APP_JUMB(tle->ressortgroupref);
2764  JumbleExpr(jstate, (Node *) tle->expr);
2765  }
2766  break;
2767  case T_RangeTblRef:
2768  {
2769  RangeTblRef *rtr = (RangeTblRef *) node;
2770 
2771  APP_JUMB(rtr->rtindex);
2772  }
2773  break;
2774  case T_JoinExpr:
2775  {
2776  JoinExpr *join = (JoinExpr *) node;
2777 
2778  APP_JUMB(join->jointype);
2779  APP_JUMB(join->isNatural);
2780  APP_JUMB(join->rtindex);
2781  JumbleExpr(jstate, join->larg);
2782  JumbleExpr(jstate, join->rarg);
2783  JumbleExpr(jstate, join->quals);
2784  }
2785  break;
2786  case T_FromExpr:
2787  {
2788  FromExpr *from = (FromExpr *) node;
2789 
2790  JumbleExpr(jstate, (Node *) from->fromlist);
2791  JumbleExpr(jstate, from->quals);
2792  }
2793  break;
2794  case T_OnConflictExpr:
2795  {
2796  OnConflictExpr *conf = (OnConflictExpr *) node;
2797 
2798  APP_JUMB(conf->action);
2799  JumbleExpr(jstate, (Node *) conf->arbiterElems);
2800  JumbleExpr(jstate, conf->arbiterWhere);
2801  JumbleExpr(jstate, (Node *) conf->onConflictSet);
2802  JumbleExpr(jstate, conf->onConflictWhere);
2803  APP_JUMB(conf->constraint);
2804  APP_JUMB(conf->exclRelIndex);
2805  JumbleExpr(jstate, (Node *) conf->exclRelTlist);
2806  }
2807  break;
2808  case T_List:
2809  foreach(temp, (List *) node)
2810  {
2811  JumbleExpr(jstate, (Node *) lfirst(temp));
2812  }
2813  break;
2814  case T_IntList:
2815  foreach(temp, (List *) node)
2816  {
2817  APP_JUMB(lfirst_int(temp));
2818  }
2819  break;
2820  case T_SortGroupClause:
2821  {
2822  SortGroupClause *sgc = (SortGroupClause *) node;
2823 
2824  APP_JUMB(sgc->tleSortGroupRef);
2825  APP_JUMB(sgc->eqop);
2826  APP_JUMB(sgc->sortop);
2827  APP_JUMB(sgc->nulls_first);
2828  }
2829  break;
2830  case T_GroupingSet:
2831  {
2832  GroupingSet *gsnode = (GroupingSet *) node;
2833 
2834  JumbleExpr(jstate, (Node *) gsnode->content);
2835  }
2836  break;
2837  case T_WindowClause:
2838  {
2839  WindowClause *wc = (WindowClause *) node;
2840 
2841  APP_JUMB(wc->winref);
2842  APP_JUMB(wc->frameOptions);
2843  JumbleExpr(jstate, (Node *) wc->partitionClause);
2844  JumbleExpr(jstate, (Node *) wc->orderClause);
2845  JumbleExpr(jstate, wc->startOffset);
2846  JumbleExpr(jstate, wc->endOffset);
2847  }
2848  break;
2849  case T_CommonTableExpr:
2850  {
2851  CommonTableExpr *cte = (CommonTableExpr *) node;
2852 
2853  /* we store the string name because RTE_CTE RTEs need it */
2854  APP_JUMB_STRING(cte->ctename);
2855  JumbleQuery(jstate, castNode(Query, cte->ctequery));
2856  }
2857  break;
2858  case T_SetOperationStmt:
2859  {
2860  SetOperationStmt *setop = (SetOperationStmt *) node;
2861 
2862  APP_JUMB(setop->op);
2863  APP_JUMB(setop->all);
2864  JumbleExpr(jstate, setop->larg);
2865  JumbleExpr(jstate, setop->rarg);
2866  }
2867  break;
2868  case T_RangeTblFunction:
2869  {
2870  RangeTblFunction *rtfunc = (RangeTblFunction *) node;
2871 
2872  JumbleExpr(jstate, rtfunc->funcexpr);
2873  }
2874  break;
2875  case T_TableFunc:
2876  {
2877  TableFunc *tablefunc = (TableFunc *) node;
2878 
2879  JumbleExpr(jstate, tablefunc->docexpr);
2880  JumbleExpr(jstate, tablefunc->rowexpr);
2881  JumbleExpr(jstate, (Node *) tablefunc->colexprs);
2882  }
2883  break;
2884  case T_TableSampleClause:
2885  {
2886  TableSampleClause *tsc = (TableSampleClause *) node;
2887 
2888  APP_JUMB(tsc->tsmhandler);
2889  JumbleExpr(jstate, (Node *) tsc->args);
2890  JumbleExpr(jstate, (Node *) tsc->repeatable);
2891  }
2892  break;
2893  default:
2894  /* Only a warning, since we can stumble along anyway */
2895  elog(WARNING, "unrecognized node type: %d",
2896  (int) nodeTag(node));
2897  break;
2898  }
2899 }
2900 
2901 /*
2902  * Record location of constant within query string of query tree
2903  * that is currently being walked.
2904  */
2905 static void
2906 RecordConstLocation(pgssJumbleState *jstate, int location)
2907 {
2908  /* -1 indicates unknown or undefined location */
2909  if (location >= 0)
2910  {
2911  /* enlarge array if needed */
2912  if (jstate->clocations_count >= jstate->clocations_buf_size)
2913  {
2914  jstate->clocations_buf_size *= 2;
2915  jstate->clocations = (pgssLocationLen *)
2916  repalloc(jstate->clocations,
2917  jstate->clocations_buf_size *
2918  sizeof(pgssLocationLen));
2919  }
2920  jstate->clocations[jstate->clocations_count].location = location;
2921  /* initialize lengths to -1 to simplify fill_in_constant_lengths */
2922  jstate->clocations[jstate->clocations_count].length = -1;
2923  jstate->clocations_count++;
2924  }
2925 }
2926 
2927 /*
2928  * Generate a normalized version of the query string that will be used to
2929  * represent all similar queries.
2930  *
2931  * Note that the normalized representation may well vary depending on
2932  * just which "equivalent" query is used to create the hashtable entry.
2933  * We assume this is OK.
2934  *
2935  * If query_loc > 0, then "query" has been advanced by that much compared to
2936  * the original string start, so we need to translate the provided locations
2937  * to compensate. (This lets us avoid re-scanning statements before the one
2938  * of interest, so it's worth doing.)
2939  *
2940  * *query_len_p contains the input string length, and is updated with
2941  * the result string length on exit. The resulting string might be longer
2942  * or shorter depending on what happens with replacement of constants.
2943  *
2944  * Returns a palloc'd string.
2945  */
2946 static char *
2947 generate_normalized_query(pgssJumbleState *jstate, const char *query,
2948  int query_loc, int *query_len_p, int encoding)
2949 {
2950  char *norm_query;
2951  int query_len = *query_len_p;
2952  int i,
2953  norm_query_buflen, /* Space allowed for norm_query */
2954  len_to_wrt, /* Length (in bytes) to write */
2955  quer_loc = 0, /* Source query byte location */
2956  n_quer_loc = 0, /* Normalized query byte location */
2957  last_off = 0, /* Offset from start for previous tok */
2958  last_tok_len = 0; /* Length (in bytes) of that tok */
2959 
2960  /*
2961  * Get constants' lengths (core system only gives us locations). Note
2962  * this also ensures the items are sorted by location.
2963  */
2964  fill_in_constant_lengths(jstate, query, query_loc);
2965 
2966  /*
2967  * Allow for $n symbols to be longer than the constants they replace.
2968  * Constants must take at least one byte in text form, while a $n symbol
2969  * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2970  * could refine that limit based on the max value of n for the current
2971  * query, but it hardly seems worth any extra effort to do so.
2972  */
2973  norm_query_buflen = query_len + jstate->clocations_count * 10;
2974 
2975  /* Allocate result buffer */
2976  norm_query = palloc(norm_query_buflen + 1);
2977 
2978  for (i = 0; i < jstate->clocations_count; i++)
2979  {
2980  int off, /* Offset from start for cur tok */
2981  tok_len; /* Length (in bytes) of that tok */
2982 
2983  off = jstate->clocations[i].location;
2984  /* Adjust recorded location if we're dealing with partial string */
2985  off -= query_loc;
2986 
2987  tok_len = jstate->clocations[i].length;
2988 
2989  if (tok_len < 0)
2990  continue; /* ignore any duplicates */
2991 
2992  /* Copy next chunk (what precedes the next constant) */
2993  len_to_wrt = off - last_off;
2994  len_to_wrt -= last_tok_len;
2995 
2996  Assert(len_to_wrt >= 0);
2997  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2998  n_quer_loc += len_to_wrt;
2999 
3000  /* And insert a param symbol in place of the constant token */
3001  n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
3002  i + 1 + jstate->highest_extern_param_id);
3003 
3004  quer_loc = off + tok_len;
3005  last_off = off;
3006  last_tok_len = tok_len;
3007  }
3008 
3009  /*
3010  * We've copied up until the last ignorable constant. Copy over the
3011  * remaining bytes of the original query string.
3012  */
3013  len_to_wrt = query_len - quer_loc;
3014 
3015  Assert(len_to_wrt >= 0);
3016  memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
3017  n_quer_loc += len_to_wrt;
3018 
3019  Assert(n_quer_loc <= norm_query_buflen);
3020  norm_query[n_quer_loc] = '\0';
3021 
3022  *query_len_p = n_quer_loc;
3023  return norm_query;
3024 }
3025 
3026 /*
3027  * Given a valid SQL string and an array of constant-location records,
3028  * fill in the textual lengths of those constants.
3029  *
3030  * The constants may use any allowed constant syntax, such as float literals,
3031  * bit-strings, single-quoted strings and dollar-quoted strings. This is
3032  * accomplished by using the public API for the core scanner.
3033  *
3034  * It is the caller's job to ensure that the string is a valid SQL statement
3035  * with constants at the indicated locations. Since in practice the string
3036  * has already been parsed, and the locations that the caller provides will
3037  * have originated from within the authoritative parser, this should not be
3038  * a problem.
3039  *
3040  * Duplicate constant pointers are possible, and will have their lengths
3041  * marked as '-1', so that they are later ignored. (Actually, we assume the
3042  * lengths were initialized as -1 to start with, and don't change them here.)
3043  *
3044  * If query_loc > 0, then "query" has been advanced by that much compared to
3045  * the original string start, so we need to translate the provided locations
3046  * to compensate. (This lets us avoid re-scanning statements before the one
3047  * of interest, so it's worth doing.)
3048  *
3049  * N.B. There is an assumption that a '-' character at a Const location begins
3050  * a negative numeric constant. This precludes there ever being another
3051  * reason for a constant to start with a '-'.
3052  */
3053 static void
3054 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
3055  int query_loc)
3056 {
3057  pgssLocationLen *locs;
3059  core_yy_extra_type yyextra;
3060  core_YYSTYPE yylval;
3061  YYLTYPE yylloc;
3062  int last_loc = -1;
3063  int i;
3064 
3065  /*
3066  * Sort the records by location so that we can process them in order while
3067  * scanning the query text.
3068  */
3069  if (jstate->clocations_count > 1)
3070  qsort(jstate->clocations, jstate->clocations_count,
3071  sizeof(pgssLocationLen), comp_location);
3072  locs = jstate->clocations;
3073 
3074  /* initialize the flex scanner --- should match raw_parser() */
3075  yyscanner = scanner_init(query,
3076  &yyextra,
3077  ScanKeywords,
3078  NumScanKeywords);
3079 
3080  /* we don't want to re-emit any escape string warnings */
3081  yyextra.escape_string_warning = false;
3082 
3083  /* Search for each constant, in sequence */
3084  for (i = 0; i < jstate->clocations_count; i++)
3085  {
3086  int loc = locs[i].location;
3087  int tok;
3088 
3089  /* Adjust recorded location if we're dealing with partial string */
3090  loc -= query_loc;
3091 
3092  Assert(loc >= 0);
3093 
3094  if (loc <= last_loc)
3095  continue; /* Duplicate constant, ignore */
3096 
3097  /* Lex tokens until we find the desired constant */
3098  for (;;)
3099  {
3100  tok = core_yylex(&yylval, &yylloc, yyscanner);
3101 
3102  /* We should not hit end-of-string, but if we do, behave sanely */
3103  if (tok == 0)
3104  break; /* out of inner for-loop */
3105 
3106  /*
3107  * We should find the token position exactly, but if we somehow
3108  * run past it, work with that.
3109  */
3110  if (yylloc >= loc)
3111  {
3112  if (query[loc] == '-')
3113  {
3114  /*
3115  * It's a negative value - this is the one and only case
3116  * where we replace more than a single token.
3117  *
3118  * Do not compensate for the core system's special-case
3119  * adjustment of location to that of the leading '-'
3120  * operator in the event of a negative constant. It is
3121  * also useful for our purposes to start from the minus
3122  * symbol. In this way, queries like "select * from foo
3123  * where bar = 1" and "select * from foo where bar = -2"
3124  * will have identical normalized query strings.
3125  */
3126  tok = core_yylex(&yylval, &yylloc, yyscanner);
3127  if (tok == 0)
3128  break; /* out of inner for-loop */
3129  }
3130 
3131  /*
3132  * We now rely on the assumption that flex has placed a zero
3133  * byte after the text of the current token in scanbuf.
3134  */
3135  locs[i].length = strlen(yyextra.scanbuf + loc);
3136  break; /* out of inner for-loop */
3137  }
3138  }
3139 
3140  /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3141  if (tok == 0)
3142  break;
3143 
3144  last_loc = loc;
3145  }
3146 
3147  scanner_finish(yyscanner);
3148 }
3149 
3150 /*
3151  * comp_location: comparator for qsorting pgssLocationLen structs by location
3152  */
3153 static int
3154 comp_location(const void *a, const void *b)
3155 {
3156  int l = ((const pgssLocationLen *) a)->location;
3157  int r = ((const pgssLocationLen *) b)->location;
3158 
3159  if (l < r)
3160  return -1;
3161  else if (l > r)
3162  return +1;
3163  else
3164  return 0;
3165 }
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
List * aggdistinct
Definition: primnodes.h:303
Node * limitOffset
Definition: parsenodes.h:158
int slock_t
Definition: s_lock.h:912
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:7782
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: executor.h:73
Expr * refassgnexpr
Definition: primnodes.h:410
long local_blks_hit
Definition: instrument.h:25
List * args
Definition: primnodes.h:1074
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: execMain.c:308
void _PG_init(void)
Definition: lwlock.h:32
static uint64 pgss_hash_string(const char *str, int len)
static void JumbleExpr(pgssJumbleState *jstate, Node *node)
#define IsA(nodeptr, _type_)
Definition: nodes.h:563
Node * docexpr
Definition: primnodes.h:84
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:69
long local_blks_dirtied
Definition: instrument.h:27
#define PG_STAT_STATEMENTS_COLS_V1_3
#define DEBUG1
Definition: elog.h:25
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:211
Expr * arg
Definition: primnodes.h:771
Index varlevelsup
Definition: primnodes.h:173
long local_blks_read
Definition: instrument.h:26
int stmt_location
Definition: parsenodes.h:179
List * sortClause
Definition: parsenodes.h:156
EState * estate
Definition: execdesc.h:48
List * content
Definition: parsenodes.h:1266
List * refs
Definition: primnodes.h:343
void * core_yyscan_t
Definition: scanner.h:116
List * args
Definition: primnodes.h:359
#define HASH_ELEM
Definition: hsearch.h:87
List * args
Definition: primnodes.h:457
static void error(void)
Definition: sql-dyntest.c:147
#define PG_STAT_STATEMENTS_COLS_V1_0
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1426
#define USAGE_DEALLOC_PERCENT
int64 shared_blks_read
static const uint32 PGSS_PG_MAJOR_VERSION
FromExpr * jointree
Definition: parsenodes.h:136
int64 local_blks_written
Definition: guc.h:164
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:634
Oid GetUserId(void)
Definition: miscinit.c:284
OnConflictExpr * onConflict
Definition: parsenodes.h:142
instr_time blk_read_time
Definition: instrument.h:31
#define castNode(_type_, nodeptr)
Definition: nodes.h:581
static ProcessUtility_hook_type prev_ProcessUtility
#define write(a, b, c)
Definition: win32.h:14
struct pgssSharedState pgssSharedState
static void AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
int64 shared_blks_dirtied
static void pgss_shmem_startup(void)
#define SpinLockInit(lock)
Definition: spin.h:60
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:199
Oid resulttype
Definition: primnodes.h:815
struct timeval instr_time
Definition: instr_time.h:147
RowCompareType rctype
Definition: primnodes.h:1038
long shared_blks_read
Definition: instrument.h:22
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:149
#define Min(x, y)
Definition: c.h:812
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static ExecutorRun_hook_type prev_ExecutorRun
const int NumScanKeywords
Definition: keywords.c:45
Index tleSortGroupRef
Definition: parsenodes.h:1196
Expr * arg
Definition: primnodes.h:794
List * groupingSets
Definition: parsenodes.h:148
static ExecutorEnd_hook_type prev_ExecutorEnd
Size entrysize
Definition: hsearch.h:73
#define PGSS_DUMP_FILE
ParamKind paramkind
Definition: primnodes.h:244
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:73
Definition: nodes.h:512
static int pgss_track
static int entry_cmp(const void *lhs, const void *rhs)
int errcode(int sqlerrcode)
Definition: elog.c:575
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:468
struct pgssEntry pgssEntry
#define PG_BINARY_W
Definition: c.h:1038
ProcessUtilityContext
Definition: utility.h:19
Instrumentation * InstrAlloc(int n, int instrument_options)
Definition: instrument.c:30
List * args
Definition: primnodes.h:301
static ExecutorStart_hook_type prev_ExecutorStart
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:80
AttrNumber varattno
Definition: primnodes.h:168
Expr * arg
Definition: primnodes.h:742
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1333
#define pgss_enabled()
List * fromlist
Definition: primnodes.h:1478
unsigned char * jumble
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:239
long temp_blks_written
Definition: instrument.h:30
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:904
Counters counters
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
int64 temp_blks_read
static void entry_reset(void)
#define JUMBLE_SIZE
#define PG_STAT_STATEMENTS_COLS_V1_1
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
Index winref
Definition: primnodes.h:361
Node * utilityStmt
Definition: parsenodes.h:118
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
Definition: primnodes.h:163
#define PG_BINARY_R
Definition: c.h:1037
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1035
int stmt_len
Definition: plannodes.h:98
char * scanbuf
Definition: scanner.h:72
List * values_lists
Definition: parsenodes.h:1016
Node * quals
Definition: primnodes.h:1479
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:408
SQLValueFunctionOp op
Definition: primnodes.h:1111
int duration
Definition: pgbench.c:104
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:7867
#define MaxAllocHugeSize
Definition: memutils.h:44
signed int int32
Definition: c.h:294
#define APP_JUMB_STRING(str)
List * windowClause
Definition: parsenodes.h:152
List * targetList
Definition: parsenodes.h:138
static void RecordConstLocation(pgssJumbleState *jstate, int location)
List * arbiterElems
Definition: primnodes.h:1497
static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:69
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:114
Node * larg
Definition: primnodes.h:1458
#define malloc(a)
Definition: header.h:50
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1722
#define ASSUMED_LENGTH_INIT
Oid consttype
Definition: primnodes.h:192
long shared_blks_written
Definition: instrument.h:24
static pgssSharedState * pgss
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: dynahash.c:208
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, char *completionTag)
Definition: utility.c:374
void pfree(void *pointer)
Definition: mcxt.c:936
MemoryContext es_query_cxt
Definition: execnodes.h:472
int64 shared_blks_hit
pgssLocationLen * clocations
static int pgss_max
static void entry_dealloc(void)
List * rtable
Definition: parsenodes.h:135
static HTAB * pgss_hash
List * distinctClause
Definition: parsenodes.h:154
Oid funcid
Definition: primnodes.h:449
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:70
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:72
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2392
static void pgss_post_parse_analyze(ParseState *pstate, Query *query)
struct pg_encoding enc
Definition: encode.c:522
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
TableFunc * tablefunc
Definition: parsenodes.h:1011
#define lfirst_int(lc)
Definition: pg_list.h:107
List * partitionClause
Definition: parsenodes.h:1289
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
BoolExprType boolop
Definition: primnodes.h:562
Definition: guc.h:75
void EmitWarningsOnPlaceholders(const char *className)
Definition: guc.c:7895
Expr * arg
Definition: primnodes.h:1187
#define lfirst_node(type, lc)
Definition: pg_list.h:109
#define USAGE_EXEC(duration)
#define APP_JUMB(item)
Node * rowexpr
Definition: primnodes.h:85
Node * limitCount
Definition: parsenodes.h:159
char * c
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:348
int stmt_location
Definition: plannodes.h:97
NodeTag type
Definition: nodes.h:514
static char * buf
Definition: pg_test_fsync.c:67
const ScanKeyword ScanKeywords[]
Definition: keywords.c:41
static void JumbleQuery(pgssJumbleState *jstate, Query *query)
List * exclRelTlist
Definition: primnodes.h:1506
List * refupperindexpr
Definition: primnodes.h:403
void check_stack_depth(void)
Definition: postgres.c:3150
bool IsUnderPostmaster
Definition: globals.c:101
List * reflowerindexpr
Definition: primnodes.h:405
JoinType jointype
Definition: parsenodes.h:994
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
List * aggorder
Definition: primnodes.h:302
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
int errdetail(const char *fmt,...)
Definition: elog.c:873
Expr * arg
Definition: primnodes.h:1210
int errcode_for_file_access(void)
Definition: elog.c:598
ScanDirection
Definition: sdir.h:22
AttrNumber resno
Definition: primnodes.h:1376
Node * utilityStmt
Definition: plannodes.h:94
static char * qtext_load_file(Size *buffer_size)
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2342
static char * generate_normalized_query(pgssJumbleState *jstate, const char *query, int query_loc, int *query_len_p, int encoding)
long shared_blks_dirtied
Definition: instrument.h:23
fmNodePtr resultinfo
Definition: fmgr.h:81
char * cursor_name
Definition: primnodes.h:1285
#define YYLTYPE
Definition: scanner.h:44
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, char *completionTag)
Definition: utility.h:27
unsigned int uint32
Definition: c.h:306
List * aggdirectargs
Definition: primnodes.h:300
static ExecutorFinish_hook_type prev_ExecutorFinish
void _PG_fini(void)
Oid winfnoid
Definition: primnodes.h:355
Expr * arg
Definition: primnodes.h:814
const char * p_sourcetext
Definition: parse_node.h:172
long temp_blks_read
Definition: instrument.h:29
Expr * elemexpr
Definition: primnodes.h:839
Definition: type.h:82
List * returningList
Definition: parsenodes.h:144
int64 temp_blks_written
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
int64 shared_blks_written
#define STICKY_DECREASE_FACTOR
#define ereport(elevel, rest)
Definition: elog.h:122
char * enrname
Definition: parsenodes.h:1048
#define USAGE_DECREASE_FACTOR
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:607
Definition: nodes.h:292
List * newvals
Definition: primnodes.h:772
Definition: guc.h:72
Definition: nodes.h:147
OnConflictAction action
Definition: primnodes.h:1494
bool isNatural
Definition: primnodes.h:1457
static Size pgss_memsize(void)
uint64 queryId
Definition: parsenodes.h:114
int CloseTransientFile(int fd)
Definition: fd.c:2562
#define WARNING
Definition: elog.h:40
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:51
Index varno
Definition: primnodes.h:166
Definition: nodes.h:146
#define stat(a, b)
Definition: win32_port.h:266
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:730
static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query, int query_loc)
XmlExprOp op
Definition: primnodes.h:1149
static int comp_location(const void *a, const void *b)
Node * startOffset
Definition: parsenodes.h:1292
#define SpinLockRelease(lock)
Definition: spin.h:64
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
#define HASH_BLOBS
Definition: hsearch.h:88
int64 local_blks_read
List * args
Definition: primnodes.h:913
static bool need_gc_qtexts(void)
uint64 pg_strtouint64(const char *str, char **endptr, int base)
Definition: numutils.c:405
int location
Definition: primnodes.h:203
Node * quals
Definition: primnodes.h:1461
BoolTestType booltesttype
Definition: primnodes.h:1211
uintptr_t Datum
Definition: postgres.h:372
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
static void pgss_shmem_shutdown(int code, Datum arg)
static void gc_qtexts(void)
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
struct pgssLocationLen pgssLocationLen
double sum_var_time
Oid MyDatabaseId
Definition: globals.c:77
PGSSTrackLevel
#define Int64GetDatumFast(X)
Definition: postgres.h:781
bool scanner_isspace(char ch)
Definition: scansup.c:221
Size keysize
Definition: hsearch.h:72
Oid resulttype
Definition: primnodes.h:795
int work_mem
Definition: globals.c:113
BufferUsage bufusage
Definition: instrument.h:63
#define USAGE_INIT
int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
NullTestType nulltesttype
Definition: primnodes.h:1188
#define DEFAULT_ROLE_READ_ALL_STATS
Definition: pg_authid.h:108
pgssHashKey key
Oid aggfnoid
Definition: primnodes.h:294
List * colexprs
Definition: primnodes.h:90
List * named_args
Definition: primnodes.h:1151
int allowedModes
Definition: execnodes.h:269
#define PG_STAT_STATEMENTS_COLS
static char * encoding
Definition: initdb.c:123
#define free(a)
Definition: header.h:65
CmdType commandType
Definition: parsenodes.h:110
List * args
Definition: primnodes.h:1153
#define PG_RETURN_VOID()
Definition: fmgr.h:309
bool is_member_of_role(Oid member, Oid role)
Definition: acl.c:4857
#define Float8GetDatumFast(X)
Definition: postgres.h:782
#define DatumGetUInt64(X)
Definition: postgres.h:640
SetFunctionReturnMode returnMode
Definition: execnodes.h:271
#define PG_CATCH()
Definition: elog.h:293
struct Instrumentation * totaltime
Definition: execdesc.h:55
#define Max(x, y)
Definition: c.h:806
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:84
double blk_read_time
Node * rarg
Definition: primnodes.h:1459
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
Expr * arg
Definition: primnodes.h:478
JoinType jointype
Definition: primnodes.h:1456
static bool pgss_track_utility
#define Assert(condition)
Definition: c.h:680
#define lfirst(lc)
Definition: pg_list.h:106
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:548
List * functions
Definition: parsenodes.h:1005
Expr * aggfilter
Definition: primnodes.h:360
int64 local_blks_dirtied
Expr * expr
Definition: primnodes.h:1375
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
int paramid
Definition: primnodes.h:245
uint64 es_processed
Definition: execnodes.h:478
PG_MODULE_MAGIC
instr_time blk_write_time
Definition: instrument.h:32
int64 local_blks_hit
size_t Size
Definition: c.h:414
struct Counters Counters
Node * endOffset
Definition: parsenodes.h:1293
Expr * arg
Definition: primnodes.h:880
Expr * aggfilter
Definition: primnodes.h:304
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1118
SetOperation op
Definition: parsenodes.h:1580
double blk_write_time
Index ctelevelsup
Definition: parsenodes.h:1022
#define MAXALIGN(LEN)
Definition: c.h:633
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, char *completionTag)
#define record_gc_qtexts()
struct pgssJumbleState pgssJumbleState
#define PG_RE_THROW()
Definition: elog.h:314
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1387
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:203
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query)
Definition: analyze.h:20
List * args
Definition: primnodes.h:563
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:208
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:949
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1377
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
#define nodeTag(nodeptr)
Definition: nodes.h:517
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeyword *keywords, int num_keywords)
Tuplestorestate * setResult
Definition: execnodes.h:274
static shmem_startup_hook_type prev_shmem_startup_hook
const char * sourceText
Definition: execdesc.h:38
RTEKind rtekind
Definition: parsenodes.h:951
static core_yyscan_t yyscanner
Definition: pl_scanner.c:210
int FreeFile(FILE *file)
Definition: fd.c:2534
static Datum values[MAXATTR]
Definition: bootstrap.c:164
List * orderClause
Definition: parsenodes.h:1290
Node * arbiterWhere
Definition: primnodes.h:1499
ExprContext * econtext
Definition: execnodes.h:267
char * ctename
Definition: parsenodes.h:1021
List * cteList
Definition: parsenodes.h:133
Node * setOperations
Definition: parsenodes.h:163
e
Definition: preproc-init.c:82
Query * subquery
Definition: parsenodes.h:974
List * groupClause
Definition: parsenodes.h:146
TupleDesc setDesc
Definition: execnodes.h:275
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
void * palloc(Size size)
Definition: mcxt.c:835
uint64 queryId
Definition: plannodes.h:47
int errmsg(const char *fmt,...)
Definition: elog.c:797
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, double total_time, uint64 rows, const BufferUsage *bufusage, pgssJumbleState *jstate)
long shared_blks_hit
Definition: instrument.h:21
Datum pg_stat_statements(PG_FUNCTION_ARGS)
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:69
long local_blks_written
Definition: instrument.h:28
int i
List * onConflictSet
Definition: primnodes.h:1503
bool escape_string_warning
Definition: scanner.h:88
static const uint32 PGSS_FILE_HEADER
Index ressortgroupref
Definition: primnodes.h:1378
#define CStringGetTextDatum(s)
Definition: builtins.h:91
void * arg
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
MinMaxOp op
Definition: primnodes.h:1073
void scanner_finish(core_yyscan_t yyscanner)
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:71
Expr * arg
Definition: primnodes.h:912
Oid opno
Definition: primnodes.h:496
#define elog
Definition: elog.h:219
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:317
Expr * result
Definition: primnodes.h:925
struct pgssHashKey pgssHashKey
PlannedStmt * plannedstmt
Definition: execdesc.h:37
#define qsort(a, b, c, d)
Definition: port.h:408
List * args
Definition: primnodes.h:502
Node * havingQual
Definition: parsenodes.h:150
Expr * defresult
Definition: primnodes.h:914
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:561
Expr * expr
Definition: primnodes.h:924
#define PG_TRY()
Definition: elog.h:284
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
Node * onConflictWhere
Definition: primnodes.h:1504
#define PGSS_TEXT_FILE
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:7756
int rtindex
Definition: primnodes.h:1463
Definition: pg_list.h:45
struct TableSampleClause * tablesample
Definition: parsenodes.h:969
int stmt_len
Definition: parsenodes.h:180
static int nested_level
Oid paramtype
Definition: primnodes.h:246
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:49
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define PG_END_TRY()
Definition: elog.h:300
#define ASSUMED_MEDIAN_INIT
#define read(a, b, c)
Definition: win32.h:13
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1463
Datum hash_any_extended(register const unsigned char *k, register int keylen, uint64 seed)
Definition: hashfunc.c:634
static const struct config_enum_entry track_options[]
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define PG_STAT_STATEMENTS_COLS_V1_2
static bool pgss_save
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
Expr * refexpr
Definition: primnodes.h:408
Definition: nodes.h:148
AttrNumber fieldnum
Definition: primnodes.h:743
#define ftruncate(a, b)
Definition: win32_port.h:60