PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_stat_statements.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_stat_statements.c
4 * Track statement planning and execution times as well as resource
5 * usage across a whole database cluster.
6 *
7 * Execution costs are totaled for each distinct source query, and kept in
8 * a shared hashtable. (We track only as many distinct queries as will fit
9 * in the designated amount of shared memory.)
10 *
11 * Starting in Postgres 9.2, this module normalized query entries. As of
12 * Postgres 14, the normalization is done by the core if compute_query_id is
13 * enabled, or optionally by third-party modules.
14 *
15 * To facilitate presenting entries to users, we create "representative" query
16 * strings in which constants are replaced with parameter symbols ($n), to
17 * make it clearer what a normalized entry can represent. To save on shared
18 * memory, and to avoid having to truncate oversized query strings, we store
19 * these strings in a temporary external query-texts file. Offsets into this
20 * file are kept in shared memory.
21 *
22 * Note about locking issues: to create or delete an entry in the shared
23 * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 * in an entry except the counters requires the same. To look up an entry,
25 * one must hold the lock shared. To read or update the counters within
26 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 * disappear!) and also take the entry's mutex spinlock.
28 * The shared state variable pgss->extent (the next free spot in the external
29 * query-text file) should be accessed only while holding either the
30 * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 * allow reserving file space while holding only shared lock on pgss->lock.
32 * Rewriting the entire external query-text file, eg for garbage collection,
33 * requires holding pgss->lock exclusively; this allows individual entries
34 * in the file to be read or written while holding only shared lock.
35 *
36 *
37 * Copyright (c) 2008-2024, PostgreSQL Global Development Group
38 *
39 * IDENTIFICATION
40 * contrib/pg_stat_statements/pg_stat_statements.c
41 *
42 *-------------------------------------------------------------------------
43 */
44#include "postgres.h"
45
46#include <math.h>
47#include <sys/stat.h>
48#include <unistd.h>
49
50#include "access/parallel.h"
51#include "catalog/pg_authid.h"
52#include "common/int.h"
53#include "executor/instrument.h"
54#include "funcapi.h"
55#include "jit/jit.h"
56#include "mb/pg_wchar.h"
57#include "miscadmin.h"
58#include "nodes/queryjumble.h"
59#include "optimizer/planner.h"
60#include "parser/analyze.h"
61#include "parser/scanner.h"
62#include "pgstat.h"
63#include "storage/fd.h"
64#include "storage/ipc.h"
65#include "storage/lwlock.h"
66#include "storage/shmem.h"
67#include "storage/spin.h"
68#include "tcop/utility.h"
69#include "utils/acl.h"
70#include "utils/builtins.h"
71#include "utils/memutils.h"
72#include "utils/timestamp.h"
73
75
76/* Location of permanent stats file (valid when database is shut down) */
77#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
78
79/*
80 * Location of external query text file.
81 */
82#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
83
84/* Magic number identifying the stats file format */
85static const uint32 PGSS_FILE_HEADER = 0x20220408;
86
87/* PostgreSQL major version number, changes in which invalidate all entries */
88static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
89
90/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
91#define USAGE_EXEC(duration) (1.0)
92#define USAGE_INIT (1.0) /* including initial planning */
93#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
94#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
95#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
96#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
97#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
98#define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
99
100/*
101 * Extension version number, for supporting older extension versions' objects
102 */
103typedef enum pgssVersion
104{
115
116typedef enum pgssStoreKind
117{
119
120 /*
121 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
122 * reference the underlying values in the arrays in the Counters struct,
123 * and this order is required in pg_stat_statements_internal().
124 */
128
129#define PGSS_NUMKIND (PGSS_EXEC + 1)
130
131/*
132 * Hashtable key that defines the identity of a hashtable entry. We separate
133 * queries by user and by database even if they are otherwise identical.
134 *
135 * If you add a new key to this struct, make sure to teach pgss_store() to
136 * zero the padding bytes. Otherwise, things will break, because pgss_hash is
137 * created using HASH_BLOBS, and thus tag_hash is used to hash this.
138
139 */
140typedef struct pgssHashKey
141{
142 Oid userid; /* user OID */
143 Oid dbid; /* database OID */
144 uint64 queryid; /* query identifier */
145 bool toplevel; /* query executed at top level */
147
148/*
149 * The actual stats counters kept within pgssEntry.
150 */
151typedef struct Counters
152{
153 int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
154 double total_time[PGSS_NUMKIND]; /* total planning/execution time,
155 * in msec */
156 double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
157 * msec since min/max reset */
158 double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
159 * msec since min/max reset */
160 double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
161 * msec */
162 double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
163 * planning/execution time in msec */
164 int64 rows; /* total # of retrieved or affected rows */
165 int64 shared_blks_hit; /* # of shared buffer hits */
166 int64 shared_blks_read; /* # of shared disk blocks read */
167 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
168 int64 shared_blks_written; /* # of shared disk blocks written */
169 int64 local_blks_hit; /* # of local buffer hits */
170 int64 local_blks_read; /* # of local disk blocks read */
171 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
172 int64 local_blks_written; /* # of local disk blocks written */
173 int64 temp_blks_read; /* # of temp blocks read */
174 int64 temp_blks_written; /* # of temp blocks written */
175 double shared_blk_read_time; /* time spent reading shared blocks,
176 * in msec */
177 double shared_blk_write_time; /* time spent writing shared blocks,
178 * in msec */
179 double local_blk_read_time; /* time spent reading local blocks, in
180 * msec */
181 double local_blk_write_time; /* time spent writing local blocks, in
182 * msec */
183 double temp_blk_read_time; /* time spent reading temp blocks, in msec */
184 double temp_blk_write_time; /* time spent writing temp blocks, in
185 * msec */
186 double usage; /* usage factor */
187 int64 wal_records; /* # of WAL records generated */
188 int64 wal_fpi; /* # of WAL full page images generated */
189 uint64 wal_bytes; /* total amount of WAL generated in bytes */
190 int64 jit_functions; /* total number of JIT functions emitted */
191 double jit_generation_time; /* total time to generate jit code */
192 int64 jit_inlining_count; /* number of times inlining time has been
193 * > 0 */
194 double jit_deform_time; /* total time to deform tuples in jit code */
195 int64 jit_deform_count; /* number of times deform time has been >
196 * 0 */
197
198 double jit_inlining_time; /* total time to inline jit code */
199 int64 jit_optimization_count; /* number of times optimization time
200 * has been > 0 */
201 double jit_optimization_time; /* total time to optimize jit code */
202 int64 jit_emission_count; /* number of times emission time has been
203 * > 0 */
204 double jit_emission_time; /* total time to emit jit code */
205 int64 parallel_workers_to_launch; /* # of parallel workers planned
206 * to be launched */
207 int64 parallel_workers_launched; /* # of parallel workers actually
208 * launched */
210
211/*
212 * Global statistics for pg_stat_statements
213 */
214typedef struct pgssGlobalStats
215{
216 int64 dealloc; /* # of times entries were deallocated */
217 TimestampTz stats_reset; /* timestamp with all stats reset */
219
220/*
221 * Statistics per statement
222 *
223 * Note: in event of a failure in garbage collection of the query text file,
224 * we reset query_offset to zero and query_len to -1. This will be seen as
225 * an invalid state by qtext_fetch().
226 */
227typedef struct pgssEntry
228{
229 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
230 Counters counters; /* the statistics for this query */
231 Size query_offset; /* query text offset in external file */
232 int query_len; /* # of valid bytes in query string, or -1 */
233 int encoding; /* query text encoding */
234 TimestampTz stats_since; /* timestamp of entry allocation */
235 TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
236 slock_t mutex; /* protects the counters only */
238
239/*
240 * Global shared state
241 */
242typedef struct pgssSharedState
243{
244 LWLock *lock; /* protects hashtable search/modification */
245 double cur_median_usage; /* current median usage in hashtable */
246 Size mean_query_len; /* current mean entry text length */
247 slock_t mutex; /* protects following fields only: */
248 Size extent; /* current extent of query file */
249 int n_writers; /* number of active writers to query file */
250 int gc_count; /* query file garbage collection cycle count */
251 pgssGlobalStats stats; /* global statistics for pgss */
253
254/*---- Local variables ----*/
255
256/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
257static int nesting_level = 0;
258
259/* Saved hook values */
269
270/* Links to shared memory state */
271static pgssSharedState *pgss = NULL;
272static HTAB *pgss_hash = NULL;
273
274/*---- GUC variables ----*/
275
276typedef enum
277{
278 PGSS_TRACK_NONE, /* track no statements */
279 PGSS_TRACK_TOP, /* only top level statements */
280 PGSS_TRACK_ALL, /* all statements, including nested ones */
282
283static const struct config_enum_entry track_options[] =
284{
285 {"none", PGSS_TRACK_NONE, false},
286 {"top", PGSS_TRACK_TOP, false},
287 {"all", PGSS_TRACK_ALL, false},
288 {NULL, 0, false}
289};
290
291static int pgss_max = 5000; /* max # statements to track */
292static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
293static bool pgss_track_utility = true; /* whether to track utility commands */
294static bool pgss_track_planning = false; /* whether to track planning
295 * duration */
296static bool pgss_save = true; /* whether to save stats across shutdown */
297
298
299#define pgss_enabled(level) \
300 (!IsParallelWorker() && \
301 (pgss_track == PGSS_TRACK_ALL || \
302 (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
303
304#define record_gc_qtexts() \
305 do { \
306 SpinLockAcquire(&pgss->mutex); \
307 pgss->gc_count++; \
308 SpinLockRelease(&pgss->mutex); \
309 } while(0)
310
311/*---- Function declarations ----*/
312
325
326static void pgss_shmem_request(void);
327static void pgss_shmem_startup(void);
328static void pgss_shmem_shutdown(int code, Datum arg);
329static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
330 JumbleState *jstate);
332 const char *query_string,
333 int cursorOptions,
334 ParamListInfo boundParams);
335static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
336static void pgss_ExecutorRun(QueryDesc *queryDesc,
337 ScanDirection direction,
338 uint64 count);
339static void pgss_ExecutorFinish(QueryDesc *queryDesc);
340static void pgss_ExecutorEnd(QueryDesc *queryDesc);
341static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
342 bool readOnlyTree,
344 QueryEnvironment *queryEnv,
346static void pgss_store(const char *query, uint64 queryId,
347 int query_location, int query_len,
348 pgssStoreKind kind,
349 double total_time, uint64 rows,
350 const BufferUsage *bufusage,
351 const WalUsage *walusage,
352 const struct JitInstrumentation *jitusage,
353 JumbleState *jstate,
354 int parallel_workers_to_launch,
355 int parallel_workers_launched);
357 pgssVersion api_version,
358 bool showtext);
359static Size pgss_memsize(void);
360static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
361 int encoding, bool sticky);
362static void entry_dealloc(void);
363static bool qtext_store(const char *query, int query_len,
364 Size *query_offset, int *gc_count);
365static char *qtext_load_file(Size *buffer_size);
366static char *qtext_fetch(Size query_offset, int query_len,
367 char *buffer, Size buffer_size);
368static bool need_gc_qtexts(void);
369static void gc_qtexts(void);
370static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only);
371static char *generate_normalized_query(JumbleState *jstate, const char *query,
372 int query_loc, int *query_len_p);
373static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
374 int query_loc);
375static int comp_location(const void *a, const void *b);
376
377
378/*
379 * Module load callback
380 */
381void
383{
384 /*
385 * In order to create our shared memory area, we have to be loaded via
386 * shared_preload_libraries. If not, fall out without hooking into any of
387 * the main system. (We don't throw error here because it seems useful to
388 * allow the pg_stat_statements functions to be created even when the
389 * module isn't active. The functions must protect themselves against
390 * being called then, however.)
391 */
393 return;
394
395 /*
396 * Inform the postmaster that we want to enable query_id calculation if
397 * compute_query_id is set to auto.
398 */
400
401 /*
402 * Define (or redefine) custom GUC variables.
403 */
404 DefineCustomIntVariable("pg_stat_statements.max",
405 "Sets the maximum number of statements tracked by pg_stat_statements.",
406 NULL,
407 &pgss_max,
408 5000,
409 100,
410 INT_MAX / 2,
412 0,
413 NULL,
414 NULL,
415 NULL);
416
417 DefineCustomEnumVariable("pg_stat_statements.track",
418 "Selects which statements are tracked by pg_stat_statements.",
419 NULL,
420 &pgss_track,
423 PGC_SUSET,
424 0,
425 NULL,
426 NULL,
427 NULL);
428
429 DefineCustomBoolVariable("pg_stat_statements.track_utility",
430 "Selects whether utility commands are tracked by pg_stat_statements.",
431 NULL,
433 true,
434 PGC_SUSET,
435 0,
436 NULL,
437 NULL,
438 NULL);
439
440 DefineCustomBoolVariable("pg_stat_statements.track_planning",
441 "Selects whether planning duration is tracked by pg_stat_statements.",
442 NULL,
444 false,
445 PGC_SUSET,
446 0,
447 NULL,
448 NULL,
449 NULL);
450
451 DefineCustomBoolVariable("pg_stat_statements.save",
452 "Save pg_stat_statements statistics across server shutdowns.",
453 NULL,
454 &pgss_save,
455 true,
457 0,
458 NULL,
459 NULL,
460 NULL);
461
462 MarkGUCPrefixReserved("pg_stat_statements");
463
464 /*
465 * Install hooks.
466 */
485}
486
487/*
488 * shmem_request hook: request additional shared resources. We'll allocate or
489 * attach to the shared resources in pgss_shmem_startup().
490 */
491static void
493{
496
498 RequestNamedLWLockTranche("pg_stat_statements", 1);
499}
500
501/*
502 * shmem_startup hook: allocate or attach to shared memory,
503 * then load any pre-existing statistics from file.
504 * Also create and load the query-texts file, which is expected to exist
505 * (even if empty) while the module is enabled.
506 */
507static void
509{
510 bool found;
511 HASHCTL info;
512 FILE *file = NULL;
513 FILE *qfile = NULL;
514 uint32 header;
515 int32 num;
516 int32 pgver;
517 int32 i;
518 int buffer_size;
519 char *buffer = NULL;
520
523
524 /* reset in case this is a restart within the postmaster */
525 pgss = NULL;
526 pgss_hash = NULL;
527
528 /*
529 * Create or attach to the shared memory state, including hash table
530 */
531 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
532
533 pgss = ShmemInitStruct("pg_stat_statements",
534 sizeof(pgssSharedState),
535 &found);
536
537 if (!found)
538 {
539 /* First time through ... */
540 pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
544 pgss->extent = 0;
545 pgss->n_writers = 0;
546 pgss->gc_count = 0;
547 pgss->stats.dealloc = 0;
549 }
550
551 info.keysize = sizeof(pgssHashKey);
552 info.entrysize = sizeof(pgssEntry);
553 pgss_hash = ShmemInitHash("pg_stat_statements hash",
555 &info,
557
558 LWLockRelease(AddinShmemInitLock);
559
560 /*
561 * If we're in the postmaster (or a standalone backend...), set up a shmem
562 * exit hook to dump the statistics to disk.
563 */
566
567 /*
568 * Done if some other process already completed our initialization.
569 */
570 if (found)
571 return;
572
573 /*
574 * Note: we don't bother with locks here, because there should be no other
575 * processes running when this code is reached.
576 */
577
578 /* Unlink query text file possibly left over from crash */
579 unlink(PGSS_TEXT_FILE);
580
581 /* Allocate new query text temp file */
583 if (qfile == NULL)
584 goto write_error;
585
586 /*
587 * If we were told not to load old statistics, we're done. (Note we do
588 * not try to unlink any old dump file in this case. This seems a bit
589 * questionable but it's the historical behavior.)
590 */
591 if (!pgss_save)
592 {
593 FreeFile(qfile);
594 return;
595 }
596
597 /*
598 * Attempt to load old statistics from the dump file.
599 */
601 if (file == NULL)
602 {
603 if (errno != ENOENT)
604 goto read_error;
605 /* No existing persisted stats file, so we're done */
606 FreeFile(qfile);
607 return;
608 }
609
610 buffer_size = 2048;
611 buffer = (char *) palloc(buffer_size);
612
613 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
614 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
615 fread(&num, sizeof(int32), 1, file) != 1)
616 goto read_error;
617
618 if (header != PGSS_FILE_HEADER ||
619 pgver != PGSS_PG_MAJOR_VERSION)
620 goto data_error;
621
622 for (i = 0; i < num; i++)
623 {
624 pgssEntry temp;
625 pgssEntry *entry;
626 Size query_offset;
627
628 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
629 goto read_error;
630
631 /* Encoding is the only field we can easily sanity-check */
633 goto data_error;
634
635 /* Resize buffer as needed */
636 if (temp.query_len >= buffer_size)
637 {
638 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
639 buffer = repalloc(buffer, buffer_size);
640 }
641
642 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
643 goto read_error;
644
645 /* Should have a trailing null, but let's make sure */
646 buffer[temp.query_len] = '\0';
647
648 /* Skip loading "sticky" entries */
649 if (IS_STICKY(temp.counters))
650 continue;
651
652 /* Store the query text */
653 query_offset = pgss->extent;
654 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
655 goto write_error;
656 pgss->extent += temp.query_len + 1;
657
658 /* make the hashtable entry (discards old entries if too many) */
659 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
660 temp.encoding,
661 false);
662
663 /* copy in the actual stats */
664 entry->counters = temp.counters;
665 entry->stats_since = temp.stats_since;
667 }
668
669 /* Read global statistics for pg_stat_statements */
670 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
671 goto read_error;
672
673 pfree(buffer);
674 FreeFile(file);
675 FreeFile(qfile);
676
677 /*
678 * Remove the persisted stats file so it's not included in
679 * backups/replication standbys, etc. A new file will be written on next
680 * shutdown.
681 *
682 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
683 * because we remove that file on startup; it acts inversely to
684 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
685 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
686 * when the server is not running. Leaving the file creates no danger of
687 * a newly restored database having a spurious record of execution costs,
688 * which is what we're really concerned about here.
689 */
690 unlink(PGSS_DUMP_FILE);
691
692 return;
693
694read_error:
695 ereport(LOG,
697 errmsg("could not read file \"%s\": %m",
699 goto fail;
700data_error:
701 ereport(LOG,
702 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
703 errmsg("ignoring invalid data in file \"%s\"",
705 goto fail;
706write_error:
707 ereport(LOG,
709 errmsg("could not write file \"%s\": %m",
711fail:
712 if (buffer)
713 pfree(buffer);
714 if (file)
715 FreeFile(file);
716 if (qfile)
717 FreeFile(qfile);
718 /* If possible, throw away the bogus file; ignore any error */
719 unlink(PGSS_DUMP_FILE);
720
721 /*
722 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
723 * server is running with pg_stat_statements enabled
724 */
725}
726
727/*
728 * shmem_shutdown hook: Dump statistics into file.
729 *
730 * Note: we don't bother with acquiring lock, because there should be no
731 * other processes running when this is called.
732 */
733static void
735{
736 FILE *file;
737 char *qbuffer = NULL;
738 Size qbuffer_size = 0;
739 HASH_SEQ_STATUS hash_seq;
740 int32 num_entries;
741 pgssEntry *entry;
742
743 /* Don't try to dump during a crash. */
744 if (code)
745 return;
746
747 /* Safety check ... shouldn't get here unless shmem is set up. */
748 if (!pgss || !pgss_hash)
749 return;
750
751 /* Don't dump if told not to. */
752 if (!pgss_save)
753 return;
754
756 if (file == NULL)
757 goto error;
758
759 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
760 goto error;
761 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
762 goto error;
763 num_entries = hash_get_num_entries(pgss_hash);
764 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
765 goto error;
766
767 qbuffer = qtext_load_file(&qbuffer_size);
768 if (qbuffer == NULL)
769 goto error;
770
771 /*
772 * When serializing to disk, we store query texts immediately after their
773 * entry data. Any orphaned query texts are thereby excluded.
774 */
775 hash_seq_init(&hash_seq, pgss_hash);
776 while ((entry = hash_seq_search(&hash_seq)) != NULL)
777 {
778 int len = entry->query_len;
779 char *qstr = qtext_fetch(entry->query_offset, len,
780 qbuffer, qbuffer_size);
781
782 if (qstr == NULL)
783 continue; /* Ignore any entries with bogus texts */
784
785 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
786 fwrite(qstr, 1, len + 1, file) != len + 1)
787 {
788 /* note: we assume hash_seq_term won't change errno */
789 hash_seq_term(&hash_seq);
790 goto error;
791 }
792 }
793
794 /* Dump global statistics for pg_stat_statements */
795 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
796 goto error;
797
798 free(qbuffer);
799 qbuffer = NULL;
800
801 if (FreeFile(file))
802 {
803 file = NULL;
804 goto error;
805 }
806
807 /*
808 * Rename file into place, so we atomically replace any old one.
809 */
811
812 /* Unlink query-texts file; it's not needed while shutdown */
813 unlink(PGSS_TEXT_FILE);
814
815 return;
816
817error:
818 ereport(LOG,
820 errmsg("could not write file \"%s\": %m",
821 PGSS_DUMP_FILE ".tmp")));
822 free(qbuffer);
823 if (file)
824 FreeFile(file);
825 unlink(PGSS_DUMP_FILE ".tmp");
826 unlink(PGSS_TEXT_FILE);
827}
828
829/*
830 * Post-parse-analysis hook: mark query with a queryId
831 */
832static void
834{
836 prev_post_parse_analyze_hook(pstate, query, jstate);
837
838 /* Safety check... */
840 return;
841
842 /*
843 * If it's EXECUTE, clear the queryId so that stats will accumulate for
844 * the underlying PREPARE. But don't do this if we're not tracking
845 * utility statements, to avoid messing up another extension that might be
846 * tracking them.
847 */
848 if (query->utilityStmt)
849 {
851 {
852 query->queryId = UINT64CONST(0);
853 return;
854 }
855 }
856
857 /*
858 * If query jumbling were able to identify any ignorable constants, we
859 * immediately create a hash table entry for the query, so that we can
860 * record the normalized form of the query string. If there were no such
861 * constants, the normalized string would be the same as the query text
862 * anyway, so there's no need for an early entry.
863 */
864 if (jstate && jstate->clocations_count > 0)
865 pgss_store(pstate->p_sourcetext,
866 query->queryId,
867 query->stmt_location,
868 query->stmt_len,
870 0,
871 0,
872 NULL,
873 NULL,
874 NULL,
875 jstate,
876 0,
877 0);
878}
879
880/*
881 * Planner hook: forward to regular planner, but measure planning time
882 * if needed.
883 */
884static PlannedStmt *
886 const char *query_string,
887 int cursorOptions,
888 ParamListInfo boundParams)
889{
890 PlannedStmt *result;
891
892 /*
893 * We can't process the query if no query_string is provided, as
894 * pgss_store needs it. We also ignore query without queryid, as it would
895 * be treated as a utility statement, which may not be the case.
896 */
898 && pgss_track_planning && query_string
899 && parse->queryId != UINT64CONST(0))
900 {
903 BufferUsage bufusage_start,
904 bufusage;
905 WalUsage walusage_start,
906 walusage;
907
908 /* We need to track buffer usage as the planner can access them. */
909 bufusage_start = pgBufferUsage;
910
911 /*
912 * Similarly the planner could write some WAL records in some cases
913 * (e.g. setting a hint bit with those being WAL-logged)
914 */
915 walusage_start = pgWalUsage;
917
919 PG_TRY();
920 {
922 result = prev_planner_hook(parse, query_string, cursorOptions,
923 boundParams);
924 else
925 result = standard_planner(parse, query_string, cursorOptions,
926 boundParams);
927 }
928 PG_FINALLY();
929 {
931 }
932 PG_END_TRY();
933
936
937 /* calc differences of buffer counters. */
938 memset(&bufusage, 0, sizeof(BufferUsage));
939 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
940
941 /* calc differences of WAL counters. */
942 memset(&walusage, 0, sizeof(WalUsage));
943 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
944
945 pgss_store(query_string,
946 parse->queryId,
947 parse->stmt_location,
948 parse->stmt_len,
949 PGSS_PLAN,
951 0,
952 &bufusage,
953 &walusage,
954 NULL,
955 NULL,
956 0,
957 0);
958 }
959 else
960 {
961 /*
962 * Even though we're not tracking plan time for this statement, we
963 * must still increment the nesting level, to ensure that functions
964 * evaluated during planning are not seen as top-level calls.
965 */
967 PG_TRY();
968 {
970 result = prev_planner_hook(parse, query_string, cursorOptions,
971 boundParams);
972 else
973 result = standard_planner(parse, query_string, cursorOptions,
974 boundParams);
975 }
976 PG_FINALLY();
977 {
979 }
980 PG_END_TRY();
981 }
982
983 return result;
984}
985
986/*
987 * ExecutorStart hook: start up tracking if needed
988 */
989static void
990pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
991{
993 prev_ExecutorStart(queryDesc, eflags);
994 else
995 standard_ExecutorStart(queryDesc, eflags);
996
997 /*
998 * If query has queryId zero, don't track it. This prevents double
999 * counting of optimizable statements that are directly contained in
1000 * utility statements.
1001 */
1002 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
1003 {
1004 /*
1005 * Set up to track total elapsed time in ExecutorRun. Make sure the
1006 * space is allocated in the per-query context so it will go away at
1007 * ExecutorEnd.
1008 */
1009 if (queryDesc->totaltime == NULL)
1010 {
1011 MemoryContext oldcxt;
1012
1013 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1014 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1015 MemoryContextSwitchTo(oldcxt);
1016 }
1017 }
1018}
1019
1020/*
1021 * ExecutorRun hook: all we need do is track nesting depth
1022 */
1023static void
1024pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1025{
1026 nesting_level++;
1027 PG_TRY();
1028 {
1029 if (prev_ExecutorRun)
1030 prev_ExecutorRun(queryDesc, direction, count);
1031 else
1032 standard_ExecutorRun(queryDesc, direction, count);
1033 }
1034 PG_FINALLY();
1035 {
1036 nesting_level--;
1037 }
1038 PG_END_TRY();
1039}
1040
1041/*
1042 * ExecutorFinish hook: all we need do is track nesting depth
1043 */
1044static void
1046{
1047 nesting_level++;
1048 PG_TRY();
1049 {
1051 prev_ExecutorFinish(queryDesc);
1052 else
1053 standard_ExecutorFinish(queryDesc);
1054 }
1055 PG_FINALLY();
1056 {
1057 nesting_level--;
1058 }
1059 PG_END_TRY();
1060}
1061
1062/*
1063 * ExecutorEnd hook: store results if needed
1064 */
1065static void
1067{
1068 uint64 queryId = queryDesc->plannedstmt->queryId;
1069
1070 if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1072 {
1073 /*
1074 * Make sure stats accumulation is done. (Note: it's okay if several
1075 * levels of hook all do this.)
1076 */
1077 InstrEndLoop(queryDesc->totaltime);
1078
1079 pgss_store(queryDesc->sourceText,
1080 queryId,
1081 queryDesc->plannedstmt->stmt_location,
1082 queryDesc->plannedstmt->stmt_len,
1083 PGSS_EXEC,
1084 queryDesc->totaltime->total * 1000.0, /* convert to msec */
1085 queryDesc->estate->es_total_processed,
1086 &queryDesc->totaltime->bufusage,
1087 &queryDesc->totaltime->walusage,
1088 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1089 NULL,
1092 }
1093
1094 if (prev_ExecutorEnd)
1095 prev_ExecutorEnd(queryDesc);
1096 else
1097 standard_ExecutorEnd(queryDesc);
1098}
1099
1100/*
1101 * ProcessUtility hook
1102 */
1103static void
1104pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1105 bool readOnlyTree,
1107 ParamListInfo params, QueryEnvironment *queryEnv,
1109{
1110 Node *parsetree = pstmt->utilityStmt;
1111 uint64 saved_queryId = pstmt->queryId;
1112 int saved_stmt_location = pstmt->stmt_location;
1113 int saved_stmt_len = pstmt->stmt_len;
1115
1116 /*
1117 * Force utility statements to get queryId zero. We do this even in cases
1118 * where the statement contains an optimizable statement for which a
1119 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1120 * cases, runtime control will first go through ProcessUtility and then
1121 * the executor, and we don't want the executor hooks to do anything,
1122 * since we are already measuring the statement's costs at the utility
1123 * level.
1124 *
1125 * Note that this is only done if pg_stat_statements is enabled and
1126 * configured to track utility statements, in the unlikely possibility
1127 * that user configured another extension to handle utility statements
1128 * only.
1129 */
1130 if (enabled)
1131 pstmt->queryId = UINT64CONST(0);
1132
1133 /*
1134 * If it's an EXECUTE statement, we don't track it and don't increment the
1135 * nesting level. This allows the cycles to be charged to the underlying
1136 * PREPARE instead (by the Executor hooks), which is much more useful.
1137 *
1138 * We also don't track execution of PREPARE. If we did, we would get one
1139 * hash table entry for the PREPARE (with hash calculated from the query
1140 * string), and then a different one with the same query string (but hash
1141 * calculated from the query tree) would be used to accumulate costs of
1142 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1143 * actually run the planner (only parse+rewrite), its costs are generally
1144 * pretty negligible and it seems okay to just ignore it.
1145 */
1146 if (enabled &&
1147 !IsA(parsetree, ExecuteStmt) &&
1148 !IsA(parsetree, PrepareStmt))
1149 {
1152 uint64 rows;
1153 BufferUsage bufusage_start,
1154 bufusage;
1155 WalUsage walusage_start,
1156 walusage;
1157
1158 bufusage_start = pgBufferUsage;
1159 walusage_start = pgWalUsage;
1161
1162 nesting_level++;
1163 PG_TRY();
1164 {
1166 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1167 context, params, queryEnv,
1168 dest, qc);
1169 else
1170 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1171 context, params, queryEnv,
1172 dest, qc);
1173 }
1174 PG_FINALLY();
1175 {
1176 nesting_level--;
1177 }
1178 PG_END_TRY();
1179
1180 /*
1181 * CAUTION: do not access the *pstmt data structure again below here.
1182 * If it was a ROLLBACK or similar, that data structure may have been
1183 * freed. We must copy everything we still need into local variables,
1184 * which we did above.
1185 *
1186 * For the same reason, we can't risk restoring pstmt->queryId to its
1187 * former value, which'd otherwise be a good idea.
1188 */
1189
1192
1193 /*
1194 * Track the total number of rows retrieved or affected by the utility
1195 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1196 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1197 */
1198 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1199 qc->commandTag == CMDTAG_FETCH ||
1200 qc->commandTag == CMDTAG_SELECT ||
1201 qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1202 qc->nprocessed : 0;
1203
1204 /* calc differences of buffer counters. */
1205 memset(&bufusage, 0, sizeof(BufferUsage));
1206 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1207
1208 /* calc differences of WAL counters. */
1209 memset(&walusage, 0, sizeof(WalUsage));
1210 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1211
1212 pgss_store(queryString,
1213 saved_queryId,
1214 saved_stmt_location,
1215 saved_stmt_len,
1216 PGSS_EXEC,
1218 rows,
1219 &bufusage,
1220 &walusage,
1221 NULL,
1222 NULL,
1223 0,
1224 0);
1225 }
1226 else
1227 {
1228 /*
1229 * Even though we're not tracking execution time for this statement,
1230 * we must still increment the nesting level, to ensure that functions
1231 * evaluated within it are not seen as top-level calls. But don't do
1232 * so for EXECUTE; that way, when control reaches pgss_planner or
1233 * pgss_ExecutorStart, we will treat the costs as top-level if
1234 * appropriate. Likewise, don't bump for PREPARE, so that parse
1235 * analysis will treat the statement as top-level if appropriate.
1236 *
1237 * To be absolutely certain we don't mess up the nesting level,
1238 * evaluate the bump_level condition just once.
1239 */
1240 bool bump_level =
1241 !IsA(parsetree, ExecuteStmt) &&
1242 !IsA(parsetree, PrepareStmt);
1243
1244 if (bump_level)
1245 nesting_level++;
1246 PG_TRY();
1247 {
1249 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1250 context, params, queryEnv,
1251 dest, qc);
1252 else
1253 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1254 context, params, queryEnv,
1255 dest, qc);
1256 }
1257 PG_FINALLY();
1258 {
1259 if (bump_level)
1260 nesting_level--;
1261 }
1262 PG_END_TRY();
1263 }
1264}
1265
1266/*
1267 * Store some statistics for a statement.
1268 *
1269 * If jstate is not NULL then we're trying to create an entry for which
1270 * we have no statistics as yet; we just want to record the normalized
1271 * query string. total_time, rows, bufusage and walusage are ignored in this
1272 * case.
1273 *
1274 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1275 * for the arrays in the Counters field.
1276 */
1277static void
1278pgss_store(const char *query, uint64 queryId,
1279 int query_location, int query_len,
1280 pgssStoreKind kind,
1281 double total_time, uint64 rows,
1282 const BufferUsage *bufusage,
1283 const WalUsage *walusage,
1284 const struct JitInstrumentation *jitusage,
1285 JumbleState *jstate,
1286 int parallel_workers_to_launch,
1287 int parallel_workers_launched)
1288{
1290 pgssEntry *entry;
1291 char *norm_query = NULL;
1293
1294 Assert(query != NULL);
1295
1296 /* Safety check... */
1297 if (!pgss || !pgss_hash)
1298 return;
1299
1300 /*
1301 * Nothing to do if compute_query_id isn't enabled and no other module
1302 * computed a query identifier.
1303 */
1304 if (queryId == UINT64CONST(0))
1305 return;
1306
1307 /*
1308 * Confine our attention to the relevant part of the string, if the query
1309 * is a portion of a multi-statement source string, and update query
1310 * location and length if needed.
1311 */
1312 query = CleanQuerytext(query, &query_location, &query_len);
1313
1314 /* Set up key for hashtable search */
1315
1316 /* clear padding */
1317 memset(&key, 0, sizeof(pgssHashKey));
1318
1319 key.userid = GetUserId();
1320 key.dbid = MyDatabaseId;
1321 key.queryid = queryId;
1322 key.toplevel = (nesting_level == 0);
1323
1324 /* Lookup the hash table entry with shared lock. */
1326
1327 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1328
1329 /* Create new entry, if not present */
1330 if (!entry)
1331 {
1332 Size query_offset;
1333 int gc_count;
1334 bool stored;
1335 bool do_gc;
1336
1337 /*
1338 * Create a new, normalized query string if caller asked. We don't
1339 * need to hold the lock while doing this work. (Note: in any case,
1340 * it's possible that someone else creates a duplicate hashtable entry
1341 * in the interval where we don't hold the lock below. That case is
1342 * handled by entry_alloc.)
1343 */
1344 if (jstate)
1345 {
1347 norm_query = generate_normalized_query(jstate, query,
1348 query_location,
1349 &query_len);
1351 }
1352
1353 /* Append new query text to file with only shared lock held */
1354 stored = qtext_store(norm_query ? norm_query : query, query_len,
1355 &query_offset, &gc_count);
1356
1357 /*
1358 * Determine whether we need to garbage collect external query texts
1359 * while the shared lock is still held. This micro-optimization
1360 * avoids taking the time to decide this while holding exclusive lock.
1361 */
1362 do_gc = need_gc_qtexts();
1363
1364 /* Need exclusive lock to make a new hashtable entry - promote */
1367
1368 /*
1369 * A garbage collection may have occurred while we weren't holding the
1370 * lock. In the unlikely event that this happens, the query text we
1371 * stored above will have been garbage collected, so write it again.
1372 * This should be infrequent enough that doing it while holding
1373 * exclusive lock isn't a performance problem.
1374 */
1375 if (!stored || pgss->gc_count != gc_count)
1376 stored = qtext_store(norm_query ? norm_query : query, query_len,
1377 &query_offset, NULL);
1378
1379 /* If we failed to write to the text file, give up */
1380 if (!stored)
1381 goto done;
1382
1383 /* OK to create a new hashtable entry */
1384 entry = entry_alloc(&key, query_offset, query_len, encoding,
1385 jstate != NULL);
1386
1387 /* If needed, perform garbage collection while exclusive lock held */
1388 if (do_gc)
1389 gc_qtexts();
1390 }
1391
1392 /* Increment the counts, except when jstate is not NULL */
1393 if (!jstate)
1394 {
1395 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1396
1397 /*
1398 * Grab the spinlock while updating the counters (see comment about
1399 * locking rules at the head of the file)
1400 */
1401 SpinLockAcquire(&entry->mutex);
1402
1403 /* "Unstick" entry if it was previously sticky */
1404 if (IS_STICKY(entry->counters))
1405 entry->counters.usage = USAGE_INIT;
1406
1407 entry->counters.calls[kind] += 1;
1408 entry->counters.total_time[kind] += total_time;
1409
1410 if (entry->counters.calls[kind] == 1)
1411 {
1412 entry->counters.min_time[kind] = total_time;
1413 entry->counters.max_time[kind] = total_time;
1414 entry->counters.mean_time[kind] = total_time;
1415 }
1416 else
1417 {
1418 /*
1419 * Welford's method for accurately computing variance. See
1420 * <http://www.johndcook.com/blog/standard_deviation/>
1421 */
1422 double old_mean = entry->counters.mean_time[kind];
1423
1424 entry->counters.mean_time[kind] +=
1425 (total_time - old_mean) / entry->counters.calls[kind];
1426 entry->counters.sum_var_time[kind] +=
1427 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1428
1429 /*
1430 * Calculate min and max time. min = 0 and max = 0 means that the
1431 * min/max statistics were reset
1432 */
1433 if (entry->counters.min_time[kind] == 0
1434 && entry->counters.max_time[kind] == 0)
1435 {
1436 entry->counters.min_time[kind] = total_time;
1437 entry->counters.max_time[kind] = total_time;
1438 }
1439 else
1440 {
1441 if (entry->counters.min_time[kind] > total_time)
1442 entry->counters.min_time[kind] = total_time;
1443 if (entry->counters.max_time[kind] < total_time)
1444 entry->counters.max_time[kind] = total_time;
1445 }
1446 }
1447 entry->counters.rows += rows;
1448 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1449 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1452 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1453 entry->counters.local_blks_read += bufusage->local_blks_read;
1456 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1457 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1464 entry->counters.usage += USAGE_EXEC(total_time);
1465 entry->counters.wal_records += walusage->wal_records;
1466 entry->counters.wal_fpi += walusage->wal_fpi;
1467 entry->counters.wal_bytes += walusage->wal_bytes;
1468 if (jitusage)
1469 {
1470 entry->counters.jit_functions += jitusage->created_functions;
1472
1474 entry->counters.jit_deform_count++;
1476
1480
1484
1488 }
1489
1490 /* parallel worker counters */
1491 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1492 entry->counters.parallel_workers_launched += parallel_workers_launched;
1493
1494 SpinLockRelease(&entry->mutex);
1495 }
1496
1497done:
1499
1500 /* We postpone this clean-up until we're out of the lock */
1501 if (norm_query)
1502 pfree(norm_query);
1503}
1504
1505/*
1506 * Reset statement statistics corresponding to userid, dbid, and queryid.
1507 */
1508Datum
1510{
1511 Oid userid;
1512 Oid dbid;
1513 uint64 queryid;
1514
1515 userid = PG_GETARG_OID(0);
1516 dbid = PG_GETARG_OID(1);
1517 queryid = (uint64) PG_GETARG_INT64(2);
1518
1519 entry_reset(userid, dbid, queryid, false);
1520
1522}
1523
1524Datum
1526{
1527 Oid userid;
1528 Oid dbid;
1529 uint64 queryid;
1530 bool minmax_only;
1531
1532 userid = PG_GETARG_OID(0);
1533 dbid = PG_GETARG_OID(1);
1534 queryid = (uint64) PG_GETARG_INT64(2);
1535 minmax_only = PG_GETARG_BOOL(3);
1536
1537 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1538}
1539
1540/*
1541 * Reset statement statistics.
1542 */
1543Datum
1545{
1546 entry_reset(0, 0, 0, false);
1547
1549}
1550
1551/* Number of output arguments (columns) for various API versions */
1552#define PG_STAT_STATEMENTS_COLS_V1_0 14
1553#define PG_STAT_STATEMENTS_COLS_V1_1 18
1554#define PG_STAT_STATEMENTS_COLS_V1_2 19
1555#define PG_STAT_STATEMENTS_COLS_V1_3 23
1556#define PG_STAT_STATEMENTS_COLS_V1_8 32
1557#define PG_STAT_STATEMENTS_COLS_V1_9 33
1558#define PG_STAT_STATEMENTS_COLS_V1_10 43
1559#define PG_STAT_STATEMENTS_COLS_V1_11 49
1560#define PG_STAT_STATEMENTS_COLS_V1_12 51
1561#define PG_STAT_STATEMENTS_COLS 51 /* maximum of above */
1562
1563/*
1564 * Retrieve statement statistics.
1565 *
1566 * The SQL API of this function has changed multiple times, and will likely
1567 * do so again in future. To support the case where a newer version of this
1568 * loadable module is being used with an old SQL declaration of the function,
1569 * we continue to support the older API versions. For 1.2 and later, the
1570 * expected API version is identified by embedding it in the C name of the
1571 * function. Unfortunately we weren't bright enough to do that for 1.1.
1572 */
1573Datum
1575{
1576 bool showtext = PG_GETARG_BOOL(0);
1577
1578 pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
1579
1580 return (Datum) 0;
1581}
1582
1583Datum
1585{
1586 bool showtext = PG_GETARG_BOOL(0);
1587
1588 pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1589
1590 return (Datum) 0;
1591}
1592
1593Datum
1595{
1596 bool showtext = PG_GETARG_BOOL(0);
1597
1598 pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1599
1600 return (Datum) 0;
1601}
1602
1603Datum
1605{
1606 bool showtext = PG_GETARG_BOOL(0);
1607
1608 pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1609
1610 return (Datum) 0;
1611}
1612
1613Datum
1615{
1616 bool showtext = PG_GETARG_BOOL(0);
1617
1618 pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1619
1620 return (Datum) 0;
1621}
1622
1623Datum
1625{
1626 bool showtext = PG_GETARG_BOOL(0);
1627
1628 pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1629
1630 return (Datum) 0;
1631}
1632
1633Datum
1635{
1636 bool showtext = PG_GETARG_BOOL(0);
1637
1638 pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1639
1640 return (Datum) 0;
1641}
1642
1643/*
1644 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1645 * This can be removed someday, perhaps.
1646 */
1647Datum
1649{
1650 /* If it's really API 1.1, we'll figure that out below */
1652
1653 return (Datum) 0;
1654}
1655
1656/* Common code for all versions of pg_stat_statements() */
1657static void
1659 pgssVersion api_version,
1660 bool showtext)
1661{
1662 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1663 Oid userid = GetUserId();
1664 bool is_allowed_role = false;
1665 char *qbuffer = NULL;
1666 Size qbuffer_size = 0;
1667 Size extent = 0;
1668 int gc_count = 0;
1669 HASH_SEQ_STATUS hash_seq;
1670 pgssEntry *entry;
1671
1672 /*
1673 * Superusers or roles with the privileges of pg_read_all_stats members
1674 * are allowed
1675 */
1676 is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1677
1678 /* hash table must exist already */
1679 if (!pgss || !pgss_hash)
1680 ereport(ERROR,
1681 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1682 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1683
1684 InitMaterializedSRF(fcinfo, 0);
1685
1686 /*
1687 * Check we have the expected number of output arguments. Aside from
1688 * being a good safety check, we need a kluge here to detect API version
1689 * 1.1, which was wedged into the code in an ill-considered way.
1690 */
1691 switch (rsinfo->setDesc->natts)
1692 {
1694 if (api_version != PGSS_V1_0)
1695 elog(ERROR, "incorrect number of output arguments");
1696 break;
1698 /* pg_stat_statements() should have told us 1.0 */
1699 if (api_version != PGSS_V1_0)
1700 elog(ERROR, "incorrect number of output arguments");
1701 api_version = PGSS_V1_1;
1702 break;
1704 if (api_version != PGSS_V1_2)
1705 elog(ERROR, "incorrect number of output arguments");
1706 break;
1708 if (api_version != PGSS_V1_3)
1709 elog(ERROR, "incorrect number of output arguments");
1710 break;
1712 if (api_version != PGSS_V1_8)
1713 elog(ERROR, "incorrect number of output arguments");
1714 break;
1716 if (api_version != PGSS_V1_9)
1717 elog(ERROR, "incorrect number of output arguments");
1718 break;
1720 if (api_version != PGSS_V1_10)
1721 elog(ERROR, "incorrect number of output arguments");
1722 break;
1724 if (api_version != PGSS_V1_11)
1725 elog(ERROR, "incorrect number of output arguments");
1726 break;
1728 if (api_version != PGSS_V1_12)
1729 elog(ERROR, "incorrect number of output arguments");
1730 break;
1731 default:
1732 elog(ERROR, "incorrect number of output arguments");
1733 }
1734
1735 /*
1736 * We'd like to load the query text file (if needed) while not holding any
1737 * lock on pgss->lock. In the worst case we'll have to do this again
1738 * after we have the lock, but it's unlikely enough to make this a win
1739 * despite occasional duplicated work. We need to reload if anybody
1740 * writes to the file (either a retail qtext_store(), or a garbage
1741 * collection) between this point and where we've gotten shared lock. If
1742 * a qtext_store is actually in progress when we look, we might as well
1743 * skip the speculative load entirely.
1744 */
1745 if (showtext)
1746 {
1747 int n_writers;
1748
1749 /* Take the mutex so we can examine variables */
1751 extent = pgss->extent;
1752 n_writers = pgss->n_writers;
1753 gc_count = pgss->gc_count;
1755
1756 /* No point in loading file now if there are active writers */
1757 if (n_writers == 0)
1758 qbuffer = qtext_load_file(&qbuffer_size);
1759 }
1760
1761 /*
1762 * Get shared lock, load or reload the query text file if we must, and
1763 * iterate over the hashtable entries.
1764 *
1765 * With a large hash table, we might be holding the lock rather longer
1766 * than one could wish. However, this only blocks creation of new hash
1767 * table entries, and the larger the hash table the less likely that is to
1768 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1769 * we need to partition the hash table to limit the time spent holding any
1770 * one lock.
1771 */
1773
1774 if (showtext)
1775 {
1776 /*
1777 * Here it is safe to examine extent and gc_count without taking the
1778 * mutex. Note that although other processes might change
1779 * pgss->extent just after we look at it, the strings they then write
1780 * into the file cannot yet be referenced in the hashtable, so we
1781 * don't care whether we see them or not.
1782 *
1783 * If qtext_load_file fails, we just press on; we'll return NULL for
1784 * every query text.
1785 */
1786 if (qbuffer == NULL ||
1787 pgss->extent != extent ||
1788 pgss->gc_count != gc_count)
1789 {
1790 free(qbuffer);
1791 qbuffer = qtext_load_file(&qbuffer_size);
1792 }
1793 }
1794
1795 hash_seq_init(&hash_seq, pgss_hash);
1796 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1797 {
1799 bool nulls[PG_STAT_STATEMENTS_COLS];
1800 int i = 0;
1801 Counters tmp;
1802 double stddev;
1803 int64 queryid = entry->key.queryid;
1804 TimestampTz stats_since;
1805 TimestampTz minmax_stats_since;
1806
1807 memset(values, 0, sizeof(values));
1808 memset(nulls, 0, sizeof(nulls));
1809
1810 values[i++] = ObjectIdGetDatum(entry->key.userid);
1811 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1812 if (api_version >= PGSS_V1_9)
1813 values[i++] = BoolGetDatum(entry->key.toplevel);
1814
1815 if (is_allowed_role || entry->key.userid == userid)
1816 {
1817 if (api_version >= PGSS_V1_2)
1818 values[i++] = Int64GetDatumFast(queryid);
1819
1820 if (showtext)
1821 {
1822 char *qstr = qtext_fetch(entry->query_offset,
1823 entry->query_len,
1824 qbuffer,
1825 qbuffer_size);
1826
1827 if (qstr)
1828 {
1829 char *enc;
1830
1831 enc = pg_any_to_server(qstr,
1832 entry->query_len,
1833 entry->encoding);
1834
1836
1837 if (enc != qstr)
1838 pfree(enc);
1839 }
1840 else
1841 {
1842 /* Just return a null if we fail to find the text */
1843 nulls[i++] = true;
1844 }
1845 }
1846 else
1847 {
1848 /* Query text not requested */
1849 nulls[i++] = true;
1850 }
1851 }
1852 else
1853 {
1854 /* Don't show queryid */
1855 if (api_version >= PGSS_V1_2)
1856 nulls[i++] = true;
1857
1858 /*
1859 * Don't show query text, but hint as to the reason for not doing
1860 * so if it was requested
1861 */
1862 if (showtext)
1863 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1864 else
1865 nulls[i++] = true;
1866 }
1867
1868 /* copy counters to a local variable to keep locking time short */
1869 SpinLockAcquire(&entry->mutex);
1870 tmp = entry->counters;
1871 SpinLockRelease(&entry->mutex);
1872
1873 /*
1874 * The spinlock is not required when reading these two as they are
1875 * always updated when holding pgss->lock exclusively.
1876 */
1877 stats_since = entry->stats_since;
1878 minmax_stats_since = entry->minmax_stats_since;
1879
1880 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1881 if (IS_STICKY(tmp))
1882 continue;
1883
1884 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1885 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1886 {
1887 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1888 {
1889 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1890 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1891 }
1892
1893 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1894 api_version >= PGSS_V1_8)
1895 {
1896 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1897 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1898 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1899
1900 /*
1901 * Note we are calculating the population variance here, not
1902 * the sample variance, as we have data for the whole
1903 * population, so Bessel's correction is not used, and we
1904 * don't divide by tmp.calls - 1.
1905 */
1906 if (tmp.calls[kind] > 1)
1907 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1908 else
1909 stddev = 0.0;
1910 values[i++] = Float8GetDatumFast(stddev);
1911 }
1912 }
1913 values[i++] = Int64GetDatumFast(tmp.rows);
1916 if (api_version >= PGSS_V1_1)
1921 if (api_version >= PGSS_V1_1)
1926 if (api_version >= PGSS_V1_1)
1927 {
1930 }
1931 if (api_version >= PGSS_V1_11)
1932 {
1935 }
1936 if (api_version >= PGSS_V1_10)
1937 {
1940 }
1941 if (api_version >= PGSS_V1_8)
1942 {
1943 char buf[256];
1944 Datum wal_bytes;
1945
1948
1949 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1950
1951 /* Convert to numeric. */
1952 wal_bytes = DirectFunctionCall3(numeric_in,
1955 Int32GetDatum(-1));
1956 values[i++] = wal_bytes;
1957 }
1958 if (api_version >= PGSS_V1_10)
1959 {
1968 }
1969 if (api_version >= PGSS_V1_11)
1970 {
1973 }
1974 if (api_version >= PGSS_V1_12)
1975 {
1978 }
1979 if (api_version >= PGSS_V1_11)
1980 {
1981 values[i++] = TimestampTzGetDatum(stats_since);
1982 values[i++] = TimestampTzGetDatum(minmax_stats_since);
1983 }
1984
1985 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1986 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1987 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1988 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1989 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1990 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1991 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
1992 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
1993 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
1994 -1 /* fail if you forget to update this assert */ ));
1995
1996 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1997 }
1998
2000
2001 free(qbuffer);
2002}
2003
2004/* Number of output arguments (columns) for pg_stat_statements_info */
2005#define PG_STAT_STATEMENTS_INFO_COLS 2
2006
2007/*
2008 * Return statistics of pg_stat_statements.
2009 */
2010Datum
2012{
2013 pgssGlobalStats stats;
2014 TupleDesc tupdesc;
2016 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2017
2018 if (!pgss || !pgss_hash)
2019 ereport(ERROR,
2020 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2021 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2022
2023 /* Build a tuple descriptor for our result type */
2024 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2025 elog(ERROR, "return type must be a row type");
2026
2027 /* Read global statistics for pg_stat_statements */
2029 stats = pgss->stats;
2031
2032 values[0] = Int64GetDatum(stats.dealloc);
2034
2036}
2037
2038/*
2039 * Estimate shared memory space needed.
2040 */
2041static Size
2043{
2044 Size size;
2045
2046 size = MAXALIGN(sizeof(pgssSharedState));
2048
2049 return size;
2050}
2051
2052/*
2053 * Allocate a new hashtable entry.
2054 * caller must hold an exclusive lock on pgss->lock
2055 *
2056 * "query" need not be null-terminated; we rely on query_len instead
2057 *
2058 * If "sticky" is true, make the new entry artificially sticky so that it will
2059 * probably still be there when the query finishes execution. We do this by
2060 * giving it a median usage value rather than the normal value. (Strictly
2061 * speaking, query strings are normalized on a best effort basis, though it
2062 * would be difficult to demonstrate this even under artificial conditions.)
2063 *
2064 * Note: despite needing exclusive lock, it's not an error for the target
2065 * entry to already exist. This is because pgss_store releases and
2066 * reacquires lock after failing to find a match; so someone else could
2067 * have made the entry while we waited to get exclusive lock.
2068 */
2069static pgssEntry *
2070entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2071 bool sticky)
2072{
2073 pgssEntry *entry;
2074 bool found;
2075
2076 /* Make space if needed */
2078 entry_dealloc();
2079
2080 /* Find or create an entry with desired hash code */
2081 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2082
2083 if (!found)
2084 {
2085 /* New entry, initialize it */
2086
2087 /* reset the statistics */
2088 memset(&entry->counters, 0, sizeof(Counters));
2089 /* set the appropriate initial usage count */
2090 entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2091 /* re-initialize the mutex each time ... we assume no one using it */
2092 SpinLockInit(&entry->mutex);
2093 /* ... and don't forget the query text metadata */
2094 Assert(query_len >= 0);
2095 entry->query_offset = query_offset;
2096 entry->query_len = query_len;
2097 entry->encoding = encoding;
2099 entry->minmax_stats_since = entry->stats_since;
2100 }
2101
2102 return entry;
2103}
2104
2105/*
2106 * qsort comparator for sorting into increasing usage order
2107 */
2108static int
2109entry_cmp(const void *lhs, const void *rhs)
2110{
2111 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2112 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2113
2114 if (l_usage < r_usage)
2115 return -1;
2116 else if (l_usage > r_usage)
2117 return +1;
2118 else
2119 return 0;
2120}
2121
2122/*
2123 * Deallocate least-used entries.
2124 *
2125 * Caller must hold an exclusive lock on pgss->lock.
2126 */
2127static void
2129{
2130 HASH_SEQ_STATUS hash_seq;
2131 pgssEntry **entries;
2132 pgssEntry *entry;
2133 int nvictims;
2134 int i;
2135 Size tottextlen;
2136 int nvalidtexts;
2137
2138 /*
2139 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2140 * While we're scanning the table, apply the decay factor to the usage
2141 * values, and update the mean query length.
2142 *
2143 * Note that the mean query length is almost immediately obsolete, since
2144 * we compute it before not after discarding the least-used entries.
2145 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2146 * making two passes to get a more current result. Likewise, the new
2147 * cur_median_usage includes the entries we're about to zap.
2148 */
2149
2150 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2151
2152 i = 0;
2153 tottextlen = 0;
2154 nvalidtexts = 0;
2155
2156 hash_seq_init(&hash_seq, pgss_hash);
2157 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2158 {
2159 entries[i++] = entry;
2160 /* "Sticky" entries get a different usage decay rate. */
2161 if (IS_STICKY(entry->counters))
2163 else
2165 /* In the mean length computation, ignore dropped texts. */
2166 if (entry->query_len >= 0)
2167 {
2168 tottextlen += entry->query_len + 1;
2169 nvalidtexts++;
2170 }
2171 }
2172
2173 /* Sort into increasing order by usage */
2174 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2175
2176 /* Record the (approximate) median usage */
2177 if (i > 0)
2178 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2179 /* Record the mean query length */
2180 if (nvalidtexts > 0)
2181 pgss->mean_query_len = tottextlen / nvalidtexts;
2182 else
2184
2185 /* Now zap an appropriate fraction of lowest-usage entries */
2186 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2187 nvictims = Min(nvictims, i);
2188
2189 for (i = 0; i < nvictims; i++)
2190 {
2191 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2192 }
2193
2194 pfree(entries);
2195
2196 /* Increment the number of times entries are deallocated */
2198 pgss->stats.dealloc += 1;
2200}
2201
2202/*
2203 * Given a query string (not necessarily null-terminated), allocate a new
2204 * entry in the external query text file and store the string there.
2205 *
2206 * If successful, returns true, and stores the new entry's offset in the file
2207 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2208 * number of garbage collections that have occurred so far.
2209 *
2210 * On failure, returns false.
2211 *
2212 * At least a shared lock on pgss->lock must be held by the caller, so as
2213 * to prevent a concurrent garbage collection. Share-lock-holding callers
2214 * should pass a gc_count pointer to obtain the number of garbage collections,
2215 * so that they can recheck the count after obtaining exclusive lock to
2216 * detect whether a garbage collection occurred (and removed this entry).
2217 */
2218static bool
2219qtext_store(const char *query, int query_len,
2220 Size *query_offset, int *gc_count)
2221{
2222 Size off;
2223 int fd;
2224
2225 /*
2226 * We use a spinlock to protect extent/n_writers/gc_count, so that
2227 * multiple processes may execute this function concurrently.
2228 */
2230 off = pgss->extent;
2231 pgss->extent += query_len + 1;
2232 pgss->n_writers++;
2233 if (gc_count)
2234 *gc_count = pgss->gc_count;
2236
2237 *query_offset = off;
2238
2239 /*
2240 * Don't allow the file to grow larger than what qtext_load_file can
2241 * (theoretically) handle. This has been seen to be reachable on 32-bit
2242 * platforms.
2243 */
2244 if (unlikely(query_len >= MaxAllocHugeSize - off))
2245 {
2246 errno = EFBIG; /* not quite right, but it'll do */
2247 fd = -1;
2248 goto error;
2249 }
2250
2251 /* Now write the data into the successfully-reserved part of the file */
2252 fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2253 if (fd < 0)
2254 goto error;
2255
2256 if (pg_pwrite(fd, query, query_len, off) != query_len)
2257 goto error;
2258 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2259 goto error;
2260
2262
2263 /* Mark our write complete */
2265 pgss->n_writers--;
2267
2268 return true;
2269
2270error:
2271 ereport(LOG,
2273 errmsg("could not write file \"%s\": %m",
2274 PGSS_TEXT_FILE)));
2275
2276 if (fd >= 0)
2278
2279 /* Mark our write complete */
2281 pgss->n_writers--;
2283
2284 return false;
2285}
2286
2287/*
2288 * Read the external query text file into a malloc'd buffer.
2289 *
2290 * Returns NULL (without throwing an error) if unable to read, eg
2291 * file not there or insufficient memory.
2292 *
2293 * On success, the buffer size is also returned into *buffer_size.
2294 *
2295 * This can be called without any lock on pgss->lock, but in that case
2296 * the caller is responsible for verifying that the result is sane.
2297 */
2298static char *
2300{
2301 char *buf;
2302 int fd;
2303 struct stat stat;
2304 Size nread;
2305
2307 if (fd < 0)
2308 {
2309 if (errno != ENOENT)
2310 ereport(LOG,
2312 errmsg("could not read file \"%s\": %m",
2313 PGSS_TEXT_FILE)));
2314 return NULL;
2315 }
2316
2317 /* Get file length */
2318 if (fstat(fd, &stat))
2319 {
2320 ereport(LOG,
2322 errmsg("could not stat file \"%s\": %m",
2323 PGSS_TEXT_FILE)));
2325 return NULL;
2326 }
2327
2328 /* Allocate buffer; beware that off_t might be wider than size_t */
2330 buf = (char *) malloc(stat.st_size);
2331 else
2332 buf = NULL;
2333 if (buf == NULL)
2334 {
2335 ereport(LOG,
2336 (errcode(ERRCODE_OUT_OF_MEMORY),
2337 errmsg("out of memory"),
2338 errdetail("Could not allocate enough memory to read file \"%s\".",
2339 PGSS_TEXT_FILE)));
2341 return NULL;
2342 }
2343
2344 /*
2345 * OK, slurp in the file. Windows fails if we try to read more than
2346 * INT_MAX bytes at once, and other platforms might not like that either,
2347 * so read a very large file in 1GB segments.
2348 */
2349 nread = 0;
2350 while (nread < stat.st_size)
2351 {
2352 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2353
2354 /*
2355 * If we get a short read and errno doesn't get set, the reason is
2356 * probably that garbage collection truncated the file since we did
2357 * the fstat(), so we don't log a complaint --- but we don't return
2358 * the data, either, since it's most likely corrupt due to concurrent
2359 * writes from garbage collection.
2360 */
2361 errno = 0;
2362 if (read(fd, buf + nread, toread) != toread)
2363 {
2364 if (errno)
2365 ereport(LOG,
2367 errmsg("could not read file \"%s\": %m",
2368 PGSS_TEXT_FILE)));
2369 free(buf);
2371 return NULL;
2372 }
2373 nread += toread;
2374 }
2375
2376 if (CloseTransientFile(fd) != 0)
2377 ereport(LOG,
2379 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2380
2381 *buffer_size = nread;
2382 return buf;
2383}
2384
2385/*
2386 * Locate a query text in the file image previously read by qtext_load_file().
2387 *
2388 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2389 * the result points to a null-terminated string within the buffer.
2390 */
2391static char *
2392qtext_fetch(Size query_offset, int query_len,
2393 char *buffer, Size buffer_size)
2394{
2395 /* File read failed? */
2396 if (buffer == NULL)
2397 return NULL;
2398 /* Bogus offset/length? */
2399 if (query_len < 0 ||
2400 query_offset + query_len >= buffer_size)
2401 return NULL;
2402 /* As a further sanity check, make sure there's a trailing null */
2403 if (buffer[query_offset + query_len] != '\0')
2404 return NULL;
2405 /* Looks OK */
2406 return buffer + query_offset;
2407}
2408
2409/*
2410 * Do we need to garbage-collect the external query text file?
2411 *
2412 * Caller should hold at least a shared lock on pgss->lock.
2413 */
2414static bool
2416{
2417 Size extent;
2418
2419 /* Read shared extent pointer */
2421 extent = pgss->extent;
2423
2424 /*
2425 * Don't proceed if file does not exceed 512 bytes per possible entry.
2426 *
2427 * Here and in the next test, 32-bit machines have overflow hazards if
2428 * pgss_max and/or mean_query_len are large. Force the multiplications
2429 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2430 */
2431 if ((uint64) extent < (uint64) 512 * pgss_max)
2432 return false;
2433
2434 /*
2435 * Don't proceed if file is less than about 50% bloat. Nothing can or
2436 * should be done in the event of unusually large query texts accounting
2437 * for file's large size. We go to the trouble of maintaining the mean
2438 * query length in order to prevent garbage collection from thrashing
2439 * uselessly.
2440 */
2441 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2442 return false;
2443
2444 return true;
2445}
2446
2447/*
2448 * Garbage-collect orphaned query texts in external file.
2449 *
2450 * This won't be called often in the typical case, since it's likely that
2451 * there won't be too much churn, and besides, a similar compaction process
2452 * occurs when serializing to disk at shutdown or as part of resetting.
2453 * Despite this, it seems prudent to plan for the edge case where the file
2454 * becomes unreasonably large, with no other method of compaction likely to
2455 * occur in the foreseeable future.
2456 *
2457 * The caller must hold an exclusive lock on pgss->lock.
2458 *
2459 * At the first sign of trouble we unlink the query text file to get a clean
2460 * slate (although existing statistics are retained), rather than risk
2461 * thrashing by allowing the same problem case to recur indefinitely.
2462 */
2463static void
2465{
2466 char *qbuffer;
2467 Size qbuffer_size;
2468 FILE *qfile = NULL;
2469 HASH_SEQ_STATUS hash_seq;
2470 pgssEntry *entry;
2471 Size extent;
2472 int nentries;
2473
2474 /*
2475 * When called from pgss_store, some other session might have proceeded
2476 * with garbage collection in the no-lock-held interim of lock strength
2477 * escalation. Check once more that this is actually necessary.
2478 */
2479 if (!need_gc_qtexts())
2480 return;
2481
2482 /*
2483 * Load the old texts file. If we fail (out of memory, for instance),
2484 * invalidate query texts. Hopefully this is rare. It might seem better
2485 * to leave things alone on an OOM failure, but the problem is that the
2486 * file is only going to get bigger; hoping for a future non-OOM result is
2487 * risky and can easily lead to complete denial of service.
2488 */
2489 qbuffer = qtext_load_file(&qbuffer_size);
2490 if (qbuffer == NULL)
2491 goto gc_fail;
2492
2493 /*
2494 * We overwrite the query texts file in place, so as to reduce the risk of
2495 * an out-of-disk-space failure. Since the file is guaranteed not to get
2496 * larger, this should always work on traditional filesystems; though we
2497 * could still lose on copy-on-write filesystems.
2498 */
2500 if (qfile == NULL)
2501 {
2502 ereport(LOG,
2504 errmsg("could not write file \"%s\": %m",
2505 PGSS_TEXT_FILE)));
2506 goto gc_fail;
2507 }
2508
2509 extent = 0;
2510 nentries = 0;
2511
2512 hash_seq_init(&hash_seq, pgss_hash);
2513 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2514 {
2515 int query_len = entry->query_len;
2516 char *qry = qtext_fetch(entry->query_offset,
2517 query_len,
2518 qbuffer,
2519 qbuffer_size);
2520
2521 if (qry == NULL)
2522 {
2523 /* Trouble ... drop the text */
2524 entry->query_offset = 0;
2525 entry->query_len = -1;
2526 /* entry will not be counted in mean query length computation */
2527 continue;
2528 }
2529
2530 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2531 {
2532 ereport(LOG,
2534 errmsg("could not write file \"%s\": %m",
2535 PGSS_TEXT_FILE)));
2536 hash_seq_term(&hash_seq);
2537 goto gc_fail;
2538 }
2539
2540 entry->query_offset = extent;
2541 extent += query_len + 1;
2542 nentries++;
2543 }
2544
2545 /*
2546 * Truncate away any now-unused space. If this fails for some odd reason,
2547 * we log it, but there's no need to fail.
2548 */
2549 if (ftruncate(fileno(qfile), extent) != 0)
2550 ereport(LOG,
2552 errmsg("could not truncate file \"%s\": %m",
2553 PGSS_TEXT_FILE)));
2554
2555 if (FreeFile(qfile))
2556 {
2557 ereport(LOG,
2559 errmsg("could not write file \"%s\": %m",
2560 PGSS_TEXT_FILE)));
2561 qfile = NULL;
2562 goto gc_fail;
2563 }
2564
2565 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2566 pgss->extent, extent);
2567
2568 /* Reset the shared extent pointer */
2569 pgss->extent = extent;
2570
2571 /*
2572 * Also update the mean query length, to be sure that need_gc_qtexts()
2573 * won't still think we have a problem.
2574 */
2575 if (nentries > 0)
2576 pgss->mean_query_len = extent / nentries;
2577 else
2579
2580 free(qbuffer);
2581
2582 /*
2583 * OK, count a garbage collection cycle. (Note: even though we have
2584 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2585 * other processes may examine gc_count while holding only the mutex.
2586 * Also, we have to advance the count *after* we've rewritten the file,
2587 * else other processes might not realize they read a stale file.)
2588 */
2590
2591 return;
2592
2593gc_fail:
2594 /* clean up resources */
2595 if (qfile)
2596 FreeFile(qfile);
2597 free(qbuffer);
2598
2599 /*
2600 * Since the contents of the external file are now uncertain, mark all
2601 * hashtable entries as having invalid texts.
2602 */
2603 hash_seq_init(&hash_seq, pgss_hash);
2604 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2605 {
2606 entry->query_offset = 0;
2607 entry->query_len = -1;
2608 }
2609
2610 /*
2611 * Destroy the query text file and create a new, empty one
2612 */
2613 (void) unlink(PGSS_TEXT_FILE);
2615 if (qfile == NULL)
2616 ereport(LOG,
2618 errmsg("could not recreate file \"%s\": %m",
2619 PGSS_TEXT_FILE)));
2620 else
2621 FreeFile(qfile);
2622
2623 /* Reset the shared extent pointer */
2624 pgss->extent = 0;
2625
2626 /* Reset mean_query_len to match the new state */
2628
2629 /*
2630 * Bump the GC count even though we failed.
2631 *
2632 * This is needed to make concurrent readers of file without any lock on
2633 * pgss->lock notice existence of new version of file. Once readers
2634 * subsequently observe a change in GC count with pgss->lock held, that
2635 * forces a safe reopen of file. Writers also require that we bump here,
2636 * of course. (As required by locking protocol, readers and writers don't
2637 * trust earlier file contents until gc_count is found unchanged after
2638 * pgss->lock acquired in shared or exclusive mode respectively.)
2639 */
2641}
2642
2643#define SINGLE_ENTRY_RESET(e) \
2644if (e) { \
2645 if (minmax_only) { \
2646 /* When requested reset only min/max statistics of an entry */ \
2647 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2648 { \
2649 e->counters.max_time[kind] = 0; \
2650 e->counters.min_time[kind] = 0; \
2651 } \
2652 e->minmax_stats_since = stats_reset; \
2653 } \
2654 else \
2655 { \
2656 /* Remove the key otherwise */ \
2657 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2658 num_remove++; \
2659 } \
2660}
2661
2662/*
2663 * Reset entries corresponding to parameters passed.
2664 */
2665static TimestampTz
2666entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
2667{
2668 HASH_SEQ_STATUS hash_seq;
2669 pgssEntry *entry;
2670 FILE *qfile;
2671 long num_entries;
2672 long num_remove = 0;
2674 TimestampTz stats_reset;
2675
2676 if (!pgss || !pgss_hash)
2677 ereport(ERROR,
2678 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2679 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2680
2682 num_entries = hash_get_num_entries(pgss_hash);
2683
2684 stats_reset = GetCurrentTimestamp();
2685
2686 if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2687 {
2688 /* If all the parameters are available, use the fast path. */
2689 memset(&key, 0, sizeof(pgssHashKey));
2690 key.userid = userid;
2691 key.dbid = dbid;
2692 key.queryid = queryid;
2693
2694 /*
2695 * Reset the entry if it exists, starting with the non-top-level
2696 * entry.
2697 */
2698 key.toplevel = false;
2699 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2700
2701 SINGLE_ENTRY_RESET(entry);
2702
2703 /* Also reset the top-level entry if it exists. */
2704 key.toplevel = true;
2705 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2706
2707 SINGLE_ENTRY_RESET(entry);
2708 }
2709 else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2710 {
2711 /* Reset entries corresponding to valid parameters. */
2712 hash_seq_init(&hash_seq, pgss_hash);
2713 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2714 {
2715 if ((!userid || entry->key.userid == userid) &&
2716 (!dbid || entry->key.dbid == dbid) &&
2717 (!queryid || entry->key.queryid == queryid))
2718 {
2719 SINGLE_ENTRY_RESET(entry);
2720 }
2721 }
2722 }
2723 else
2724 {
2725 /* Reset all entries. */
2726 hash_seq_init(&hash_seq, pgss_hash);
2727 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2728 {
2729 SINGLE_ENTRY_RESET(entry);
2730 }
2731 }
2732
2733 /* All entries are removed? */
2734 if (num_entries != num_remove)
2735 goto release_lock;
2736
2737 /*
2738 * Reset global statistics for pg_stat_statements since all entries are
2739 * removed.
2740 */
2742 pgss->stats.dealloc = 0;
2743 pgss->stats.stats_reset = stats_reset;
2745
2746 /*
2747 * Write new empty query file, perhaps even creating a new one to recover
2748 * if the file was missing.
2749 */
2751 if (qfile == NULL)
2752 {
2753 ereport(LOG,
2755 errmsg("could not create file \"%s\": %m",
2756 PGSS_TEXT_FILE)));
2757 goto done;
2758 }
2759
2760 /* If ftruncate fails, log it, but it's not a fatal problem */
2761 if (ftruncate(fileno(qfile), 0) != 0)
2762 ereport(LOG,
2764 errmsg("could not truncate file \"%s\": %m",
2765 PGSS_TEXT_FILE)));
2766
2767 FreeFile(qfile);
2768
2769done:
2770 pgss->extent = 0;
2771 /* This counts as a query text garbage collection for our purposes */
2773
2774release_lock:
2776
2777 return stats_reset;
2778}
2779
2780/*
2781 * Generate a normalized version of the query string that will be used to
2782 * represent all similar queries.
2783 *
2784 * Note that the normalized representation may well vary depending on
2785 * just which "equivalent" query is used to create the hashtable entry.
2786 * We assume this is OK.
2787 *
2788 * If query_loc > 0, then "query" has been advanced by that much compared to
2789 * the original string start, so we need to translate the provided locations
2790 * to compensate. (This lets us avoid re-scanning statements before the one
2791 * of interest, so it's worth doing.)
2792 *
2793 * *query_len_p contains the input string length, and is updated with
2794 * the result string length on exit. The resulting string might be longer
2795 * or shorter depending on what happens with replacement of constants.
2796 *
2797 * Returns a palloc'd string.
2798 */
2799static char *
2800generate_normalized_query(JumbleState *jstate, const char *query,
2801 int query_loc, int *query_len_p)
2802{
2803 char *norm_query;
2804 int query_len = *query_len_p;
2805 int i,
2806 norm_query_buflen, /* Space allowed for norm_query */
2807 len_to_wrt, /* Length (in bytes) to write */
2808 quer_loc = 0, /* Source query byte location */
2809 n_quer_loc = 0, /* Normalized query byte location */
2810 last_off = 0, /* Offset from start for previous tok */
2811 last_tok_len = 0; /* Length (in bytes) of that tok */
2812
2813 /*
2814 * Get constants' lengths (core system only gives us locations). Note
2815 * this also ensures the items are sorted by location.
2816 */
2817 fill_in_constant_lengths(jstate, query, query_loc);
2818
2819 /*
2820 * Allow for $n symbols to be longer than the constants they replace.
2821 * Constants must take at least one byte in text form, while a $n symbol
2822 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2823 * could refine that limit based on the max value of n for the current
2824 * query, but it hardly seems worth any extra effort to do so.
2825 */
2826 norm_query_buflen = query_len + jstate->clocations_count * 10;
2827
2828 /* Allocate result buffer */
2829 norm_query = palloc(norm_query_buflen + 1);
2830
2831 for (i = 0; i < jstate->clocations_count; i++)
2832 {
2833 int off, /* Offset from start for cur tok */
2834 tok_len; /* Length (in bytes) of that tok */
2835
2836 off = jstate->clocations[i].location;
2837 /* Adjust recorded location if we're dealing with partial string */
2838 off -= query_loc;
2839
2840 tok_len = jstate->clocations[i].length;
2841
2842 if (tok_len < 0)
2843 continue; /* ignore any duplicates */
2844
2845 /* Copy next chunk (what precedes the next constant) */
2846 len_to_wrt = off - last_off;
2847 len_to_wrt -= last_tok_len;
2848
2849 Assert(len_to_wrt >= 0);
2850 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2851 n_quer_loc += len_to_wrt;
2852
2853 /* And insert a param symbol in place of the constant token */
2854 n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2855 i + 1 + jstate->highest_extern_param_id);
2856
2857 quer_loc = off + tok_len;
2858 last_off = off;
2859 last_tok_len = tok_len;
2860 }
2861
2862 /*
2863 * We've copied up until the last ignorable constant. Copy over the
2864 * remaining bytes of the original query string.
2865 */
2866 len_to_wrt = query_len - quer_loc;
2867
2868 Assert(len_to_wrt >= 0);
2869 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2870 n_quer_loc += len_to_wrt;
2871
2872 Assert(n_quer_loc <= norm_query_buflen);
2873 norm_query[n_quer_loc] = '\0';
2874
2875 *query_len_p = n_quer_loc;
2876 return norm_query;
2877}
2878
2879/*
2880 * Given a valid SQL string and an array of constant-location records,
2881 * fill in the textual lengths of those constants.
2882 *
2883 * The constants may use any allowed constant syntax, such as float literals,
2884 * bit-strings, single-quoted strings and dollar-quoted strings. This is
2885 * accomplished by using the public API for the core scanner.
2886 *
2887 * It is the caller's job to ensure that the string is a valid SQL statement
2888 * with constants at the indicated locations. Since in practice the string
2889 * has already been parsed, and the locations that the caller provides will
2890 * have originated from within the authoritative parser, this should not be
2891 * a problem.
2892 *
2893 * Duplicate constant pointers are possible, and will have their lengths
2894 * marked as '-1', so that they are later ignored. (Actually, we assume the
2895 * lengths were initialized as -1 to start with, and don't change them here.)
2896 *
2897 * If query_loc > 0, then "query" has been advanced by that much compared to
2898 * the original string start, so we need to translate the provided locations
2899 * to compensate. (This lets us avoid re-scanning statements before the one
2900 * of interest, so it's worth doing.)
2901 *
2902 * N.B. There is an assumption that a '-' character at a Const location begins
2903 * a negative numeric constant. This precludes there ever being another
2904 * reason for a constant to start with a '-'.
2905 */
2906static void
2907fill_in_constant_lengths(JumbleState *jstate, const char *query,
2908 int query_loc)
2909{
2910 LocationLen *locs;
2913 core_YYSTYPE yylval;
2915 int last_loc = -1;
2916 int i;
2917
2918 /*
2919 * Sort the records by location so that we can process them in order while
2920 * scanning the query text.
2921 */
2922 if (jstate->clocations_count > 1)
2923 qsort(jstate->clocations, jstate->clocations_count,
2924 sizeof(LocationLen), comp_location);
2925 locs = jstate->clocations;
2926
2927 /* initialize the flex scanner --- should match raw_parser() */
2928 yyscanner = scanner_init(query,
2929 &yyextra,
2930 &ScanKeywords,
2932
2933 /* we don't want to re-emit any escape string warnings */
2934 yyextra.escape_string_warning = false;
2935
2936 /* Search for each constant, in sequence */
2937 for (i = 0; i < jstate->clocations_count; i++)
2938 {
2939 int loc = locs[i].location;
2940 int tok;
2941
2942 /* Adjust recorded location if we're dealing with partial string */
2943 loc -= query_loc;
2944
2945 Assert(loc >= 0);
2946
2947 if (loc <= last_loc)
2948 continue; /* Duplicate constant, ignore */
2949
2950 /* Lex tokens until we find the desired constant */
2951 for (;;)
2952 {
2953 tok = core_yylex(&yylval, &yylloc, yyscanner);
2954
2955 /* We should not hit end-of-string, but if we do, behave sanely */
2956 if (tok == 0)
2957 break; /* out of inner for-loop */
2958
2959 /*
2960 * We should find the token position exactly, but if we somehow
2961 * run past it, work with that.
2962 */
2963 if (yylloc >= loc)
2964 {
2965 if (query[loc] == '-')
2966 {
2967 /*
2968 * It's a negative value - this is the one and only case
2969 * where we replace more than a single token.
2970 *
2971 * Do not compensate for the core system's special-case
2972 * adjustment of location to that of the leading '-'
2973 * operator in the event of a negative constant. It is
2974 * also useful for our purposes to start from the minus
2975 * symbol. In this way, queries like "select * from foo
2976 * where bar = 1" and "select * from foo where bar = -2"
2977 * will have identical normalized query strings.
2978 */
2979 tok = core_yylex(&yylval, &yylloc, yyscanner);
2980 if (tok == 0)
2981 break; /* out of inner for-loop */
2982 }
2983
2984 /*
2985 * We now rely on the assumption that flex has placed a zero
2986 * byte after the text of the current token in scanbuf.
2987 */
2988 locs[i].length = strlen(yyextra.scanbuf + loc);
2989 break; /* out of inner for-loop */
2990 }
2991 }
2992
2993 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2994 if (tok == 0)
2995 break;
2996
2997 last_loc = loc;
2998 }
2999
3001}
3002
3003/*
3004 * comp_location: comparator for qsorting LocationLen structs by location
3005 */
3006static int
3007comp_location(const void *a, const void *b)
3008{
3009 int l = ((const LocationLen *) a)->location;
3010 int r = ((const LocationLen *) b)->location;
3011
3012 return pg_cmp_s32(l, r);
3013}
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:5268
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:637
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define CStringGetTextDatum(s)
Definition: builtins.h:97
#define Min(x, y)
Definition: c.h:958
#define PG_BINARY_R
Definition: c.h:1229
#define MAXALIGN(LEN)
Definition: c.h:765
#define Max(x, y)
Definition: c.h:952
#define Assert(condition)
Definition: c.h:812
int64_t int64
Definition: c.h:482
#define PG_BINARY
Definition: c.h:1227
#define UINT64_FORMAT
Definition: c.h:504
int32_t int32
Definition: c.h:481
uint64_t uint64
Definition: c.h:486
#define unlikely(x)
Definition: c.h:330
uint32_t uint32
Definition: c.h:485
#define PG_BINARY_W
Definition: c.h:1230
#define UINT64CONST(x)
Definition: c.h:500
size_t Size
Definition: c.h:559
enc
int64 TimestampTz
Definition: timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1514
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:783
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
int errcode_for_file_access(void)
Definition: elog.c:876
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define PG_TRY(...)
Definition: elog.h:371
#define PG_END_TRY(...)
Definition: elog.h:396
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_FINALLY(...)
Definition: elog.h:388
#define ereport(elevel,...)
Definition: elog.h:149
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:70
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:69
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:67
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:138
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:68
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition: execMain.c:304
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:472
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:412
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:85
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition: executor.h:79
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:75
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:89
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:781
int CloseTransientFile(int fd)
Definition: fd.c:2831
int FreeFile(FILE *file)
Definition: fd.c:2803
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2605
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2655
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:645
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
bool IsUnderPostmaster
Definition: globals.c:119
Oid MyDatabaseId
Definition: globals.c:93
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5243
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5132
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5279
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5158
@ PGC_SUSET
Definition: guc.h:74
@ PGC_POSTMASTER
Definition: guc.h:70
@ PGC_SIGHUP
Definition: guc.h:71
return str start
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:191
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
@ INSTRUMENT_ALL
Definition: instrument.h:65
static int pg_cmp_s32(int32 a, int32 b)
Definition: int.h:646
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:57
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:73
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int i
Definition: isn.c:72
PGDLLIMPORT const ScanKeywordList ScanKeywords
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:573
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:670
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
#define MaxAllocHugeSize
Definition: memutils.h:45
void(* shmem_request_hook_type)(void)
Definition: miscadmin.h:522
Oid GetUserId(void)
Definition: miscinit.c:517
shmem_request_hook_type shmem_request_hook
Definition: miscinit.c:1837
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1834
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:59
void * arg
const void size_t len
int32 encoding
Definition: pg_database.h:41
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static ExecutorRun_hook_type prev_ExecutorRun
struct pgssSharedState pgssSharedState
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
void _PG_init(void)
static void gc_qtexts(void)
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
struct Counters Counters
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
PGSSTrackLevel
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
PG_MODULE_MAGIC
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void pgss_shmem_request(void)
static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
pgssStoreKind
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
#define PGSS_NUMKIND
static bool pgss_save
static void pgss_shmem_startup(void)
static int nesting_level
struct pgssGlobalStats pgssGlobalStats
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
struct pgssEntry pgssEntry
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
struct pgssHashKey pgssHashKey
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static char * buf
Definition: pg_test_fsync.c:72
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281
static int duration
Definition: pgbench.c:174
static core_yyscan_t yyscanner
Definition: pl_scanner.c:106
planner_hook_type planner_hook
Definition: planner.c:72
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.c:297
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.h:26
#define sprintf
Definition: port.h:240
#define pg_pwrite
Definition: port.h:226
#define snprintf
Definition: port.h:238
#define qsort(a, b, c, d)
Definition: port.h:447
#define Int64GetDatumFast(X)
Definition: postgres.h:554
uintptr_t Datum
Definition: postgres.h:64
#define Float8GetDatumFast(X)
Definition: postgres.h:556
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:350
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
unsigned int Oid
Definition: postgres_ext.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
const char * YYLTYPE
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
tree context
Definition: radixtree.h:1837
MemoryContextSwitchTo(old_ctx)
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:717
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
Definition: scan.l:1263
#define yylloc
Definition: scan.l:1136
void scanner_finish(core_yyscan_t yyscanner)
Definition: scan.l:1305
#define yyextra
Definition: scan.l:1132
const uint16 ScanKeywordTokens[]
Definition: scan.l:81
void * core_yyscan_t
Definition: scanner.h:121
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
ScanDirection
Definition: sdir.h:25
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:327
Size add_size(Size s1, Size s2)
Definition: shmem.c:488
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:382
static pg_noinline void Size size
Definition: slab.c:607
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
static void error(void)
Definition: sql-dyntest.c:147
instr_time local_blk_read_time
Definition: instrument.h:38
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
instr_time temp_blk_write_time
Definition: instrument.h:41
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
int64 local_blks_written
Definition: instrument.h:33
instr_time temp_blk_read_time
Definition: instrument.h:40
instr_time local_blk_write_time
Definition: instrument.h:39
int64 temp_blks_read
Definition: instrument.h:34
int64 shared_blks_read
Definition: instrument.h:27
int64 shared_blks_written
Definition: instrument.h:29
int64 temp_blks_written
Definition: instrument.h:35
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
int64 temp_blks_read
double min_time[PGSS_NUMKIND]
int64 local_blks_written
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
int64 jit_deform_count
double jit_emission_time
int64 shared_blks_hit
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 local_blks_dirtied
int64 jit_inlining_count
int64 shared_blks_read
int64 local_blks_hit
double jit_deform_time
int64 parallel_workers_to_launch
int64 local_blks_read
double shared_blk_read_time
double jit_inlining_time
int es_parallel_workers_to_launch
Definition: execnodes.h:711
struct JitContext * es_jit
Definition: execnodes.h:729
uint64 es_total_processed
Definition: execnodes.h:681
MemoryContext es_query_cxt
Definition: execnodes.h:675
int es_parallel_workers_launched
Definition: execnodes.h:713
fmNodePtr resultinfo
Definition: fmgr.h:89
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
WalUsage walusage
Definition: instrument.h:92
BufferUsage bufusage
Definition: instrument.h:91
JitInstrumentation instr
Definition: jit.h:62
instr_time generation_counter
Definition: jit.h:33
size_t created_functions
Definition: jit.h:30
instr_time optimization_counter
Definition: jit.h:42
instr_time deform_counter
Definition: jit.h:36
instr_time emission_counter
Definition: jit.h:45
instr_time inlining_counter
Definition: jit.h:39
int highest_extern_param_id
Definition: queryjumble.h:50
LocationLen * clocations
Definition: queryjumble.h:41
int clocations_count
Definition: queryjumble.h:47
Definition: lwlock.h:42
Definition: nodes.h:129
const char * p_sourcetext
Definition: parse_node.h:209
ParseLoc stmt_len
Definition: plannodes.h:99
ParseLoc stmt_location
Definition: plannodes.h:98
Node * utilityStmt
Definition: plannodes.h:95
uint64 queryId
Definition: plannodes.h:54
uint64 nprocessed
Definition: cmdtag.h:32
CommandTag commandTag
Definition: cmdtag.h:31
const char * sourceText
Definition: execdesc.h:38
EState * estate
Definition: execdesc.h:48
PlannedStmt * plannedstmt
Definition: execdesc.h:37
struct Instrumentation * totaltime
Definition: execdesc.h:55
Node * utilityStmt
Definition: parsenodes.h:136
ParseLoc stmt_location
Definition: parsenodes.h:240
TupleDesc setDesc
Definition: execnodes.h:343
Tuplestorestate * setResult
Definition: execnodes.h:342
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
Definition: guc.h:170
Counters counters
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
TimestampTz stats_reset
pgssGlobalStats stats
__int64 st_size
Definition: win32_port.h:273
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:543
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:70
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
ProcessUtilityContext
Definition: utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition: timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition: timestamp.h:68
#define fstat
Definition: win32_port.h:283
#define ftruncate(a, b)
Definition: win32_port.h:82