PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_stat_statements.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_stat_statements.c
4 * Track statement planning and execution times as well as resource
5 * usage across a whole database cluster.
6 *
7 * Execution costs are totaled for each distinct source query, and kept in
8 * a shared hashtable. (We track only as many distinct queries as will fit
9 * in the designated amount of shared memory.)
10 *
11 * Starting in Postgres 9.2, this module normalized query entries. As of
12 * Postgres 14, the normalization is done by the core if compute_query_id is
13 * enabled, or optionally by third-party modules.
14 *
15 * To facilitate presenting entries to users, we create "representative" query
16 * strings in which constants are replaced with parameter symbols ($n), to
17 * make it clearer what a normalized entry can represent. To save on shared
18 * memory, and to avoid having to truncate oversized query strings, we store
19 * these strings in a temporary external query-texts file. Offsets into this
20 * file are kept in shared memory.
21 *
22 * Note about locking issues: to create or delete an entry in the shared
23 * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 * in an entry except the counters requires the same. To look up an entry,
25 * one must hold the lock shared. To read or update the counters within
26 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 * disappear!) and also take the entry's mutex spinlock.
28 * The shared state variable pgss->extent (the next free spot in the external
29 * query-text file) should be accessed only while holding either the
30 * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 * allow reserving file space while holding only shared lock on pgss->lock.
32 * Rewriting the entire external query-text file, eg for garbage collection,
33 * requires holding pgss->lock exclusively; this allows individual entries
34 * in the file to be read or written while holding only shared lock.
35 *
36 *
37 * Copyright (c) 2008-2026, PostgreSQL Global Development Group
38 *
39 * IDENTIFICATION
40 * contrib/pg_stat_statements/pg_stat_statements.c
41 *
42 *-------------------------------------------------------------------------
43 */
44#include "postgres.h"
45
46#include <math.h>
47#include <sys/stat.h>
48#include <unistd.h>
49
50#include "access/htup_details.h"
51#include "access/parallel.h"
52#include "catalog/pg_authid.h"
53#include "common/int.h"
54#include "executor/instrument.h"
55#include "funcapi.h"
56#include "jit/jit.h"
57#include "mb/pg_wchar.h"
58#include "miscadmin.h"
59#include "nodes/queryjumble.h"
60#include "optimizer/planner.h"
61#include "parser/analyze.h"
62#include "parser/scanner.h"
63#include "pgstat.h"
64#include "storage/fd.h"
65#include "storage/ipc.h"
66#include "storage/lwlock.h"
67#include "storage/shmem.h"
68#include "storage/spin.h"
69#include "tcop/utility.h"
70#include "utils/acl.h"
71#include "utils/builtins.h"
72#include "utils/memutils.h"
73#include "utils/timestamp.h"
74#include "utils/tuplestore.h"
75
77 .name = "pg_stat_statements",
78 .version = PG_VERSION
79);
80
81/* Location of permanent stats file (valid when database is shut down) */
82#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
83
84/*
85 * Location of external query text file.
86 */
87#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
88
89/* Magic number identifying the stats file format */
90static const uint32 PGSS_FILE_HEADER = 0x20250731;
91
92/* PostgreSQL major version number, changes in which invalidate all entries */
94
95/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
96#define USAGE_EXEC(duration) (1.0)
97#define USAGE_INIT (1.0) /* including initial planning */
98#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
99#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
100#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
101#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
102#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
103#define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
104
105/*
106 * Extension version number, for supporting older extension versions' objects
107 */
121
122typedef enum pgssStoreKind
123{
125
126 /*
127 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
128 * reference the underlying values in the arrays in the Counters struct,
129 * and this order is required in pg_stat_statements_internal().
130 */
134
135#define PGSS_NUMKIND (PGSS_EXEC + 1)
136
137/*
138 * Hashtable key that defines the identity of a hashtable entry. We separate
139 * queries by user and by database even if they are otherwise identical.
140 *
141 * If you add a new key to this struct, make sure to teach pgss_store() to
142 * zero the padding bytes. Otherwise, things will break, because pgss_hash is
143 * created using HASH_BLOBS, and thus tag_hash is used to hash this.
144 */
145typedef struct pgssHashKey
146{
147 Oid userid; /* user OID */
148 Oid dbid; /* database OID */
149 int64 queryid; /* query identifier */
150 bool toplevel; /* query executed at top level */
152
153/*
154 * The actual stats counters kept within pgssEntry.
155 */
156typedef struct Counters
157{
158 int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
159 double total_time[PGSS_NUMKIND]; /* total planning/execution time,
160 * in msec */
161 double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
162 * msec since min/max reset */
163 double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
164 * msec since min/max reset */
165 double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
166 * msec */
167 double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
168 * planning/execution time in msec */
169 int64 rows; /* total # of retrieved or affected rows */
170 int64 shared_blks_hit; /* # of shared buffer hits */
171 int64 shared_blks_read; /* # of shared disk blocks read */
172 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
173 int64 shared_blks_written; /* # of shared disk blocks written */
174 int64 local_blks_hit; /* # of local buffer hits */
175 int64 local_blks_read; /* # of local disk blocks read */
176 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
177 int64 local_blks_written; /* # of local disk blocks written */
178 int64 temp_blks_read; /* # of temp blocks read */
179 int64 temp_blks_written; /* # of temp blocks written */
180 double shared_blk_read_time; /* time spent reading shared blocks,
181 * in msec */
182 double shared_blk_write_time; /* time spent writing shared blocks,
183 * in msec */
184 double local_blk_read_time; /* time spent reading local blocks, in
185 * msec */
186 double local_blk_write_time; /* time spent writing local blocks, in
187 * msec */
188 double temp_blk_read_time; /* time spent reading temp blocks, in msec */
189 double temp_blk_write_time; /* time spent writing temp blocks, in
190 * msec */
191 double usage; /* usage factor */
192 int64 wal_records; /* # of WAL records generated */
193 int64 wal_fpi; /* # of WAL full page images generated */
194 uint64 wal_bytes; /* total amount of WAL generated in bytes */
195 int64 wal_buffers_full; /* # of times the WAL buffers became full */
196 int64 jit_functions; /* total number of JIT functions emitted */
197 double jit_generation_time; /* total time to generate jit code */
198 int64 jit_inlining_count; /* number of times inlining time has been
199 * > 0 */
200 double jit_deform_time; /* total time to deform tuples in jit code */
201 int64 jit_deform_count; /* number of times deform time has been >
202 * 0 */
203
204 double jit_inlining_time; /* total time to inline jit code */
205 int64 jit_optimization_count; /* number of times optimization time
206 * has been > 0 */
207 double jit_optimization_time; /* total time to optimize jit code */
208 int64 jit_emission_count; /* number of times emission time has been
209 * > 0 */
210 double jit_emission_time; /* total time to emit jit code */
211 int64 parallel_workers_to_launch; /* # of parallel workers planned
212 * to be launched */
213 int64 parallel_workers_launched; /* # of parallel workers actually
214 * launched */
215 int64 generic_plan_calls; /* number of calls using a generic plan */
216 int64 custom_plan_calls; /* number of calls using a custom plan */
218
219/*
220 * Global statistics for pg_stat_statements
221 */
222typedef struct pgssGlobalStats
223{
224 int64 dealloc; /* # of times entries were deallocated */
225 TimestampTz stats_reset; /* timestamp with all stats reset */
227
228/*
229 * Statistics per statement
230 *
231 * Note: in event of a failure in garbage collection of the query text file,
232 * we reset query_offset to zero and query_len to -1. This will be seen as
233 * an invalid state by qtext_fetch().
234 */
235typedef struct pgssEntry
236{
237 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
238 Counters counters; /* the statistics for this query */
239 Size query_offset; /* query text offset in external file */
240 int query_len; /* # of valid bytes in query string, or -1 */
241 int encoding; /* query text encoding */
242 TimestampTz stats_since; /* timestamp of entry allocation */
243 TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
244 slock_t mutex; /* protects the counters only */
246
247/*
248 * Global shared state
249 */
250typedef struct pgssSharedState
251{
252 LWLock *lock; /* protects hashtable search/modification */
253 double cur_median_usage; /* current median usage in hashtable */
254 Size mean_query_len; /* current mean entry text length */
255 slock_t mutex; /* protects following fields only: */
256 Size extent; /* current extent of query file */
257 int n_writers; /* number of active writers to query file */
258 int gc_count; /* query file garbage collection cycle count */
259 pgssGlobalStats stats; /* global statistics for pgss */
261
262/*---- Local variables ----*/
263
264/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
265static int nesting_level = 0;
266
267/* Saved hook values */
277
278/* Links to shared memory state */
281
282/*---- GUC variables ----*/
283
284typedef enum
285{
286 PGSS_TRACK_NONE, /* track no statements */
287 PGSS_TRACK_TOP, /* only top level statements */
288 PGSS_TRACK_ALL, /* all statements, including nested ones */
290
291static const struct config_enum_entry track_options[] =
292{
293 {"none", PGSS_TRACK_NONE, false},
294 {"top", PGSS_TRACK_TOP, false},
295 {"all", PGSS_TRACK_ALL, false},
296 {NULL, 0, false}
297};
298
299static int pgss_max = 5000; /* max # statements to track */
300static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
301static bool pgss_track_utility = true; /* whether to track utility commands */
302static bool pgss_track_planning = false; /* whether to track planning
303 * duration */
304static bool pgss_save = true; /* whether to save stats across shutdown */
305
306#define pgss_enabled(level) \
307 (!IsParallelWorker() && \
308 (pgss_track == PGSS_TRACK_ALL || \
309 (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
310
311#define record_gc_qtexts() \
312 do { \
313 SpinLockAcquire(&pgss->mutex); \
314 pgss->gc_count++; \
315 SpinLockRelease(&pgss->mutex); \
316 } while(0)
317
318/*---- Function declarations ----*/
319
333
334static void pgss_shmem_request(void);
335static void pgss_shmem_startup(void);
336static void pgss_shmem_shutdown(int code, Datum arg);
337static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
339static PlannedStmt *pgss_planner(Query *parse,
340 const char *query_string,
341 int cursorOptions,
342 ParamListInfo boundParams,
343 ExplainState *es);
344static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
345static void pgss_ExecutorRun(QueryDesc *queryDesc,
346 ScanDirection direction,
347 uint64 count);
348static void pgss_ExecutorFinish(QueryDesc *queryDesc);
349static void pgss_ExecutorEnd(QueryDesc *queryDesc);
350static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
351 bool readOnlyTree,
352 ProcessUtilityContext context, ParamListInfo params,
353 QueryEnvironment *queryEnv,
354 DestReceiver *dest, QueryCompletion *qc);
355static void pgss_store(const char *query, int64 queryId,
356 int query_location, int query_len,
357 pgssStoreKind kind,
358 double total_time, uint64 rows,
359 const BufferUsage *bufusage,
360 const WalUsage *walusage,
361 const struct JitInstrumentation *jitusage,
363 int parallel_workers_to_launch,
364 int parallel_workers_launched,
365 PlannedStmtOrigin planOrigin);
367 pgssVersion api_version,
368 bool showtext);
369static Size pgss_memsize(void);
370static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
371 int encoding, bool sticky);
372static void entry_dealloc(void);
373static bool qtext_store(const char *query, int query_len,
374 Size *query_offset, int *gc_count);
375static char *qtext_load_file(Size *buffer_size);
376static char *qtext_fetch(Size query_offset, int query_len,
377 char *buffer, Size buffer_size);
378static bool need_gc_qtexts(void);
379static void gc_qtexts(void);
380static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
381static char *generate_normalized_query(JumbleState *jstate, const char *query,
382 int query_loc, int *query_len_p);
383static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
384 int query_loc);
385static int comp_location(const void *a, const void *b);
386
387
388/*
389 * Module load callback
390 */
391void
393{
394 /*
395 * In order to create our shared memory area, we have to be loaded via
396 * shared_preload_libraries. If not, fall out without hooking into any of
397 * the main system. (We don't throw error here because it seems useful to
398 * allow the pg_stat_statements functions to be created even when the
399 * module isn't active. The functions must protect themselves against
400 * being called then, however.)
401 */
403 return;
404
405 /*
406 * Inform the postmaster that we want to enable query_id calculation if
407 * compute_query_id is set to auto.
408 */
410
411 /*
412 * Define (or redefine) custom GUC variables.
413 */
414 DefineCustomIntVariable("pg_stat_statements.max",
415 "Sets the maximum number of statements tracked by pg_stat_statements.",
416 NULL,
417 &pgss_max,
418 5000,
419 100,
420 INT_MAX / 2,
422 0,
423 NULL,
424 NULL,
425 NULL);
426
427 DefineCustomEnumVariable("pg_stat_statements.track",
428 "Selects which statements are tracked by pg_stat_statements.",
429 NULL,
430 &pgss_track,
433 PGC_SUSET,
434 0,
435 NULL,
436 NULL,
437 NULL);
438
439 DefineCustomBoolVariable("pg_stat_statements.track_utility",
440 "Selects whether utility commands are tracked by pg_stat_statements.",
441 NULL,
443 true,
444 PGC_SUSET,
445 0,
446 NULL,
447 NULL,
448 NULL);
449
450 DefineCustomBoolVariable("pg_stat_statements.track_planning",
451 "Selects whether planning duration is tracked by pg_stat_statements.",
452 NULL,
454 false,
455 PGC_SUSET,
456 0,
457 NULL,
458 NULL,
459 NULL);
460
461 DefineCustomBoolVariable("pg_stat_statements.save",
462 "Save pg_stat_statements statistics across server shutdowns.",
463 NULL,
464 &pgss_save,
465 true,
467 0,
468 NULL,
469 NULL,
470 NULL);
471
472 MarkGUCPrefixReserved("pg_stat_statements");
473
474 /*
475 * Install hooks.
476 */
495}
496
497/*
498 * shmem_request hook: request additional shared resources. We'll allocate or
499 * attach to the shared resources in pgss_shmem_startup().
500 */
501static void
510
511/*
512 * shmem_startup hook: allocate or attach to shared memory,
513 * then load any pre-existing statistics from file.
514 * Also create and load the query-texts file, which is expected to exist
515 * (even if empty) while the module is enabled.
516 */
517static void
519{
520 bool found;
521 HASHCTL info;
522 FILE *file = NULL;
523 FILE *qfile = NULL;
524 uint32 header;
525 int32 num;
526 int32 pgver;
527 int32 i;
528 int buffer_size;
529 char *buffer = NULL;
530
533
534 /* reset in case this is a restart within the postmaster */
535 pgss = NULL;
536 pgss_hash = NULL;
537
538 /*
539 * Create or attach to the shared memory state, including hash table
540 */
542
543 pgss = ShmemInitStruct("pg_stat_statements",
544 sizeof(pgssSharedState),
545 &found);
546
547 if (!found)
548 {
549 /* First time through ... */
550 pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
554 pgss->extent = 0;
555 pgss->n_writers = 0;
556 pgss->gc_count = 0;
557 pgss->stats.dealloc = 0;
559 }
560
561 info.keysize = sizeof(pgssHashKey);
562 info.entrysize = sizeof(pgssEntry);
563 pgss_hash = ShmemInitHash("pg_stat_statements hash",
565 &info,
567
569
570 /*
571 * If we're in the postmaster (or a standalone backend...), set up a shmem
572 * exit hook to dump the statistics to disk.
573 */
576
577 /*
578 * Done if some other process already completed our initialization.
579 */
580 if (found)
581 return;
582
583 /*
584 * Note: we don't bother with locks here, because there should be no other
585 * processes running when this code is reached.
586 */
587
588 /* Unlink query text file possibly left over from crash */
590
591 /* Allocate new query text temp file */
593 if (qfile == NULL)
594 goto write_error;
595
596 /*
597 * If we were told not to load old statistics, we're done. (Note we do
598 * not try to unlink any old dump file in this case. This seems a bit
599 * questionable but it's the historical behavior.)
600 */
601 if (!pgss_save)
602 {
604 return;
605 }
606
607 /*
608 * Attempt to load old statistics from the dump file.
609 */
611 if (file == NULL)
612 {
613 if (errno != ENOENT)
614 goto read_error;
615 /* No existing persisted stats file, so we're done */
617 return;
618 }
619
620 buffer_size = 2048;
621 buffer = (char *) palloc(buffer_size);
622
623 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
624 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
625 fread(&num, sizeof(int32), 1, file) != 1)
626 goto read_error;
627
628 if (header != PGSS_FILE_HEADER ||
630 goto data_error;
631
632 for (i = 0; i < num; i++)
633 {
635 pgssEntry *entry;
636 Size query_offset;
637
638 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
639 goto read_error;
640
641 /* Encoding is the only field we can easily sanity-check */
642 if (!PG_VALID_BE_ENCODING(temp.encoding))
643 goto data_error;
644
645 /* Resize buffer as needed */
646 if (temp.query_len >= buffer_size)
647 {
648 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
649 buffer = repalloc(buffer, buffer_size);
650 }
651
652 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
653 goto read_error;
654
655 /* Should have a trailing null, but let's make sure */
656 buffer[temp.query_len] = '\0';
657
658 /* Skip loading "sticky" entries */
659 if (IS_STICKY(temp.counters))
660 continue;
661
662 /* Store the query text */
663 query_offset = pgss->extent;
664 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
665 goto write_error;
666 pgss->extent += temp.query_len + 1;
667
668 /* make the hashtable entry (discards old entries if too many) */
669 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
670 temp.encoding,
671 false);
672
673 /* copy in the actual stats */
674 entry->counters = temp.counters;
675 entry->stats_since = temp.stats_since;
676 entry->minmax_stats_since = temp.minmax_stats_since;
677 }
678
679 /* Read global statistics for pg_stat_statements */
680 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
681 goto read_error;
682
683 pfree(buffer);
684 FreeFile(file);
686
687 /*
688 * Remove the persisted stats file so it's not included in
689 * backups/replication standbys, etc. A new file will be written on next
690 * shutdown.
691 *
692 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
693 * because we remove that file on startup; it acts inversely to
694 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
695 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
696 * when the server is not running. Leaving the file creates no danger of
697 * a newly restored database having a spurious record of execution costs,
698 * which is what we're really concerned about here.
699 */
701
702 return;
703
705 ereport(LOG,
707 errmsg("could not read file \"%s\": %m",
709 goto fail;
711 ereport(LOG,
713 errmsg("ignoring invalid data in file \"%s\"",
715 goto fail;
717 ereport(LOG,
719 errmsg("could not write file \"%s\": %m",
721fail:
722 if (buffer)
723 pfree(buffer);
724 if (file)
725 FreeFile(file);
726 if (qfile)
728 /* If possible, throw away the bogus file; ignore any error */
730
731 /*
732 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
733 * server is running with pg_stat_statements enabled
734 */
735}
736
737/*
738 * shmem_shutdown hook: Dump statistics into file.
739 *
740 * Note: we don't bother with acquiring lock, because there should be no
741 * other processes running when this is called.
742 */
743static void
745{
746 FILE *file;
747 char *qbuffer = NULL;
748 Size qbuffer_size = 0;
750 int32 num_entries;
751 pgssEntry *entry;
752
753 /* Don't try to dump during a crash. */
754 if (code)
755 return;
756
757 /* Safety check ... shouldn't get here unless shmem is set up. */
758 if (!pgss || !pgss_hash)
759 return;
760
761 /* Don't dump if told not to. */
762 if (!pgss_save)
763 return;
764
766 if (file == NULL)
767 goto error;
768
769 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
770 goto error;
771 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
772 goto error;
773 num_entries = hash_get_num_entries(pgss_hash);
774 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
775 goto error;
776
778 if (qbuffer == NULL)
779 goto error;
780
781 /*
782 * When serializing to disk, we store query texts immediately after their
783 * entry data. Any orphaned query texts are thereby excluded.
784 */
786 while ((entry = hash_seq_search(&hash_seq)) != NULL)
787 {
788 int len = entry->query_len;
789 char *qstr = qtext_fetch(entry->query_offset, len,
791
792 if (qstr == NULL)
793 continue; /* Ignore any entries with bogus texts */
794
795 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
796 fwrite(qstr, 1, len + 1, file) != len + 1)
797 {
798 /* note: we assume hash_seq_term won't change errno */
800 goto error;
801 }
802 }
803
804 /* Dump global statistics for pg_stat_statements */
805 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
806 goto error;
807
808 free(qbuffer);
809 qbuffer = NULL;
810
811 if (FreeFile(file))
812 {
813 file = NULL;
814 goto error;
815 }
816
817 /*
818 * Rename file into place, so we atomically replace any old one.
819 */
821
822 /* Unlink query-texts file; it's not needed while shutdown */
824
825 return;
826
827error:
828 ereport(LOG,
830 errmsg("could not write file \"%s\": %m",
831 PGSS_DUMP_FILE ".tmp")));
832 free(qbuffer);
833 if (file)
834 FreeFile(file);
835 unlink(PGSS_DUMP_FILE ".tmp");
837}
838
839/*
840 * Post-parse-analysis hook: mark query with a queryId
841 */
842static void
844{
846 prev_post_parse_analyze_hook(pstate, query, jstate);
847
848 /* Safety check... */
850 return;
851
852 /*
853 * If it's EXECUTE, clear the queryId so that stats will accumulate for
854 * the underlying PREPARE. But don't do this if we're not tracking
855 * utility statements, to avoid messing up another extension that might be
856 * tracking them.
857 */
858 if (query->utilityStmt)
859 {
861 {
862 query->queryId = INT64CONST(0);
863 return;
864 }
865 }
866
867 /*
868 * If query jumbling were able to identify any ignorable constants, we
869 * immediately create a hash table entry for the query, so that we can
870 * record the normalized form of the query string. If there were no such
871 * constants, the normalized string would be the same as the query text
872 * anyway, so there's no need for an early entry.
873 */
874 if (jstate && jstate->clocations_count > 0)
875 pgss_store(pstate->p_sourcetext,
876 query->queryId,
877 query->stmt_location,
878 query->stmt_len,
880 0,
881 0,
882 NULL,
883 NULL,
884 NULL,
885 jstate,
886 0,
887 0,
889}
890
891/*
892 * Planner hook: forward to regular planner, but measure planning time
893 * if needed.
894 */
895static PlannedStmt *
897 const char *query_string,
898 int cursorOptions,
899 ParamListInfo boundParams,
900 ExplainState *es)
901{
902 PlannedStmt *result;
903
904 /*
905 * We can't process the query if no query_string is provided, as
906 * pgss_store needs it. We also ignore query without queryid, as it would
907 * be treated as a utility statement, which may not be the case.
908 */
910 && pgss_track_planning && query_string
911 && parse->queryId != INT64CONST(0))
912 {
915 BufferUsage bufusage_start,
916 bufusage;
917 WalUsage walusage_start,
918 walusage;
919
920 /* We need to track buffer usage as the planner can access them. */
921 bufusage_start = pgBufferUsage;
922
923 /*
924 * Similarly the planner could write some WAL records in some cases
925 * (e.g. setting a hint bit with those being WAL-logged)
926 */
927 walusage_start = pgWalUsage;
929
931 PG_TRY();
932 {
934 result = prev_planner_hook(parse, query_string, cursorOptions,
935 boundParams, es);
936 else
937 result = standard_planner(parse, query_string, cursorOptions,
938 boundParams, es);
939 }
940 PG_FINALLY();
941 {
943 }
944 PG_END_TRY();
945
948
949 /* calc differences of buffer counters. */
950 memset(&bufusage, 0, sizeof(BufferUsage));
951 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
952
953 /* calc differences of WAL counters. */
954 memset(&walusage, 0, sizeof(WalUsage));
955 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
956
957 pgss_store(query_string,
958 parse->queryId,
959 parse->stmt_location,
960 parse->stmt_len,
961 PGSS_PLAN,
963 0,
964 &bufusage,
965 &walusage,
966 NULL,
967 NULL,
968 0,
969 0,
970 result->planOrigin);
971 }
972 else
973 {
974 /*
975 * Even though we're not tracking plan time for this statement, we
976 * must still increment the nesting level, to ensure that functions
977 * evaluated during planning are not seen as top-level calls.
978 */
980 PG_TRY();
981 {
983 result = prev_planner_hook(parse, query_string, cursorOptions,
984 boundParams, es);
985 else
986 result = standard_planner(parse, query_string, cursorOptions,
987 boundParams, es);
988 }
989 PG_FINALLY();
990 {
992 }
993 PG_END_TRY();
994 }
995
996 return result;
997}
998
999/*
1000 * ExecutorStart hook: start up tracking if needed
1001 */
1002static void
1003pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
1004{
1006 prev_ExecutorStart(queryDesc, eflags);
1007 else
1008 standard_ExecutorStart(queryDesc, eflags);
1009
1010 /*
1011 * If query has queryId zero, don't track it. This prevents double
1012 * counting of optimizable statements that are directly contained in
1013 * utility statements.
1014 */
1015 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1016 {
1017 /*
1018 * Set up to track total elapsed time in ExecutorRun. Make sure the
1019 * space is allocated in the per-query context so it will go away at
1020 * ExecutorEnd.
1021 */
1022 if (queryDesc->totaltime == NULL)
1023 {
1025
1027 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1029 }
1030 }
1031}
1032
1033/*
1034 * ExecutorRun hook: all we need do is track nesting depth
1035 */
1036static void
1037pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1038{
1039 nesting_level++;
1040 PG_TRY();
1041 {
1042 if (prev_ExecutorRun)
1043 prev_ExecutorRun(queryDesc, direction, count);
1044 else
1045 standard_ExecutorRun(queryDesc, direction, count);
1046 }
1047 PG_FINALLY();
1048 {
1049 nesting_level--;
1050 }
1051 PG_END_TRY();
1052}
1053
1054/*
1055 * ExecutorFinish hook: all we need do is track nesting depth
1056 */
1057static void
1059{
1060 nesting_level++;
1061 PG_TRY();
1062 {
1064 prev_ExecutorFinish(queryDesc);
1065 else
1066 standard_ExecutorFinish(queryDesc);
1067 }
1068 PG_FINALLY();
1069 {
1070 nesting_level--;
1071 }
1072 PG_END_TRY();
1073}
1074
1075/*
1076 * ExecutorEnd hook: store results if needed
1077 */
1078static void
1080{
1081 int64 queryId = queryDesc->plannedstmt->queryId;
1082
1083 if (queryId != INT64CONST(0) && queryDesc->totaltime &&
1085 {
1086 /*
1087 * Make sure stats accumulation is done. (Note: it's okay if several
1088 * levels of hook all do this.)
1089 */
1090 InstrEndLoop(queryDesc->totaltime);
1091
1092 pgss_store(queryDesc->sourceText,
1093 queryId,
1094 queryDesc->plannedstmt->stmt_location,
1095 queryDesc->plannedstmt->stmt_len,
1096 PGSS_EXEC,
1098 queryDesc->estate->es_total_processed,
1099 &queryDesc->totaltime->bufusage,
1100 &queryDesc->totaltime->walusage,
1101 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1102 NULL,
1105 queryDesc->plannedstmt->planOrigin);
1106 }
1107
1108 if (prev_ExecutorEnd)
1109 prev_ExecutorEnd(queryDesc);
1110 else
1111 standard_ExecutorEnd(queryDesc);
1112}
1113
1114/*
1115 * ProcessUtility hook
1116 */
1117static void
1118pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1119 bool readOnlyTree,
1120 ProcessUtilityContext context,
1121 ParamListInfo params, QueryEnvironment *queryEnv,
1122 DestReceiver *dest, QueryCompletion *qc)
1123{
1124 Node *parsetree = pstmt->utilityStmt;
1125 int64 saved_queryId = pstmt->queryId;
1127 int saved_stmt_len = pstmt->stmt_len;
1129
1130 /*
1131 * Force utility statements to get queryId zero. We do this even in cases
1132 * where the statement contains an optimizable statement for which a
1133 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1134 * cases, runtime control will first go through ProcessUtility and then
1135 * the executor, and we don't want the executor hooks to do anything,
1136 * since we are already measuring the statement's costs at the utility
1137 * level.
1138 *
1139 * Note that this is only done if pg_stat_statements is enabled and
1140 * configured to track utility statements, in the unlikely possibility
1141 * that user configured another extension to handle utility statements
1142 * only.
1143 */
1144 if (enabled)
1145 pstmt->queryId = INT64CONST(0);
1146
1147 /*
1148 * If it's an EXECUTE statement, we don't track it and don't increment the
1149 * nesting level. This allows the cycles to be charged to the underlying
1150 * PREPARE instead (by the Executor hooks), which is much more useful.
1151 *
1152 * We also don't track execution of PREPARE. If we did, we would get one
1153 * hash table entry for the PREPARE (with hash calculated from the query
1154 * string), and then a different one with the same query string (but hash
1155 * calculated from the query tree) would be used to accumulate costs of
1156 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1157 * actually run the planner (only parse+rewrite), its costs are generally
1158 * pretty negligible and it seems okay to just ignore it.
1159 */
1160 if (enabled &&
1161 !IsA(parsetree, ExecuteStmt) &&
1162 !IsA(parsetree, PrepareStmt))
1163 {
1166 uint64 rows;
1167 BufferUsage bufusage_start,
1168 bufusage;
1169 WalUsage walusage_start,
1170 walusage;
1171
1172 bufusage_start = pgBufferUsage;
1173 walusage_start = pgWalUsage;
1175
1176 nesting_level++;
1177 PG_TRY();
1178 {
1180 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1181 context, params, queryEnv,
1182 dest, qc);
1183 else
1184 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1185 context, params, queryEnv,
1186 dest, qc);
1187 }
1188 PG_FINALLY();
1189 {
1190 nesting_level--;
1191 }
1192 PG_END_TRY();
1193
1194 /*
1195 * CAUTION: do not access the *pstmt data structure again below here.
1196 * If it was a ROLLBACK or similar, that data structure may have been
1197 * freed. We must copy everything we still need into local variables,
1198 * which we did above.
1199 *
1200 * For the same reason, we can't risk restoring pstmt->queryId to its
1201 * former value, which'd otherwise be a good idea.
1202 */
1203
1206
1207 /*
1208 * Track the total number of rows retrieved or affected by the utility
1209 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1210 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1211 */
1212 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1213 qc->commandTag == CMDTAG_FETCH ||
1214 qc->commandTag == CMDTAG_SELECT ||
1216 qc->nprocessed : 0;
1217
1218 /* calc differences of buffer counters. */
1219 memset(&bufusage, 0, sizeof(BufferUsage));
1220 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1221
1222 /* calc differences of WAL counters. */
1223 memset(&walusage, 0, sizeof(WalUsage));
1224 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1225
1226 pgss_store(queryString,
1230 PGSS_EXEC,
1232 rows,
1233 &bufusage,
1234 &walusage,
1235 NULL,
1236 NULL,
1237 0,
1238 0,
1239 pstmt->planOrigin);
1240 }
1241 else
1242 {
1243 /*
1244 * Even though we're not tracking execution time for this statement,
1245 * we must still increment the nesting level, to ensure that functions
1246 * evaluated within it are not seen as top-level calls. But don't do
1247 * so for EXECUTE; that way, when control reaches pgss_planner or
1248 * pgss_ExecutorStart, we will treat the costs as top-level if
1249 * appropriate. Likewise, don't bump for PREPARE, so that parse
1250 * analysis will treat the statement as top-level if appropriate.
1251 *
1252 * To be absolutely certain we don't mess up the nesting level,
1253 * evaluate the bump_level condition just once.
1254 */
1255 bool bump_level =
1256 !IsA(parsetree, ExecuteStmt) &&
1257 !IsA(parsetree, PrepareStmt);
1258
1259 if (bump_level)
1260 nesting_level++;
1261 PG_TRY();
1262 {
1264 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1265 context, params, queryEnv,
1266 dest, qc);
1267 else
1268 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1269 context, params, queryEnv,
1270 dest, qc);
1271 }
1272 PG_FINALLY();
1273 {
1274 if (bump_level)
1275 nesting_level--;
1276 }
1277 PG_END_TRY();
1278 }
1279}
1280
1281/*
1282 * Store some statistics for a statement.
1283 *
1284 * If jstate is not NULL then we're trying to create an entry for which
1285 * we have no statistics as yet; we just want to record the normalized
1286 * query string. total_time, rows, bufusage and walusage are ignored in this
1287 * case.
1288 *
1289 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1290 * for the arrays in the Counters field.
1291 */
1292static void
1293pgss_store(const char *query, int64 queryId,
1294 int query_location, int query_len,
1295 pgssStoreKind kind,
1296 double total_time, uint64 rows,
1297 const BufferUsage *bufusage,
1298 const WalUsage *walusage,
1299 const struct JitInstrumentation *jitusage,
1301 int parallel_workers_to_launch,
1302 int parallel_workers_launched,
1303 PlannedStmtOrigin planOrigin)
1304{
1305 pgssHashKey key;
1306 pgssEntry *entry;
1307 char *norm_query = NULL;
1309
1310 Assert(query != NULL);
1311
1312 /* Safety check... */
1313 if (!pgss || !pgss_hash)
1314 return;
1315
1316 /*
1317 * Nothing to do if compute_query_id isn't enabled and no other module
1318 * computed a query identifier.
1319 */
1320 if (queryId == INT64CONST(0))
1321 return;
1322
1323 /*
1324 * Confine our attention to the relevant part of the string, if the query
1325 * is a portion of a multi-statement source string, and update query
1326 * location and length if needed.
1327 */
1328 query = CleanQuerytext(query, &query_location, &query_len);
1329
1330 /* Set up key for hashtable search */
1331
1332 /* clear padding */
1333 memset(&key, 0, sizeof(pgssHashKey));
1334
1335 key.userid = GetUserId();
1336 key.dbid = MyDatabaseId;
1337 key.queryid = queryId;
1338 key.toplevel = (nesting_level == 0);
1339
1340 /* Lookup the hash table entry with shared lock. */
1342
1343 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1344
1345 /* Create new entry, if not present */
1346 if (!entry)
1347 {
1348 Size query_offset;
1349 int gc_count;
1350 bool stored;
1351 bool do_gc;
1352
1353 /*
1354 * Create a new, normalized query string if caller asked. We don't
1355 * need to hold the lock while doing this work. (Note: in any case,
1356 * it's possible that someone else creates a duplicate hashtable entry
1357 * in the interval where we don't hold the lock below. That case is
1358 * handled by entry_alloc.)
1359 */
1360 if (jstate)
1361 {
1365 &query_len);
1367 }
1368
1369 /* Append new query text to file with only shared lock held */
1370 stored = qtext_store(norm_query ? norm_query : query, query_len,
1371 &query_offset, &gc_count);
1372
1373 /*
1374 * Determine whether we need to garbage collect external query texts
1375 * while the shared lock is still held. This micro-optimization
1376 * avoids taking the time to decide this while holding exclusive lock.
1377 */
1379
1380 /* Need exclusive lock to make a new hashtable entry - promote */
1383
1384 /*
1385 * A garbage collection may have occurred while we weren't holding the
1386 * lock. In the unlikely event that this happens, the query text we
1387 * stored above will have been garbage collected, so write it again.
1388 * This should be infrequent enough that doing it while holding
1389 * exclusive lock isn't a performance problem.
1390 */
1391 if (!stored || pgss->gc_count != gc_count)
1392 stored = qtext_store(norm_query ? norm_query : query, query_len,
1393 &query_offset, NULL);
1394
1395 /* If we failed to write to the text file, give up */
1396 if (!stored)
1397 goto done;
1398
1399 /* OK to create a new hashtable entry */
1400 entry = entry_alloc(&key, query_offset, query_len, encoding,
1401 jstate != NULL);
1402
1403 /* If needed, perform garbage collection while exclusive lock held */
1404 if (do_gc)
1405 gc_qtexts();
1406 }
1407
1408 /* Increment the counts, except when jstate is not NULL */
1409 if (!jstate)
1410 {
1411 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1412
1413 /*
1414 * Grab the spinlock while updating the counters (see comment about
1415 * locking rules at the head of the file)
1416 */
1417 SpinLockAcquire(&entry->mutex);
1418
1419 /* "Unstick" entry if it was previously sticky */
1420 if (IS_STICKY(entry->counters))
1421 entry->counters.usage = USAGE_INIT;
1422
1423 entry->counters.calls[kind] += 1;
1424 entry->counters.total_time[kind] += total_time;
1425
1426 if (entry->counters.calls[kind] == 1)
1427 {
1428 entry->counters.min_time[kind] = total_time;
1429 entry->counters.max_time[kind] = total_time;
1430 entry->counters.mean_time[kind] = total_time;
1431 }
1432 else
1433 {
1434 /*
1435 * Welford's method for accurately computing variance. See
1436 * <http://www.johndcook.com/blog/standard_deviation/>
1437 */
1438 double old_mean = entry->counters.mean_time[kind];
1439
1440 entry->counters.mean_time[kind] +=
1441 (total_time - old_mean) / entry->counters.calls[kind];
1442 entry->counters.sum_var_time[kind] +=
1443 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1444
1445 /*
1446 * Calculate min and max time. min = 0 and max = 0 means that the
1447 * min/max statistics were reset
1448 */
1449 if (entry->counters.min_time[kind] == 0
1450 && entry->counters.max_time[kind] == 0)
1451 {
1452 entry->counters.min_time[kind] = total_time;
1453 entry->counters.max_time[kind] = total_time;
1454 }
1455 else
1456 {
1457 if (entry->counters.min_time[kind] > total_time)
1458 entry->counters.min_time[kind] = total_time;
1459 if (entry->counters.max_time[kind] < total_time)
1460 entry->counters.max_time[kind] = total_time;
1461 }
1462 }
1463 entry->counters.rows += rows;
1464 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1465 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1468 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1469 entry->counters.local_blks_read += bufusage->local_blks_read;
1472 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1473 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1480 entry->counters.usage += USAGE_EXEC(total_time);
1481 entry->counters.wal_records += walusage->wal_records;
1482 entry->counters.wal_fpi += walusage->wal_fpi;
1483 entry->counters.wal_bytes += walusage->wal_bytes;
1484 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1485 if (jitusage)
1486 {
1487 entry->counters.jit_functions += jitusage->created_functions;
1488 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1489
1490 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1491 entry->counters.jit_deform_count++;
1492 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1493
1494 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1496 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1497
1498 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1500 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1501
1502 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1504 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1505 }
1506
1507 /* parallel worker counters */
1508 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1509 entry->counters.parallel_workers_launched += parallel_workers_launched;
1510
1511 /* plan cache counters */
1512 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1514 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1515 entry->counters.custom_plan_calls++;
1516
1517 SpinLockRelease(&entry->mutex);
1518 }
1519
1520done:
1522
1523 /* We postpone this clean-up until we're out of the lock */
1524 if (norm_query)
1526}
1527
1528/*
1529 * Reset statement statistics corresponding to userid, dbid, and queryid.
1530 */
1531Datum
1533{
1534 Oid userid;
1535 Oid dbid;
1536 int64 queryid;
1537
1538 userid = PG_GETARG_OID(0);
1539 dbid = PG_GETARG_OID(1);
1540 queryid = PG_GETARG_INT64(2);
1541
1542 entry_reset(userid, dbid, queryid, false);
1543
1545}
1546
1547Datum
1549{
1550 Oid userid;
1551 Oid dbid;
1552 int64 queryid;
1553 bool minmax_only;
1554
1555 userid = PG_GETARG_OID(0);
1556 dbid = PG_GETARG_OID(1);
1557 queryid = PG_GETARG_INT64(2);
1559
1560 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1561}
1562
1563/*
1564 * Reset statement statistics.
1565 */
1566Datum
1568{
1569 entry_reset(0, 0, 0, false);
1570
1572}
1573
1574/* Number of output arguments (columns) for various API versions */
1575#define PG_STAT_STATEMENTS_COLS_V1_0 14
1576#define PG_STAT_STATEMENTS_COLS_V1_1 18
1577#define PG_STAT_STATEMENTS_COLS_V1_2 19
1578#define PG_STAT_STATEMENTS_COLS_V1_3 23
1579#define PG_STAT_STATEMENTS_COLS_V1_8 32
1580#define PG_STAT_STATEMENTS_COLS_V1_9 33
1581#define PG_STAT_STATEMENTS_COLS_V1_10 43
1582#define PG_STAT_STATEMENTS_COLS_V1_11 49
1583#define PG_STAT_STATEMENTS_COLS_V1_12 52
1584#define PG_STAT_STATEMENTS_COLS_V1_13 54
1585#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1586
1587/*
1588 * Retrieve statement statistics.
1589 *
1590 * The SQL API of this function has changed multiple times, and will likely
1591 * do so again in future. To support the case where a newer version of this
1592 * loadable module is being used with an old SQL declaration of the function,
1593 * we continue to support the older API versions. For 1.2 and later, the
1594 * expected API version is identified by embedding it in the C name of the
1595 * function. Unfortunately we weren't bright enough to do that for 1.1.
1596 */
1597Datum
1599{
1600 bool showtext = PG_GETARG_BOOL(0);
1601
1603
1604 return (Datum) 0;
1605}
1606
1607Datum
1609{
1610 bool showtext = PG_GETARG_BOOL(0);
1611
1613
1614 return (Datum) 0;
1615}
1616
1617Datum
1619{
1620 bool showtext = PG_GETARG_BOOL(0);
1621
1623
1624 return (Datum) 0;
1625}
1626
1627Datum
1629{
1630 bool showtext = PG_GETARG_BOOL(0);
1631
1633
1634 return (Datum) 0;
1635}
1636
1637Datum
1639{
1640 bool showtext = PG_GETARG_BOOL(0);
1641
1643
1644 return (Datum) 0;
1645}
1646
1647Datum
1649{
1650 bool showtext = PG_GETARG_BOOL(0);
1651
1653
1654 return (Datum) 0;
1655}
1656
1657Datum
1659{
1660 bool showtext = PG_GETARG_BOOL(0);
1661
1663
1664 return (Datum) 0;
1665}
1666
1667Datum
1669{
1670 bool showtext = PG_GETARG_BOOL(0);
1671
1673
1674 return (Datum) 0;
1675}
1676
1677/*
1678 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1679 * This can be removed someday, perhaps.
1680 */
1681Datum
1683{
1684 /* If it's really API 1.1, we'll figure that out below */
1686
1687 return (Datum) 0;
1688}
1689
1690/* Common code for all versions of pg_stat_statements() */
1691static void
1693 pgssVersion api_version,
1694 bool showtext)
1695{
1697 Oid userid = GetUserId();
1698 bool is_allowed_role = false;
1699 char *qbuffer = NULL;
1700 Size qbuffer_size = 0;
1701 Size extent = 0;
1702 int gc_count = 0;
1704 pgssEntry *entry;
1705
1706 /*
1707 * Superusers or roles with the privileges of pg_read_all_stats members
1708 * are allowed
1709 */
1711
1712 /* hash table must exist already */
1713 if (!pgss || !pgss_hash)
1714 ereport(ERROR,
1716 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1717
1718 InitMaterializedSRF(fcinfo, 0);
1719
1720 /*
1721 * Check we have the expected number of output arguments. Aside from
1722 * being a good safety check, we need a kluge here to detect API version
1723 * 1.1, which was wedged into the code in an ill-considered way.
1724 */
1725 switch (rsinfo->setDesc->natts)
1726 {
1728 if (api_version != PGSS_V1_0)
1729 elog(ERROR, "incorrect number of output arguments");
1730 break;
1732 /* pg_stat_statements() should have told us 1.0 */
1733 if (api_version != PGSS_V1_0)
1734 elog(ERROR, "incorrect number of output arguments");
1735 api_version = PGSS_V1_1;
1736 break;
1738 if (api_version != PGSS_V1_2)
1739 elog(ERROR, "incorrect number of output arguments");
1740 break;
1742 if (api_version != PGSS_V1_3)
1743 elog(ERROR, "incorrect number of output arguments");
1744 break;
1746 if (api_version != PGSS_V1_8)
1747 elog(ERROR, "incorrect number of output arguments");
1748 break;
1750 if (api_version != PGSS_V1_9)
1751 elog(ERROR, "incorrect number of output arguments");
1752 break;
1754 if (api_version != PGSS_V1_10)
1755 elog(ERROR, "incorrect number of output arguments");
1756 break;
1758 if (api_version != PGSS_V1_11)
1759 elog(ERROR, "incorrect number of output arguments");
1760 break;
1762 if (api_version != PGSS_V1_12)
1763 elog(ERROR, "incorrect number of output arguments");
1764 break;
1766 if (api_version != PGSS_V1_13)
1767 elog(ERROR, "incorrect number of output arguments");
1768 break;
1769 default:
1770 elog(ERROR, "incorrect number of output arguments");
1771 }
1772
1773 /*
1774 * We'd like to load the query text file (if needed) while not holding any
1775 * lock on pgss->lock. In the worst case we'll have to do this again
1776 * after we have the lock, but it's unlikely enough to make this a win
1777 * despite occasional duplicated work. We need to reload if anybody
1778 * writes to the file (either a retail qtext_store(), or a garbage
1779 * collection) between this point and where we've gotten shared lock. If
1780 * a qtext_store is actually in progress when we look, we might as well
1781 * skip the speculative load entirely.
1782 */
1783 if (showtext)
1784 {
1785 int n_writers;
1786
1787 /* Take the mutex so we can examine variables */
1789 extent = pgss->extent;
1790 n_writers = pgss->n_writers;
1791 gc_count = pgss->gc_count;
1793
1794 /* No point in loading file now if there are active writers */
1795 if (n_writers == 0)
1797 }
1798
1799 /*
1800 * Get shared lock, load or reload the query text file if we must, and
1801 * iterate over the hashtable entries.
1802 *
1803 * With a large hash table, we might be holding the lock rather longer
1804 * than one could wish. However, this only blocks creation of new hash
1805 * table entries, and the larger the hash table the less likely that is to
1806 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1807 * we need to partition the hash table to limit the time spent holding any
1808 * one lock.
1809 */
1811
1812 if (showtext)
1813 {
1814 /*
1815 * Here it is safe to examine extent and gc_count without taking the
1816 * mutex. Note that although other processes might change
1817 * pgss->extent just after we look at it, the strings they then write
1818 * into the file cannot yet be referenced in the hashtable, so we
1819 * don't care whether we see them or not.
1820 *
1821 * If qtext_load_file fails, we just press on; we'll return NULL for
1822 * every query text.
1823 */
1824 if (qbuffer == NULL ||
1825 pgss->extent != extent ||
1826 pgss->gc_count != gc_count)
1827 {
1828 free(qbuffer);
1830 }
1831 }
1832
1834 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1835 {
1837 bool nulls[PG_STAT_STATEMENTS_COLS];
1838 int i = 0;
1839 Counters tmp;
1840 double stddev;
1841 int64 queryid = entry->key.queryid;
1842 TimestampTz stats_since;
1843 TimestampTz minmax_stats_since;
1844
1845 memset(values, 0, sizeof(values));
1846 memset(nulls, 0, sizeof(nulls));
1847
1848 values[i++] = ObjectIdGetDatum(entry->key.userid);
1849 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1850 if (api_version >= PGSS_V1_9)
1851 values[i++] = BoolGetDatum(entry->key.toplevel);
1852
1853 if (is_allowed_role || entry->key.userid == userid)
1854 {
1855 if (api_version >= PGSS_V1_2)
1856 values[i++] = Int64GetDatumFast(queryid);
1857
1858 if (showtext)
1859 {
1860 char *qstr = qtext_fetch(entry->query_offset,
1861 entry->query_len,
1862 qbuffer,
1863 qbuffer_size);
1864
1865 if (qstr)
1866 {
1867 char *enc;
1868
1870 entry->query_len,
1871 entry->encoding);
1872
1874
1875 if (enc != qstr)
1876 pfree(enc);
1877 }
1878 else
1879 {
1880 /* Just return a null if we fail to find the text */
1881 nulls[i++] = true;
1882 }
1883 }
1884 else
1885 {
1886 /* Query text not requested */
1887 nulls[i++] = true;
1888 }
1889 }
1890 else
1891 {
1892 /* Don't show queryid */
1893 if (api_version >= PGSS_V1_2)
1894 nulls[i++] = true;
1895
1896 /*
1897 * Don't show query text, but hint as to the reason for not doing
1898 * so if it was requested
1899 */
1900 if (showtext)
1901 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1902 else
1903 nulls[i++] = true;
1904 }
1905
1906 /* copy counters to a local variable to keep locking time short */
1907 SpinLockAcquire(&entry->mutex);
1908 tmp = entry->counters;
1909 SpinLockRelease(&entry->mutex);
1910
1911 /*
1912 * The spinlock is not required when reading these two as they are
1913 * always updated when holding pgss->lock exclusively.
1914 */
1915 stats_since = entry->stats_since;
1916 minmax_stats_since = entry->minmax_stats_since;
1917
1918 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1919 if (IS_STICKY(tmp))
1920 continue;
1921
1922 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1923 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1924 {
1925 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1926 {
1927 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1928 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1929 }
1930
1931 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1932 api_version >= PGSS_V1_8)
1933 {
1934 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1935 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1936 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1937
1938 /*
1939 * Note we are calculating the population variance here, not
1940 * the sample variance, as we have data for the whole
1941 * population, so Bessel's correction is not used, and we
1942 * don't divide by tmp.calls - 1.
1943 */
1944 if (tmp.calls[kind] > 1)
1945 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1946 else
1947 stddev = 0.0;
1948 values[i++] = Float8GetDatumFast(stddev);
1949 }
1950 }
1951 values[i++] = Int64GetDatumFast(tmp.rows);
1954 if (api_version >= PGSS_V1_1)
1959 if (api_version >= PGSS_V1_1)
1964 if (api_version >= PGSS_V1_1)
1965 {
1968 }
1969 if (api_version >= PGSS_V1_11)
1970 {
1973 }
1974 if (api_version >= PGSS_V1_10)
1975 {
1978 }
1979 if (api_version >= PGSS_V1_8)
1980 {
1981 char buf[256];
1982 Datum wal_bytes;
1983
1986
1987 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1988
1989 /* Convert to numeric. */
1990 wal_bytes = DirectFunctionCall3(numeric_in,
1993 Int32GetDatum(-1));
1994 values[i++] = wal_bytes;
1995 }
1996 if (api_version >= PGSS_V1_12)
1997 {
1999 }
2000 if (api_version >= PGSS_V1_10)
2001 {
2010 }
2011 if (api_version >= PGSS_V1_11)
2012 {
2015 }
2016 if (api_version >= PGSS_V1_12)
2017 {
2020 }
2021 if (api_version >= PGSS_V1_13)
2022 {
2025 }
2026 if (api_version >= PGSS_V1_11)
2027 {
2028 values[i++] = TimestampTzGetDatum(stats_since);
2029 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2030 }
2031
2032 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2033 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2034 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2035 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2036 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2037 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2038 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2039 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2040 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2041 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2042 -1 /* fail if you forget to update this assert */ ));
2043
2044 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2045 }
2046
2048
2049 free(qbuffer);
2050}
2051
2052/* Number of output arguments (columns) for pg_stat_statements_info */
2053#define PG_STAT_STATEMENTS_INFO_COLS 2
2054
2055/*
2056 * Return statistics of pg_stat_statements.
2057 */
2058Datum
2060{
2061 pgssGlobalStats stats;
2062 TupleDesc tupdesc;
2064 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2065
2066 if (!pgss || !pgss_hash)
2067 ereport(ERROR,
2069 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2070
2071 /* Build a tuple descriptor for our result type */
2072 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2073 elog(ERROR, "return type must be a row type");
2074
2075 /* Read global statistics for pg_stat_statements */
2077 stats = pgss->stats;
2079
2080 values[0] = Int64GetDatum(stats.dealloc);
2082
2084}
2085
2086/*
2087 * Estimate shared memory space needed.
2088 */
2089static Size
2091{
2092 Size size;
2093
2094 size = MAXALIGN(sizeof(pgssSharedState));
2095 size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2096
2097 return size;
2098}
2099
2100/*
2101 * Allocate a new hashtable entry.
2102 * caller must hold an exclusive lock on pgss->lock
2103 *
2104 * "query" need not be null-terminated; we rely on query_len instead
2105 *
2106 * If "sticky" is true, make the new entry artificially sticky so that it will
2107 * probably still be there when the query finishes execution. We do this by
2108 * giving it a median usage value rather than the normal value. (Strictly
2109 * speaking, query strings are normalized on a best effort basis, though it
2110 * would be difficult to demonstrate this even under artificial conditions.)
2111 *
2112 * Note: despite needing exclusive lock, it's not an error for the target
2113 * entry to already exist. This is because pgss_store releases and
2114 * reacquires lock after failing to find a match; so someone else could
2115 * have made the entry while we waited to get exclusive lock.
2116 */
2117static pgssEntry *
2118entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2119 bool sticky)
2120{
2121 pgssEntry *entry;
2122 bool found;
2123
2124 /* Make space if needed */
2126 entry_dealloc();
2127
2128 /* Find or create an entry with desired hash code */
2129 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2130
2131 if (!found)
2132 {
2133 /* New entry, initialize it */
2134
2135 /* reset the statistics */
2136 memset(&entry->counters, 0, sizeof(Counters));
2137 /* set the appropriate initial usage count */
2139 /* re-initialize the mutex each time ... we assume no one using it */
2140 SpinLockInit(&entry->mutex);
2141 /* ... and don't forget the query text metadata */
2142 Assert(query_len >= 0);
2143 entry->query_offset = query_offset;
2144 entry->query_len = query_len;
2145 entry->encoding = encoding;
2147 entry->minmax_stats_since = entry->stats_since;
2148 }
2149
2150 return entry;
2151}
2152
2153/*
2154 * qsort comparator for sorting into increasing usage order
2155 */
2156static int
2157entry_cmp(const void *lhs, const void *rhs)
2158{
2159 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2160 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2161
2162 if (l_usage < r_usage)
2163 return -1;
2164 else if (l_usage > r_usage)
2165 return +1;
2166 else
2167 return 0;
2168}
2169
2170/*
2171 * Deallocate least-used entries.
2172 *
2173 * Caller must hold an exclusive lock on pgss->lock.
2174 */
2175static void
2177{
2179 pgssEntry **entries;
2180 pgssEntry *entry;
2181 int nvictims;
2182 int i;
2184 int nvalidtexts;
2185
2186 /*
2187 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2188 * While we're scanning the table, apply the decay factor to the usage
2189 * values, and update the mean query length.
2190 *
2191 * Note that the mean query length is almost immediately obsolete, since
2192 * we compute it before not after discarding the least-used entries.
2193 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2194 * making two passes to get a more current result. Likewise, the new
2195 * cur_median_usage includes the entries we're about to zap.
2196 */
2197
2198 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2199
2200 i = 0;
2201 tottextlen = 0;
2202 nvalidtexts = 0;
2203
2205 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2206 {
2207 entries[i++] = entry;
2208 /* "Sticky" entries get a different usage decay rate. */
2209 if (IS_STICKY(entry->counters))
2211 else
2213 /* In the mean length computation, ignore dropped texts. */
2214 if (entry->query_len >= 0)
2215 {
2216 tottextlen += entry->query_len + 1;
2217 nvalidtexts++;
2218 }
2219 }
2220
2221 /* Sort into increasing order by usage */
2222 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2223
2224 /* Record the (approximate) median usage */
2225 if (i > 0)
2226 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2227 /* Record the mean query length */
2228 if (nvalidtexts > 0)
2230 else
2232
2233 /* Now zap an appropriate fraction of lowest-usage entries */
2234 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2235 nvictims = Min(nvictims, i);
2236
2237 for (i = 0; i < nvictims; i++)
2238 {
2239 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2240 }
2241
2242 pfree(entries);
2243
2244 /* Increment the number of times entries are deallocated */
2246 pgss->stats.dealloc += 1;
2248}
2249
2250/*
2251 * Given a query string (not necessarily null-terminated), allocate a new
2252 * entry in the external query text file and store the string there.
2253 *
2254 * If successful, returns true, and stores the new entry's offset in the file
2255 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2256 * number of garbage collections that have occurred so far.
2257 *
2258 * On failure, returns false.
2259 *
2260 * At least a shared lock on pgss->lock must be held by the caller, so as
2261 * to prevent a concurrent garbage collection. Share-lock-holding callers
2262 * should pass a gc_count pointer to obtain the number of garbage collections,
2263 * so that they can recheck the count after obtaining exclusive lock to
2264 * detect whether a garbage collection occurred (and removed this entry).
2265 */
2266static bool
2267qtext_store(const char *query, int query_len,
2268 Size *query_offset, int *gc_count)
2269{
2270 Size off;
2271 int fd;
2272
2273 /*
2274 * We use a spinlock to protect extent/n_writers/gc_count, so that
2275 * multiple processes may execute this function concurrently.
2276 */
2278 off = pgss->extent;
2279 pgss->extent += query_len + 1;
2280 pgss->n_writers++;
2281 if (gc_count)
2282 *gc_count = pgss->gc_count;
2284
2285 *query_offset = off;
2286
2287 /*
2288 * Don't allow the file to grow larger than what qtext_load_file can
2289 * (theoretically) handle. This has been seen to be reachable on 32-bit
2290 * platforms.
2291 */
2292 if (unlikely(query_len >= MaxAllocHugeSize - off))
2293 {
2294 errno = EFBIG; /* not quite right, but it'll do */
2295 fd = -1;
2296 goto error;
2297 }
2298
2299 /* Now write the data into the successfully-reserved part of the file */
2301 if (fd < 0)
2302 goto error;
2303
2304 if (pg_pwrite(fd, query, query_len, off) != query_len)
2305 goto error;
2306 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2307 goto error;
2308
2310
2311 /* Mark our write complete */
2313 pgss->n_writers--;
2315
2316 return true;
2317
2318error:
2319 ereport(LOG,
2321 errmsg("could not write file \"%s\": %m",
2322 PGSS_TEXT_FILE)));
2323
2324 if (fd >= 0)
2326
2327 /* Mark our write complete */
2329 pgss->n_writers--;
2331
2332 return false;
2333}
2334
2335/*
2336 * Read the external query text file into a malloc'd buffer.
2337 *
2338 * Returns NULL (without throwing an error) if unable to read, eg
2339 * file not there or insufficient memory.
2340 *
2341 * On success, the buffer size is also returned into *buffer_size.
2342 *
2343 * This can be called without any lock on pgss->lock, but in that case
2344 * the caller is responsible for verifying that the result is sane.
2345 */
2346static char *
2348{
2349 char *buf;
2350 int fd;
2351 struct stat stat;
2352 Size nread;
2353
2355 if (fd < 0)
2356 {
2357 if (errno != ENOENT)
2358 ereport(LOG,
2360 errmsg("could not read file \"%s\": %m",
2361 PGSS_TEXT_FILE)));
2362 return NULL;
2363 }
2364
2365 /* Get file length */
2366 if (fstat(fd, &stat))
2367 {
2368 ereport(LOG,
2370 errmsg("could not stat file \"%s\": %m",
2371 PGSS_TEXT_FILE)));
2373 return NULL;
2374 }
2375
2376 /* Allocate buffer; beware that off_t might be wider than size_t */
2378 buf = (char *) malloc(stat.st_size);
2379 else
2380 buf = NULL;
2381 if (buf == NULL)
2382 {
2383 ereport(LOG,
2385 errmsg("out of memory"),
2386 errdetail("Could not allocate enough memory to read file \"%s\".",
2387 PGSS_TEXT_FILE)));
2389 return NULL;
2390 }
2391
2392 /*
2393 * OK, slurp in the file. Windows fails if we try to read more than
2394 * INT_MAX bytes at once, and other platforms might not like that either,
2395 * so read a very large file in 1GB segments.
2396 */
2397 nread = 0;
2398 while (nread < stat.st_size)
2399 {
2400 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2401
2402 /*
2403 * If we get a short read and errno doesn't get set, the reason is
2404 * probably that garbage collection truncated the file since we did
2405 * the fstat(), so we don't log a complaint --- but we don't return
2406 * the data, either, since it's most likely corrupt due to concurrent
2407 * writes from garbage collection.
2408 */
2409 errno = 0;
2410 if (read(fd, buf + nread, toread) != toread)
2411 {
2412 if (errno)
2413 ereport(LOG,
2415 errmsg("could not read file \"%s\": %m",
2416 PGSS_TEXT_FILE)));
2417 free(buf);
2419 return NULL;
2420 }
2421 nread += toread;
2422 }
2423
2424 if (CloseTransientFile(fd) != 0)
2425 ereport(LOG,
2427 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2428
2429 *buffer_size = nread;
2430 return buf;
2431}
2432
2433/*
2434 * Locate a query text in the file image previously read by qtext_load_file().
2435 *
2436 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2437 * the result points to a null-terminated string within the buffer.
2438 */
2439static char *
2440qtext_fetch(Size query_offset, int query_len,
2441 char *buffer, Size buffer_size)
2442{
2443 /* File read failed? */
2444 if (buffer == NULL)
2445 return NULL;
2446 /* Bogus offset/length? */
2447 if (query_len < 0 ||
2448 query_offset + query_len >= buffer_size)
2449 return NULL;
2450 /* As a further sanity check, make sure there's a trailing null */
2451 if (buffer[query_offset + query_len] != '\0')
2452 return NULL;
2453 /* Looks OK */
2454 return buffer + query_offset;
2455}
2456
2457/*
2458 * Do we need to garbage-collect the external query text file?
2459 *
2460 * Caller should hold at least a shared lock on pgss->lock.
2461 */
2462static bool
2464{
2465 Size extent;
2466
2467 /* Read shared extent pointer */
2469 extent = pgss->extent;
2471
2472 /*
2473 * Don't proceed if file does not exceed 512 bytes per possible entry.
2474 *
2475 * Here and in the next test, 32-bit machines have overflow hazards if
2476 * pgss_max and/or mean_query_len are large. Force the multiplications
2477 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2478 */
2479 if ((uint64) extent < (uint64) 512 * pgss_max)
2480 return false;
2481
2482 /*
2483 * Don't proceed if file is less than about 50% bloat. Nothing can or
2484 * should be done in the event of unusually large query texts accounting
2485 * for file's large size. We go to the trouble of maintaining the mean
2486 * query length in order to prevent garbage collection from thrashing
2487 * uselessly.
2488 */
2489 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2490 return false;
2491
2492 return true;
2493}
2494
2495/*
2496 * Garbage-collect orphaned query texts in external file.
2497 *
2498 * This won't be called often in the typical case, since it's likely that
2499 * there won't be too much churn, and besides, a similar compaction process
2500 * occurs when serializing to disk at shutdown or as part of resetting.
2501 * Despite this, it seems prudent to plan for the edge case where the file
2502 * becomes unreasonably large, with no other method of compaction likely to
2503 * occur in the foreseeable future.
2504 *
2505 * The caller must hold an exclusive lock on pgss->lock.
2506 *
2507 * At the first sign of trouble we unlink the query text file to get a clean
2508 * slate (although existing statistics are retained), rather than risk
2509 * thrashing by allowing the same problem case to recur indefinitely.
2510 */
2511static void
2513{
2514 char *qbuffer;
2516 FILE *qfile = NULL;
2518 pgssEntry *entry;
2519 Size extent;
2520 int nentries;
2521
2522 /*
2523 * When called from pgss_store, some other session might have proceeded
2524 * with garbage collection in the no-lock-held interim of lock strength
2525 * escalation. Check once more that this is actually necessary.
2526 */
2527 if (!need_gc_qtexts())
2528 return;
2529
2530 /*
2531 * Load the old texts file. If we fail (out of memory, for instance),
2532 * invalidate query texts. Hopefully this is rare. It might seem better
2533 * to leave things alone on an OOM failure, but the problem is that the
2534 * file is only going to get bigger; hoping for a future non-OOM result is
2535 * risky and can easily lead to complete denial of service.
2536 */
2538 if (qbuffer == NULL)
2539 goto gc_fail;
2540
2541 /*
2542 * We overwrite the query texts file in place, so as to reduce the risk of
2543 * an out-of-disk-space failure. Since the file is guaranteed not to get
2544 * larger, this should always work on traditional filesystems; though we
2545 * could still lose on copy-on-write filesystems.
2546 */
2548 if (qfile == NULL)
2549 {
2550 ereport(LOG,
2552 errmsg("could not write file \"%s\": %m",
2553 PGSS_TEXT_FILE)));
2554 goto gc_fail;
2555 }
2556
2557 extent = 0;
2558 nentries = 0;
2559
2561 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2562 {
2563 int query_len = entry->query_len;
2564 char *qry = qtext_fetch(entry->query_offset,
2565 query_len,
2566 qbuffer,
2567 qbuffer_size);
2568
2569 if (qry == NULL)
2570 {
2571 /* Trouble ... drop the text */
2572 entry->query_offset = 0;
2573 entry->query_len = -1;
2574 /* entry will not be counted in mean query length computation */
2575 continue;
2576 }
2577
2578 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2579 {
2580 ereport(LOG,
2582 errmsg("could not write file \"%s\": %m",
2583 PGSS_TEXT_FILE)));
2585 goto gc_fail;
2586 }
2587
2588 entry->query_offset = extent;
2589 extent += query_len + 1;
2590 nentries++;
2591 }
2592
2593 /*
2594 * Truncate away any now-unused space. If this fails for some odd reason,
2595 * we log it, but there's no need to fail.
2596 */
2597 if (ftruncate(fileno(qfile), extent) != 0)
2598 ereport(LOG,
2600 errmsg("could not truncate file \"%s\": %m",
2601 PGSS_TEXT_FILE)));
2602
2603 if (FreeFile(qfile))
2604 {
2605 ereport(LOG,
2607 errmsg("could not write file \"%s\": %m",
2608 PGSS_TEXT_FILE)));
2609 qfile = NULL;
2610 goto gc_fail;
2611 }
2612
2613 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2614 pgss->extent, extent);
2615
2616 /* Reset the shared extent pointer */
2617 pgss->extent = extent;
2618
2619 /*
2620 * Also update the mean query length, to be sure that need_gc_qtexts()
2621 * won't still think we have a problem.
2622 */
2623 if (nentries > 0)
2624 pgss->mean_query_len = extent / nentries;
2625 else
2627
2628 free(qbuffer);
2629
2630 /*
2631 * OK, count a garbage collection cycle. (Note: even though we have
2632 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2633 * other processes may examine gc_count while holding only the mutex.
2634 * Also, we have to advance the count *after* we've rewritten the file,
2635 * else other processes might not realize they read a stale file.)
2636 */
2638
2639 return;
2640
2641gc_fail:
2642 /* clean up resources */
2643 if (qfile)
2644 FreeFile(qfile);
2645 free(qbuffer);
2646
2647 /*
2648 * Since the contents of the external file are now uncertain, mark all
2649 * hashtable entries as having invalid texts.
2650 */
2652 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2653 {
2654 entry->query_offset = 0;
2655 entry->query_len = -1;
2656 }
2657
2658 /*
2659 * Destroy the query text file and create a new, empty one
2660 */
2663 if (qfile == NULL)
2664 ereport(LOG,
2666 errmsg("could not recreate file \"%s\": %m",
2667 PGSS_TEXT_FILE)));
2668 else
2669 FreeFile(qfile);
2670
2671 /* Reset the shared extent pointer */
2672 pgss->extent = 0;
2673
2674 /* Reset mean_query_len to match the new state */
2676
2677 /*
2678 * Bump the GC count even though we failed.
2679 *
2680 * This is needed to make concurrent readers of file without any lock on
2681 * pgss->lock notice existence of new version of file. Once readers
2682 * subsequently observe a change in GC count with pgss->lock held, that
2683 * forces a safe reopen of file. Writers also require that we bump here,
2684 * of course. (As required by locking protocol, readers and writers don't
2685 * trust earlier file contents until gc_count is found unchanged after
2686 * pgss->lock acquired in shared or exclusive mode respectively.)
2687 */
2689}
2690
2691#define SINGLE_ENTRY_RESET(e) \
2692if (e) { \
2693 if (minmax_only) { \
2694 /* When requested reset only min/max statistics of an entry */ \
2695 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2696 { \
2697 e->counters.max_time[kind] = 0; \
2698 e->counters.min_time[kind] = 0; \
2699 } \
2700 e->minmax_stats_since = stats_reset; \
2701 } \
2702 else \
2703 { \
2704 /* Remove the key otherwise */ \
2705 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2706 num_remove++; \
2707 } \
2708}
2709
2710/*
2711 * Reset entries corresponding to parameters passed.
2712 */
2713static TimestampTz
2714entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2715{
2717 pgssEntry *entry;
2718 FILE *qfile;
2719 int64 num_entries;
2720 int64 num_remove = 0;
2721 pgssHashKey key;
2722 TimestampTz stats_reset;
2723
2724 if (!pgss || !pgss_hash)
2725 ereport(ERROR,
2727 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2728
2730 num_entries = hash_get_num_entries(pgss_hash);
2731
2732 stats_reset = GetCurrentTimestamp();
2733
2734 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2735 {
2736 /* If all the parameters are available, use the fast path. */
2737 memset(&key, 0, sizeof(pgssHashKey));
2738 key.userid = userid;
2739 key.dbid = dbid;
2740 key.queryid = queryid;
2741
2742 /*
2743 * Reset the entry if it exists, starting with the non-top-level
2744 * entry.
2745 */
2746 key.toplevel = false;
2747 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2748
2749 SINGLE_ENTRY_RESET(entry);
2750
2751 /* Also reset the top-level entry if it exists. */
2752 key.toplevel = true;
2753 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2754
2755 SINGLE_ENTRY_RESET(entry);
2756 }
2757 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2758 {
2759 /* Reset entries corresponding to valid parameters. */
2761 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2762 {
2763 if ((!userid || entry->key.userid == userid) &&
2764 (!dbid || entry->key.dbid == dbid) &&
2765 (!queryid || entry->key.queryid == queryid))
2766 {
2767 SINGLE_ENTRY_RESET(entry);
2768 }
2769 }
2770 }
2771 else
2772 {
2773 /* Reset all entries. */
2775 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2776 {
2777 SINGLE_ENTRY_RESET(entry);
2778 }
2779 }
2780
2781 /* All entries are removed? */
2782 if (num_entries != num_remove)
2783 goto release_lock;
2784
2785 /*
2786 * Reset global statistics for pg_stat_statements since all entries are
2787 * removed.
2788 */
2790 pgss->stats.dealloc = 0;
2791 pgss->stats.stats_reset = stats_reset;
2793
2794 /*
2795 * Write new empty query file, perhaps even creating a new one to recover
2796 * if the file was missing.
2797 */
2799 if (qfile == NULL)
2800 {
2801 ereport(LOG,
2803 errmsg("could not create file \"%s\": %m",
2804 PGSS_TEXT_FILE)));
2805 goto done;
2806 }
2807
2808 /* If ftruncate fails, log it, but it's not a fatal problem */
2809 if (ftruncate(fileno(qfile), 0) != 0)
2810 ereport(LOG,
2812 errmsg("could not truncate file \"%s\": %m",
2813 PGSS_TEXT_FILE)));
2814
2815 FreeFile(qfile);
2816
2817done:
2818 pgss->extent = 0;
2819 /* This counts as a query text garbage collection for our purposes */
2821
2824
2825 return stats_reset;
2826}
2827
2828/*
2829 * Generate a normalized version of the query string that will be used to
2830 * represent all similar queries.
2831 *
2832 * Note that the normalized representation may well vary depending on
2833 * just which "equivalent" query is used to create the hashtable entry.
2834 * We assume this is OK.
2835 *
2836 * If query_loc > 0, then "query" has been advanced by that much compared to
2837 * the original string start, so we need to translate the provided locations
2838 * to compensate. (This lets us avoid re-scanning statements before the one
2839 * of interest, so it's worth doing.)
2840 *
2841 * *query_len_p contains the input string length, and is updated with
2842 * the result string length on exit. The resulting string might be longer
2843 * or shorter depending on what happens with replacement of constants.
2844 *
2845 * Returns a palloc'd string.
2846 */
2847static char *
2849 int query_loc, int *query_len_p)
2850{
2851 char *norm_query;
2852 int query_len = *query_len_p;
2853 int norm_query_buflen, /* Space allowed for norm_query */
2854 len_to_wrt, /* Length (in bytes) to write */
2855 quer_loc = 0, /* Source query byte location */
2856 n_quer_loc = 0, /* Normalized query byte location */
2857 last_off = 0, /* Offset from start for previous tok */
2858 last_tok_len = 0; /* Length (in bytes) of that tok */
2859 int num_constants_replaced = 0;
2860
2861 /*
2862 * Get constants' lengths (core system only gives us locations). Note
2863 * this also ensures the items are sorted by location.
2864 */
2866
2867 /*
2868 * Allow for $n symbols to be longer than the constants they replace.
2869 * Constants must take at least one byte in text form, while a $n symbol
2870 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2871 * could refine that limit based on the max value of n for the current
2872 * query, but it hardly seems worth any extra effort to do so.
2873 */
2874 norm_query_buflen = query_len + jstate->clocations_count * 10;
2875
2876 /* Allocate result buffer */
2878
2879 for (int i = 0; i < jstate->clocations_count; i++)
2880 {
2881 int off, /* Offset from start for cur tok */
2882 tok_len; /* Length (in bytes) of that tok */
2883
2884 /*
2885 * If we have an external param at this location, but no lists are
2886 * being squashed across the query, then we skip here; this will make
2887 * us print the characters found in the original query that represent
2888 * the parameter in the next iteration (or after the loop is done),
2889 * which is a bit odd but seems to work okay in most cases.
2890 */
2891 if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
2892 continue;
2893
2894 off = jstate->clocations[i].location;
2895
2896 /* Adjust recorded location if we're dealing with partial string */
2897 off -= query_loc;
2898
2899 tok_len = jstate->clocations[i].length;
2900
2901 if (tok_len < 0)
2902 continue; /* ignore any duplicates */
2903
2904 /* Copy next chunk (what precedes the next constant) */
2905 len_to_wrt = off - last_off;
2907 Assert(len_to_wrt >= 0);
2910
2911 /*
2912 * And insert a param symbol in place of the constant token; and, if
2913 * we have a squashable list, insert a placeholder comment starting
2914 * from the list's second value.
2915 */
2917 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2918 jstate->clocations[i].squashed ? " /*, ... */" : "");
2920
2921 /* move forward */
2922 quer_loc = off + tok_len;
2923 last_off = off;
2925 }
2926
2927 /*
2928 * We've copied up until the last ignorable constant. Copy over the
2929 * remaining bytes of the original query string.
2930 */
2931 len_to_wrt = query_len - quer_loc;
2932
2933 Assert(len_to_wrt >= 0);
2936
2938 norm_query[n_quer_loc] = '\0';
2939
2941 return norm_query;
2942}
2943
2944/*
2945 * Given a valid SQL string and an array of constant-location records,
2946 * fill in the textual lengths of those constants.
2947 *
2948 * The constants may use any allowed constant syntax, such as float literals,
2949 * bit-strings, single-quoted strings and dollar-quoted strings. This is
2950 * accomplished by using the public API for the core scanner.
2951 *
2952 * It is the caller's job to ensure that the string is a valid SQL statement
2953 * with constants at the indicated locations. Since in practice the string
2954 * has already been parsed, and the locations that the caller provides will
2955 * have originated from within the authoritative parser, this should not be
2956 * a problem.
2957 *
2958 * Multiple constants can have the same location. We reset lengths of those
2959 * past the first to -1 so that they can later be ignored.
2960 *
2961 * If query_loc > 0, then "query" has been advanced by that much compared to
2962 * the original string start, so we need to translate the provided locations
2963 * to compensate. (This lets us avoid re-scanning statements before the one
2964 * of interest, so it's worth doing.)
2965 *
2966 * N.B. There is an assumption that a '-' character at a Const location begins
2967 * a negative numeric constant. This precludes there ever being another
2968 * reason for a constant to start with a '-'.
2969 */
2970static void
2972 int query_loc)
2973{
2975 core_yyscan_t yyscanner;
2979
2980 /*
2981 * Sort the records by location so that we can process them in order while
2982 * scanning the query text.
2983 */
2984 if (jstate->clocations_count > 1)
2985 qsort(jstate->clocations, jstate->clocations_count,
2986 sizeof(LocationLen), comp_location);
2987 locs = jstate->clocations;
2988
2989 /* initialize the flex scanner --- should match raw_parser() */
2990 yyscanner = scanner_init(query,
2991 &yyextra,
2992 &ScanKeywords,
2994
2995 /* Search for each constant, in sequence */
2996 for (int i = 0; i < jstate->clocations_count; i++)
2997 {
2998 int loc;
2999 int tok;
3000
3001 /* Ignore constants after the first one in the same location */
3002 if (i > 0 && locs[i].location == locs[i - 1].location)
3003 {
3004 locs[i].length = -1;
3005 continue;
3006 }
3007
3008 if (locs[i].squashed)
3009 continue; /* squashable list, ignore */
3010
3011 /* Adjust recorded location if we're dealing with partial string */
3012 loc = locs[i].location - query_loc;
3013 Assert(loc >= 0);
3014
3015 /*
3016 * We have a valid location for a constant that's not a dupe. Lex
3017 * tokens until we find the desired constant.
3018 */
3019 for (;;)
3020 {
3021 tok = core_yylex(&yylval, &yylloc, yyscanner);
3022
3023 /* We should not hit end-of-string, but if we do, behave sanely */
3024 if (tok == 0)
3025 break; /* out of inner for-loop */
3026
3027 /*
3028 * We should find the token position exactly, but if we somehow
3029 * run past it, work with that.
3030 */
3031 if (yylloc >= loc)
3032 {
3033 if (query[loc] == '-')
3034 {
3035 /*
3036 * It's a negative value - this is the one and only case
3037 * where we replace more than a single token.
3038 *
3039 * Do not compensate for the core system's special-case
3040 * adjustment of location to that of the leading '-'
3041 * operator in the event of a negative constant. It is
3042 * also useful for our purposes to start from the minus
3043 * symbol. In this way, queries like "select * from foo
3044 * where bar = 1" and "select * from foo where bar = -2"
3045 * will have identical normalized query strings.
3046 */
3047 tok = core_yylex(&yylval, &yylloc, yyscanner);
3048 if (tok == 0)
3049 break; /* out of inner for-loop */
3050 }
3051
3052 /*
3053 * We now rely on the assumption that flex has placed a zero
3054 * byte after the text of the current token in scanbuf.
3055 */
3056 locs[i].length = strlen(yyextra.scanbuf + loc);
3057 break; /* out of inner for-loop */
3058 }
3059 }
3060
3061 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3062 if (tok == 0)
3063 break;
3064 }
3065
3066 scanner_finish(yyscanner);
3067}
3068
3069/*
3070 * comp_location: comparator for qsorting LocationLen structs by location
3071 */
3072static int
3073comp_location(const void *a, const void *b)
3074{
3075 int l = ((const LocationLen *) a)->location;
3076 int r = ((const LocationLen *) b)->location;
3077
3078 return pg_cmp_s32(l, r);
3079}
bool has_privs_of_role(Oid member, Oid role)
Definition acl.c:5314
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition numeric.c:626
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1636
static Datum values[MAXATTR]
Definition bootstrap.c:188
#define CStringGetTextDatum(s)
Definition builtins.h:98
#define INT64CONST(x)
Definition c.h:632
#define Min(x, y)
Definition c.h:1093
#define PG_BINARY_R
Definition c.h:1378
#define MAXALIGN(LEN)
Definition c.h:898
#define Max(x, y)
Definition c.h:1087
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
#define PG_BINARY
Definition c.h:1376
#define UINT64_FORMAT
Definition c.h:637
int32_t int32
Definition c.h:614
uint64_t uint64
Definition c.h:619
#define unlikely(x)
Definition c.h:432
uint32_t uint32
Definition c.h:618
#define PG_BINARY_W
Definition c.h:1379
size_t Size
Definition c.h:691
int64 TimestampTz
Definition timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
Size hash_estimate_size(int64 num_entries, Size entrysize)
Definition dynahash.c:783
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition dynahash.c:1509
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1336
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
Datum arg
Definition elog.c:1322
int errcode_for_file_access(void)
Definition elog.c:897
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define PG_TRY(...)
Definition elog.h:372
#define PG_END_TRY(...)
Definition elog.h:397
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define PG_FINALLY(...)
Definition elog.h:389
#define ereport(elevel,...)
Definition elog.h:150
ExecutorEnd_hook_type ExecutorEnd_hook
Definition execMain.c:73
ExecutorFinish_hook_type ExecutorFinish_hook
Definition execMain.c:72
ExecutorStart_hook_type ExecutorStart_hook
Definition execMain.c:70
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition execMain.c:143
ExecutorRun_hook_type ExecutorRun_hook
Definition execMain.c:71
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition execMain.c:309
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition execMain.c:477
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition execMain.c:417
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition executor.h:87
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition executor.h:81
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition executor.h:77
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition executor.h:91
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:783
int CloseTransientFile(int fd)
Definition fd.c:2855
int FreeFile(FILE *file)
Definition fd.c:2827
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2628
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_MODULE_MAGIC_EXT(...)
Definition fmgr.h:540
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_BOOL(n)
Definition fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:688
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
bool IsUnderPostmaster
Definition globals.c:120
Oid MyDatabaseId
Definition globals.c:94
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5146
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5043
void MarkGUCPrefixReserved(const char *className)
Definition guc.c:5180
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5067
@ PGC_SUSET
Definition guc.h:78
@ PGC_POSTMASTER
Definition guc.h:74
@ PGC_SIGHUP
Definition guc.h:75
return str start
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1037
@ HASH_FIND
Definition hsearch.h:113
@ HASH_REMOVE
Definition hsearch.h:115
@ HASH_ENTER
Definition hsearch.h:114
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
void parse(int)
Definition parse.c:49
static char * encoding
Definition initdb.c:139
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:177
#define INSTR_TIME_GET_MILLISEC(t)
Definition instr_time.h:189
void InstrEndLoop(Instrumentation *instr)
Definition instrument.c:144
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition instrument.c:31
WalUsage pgWalUsage
Definition instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:289
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:249
@ INSTRUMENT_ALL
Definition instrument.h:67
static int pg_cmp_s32(int32 a, int32 b)
Definition int.h:713
#define read(a, b, c)
Definition win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
void(* shmem_startup_hook_type)(void)
Definition ipc.h:22
shmem_startup_hook_type shmem_startup_hook
Definition ipci.c:60
void RequestAddinShmemSpace(Size size)
Definition ipci.c:76
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
PGDLLIMPORT const ScanKeywordList ScanKeywords
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1177
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition lwlock.c:567
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition lwlock.c:650
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
int GetDatabaseEncoding(void)
Definition mbutils.c:1389
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:687
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
#define MaxAllocHugeSize
Definition memutils.h:45
void(* shmem_request_hook_type)(void)
Definition miscadmin.h:534
Oid GetUserId(void)
Definition miscinit.c:470
shmem_request_hook_type shmem_request_hook
Definition miscinit.c:1790
bool process_shared_preload_libraries_in_progress
Definition miscinit.c:1787
#define IsA(nodeptr, _type_)
Definition nodes.h:164
static char * errmsg
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
post_parse_analyze_hook_type post_parse_analyze_hook
Definition analyze.c:68
const void size_t len
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
@ PGSS_V1_13
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
static ExecutorRun_hook_type prev_ExecutorRun
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS_V1_13
void _PG_init(void)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS)
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void pgss_shmem_request(void)
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
#define PGSS_NUMKIND
static bool pgss_save
static void pgss_shmem_startup(void)
static int nesting_level
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
static void pgss_store(const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define PG_VALID_BE_ENCODING(_enc)
Definition pg_wchar.h:281
static int duration
Definition pgbench.c:175
planner_hook_type planner_hook
Definition planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.c:333
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.h:28
PlannedStmtOrigin
Definition plannodes.h:36
@ PLAN_STMT_UNKNOWN
Definition plannodes.h:37
@ PLAN_STMT_CACHE_CUSTOM
Definition plannodes.h:41
@ PLAN_STMT_CACHE_GENERIC
Definition plannodes.h:40
#define sprintf
Definition port.h:262
#define pg_pwrite
Definition port.h:248
#define snprintf
Definition port.h:260
#define qsort(a, b, c, d)
Definition port.h:495
static Datum Int64GetDatum(int64 X)
Definition postgres.h:413
#define Int64GetDatumFast(X)
Definition postgres.h:525
#define Float8GetDatumFast(X)
Definition postgres.h:527
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Datum CStringGetDatum(const char *X)
Definition postgres.h:370
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
const char * YYLTYPE
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
Definition scan.l:1233
#define yylloc
Definition scan.l:1106
void scanner_finish(core_yyscan_t yyscanner)
Definition scan.l:1273
#define yyextra
Definition scan.l:1102
const uint16 ScanKeywordTokens[]
Definition scan.l:80
void * core_yyscan_t
Definition scanner.h:118
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
ScanDirection
Definition sdir.h:25
Size add_size(Size s1, Size s2)
Definition shmem.c:485
HTAB * ShmemInitHash(const char *name, int64 init_size, int64 max_size, HASHCTL *infoP, int hash_flags)
Definition shmem.c:326
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:381
#define free(a)
#define malloc(a)
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
static void error(void)
instr_time local_blk_read_time
Definition instrument.h:38
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
instr_time temp_blk_write_time
Definition instrument.h:41
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
int64 local_blks_written
Definition instrument.h:33
instr_time temp_blk_read_time
Definition instrument.h:40
instr_time local_blk_write_time
Definition instrument.h:39
int64 temp_blks_read
Definition instrument.h:34
int64 shared_blks_read
Definition instrument.h:27
int64 shared_blks_written
Definition instrument.h:29
int64 temp_blks_written
Definition instrument.h:35
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
double min_time[PGSS_NUMKIND]
int64 local_blks_written
int64 generic_plan_calls
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
double jit_emission_time
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 custom_plan_calls
int64 local_blks_dirtied
int64 jit_inlining_count
double jit_deform_time
int64 parallel_workers_to_launch
double shared_blk_read_time
double jit_inlining_time
int es_parallel_workers_to_launch
Definition execnodes.h:758
struct JitContext * es_jit
Definition execnodes.h:776
uint64 es_total_processed
Definition execnodes.h:728
MemoryContext es_query_cxt
Definition execnodes.h:722
int es_parallel_workers_launched
Definition execnodes.h:760
Size keysize
Definition hsearch.h:75
Size entrysize
Definition hsearch.h:76
WalUsage walusage
Definition instrument.h:94
instr_time total
Definition instrument.h:87
BufferUsage bufusage
Definition instrument.h:93
JitInstrumentation instr
Definition jit.h:62
Definition nodes.h:135
const char * p_sourcetext
Definition parse_node.h:210
int64 queryId
Definition plannodes.h:69
ParseLoc stmt_len
Definition plannodes.h:169
PlannedStmtOrigin planOrigin
Definition plannodes.h:75
ParseLoc stmt_location
Definition plannodes.h:167
Node * utilityStmt
Definition plannodes.h:151
uint64 nprocessed
Definition cmdtag.h:32
CommandTag commandTag
Definition cmdtag.h:31
const char * sourceText
Definition execdesc.h:38
EState * estate
Definition execdesc.h:48
PlannedStmt * plannedstmt
Definition execdesc.h:37
struct Instrumentation * totaltime
Definition execdesc.h:55
Node * utilityStmt
Definition parsenodes.h:141
ParseLoc stmt_location
Definition parsenodes.h:255
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
int64 wal_records
Definition instrument.h:53
Definition guc.h:174
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
pgssGlobalStats stats
__int64 st_size
Definition win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:785
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.c:548
ProcessUtility_hook_type ProcessUtility_hook
Definition utility.c:72
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.h:71
ProcessUtilityContext
Definition utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition timestamp.h:68
const char * name
#define fstat
Definition win32_port.h:73