PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_stat_statements.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_stat_statements.c
4 * Track statement planning and execution times as well as resource
5 * usage across a whole database cluster.
6 *
7 * Execution costs are totaled for each distinct source query, and kept in
8 * a shared hashtable. (We track only as many distinct queries as will fit
9 * in the designated amount of shared memory.)
10 *
11 * Starting in Postgres 9.2, this module normalized query entries. As of
12 * Postgres 14, the normalization is done by the core if compute_query_id is
13 * enabled, or optionally by third-party modules.
14 *
15 * To facilitate presenting entries to users, we create "representative" query
16 * strings in which constants are replaced with parameter symbols ($n), to
17 * make it clearer what a normalized entry can represent. To save on shared
18 * memory, and to avoid having to truncate oversized query strings, we store
19 * these strings in a temporary external query-texts file. Offsets into this
20 * file are kept in shared memory.
21 *
22 * Note about locking issues: to create or delete an entry in the shared
23 * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 * in an entry except the counters requires the same. To look up an entry,
25 * one must hold the lock shared. To read or update the counters within
26 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 * disappear!) and also take the entry's mutex spinlock.
28 * The shared state variable pgss->extent (the next free spot in the external
29 * query-text file) should be accessed only while holding either the
30 * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 * allow reserving file space while holding only shared lock on pgss->lock.
32 * Rewriting the entire external query-text file, eg for garbage collection,
33 * requires holding pgss->lock exclusively; this allows individual entries
34 * in the file to be read or written while holding only shared lock.
35 *
36 *
37 * Copyright (c) 2008-2026, PostgreSQL Global Development Group
38 *
39 * IDENTIFICATION
40 * contrib/pg_stat_statements/pg_stat_statements.c
41 *
42 *-------------------------------------------------------------------------
43 */
44#include "postgres.h"
45
46#include <math.h>
47#include <sys/stat.h>
48#include <unistd.h>
49
50#include "access/htup_details.h"
51#include "access/parallel.h"
52#include "catalog/pg_authid.h"
53#include "executor/instrument.h"
54#include "funcapi.h"
55#include "jit/jit.h"
56#include "mb/pg_wchar.h"
57#include "miscadmin.h"
58#include "nodes/queryjumble.h"
59#include "optimizer/planner.h"
60#include "parser/analyze.h"
61#include "pgstat.h"
62#include "storage/fd.h"
63#include "storage/ipc.h"
64#include "storage/lwlock.h"
65#include "storage/shmem.h"
66#include "storage/spin.h"
67#include "tcop/utility.h"
68#include "utils/acl.h"
69#include "utils/builtins.h"
70#include "utils/memutils.h"
71#include "utils/timestamp.h"
72#include "utils/tuplestore.h"
73
75 .name = "pg_stat_statements",
76 .version = PG_VERSION
77);
78
79/* Location of permanent stats file (valid when database is shut down) */
80#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
81
82/*
83 * Location of external query text file.
84 */
85#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
86
87/* Magic number identifying the stats file format */
88static const uint32 PGSS_FILE_HEADER = 0x20250731;
89
90/* PostgreSQL major version number, changes in which invalidate all entries */
92
93/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
94#define USAGE_EXEC(duration) (1.0)
95#define USAGE_INIT (1.0) /* including initial planning */
96#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
97#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
98#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
99#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
100#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
101#define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
102
103/*
104 * Extension version number, for supporting older extension versions' objects
105 */
119
120typedef enum pgssStoreKind
121{
123
124 /*
125 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
126 * reference the underlying values in the arrays in the Counters struct,
127 * and this order is required in pg_stat_statements_internal().
128 */
132
133#define PGSS_NUMKIND (PGSS_EXEC + 1)
134
135/*
136 * Hashtable key that defines the identity of a hashtable entry. We separate
137 * queries by user and by database even if they are otherwise identical.
138 *
139 * If you add a new key to this struct, make sure to teach pgss_store() to
140 * zero the padding bytes. Otherwise, things will break, because pgss_hash is
141 * created using HASH_BLOBS, and thus tag_hash is used to hash this.
142 */
143typedef struct pgssHashKey
144{
145 Oid userid; /* user OID */
146 Oid dbid; /* database OID */
147 int64 queryid; /* query identifier */
148 bool toplevel; /* query executed at top level */
150
151/*
152 * The actual stats counters kept within pgssEntry.
153 */
154typedef struct Counters
155{
156 int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
157 double total_time[PGSS_NUMKIND]; /* total planning/execution time,
158 * in msec */
159 double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
160 * msec since min/max reset */
161 double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
162 * msec since min/max reset */
163 double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
164 * msec */
165 double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
166 * planning/execution time in msec */
167 int64 rows; /* total # of retrieved or affected rows */
168 int64 shared_blks_hit; /* # of shared buffer hits */
169 int64 shared_blks_read; /* # of shared disk blocks read */
170 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
171 int64 shared_blks_written; /* # of shared disk blocks written */
172 int64 local_blks_hit; /* # of local buffer hits */
173 int64 local_blks_read; /* # of local disk blocks read */
174 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
175 int64 local_blks_written; /* # of local disk blocks written */
176 int64 temp_blks_read; /* # of temp blocks read */
177 int64 temp_blks_written; /* # of temp blocks written */
178 double shared_blk_read_time; /* time spent reading shared blocks,
179 * in msec */
180 double shared_blk_write_time; /* time spent writing shared blocks,
181 * in msec */
182 double local_blk_read_time; /* time spent reading local blocks, in
183 * msec */
184 double local_blk_write_time; /* time spent writing local blocks, in
185 * msec */
186 double temp_blk_read_time; /* time spent reading temp blocks, in msec */
187 double temp_blk_write_time; /* time spent writing temp blocks, in
188 * msec */
189 double usage; /* usage factor */
190 int64 wal_records; /* # of WAL records generated */
191 int64 wal_fpi; /* # of WAL full page images generated */
192 uint64 wal_bytes; /* total amount of WAL generated in bytes */
193 int64 wal_buffers_full; /* # of times the WAL buffers became full */
194 int64 jit_functions; /* total number of JIT functions emitted */
195 double jit_generation_time; /* total time to generate jit code */
196 int64 jit_inlining_count; /* number of times inlining time has been
197 * > 0 */
198 double jit_deform_time; /* total time to deform tuples in jit code */
199 int64 jit_deform_count; /* number of times deform time has been >
200 * 0 */
201
202 double jit_inlining_time; /* total time to inline jit code */
203 int64 jit_optimization_count; /* number of times optimization time
204 * has been > 0 */
205 double jit_optimization_time; /* total time to optimize jit code */
206 int64 jit_emission_count; /* number of times emission time has been
207 * > 0 */
208 double jit_emission_time; /* total time to emit jit code */
209 int64 parallel_workers_to_launch; /* # of parallel workers planned
210 * to be launched */
211 int64 parallel_workers_launched; /* # of parallel workers actually
212 * launched */
213 int64 generic_plan_calls; /* number of calls using a generic plan */
214 int64 custom_plan_calls; /* number of calls using a custom plan */
216
217/*
218 * Global statistics for pg_stat_statements
219 */
220typedef struct pgssGlobalStats
221{
222 int64 dealloc; /* # of times entries were deallocated */
223 TimestampTz stats_reset; /* timestamp with all stats reset */
225
226/*
227 * Statistics per statement
228 *
229 * Note: in event of a failure in garbage collection of the query text file,
230 * we reset query_offset to zero and query_len to -1. This will be seen as
231 * an invalid state by qtext_fetch().
232 */
233typedef struct pgssEntry
234{
235 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
236 Counters counters; /* the statistics for this query */
237 Size query_offset; /* query text offset in external file */
238 int query_len; /* # of valid bytes in query string, or -1 */
239 int encoding; /* query text encoding */
240 TimestampTz stats_since; /* timestamp of entry allocation */
241 TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
242 slock_t mutex; /* protects the counters only */
244
245/*
246 * Global shared state
247 */
248typedef struct pgssSharedState
249{
250 LWLockPadded lock; /* protects hashtable search/modification */
251 double cur_median_usage; /* current median usage in hashtable */
252 Size mean_query_len; /* current mean entry text length */
253 slock_t mutex; /* protects following fields only: */
254 Size extent; /* current extent of query file */
255 int n_writers; /* number of active writers to query file */
256 int gc_count; /* query file garbage collection cycle count */
257 pgssGlobalStats stats; /* global statistics for pgss */
259
260/* Links to shared memory state */
263
264static void pgss_shmem_request(void *arg);
265static void pgss_shmem_init(void *arg);
266
271
272/*---- Local variables ----*/
273
274/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
275static int nesting_level = 0;
276
277/* Saved hook values */
285
286/*---- GUC variables ----*/
287
288typedef enum
289{
290 PGSS_TRACK_NONE, /* track no statements */
291 PGSS_TRACK_TOP, /* only top level statements */
292 PGSS_TRACK_ALL, /* all statements, including nested ones */
294
295static const struct config_enum_entry track_options[] =
296{
297 {"none", PGSS_TRACK_NONE, false},
298 {"top", PGSS_TRACK_TOP, false},
299 {"all", PGSS_TRACK_ALL, false},
300 {NULL, 0, false}
301};
302
303static int pgss_max = 5000; /* max # statements to track */
304static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
305static bool pgss_track_utility = true; /* whether to track utility commands */
306static bool pgss_track_planning = false; /* whether to track planning
307 * duration */
308static bool pgss_save = true; /* whether to save stats across shutdown */
309
310#define pgss_enabled(level) \
311 (!IsParallelWorker() && \
312 (pgss_track == PGSS_TRACK_ALL || \
313 (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
314
315#define record_gc_qtexts() \
316 do { \
317 SpinLockAcquire(&pgss->mutex); \
318 pgss->gc_count++; \
319 SpinLockRelease(&pgss->mutex); \
320 } while(0)
321
322/*---- Function declarations ----*/
323
337
338static void pgss_shmem_shutdown(int code, Datum arg);
339static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
340 const JumbleState *jstate);
341static PlannedStmt *pgss_planner(Query *parse,
342 const char *query_string,
343 int cursorOptions,
344 ParamListInfo boundParams,
345 ExplainState *es);
346static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
347static void pgss_ExecutorRun(QueryDesc *queryDesc,
348 ScanDirection direction,
349 uint64 count);
350static void pgss_ExecutorFinish(QueryDesc *queryDesc);
351static void pgss_ExecutorEnd(QueryDesc *queryDesc);
352static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
353 bool readOnlyTree,
354 ProcessUtilityContext context, ParamListInfo params,
355 QueryEnvironment *queryEnv,
356 DestReceiver *dest, QueryCompletion *qc);
357static void pgss_store(const char *query, int64 queryId,
358 int query_location, int query_len,
359 pgssStoreKind kind,
360 double total_time, uint64 rows,
361 const BufferUsage *bufusage,
362 const WalUsage *walusage,
363 const struct JitInstrumentation *jitusage,
364 const JumbleState *jstate,
365 int parallel_workers_to_launch,
366 int parallel_workers_launched,
367 PlannedStmtOrigin planOrigin);
369 pgssVersion api_version,
370 bool showtext);
371static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
372 int encoding, bool sticky);
373static void entry_dealloc(void);
374static bool qtext_store(const char *query, int query_len,
375 Size *query_offset, int *gc_count);
376static char *qtext_load_file(Size *buffer_size);
377static char *qtext_fetch(Size query_offset, int query_len,
378 char *buffer, Size buffer_size);
379static bool need_gc_qtexts(void);
380static void gc_qtexts(void);
381static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
383 const char *query,
384 int query_loc, int *query_len_p);
385
386/*
387 * Module load callback
388 */
389void
391{
392 /*
393 * In order to create our shared memory area, we have to be loaded via
394 * shared_preload_libraries. If not, fall out without hooking into any of
395 * the main system. (We don't throw error here because it seems useful to
396 * allow the pg_stat_statements functions to be created even when the
397 * module isn't active. The functions must protect themselves against
398 * being called then, however.)
399 */
401 return;
402
403 /*
404 * Inform the postmaster that we want to enable query_id calculation if
405 * compute_query_id is set to auto.
406 */
408
409 /*
410 * Define (or redefine) custom GUC variables.
411 */
412 DefineCustomIntVariable("pg_stat_statements.max",
413 "Sets the maximum number of statements tracked by pg_stat_statements.",
414 NULL,
415 &pgss_max,
416 5000,
417 100,
418 INT_MAX / 2,
420 0,
421 NULL,
422 NULL,
423 NULL);
424
425 DefineCustomEnumVariable("pg_stat_statements.track",
426 "Selects which statements are tracked by pg_stat_statements.",
427 NULL,
428 &pgss_track,
431 PGC_SUSET,
432 0,
433 NULL,
434 NULL,
435 NULL);
436
437 DefineCustomBoolVariable("pg_stat_statements.track_utility",
438 "Selects whether utility commands are tracked by pg_stat_statements.",
439 NULL,
441 true,
442 PGC_SUSET,
443 0,
444 NULL,
445 NULL,
446 NULL);
447
448 DefineCustomBoolVariable("pg_stat_statements.track_planning",
449 "Selects whether planning duration is tracked by pg_stat_statements.",
450 NULL,
452 false,
453 PGC_SUSET,
454 0,
455 NULL,
456 NULL,
457 NULL);
458
459 DefineCustomBoolVariable("pg_stat_statements.save",
460 "Save pg_stat_statements statistics across server shutdowns.",
461 NULL,
462 &pgss_save,
463 true,
465 0,
466 NULL,
467 NULL,
468 NULL);
469
470 MarkGUCPrefixReserved("pg_stat_statements");
471
472 /*
473 * Register our shared memory needs.
474 */
476
477 /*
478 * Install hooks.
479 */
494}
495
496/*
497 * shmem request callback: Request shared memory resources.
498 *
499 * This is called at postmaster startup. Note that the shared memory isn't
500 * allocated here yet, this merely register our needs.
501 *
502 * In EXEC_BACKEND mode, this is also called in each backend, to re-attach to
503 * the shared memory area that was already initialized.
504 */
505static void
507{
508 ShmemRequestHash(.name = "pg_stat_statements hash",
509 .nelems = pgss_max,
510 .hash_info.keysize = sizeof(pgssHashKey),
511 .hash_info.entrysize = sizeof(pgssEntry),
512 .hash_flags = HASH_ELEM | HASH_BLOBS,
513 .ptr = &pgss_hash,
514 );
515 ShmemRequestStruct(.name = "pg_stat_statements",
516 .size = sizeof(pgssSharedState),
517 .ptr = (void **) &pgss,
518 );
519}
520
521/*
522 * shmem init callback: Initialize our shared memory data structures at
523 * postmaster startup.
524 *
525 * Load any pre-existing statistics from file. Also create and load the
526 * query-texts file, which is expected to exist (even if empty) while the
527 * module is enabled.
528 */
529static void
531{
532 int tranche_id;
533 FILE *file = NULL;
534 FILE *qfile = NULL;
535 uint32 header;
536 int32 num;
537 int32 pgver;
538 int32 i;
539 int buffer_size;
540 char *buffer = NULL;
541
542 /*
543 * We already checked that we're loaded from shared_preload_libraries in
544 * _PG_init(), so we should not get here after postmaster startup.
545 */
547
548 /*
549 * Initialize the shmem area with no statistics.
550 */
551 tranche_id = LWLockNewTrancheId("pg_stat_statements");
552 LWLockInitialize(&pgss->lock.lock, tranche_id);
556 pgss->extent = 0;
557 pgss->n_writers = 0;
558 pgss->gc_count = 0;
559 pgss->stats.dealloc = 0;
561
562 /* The hash table must've also been initialized by now */
564
565 /*
566 * Set up a shmem exit hook to dump the statistics to disk on postmaster
567 * (or standalone backend) exit.
568 */
570
571 /*
572 * Load any pre-existing statistics from file.
573 *
574 * Note: we don't bother with locks here, because there should be no other
575 * processes running when this code is reached.
576 */
577
578 /* Unlink query text file possibly left over from crash */
580
581 /* Allocate new query text temp file */
583 if (qfile == NULL)
584 goto write_error;
585
586 /*
587 * If we were told not to load old statistics, we're done. (Note we do
588 * not try to unlink any old dump file in this case. This seems a bit
589 * questionable but it's the historical behavior.)
590 */
591 if (!pgss_save)
592 {
594 return;
595 }
596
597 /*
598 * Attempt to load old statistics from the dump file.
599 */
601 if (file == NULL)
602 {
603 if (errno != ENOENT)
604 goto read_error;
605 /* No existing persisted stats file, so we're done */
607 return;
608 }
609
610 buffer_size = 2048;
611 buffer = (char *) palloc(buffer_size);
612
613 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
614 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
615 fread(&num, sizeof(int32), 1, file) != 1)
616 goto read_error;
617
618 if (header != PGSS_FILE_HEADER ||
620 goto data_error;
621
622 for (i = 0; i < num; i++)
623 {
625 pgssEntry *entry;
626 Size query_offset;
627
628 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
629 goto read_error;
630
631 /* Encoding is the only field we can easily sanity-check */
632 if (!PG_VALID_BE_ENCODING(temp.encoding))
633 goto data_error;
634
635 /* Resize buffer as needed */
636 if (temp.query_len >= buffer_size)
637 {
638 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
639 buffer = repalloc(buffer, buffer_size);
640 }
641
642 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
643 goto read_error;
644
645 /* Should have a trailing null, but let's make sure */
646 buffer[temp.query_len] = '\0';
647
648 /* Skip loading "sticky" entries */
649 if (IS_STICKY(temp.counters))
650 continue;
651
652 /* Store the query text */
653 query_offset = pgss->extent;
654 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
655 goto write_error;
656 pgss->extent += temp.query_len + 1;
657
658 /* make the hashtable entry (discards old entries if too many) */
659 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
660 temp.encoding,
661 false);
662
663 /* copy in the actual stats */
664 entry->counters = temp.counters;
665 entry->stats_since = temp.stats_since;
666 entry->minmax_stats_since = temp.minmax_stats_since;
667 }
668
669 /* Read global statistics for pg_stat_statements */
670 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
671 goto read_error;
672
673 pfree(buffer);
674 FreeFile(file);
676
677 /*
678 * Remove the persisted stats file so it's not included in
679 * backups/replication standbys, etc. A new file will be written on next
680 * shutdown.
681 *
682 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
683 * because we remove that file on startup; it acts inversely to
684 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
685 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
686 * when the server is not running. Leaving the file creates no danger of
687 * a newly restored database having a spurious record of execution costs,
688 * which is what we're really concerned about here.
689 */
691
692 return;
693
695 ereport(LOG,
697 errmsg("could not read file \"%s\": %m",
699 goto fail;
701 ereport(LOG,
703 errmsg("ignoring invalid data in file \"%s\"",
705 goto fail;
707 ereport(LOG,
709 errmsg("could not write file \"%s\": %m",
711fail:
712 if (buffer)
713 pfree(buffer);
714 if (file)
715 FreeFile(file);
716 if (qfile)
718 /* If possible, throw away the bogus file; ignore any error */
720
721 /*
722 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
723 * server is running with pg_stat_statements enabled
724 */
725}
726
727/*
728 * shmem_shutdown hook: Dump statistics into file.
729 *
730 * Note: we don't bother with acquiring lock, because there should be no
731 * other processes running when this is called.
732 */
733static void
735{
736 FILE *file;
737 char *qbuffer = NULL;
738 Size qbuffer_size = 0;
740 int32 num_entries;
741 pgssEntry *entry;
742
743 /* Don't try to dump during a crash. */
744 if (code)
745 return;
746
747 /* Safety check ... shouldn't get here unless shmem is set up. */
748 if (!pgss || !pgss_hash)
749 return;
750
751 /* Don't dump if told not to. */
752 if (!pgss_save)
753 return;
754
756 if (file == NULL)
757 goto error;
758
759 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
760 goto error;
761 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
762 goto error;
763 num_entries = hash_get_num_entries(pgss_hash);
764 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
765 goto error;
766
768 if (qbuffer == NULL)
769 goto error;
770
771 /*
772 * When serializing to disk, we store query texts immediately after their
773 * entry data. Any orphaned query texts are thereby excluded.
774 */
776 while ((entry = hash_seq_search(&hash_seq)) != NULL)
777 {
778 int len = entry->query_len;
779 char *qstr = qtext_fetch(entry->query_offset, len,
781
782 if (qstr == NULL)
783 continue; /* Ignore any entries with bogus texts */
784
785 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
786 fwrite(qstr, 1, len + 1, file) != len + 1)
787 {
788 /* note: we assume hash_seq_term won't change errno */
790 goto error;
791 }
792 }
793
794 /* Dump global statistics for pg_stat_statements */
795 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
796 goto error;
797
798 pfree(qbuffer);
799 qbuffer = NULL;
800
801 if (FreeFile(file))
802 {
803 file = NULL;
804 goto error;
805 }
806
807 /*
808 * Rename file into place, so we atomically replace any old one.
809 */
811
812 /* Unlink query-texts file; it's not needed while shutdown */
814
815 return;
816
817error:
818 ereport(LOG,
820 errmsg("could not write file \"%s\": %m",
821 PGSS_DUMP_FILE ".tmp")));
822 if (qbuffer)
823 pfree(qbuffer);
824 if (file)
825 FreeFile(file);
826 unlink(PGSS_DUMP_FILE ".tmp");
828}
829
830/*
831 * Post-parse-analysis hook: mark query with a queryId
832 */
833static void
835{
837 prev_post_parse_analyze_hook(pstate, query, jstate);
838
839 /* Safety check... */
841 return;
842
843 /*
844 * If it's EXECUTE, clear the queryId so that stats will accumulate for
845 * the underlying PREPARE. But don't do this if we're not tracking
846 * utility statements, to avoid messing up another extension that might be
847 * tracking them.
848 */
849 if (query->utilityStmt)
850 {
852 {
853 query->queryId = INT64CONST(0);
854 return;
855 }
856 }
857
858 /*
859 * If query jumbling were able to identify any ignorable constants, we
860 * immediately create a hash table entry for the query, so that we can
861 * record the normalized form of the query string. If there were no such
862 * constants, the normalized string would be the same as the query text
863 * anyway, so there's no need for an early entry.
864 */
865 if (jstate && jstate->clocations_count > 0)
866 pgss_store(pstate->p_sourcetext,
867 query->queryId,
868 query->stmt_location,
869 query->stmt_len,
871 0,
872 0,
873 NULL,
874 NULL,
875 NULL,
876 jstate,
877 0,
878 0,
880}
881
882/*
883 * Planner hook: forward to regular planner, but measure planning time
884 * if needed.
885 */
886static PlannedStmt *
888 const char *query_string,
889 int cursorOptions,
890 ParamListInfo boundParams,
891 ExplainState *es)
892{
894
895 /*
896 * We can't process the query if no query_string is provided, as
897 * pgss_store needs it. We also ignore query without queryid, as it would
898 * be treated as a utility statement, which may not be the case.
899 */
901 && pgss_track_planning && query_string
902 && parse->queryId != INT64CONST(0))
903 {
906 BufferUsage bufusage_start,
907 bufusage;
908 WalUsage walusage_start,
909 walusage;
910
911 /* We need to track buffer usage as the planner can access them. */
912 bufusage_start = pgBufferUsage;
913
914 /*
915 * Similarly the planner could write some WAL records in some cases
916 * (e.g. setting a hint bit with those being WAL-logged)
917 */
918 walusage_start = pgWalUsage;
920
922 PG_TRY();
923 {
925 result = prev_planner_hook(parse, query_string, cursorOptions,
926 boundParams, es);
927 else
928 result = standard_planner(parse, query_string, cursorOptions,
929 boundParams, es);
930 }
931 PG_FINALLY();
932 {
934 }
935 PG_END_TRY();
936
939
940 /* calc differences of buffer counters. */
941 memset(&bufusage, 0, sizeof(BufferUsage));
942 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
943
944 /* calc differences of WAL counters. */
945 memset(&walusage, 0, sizeof(WalUsage));
946 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
947
948 pgss_store(query_string,
949 parse->queryId,
950 parse->stmt_location,
951 parse->stmt_len,
952 PGSS_PLAN,
954 0,
955 &bufusage,
956 &walusage,
957 NULL,
958 NULL,
959 0,
960 0,
961 result->planOrigin);
962 }
963 else
964 {
965 /*
966 * Even though we're not tracking plan time for this statement, we
967 * must still increment the nesting level, to ensure that functions
968 * evaluated during planning are not seen as top-level calls.
969 */
971 PG_TRY();
972 {
974 result = prev_planner_hook(parse, query_string, cursorOptions,
975 boundParams, es);
976 else
977 result = standard_planner(parse, query_string, cursorOptions,
978 boundParams, es);
979 }
980 PG_FINALLY();
981 {
983 }
984 PG_END_TRY();
985 }
986
987 return result;
988}
989
990/*
991 * ExecutorStart hook: start up tracking if needed
992 */
993static void
994pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
995{
996 /*
997 * If query has queryId zero, don't track it. This prevents double
998 * counting of optimizable statements that are directly contained in
999 * utility statements.
1000 */
1001 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1002 {
1003 /* Request all summary instrumentation, i.e. timing, buffers and WAL */
1004 queryDesc->query_instr_options |= INSTRUMENT_ALL;
1005 }
1006
1008 prev_ExecutorStart(queryDesc, eflags);
1009 else
1010 standard_ExecutorStart(queryDesc, eflags);
1011}
1012
1013/*
1014 * ExecutorRun hook: all we need do is track nesting depth
1015 */
1016static void
1017pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1018{
1019 nesting_level++;
1020 PG_TRY();
1021 {
1022 if (prev_ExecutorRun)
1023 prev_ExecutorRun(queryDesc, direction, count);
1024 else
1025 standard_ExecutorRun(queryDesc, direction, count);
1026 }
1027 PG_FINALLY();
1028 {
1029 nesting_level--;
1030 }
1031 PG_END_TRY();
1032}
1033
1034/*
1035 * ExecutorFinish hook: all we need do is track nesting depth
1036 */
1037static void
1039{
1040 nesting_level++;
1041 PG_TRY();
1042 {
1044 prev_ExecutorFinish(queryDesc);
1045 else
1046 standard_ExecutorFinish(queryDesc);
1047 }
1048 PG_FINALLY();
1049 {
1050 nesting_level--;
1051 }
1052 PG_END_TRY();
1053}
1054
1055/*
1056 * ExecutorEnd hook: store results if needed
1057 */
1058static void
1060{
1061 int64 queryId = queryDesc->plannedstmt->queryId;
1062
1063 if (queryId != INT64CONST(0) && queryDesc->query_instr &&
1065 {
1066 pgss_store(queryDesc->sourceText,
1067 queryId,
1068 queryDesc->plannedstmt->stmt_location,
1069 queryDesc->plannedstmt->stmt_len,
1070 PGSS_EXEC,
1072 queryDesc->estate->es_total_processed,
1073 &queryDesc->query_instr->bufusage,
1074 &queryDesc->query_instr->walusage,
1075 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1076 NULL,
1079 queryDesc->plannedstmt->planOrigin);
1080 }
1081
1082 if (prev_ExecutorEnd)
1083 prev_ExecutorEnd(queryDesc);
1084 else
1085 standard_ExecutorEnd(queryDesc);
1086}
1087
1088/*
1089 * ProcessUtility hook
1090 */
1091static void
1092pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1093 bool readOnlyTree,
1094 ProcessUtilityContext context,
1095 ParamListInfo params, QueryEnvironment *queryEnv,
1096 DestReceiver *dest, QueryCompletion *qc)
1097{
1098 Node *parsetree = pstmt->utilityStmt;
1099 int64 saved_queryId = pstmt->queryId;
1101 int saved_stmt_len = pstmt->stmt_len;
1103
1104 /*
1105 * Force utility statements to get queryId zero. We do this even in cases
1106 * where the statement contains an optimizable statement for which a
1107 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1108 * cases, runtime control will first go through ProcessUtility and then
1109 * the executor, and we don't want the executor hooks to do anything,
1110 * since we are already measuring the statement's costs at the utility
1111 * level.
1112 *
1113 * Note that this is only done if pg_stat_statements is enabled and
1114 * configured to track utility statements, in the unlikely possibility
1115 * that user configured another extension to handle utility statements
1116 * only.
1117 */
1118 if (enabled)
1119 pstmt->queryId = INT64CONST(0);
1120
1121 /*
1122 * If it's an EXECUTE statement, we don't track it and don't increment the
1123 * nesting level. This allows the cycles to be charged to the underlying
1124 * PREPARE instead (by the Executor hooks), which is much more useful.
1125 *
1126 * We also don't track execution of PREPARE. If we did, we would get one
1127 * hash table entry for the PREPARE (with hash calculated from the query
1128 * string), and then a different one with the same query string (but hash
1129 * calculated from the query tree) would be used to accumulate costs of
1130 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1131 * actually run the planner (only parse+rewrite), its costs are generally
1132 * pretty negligible and it seems okay to just ignore it.
1133 */
1134 if (enabled &&
1135 !IsA(parsetree, ExecuteStmt) &&
1136 !IsA(parsetree, PrepareStmt))
1137 {
1140 uint64 rows;
1141 BufferUsage bufusage_start,
1142 bufusage;
1143 WalUsage walusage_start,
1144 walusage;
1145
1146 bufusage_start = pgBufferUsage;
1147 walusage_start = pgWalUsage;
1149
1150 nesting_level++;
1151 PG_TRY();
1152 {
1154 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1155 context, params, queryEnv,
1156 dest, qc);
1157 else
1158 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1159 context, params, queryEnv,
1160 dest, qc);
1161 }
1162 PG_FINALLY();
1163 {
1164 nesting_level--;
1165 }
1166 PG_END_TRY();
1167
1168 /*
1169 * CAUTION: do not access the *pstmt data structure again below here.
1170 * If it was a ROLLBACK or similar, that data structure may have been
1171 * freed. We must copy everything we still need into local variables,
1172 * which we did above.
1173 *
1174 * For the same reason, we can't risk restoring pstmt->queryId to its
1175 * former value, which'd otherwise be a good idea.
1176 */
1177
1180
1181 /*
1182 * Track the total number of rows retrieved or affected by the utility
1183 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1184 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1185 */
1186 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1187 qc->commandTag == CMDTAG_FETCH ||
1188 qc->commandTag == CMDTAG_SELECT ||
1190 qc->nprocessed : 0;
1191
1192 /* calc differences of buffer counters. */
1193 memset(&bufusage, 0, sizeof(BufferUsage));
1194 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1195
1196 /* calc differences of WAL counters. */
1197 memset(&walusage, 0, sizeof(WalUsage));
1198 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1199
1200 pgss_store(queryString,
1204 PGSS_EXEC,
1206 rows,
1207 &bufusage,
1208 &walusage,
1209 NULL,
1210 NULL,
1211 0,
1212 0,
1213 pstmt->planOrigin);
1214 }
1215 else
1216 {
1217 /*
1218 * Even though we're not tracking execution time for this statement,
1219 * we must still increment the nesting level, to ensure that functions
1220 * evaluated within it are not seen as top-level calls. But don't do
1221 * so for EXECUTE; that way, when control reaches pgss_planner or
1222 * pgss_ExecutorStart, we will treat the costs as top-level if
1223 * appropriate. Likewise, don't bump for PREPARE, so that parse
1224 * analysis will treat the statement as top-level if appropriate.
1225 *
1226 * To be absolutely certain we don't mess up the nesting level,
1227 * evaluate the bump_level condition just once.
1228 */
1229 bool bump_level =
1230 !IsA(parsetree, ExecuteStmt) &&
1231 !IsA(parsetree, PrepareStmt);
1232
1233 if (bump_level)
1234 nesting_level++;
1235 PG_TRY();
1236 {
1238 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1239 context, params, queryEnv,
1240 dest, qc);
1241 else
1242 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1243 context, params, queryEnv,
1244 dest, qc);
1245 }
1246 PG_FINALLY();
1247 {
1248 if (bump_level)
1249 nesting_level--;
1250 }
1251 PG_END_TRY();
1252 }
1253}
1254
1255/*
1256 * Store some statistics for a statement.
1257 *
1258 * If jstate is not NULL then we're trying to create an entry for which
1259 * we have no statistics as yet; we just want to record the normalized
1260 * query string. total_time, rows, bufusage and walusage are ignored in this
1261 * case.
1262 *
1263 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1264 * for the arrays in the Counters field.
1265 */
1266static void
1267pgss_store(const char *query, int64 queryId,
1268 int query_location, int query_len,
1269 pgssStoreKind kind,
1270 double total_time, uint64 rows,
1271 const BufferUsage *bufusage,
1272 const WalUsage *walusage,
1273 const struct JitInstrumentation *jitusage,
1274 const JumbleState *jstate,
1275 int parallel_workers_to_launch,
1276 int parallel_workers_launched,
1277 PlannedStmtOrigin planOrigin)
1278{
1279 pgssHashKey key;
1280 pgssEntry *entry;
1281 char *norm_query = NULL;
1283
1284 Assert(query != NULL);
1285
1286 /* Safety check... */
1287 if (!pgss || !pgss_hash)
1288 return;
1289
1290 /*
1291 * Nothing to do if compute_query_id isn't enabled and no other module
1292 * computed a query identifier.
1293 */
1294 if (queryId == INT64CONST(0))
1295 return;
1296
1297 /*
1298 * Confine our attention to the relevant part of the string, if the query
1299 * is a portion of a multi-statement source string, and update query
1300 * location and length if needed.
1301 */
1302 query = CleanQuerytext(query, &query_location, &query_len);
1303
1304 /* Set up key for hashtable search */
1305
1306 /* clear padding */
1307 memset(&key, 0, sizeof(pgssHashKey));
1308
1309 key.userid = GetUserId();
1310 key.dbid = MyDatabaseId;
1311 key.queryid = queryId;
1312 key.toplevel = (nesting_level == 0);
1313
1314 /* Lookup the hash table entry with shared lock. */
1316
1317 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1318
1319 /* Create new entry, if not present */
1320 if (!entry)
1321 {
1322 Size query_offset;
1323 int gc_count;
1324 bool stored;
1325 bool do_gc;
1326
1327 /*
1328 * Create a new, normalized query string if caller asked. We don't
1329 * need to hold the lock while doing this work. (Note: in any case,
1330 * it's possible that someone else creates a duplicate hashtable entry
1331 * in the interval where we don't hold the lock below. That case is
1332 * handled by entry_alloc.)
1333 */
1334 if (jstate)
1335 {
1339 &query_len);
1341 }
1342
1343 /* Append new query text to file with only shared lock held */
1344 stored = qtext_store(norm_query ? norm_query : query, query_len,
1345 &query_offset, &gc_count);
1346
1347 /*
1348 * Determine whether we need to garbage collect external query texts
1349 * while the shared lock is still held. This micro-optimization
1350 * avoids taking the time to decide this while holding exclusive lock.
1351 */
1353
1354 /* Need exclusive lock to make a new hashtable entry - promote */
1357
1358 /*
1359 * A garbage collection may have occurred while we weren't holding the
1360 * lock. In the unlikely event that this happens, the query text we
1361 * stored above will have been garbage collected, so write it again.
1362 * This should be infrequent enough that doing it while holding
1363 * exclusive lock isn't a performance problem.
1364 */
1365 if (!stored || pgss->gc_count != gc_count)
1366 stored = qtext_store(norm_query ? norm_query : query, query_len,
1367 &query_offset, NULL);
1368
1369 /* If we failed to write to the text file, give up */
1370 if (!stored)
1371 goto done;
1372
1373 /* OK to create a new hashtable entry */
1374 entry = entry_alloc(&key, query_offset, query_len, encoding,
1375 jstate != NULL);
1376
1377 /* If needed, perform garbage collection while exclusive lock held */
1378 if (do_gc)
1379 gc_qtexts();
1380 }
1381
1382 /* Increment the counts, except when jstate is not NULL */
1383 if (!jstate)
1384 {
1385 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1386
1387 /*
1388 * Grab the spinlock while updating the counters (see comment about
1389 * locking rules at the head of the file)
1390 */
1391 SpinLockAcquire(&entry->mutex);
1392
1393 /* "Unstick" entry if it was previously sticky */
1394 if (IS_STICKY(entry->counters))
1395 entry->counters.usage = USAGE_INIT;
1396
1397 entry->counters.calls[kind] += 1;
1398 entry->counters.total_time[kind] += total_time;
1399
1400 if (entry->counters.calls[kind] == 1)
1401 {
1402 entry->counters.min_time[kind] = total_time;
1403 entry->counters.max_time[kind] = total_time;
1404 entry->counters.mean_time[kind] = total_time;
1405 }
1406 else
1407 {
1408 /*
1409 * Welford's method for accurately computing variance. See
1410 * <http://www.johndcook.com/blog/standard_deviation/>
1411 */
1412 double old_mean = entry->counters.mean_time[kind];
1413
1414 entry->counters.mean_time[kind] +=
1415 (total_time - old_mean) / entry->counters.calls[kind];
1416 entry->counters.sum_var_time[kind] +=
1417 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1418
1419 /*
1420 * Calculate min and max time. min = 0 and max = 0 means that the
1421 * min/max statistics were reset
1422 */
1423 if (entry->counters.min_time[kind] == 0
1424 && entry->counters.max_time[kind] == 0)
1425 {
1426 entry->counters.min_time[kind] = total_time;
1427 entry->counters.max_time[kind] = total_time;
1428 }
1429 else
1430 {
1431 if (entry->counters.min_time[kind] > total_time)
1432 entry->counters.min_time[kind] = total_time;
1433 if (entry->counters.max_time[kind] < total_time)
1434 entry->counters.max_time[kind] = total_time;
1435 }
1436 }
1437 entry->counters.rows += rows;
1438 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1439 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1442 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1443 entry->counters.local_blks_read += bufusage->local_blks_read;
1446 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1447 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1454 entry->counters.usage += USAGE_EXEC(total_time);
1455 entry->counters.wal_records += walusage->wal_records;
1456 entry->counters.wal_fpi += walusage->wal_fpi;
1457 entry->counters.wal_bytes += walusage->wal_bytes;
1458 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1459 if (jitusage)
1460 {
1461 entry->counters.jit_functions += jitusage->created_functions;
1462 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1463
1464 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1465 entry->counters.jit_deform_count++;
1466 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1467
1468 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1470 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1471
1472 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1474 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1475
1476 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1478 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1479 }
1480
1481 /* parallel worker counters */
1482 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1483 entry->counters.parallel_workers_launched += parallel_workers_launched;
1484
1485 /* plan cache counters */
1486 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1488 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1489 entry->counters.custom_plan_calls++;
1490
1491 SpinLockRelease(&entry->mutex);
1492 }
1493
1494done:
1496
1497 /* We postpone this clean-up until we're out of the lock */
1498 if (norm_query)
1500}
1501
1502/*
1503 * Reset statement statistics corresponding to userid, dbid, and queryid.
1504 */
1505Datum
1507{
1508 Oid userid;
1509 Oid dbid;
1510 int64 queryid;
1511
1512 userid = PG_GETARG_OID(0);
1513 dbid = PG_GETARG_OID(1);
1514 queryid = PG_GETARG_INT64(2);
1515
1516 entry_reset(userid, dbid, queryid, false);
1517
1519}
1520
1521Datum
1523{
1524 Oid userid;
1525 Oid dbid;
1526 int64 queryid;
1527 bool minmax_only;
1528
1529 userid = PG_GETARG_OID(0);
1530 dbid = PG_GETARG_OID(1);
1531 queryid = PG_GETARG_INT64(2);
1533
1534 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1535}
1536
1537/*
1538 * Reset statement statistics.
1539 */
1540Datum
1542{
1543 entry_reset(0, 0, 0, false);
1544
1546}
1547
1548/* Number of output arguments (columns) for various API versions */
1549#define PG_STAT_STATEMENTS_COLS_V1_0 14
1550#define PG_STAT_STATEMENTS_COLS_V1_1 18
1551#define PG_STAT_STATEMENTS_COLS_V1_2 19
1552#define PG_STAT_STATEMENTS_COLS_V1_3 23
1553#define PG_STAT_STATEMENTS_COLS_V1_8 32
1554#define PG_STAT_STATEMENTS_COLS_V1_9 33
1555#define PG_STAT_STATEMENTS_COLS_V1_10 43
1556#define PG_STAT_STATEMENTS_COLS_V1_11 49
1557#define PG_STAT_STATEMENTS_COLS_V1_12 52
1558#define PG_STAT_STATEMENTS_COLS_V1_13 54
1559#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1560
1561/*
1562 * Retrieve statement statistics.
1563 *
1564 * The SQL API of this function has changed multiple times, and will likely
1565 * do so again in future. To support the case where a newer version of this
1566 * loadable module is being used with an old SQL declaration of the function,
1567 * we continue to support the older API versions. For 1.2 and later, the
1568 * expected API version is identified by embedding it in the C name of the
1569 * function. Unfortunately we weren't bright enough to do that for 1.1.
1570 */
1571Datum
1573{
1574 bool showtext = PG_GETARG_BOOL(0);
1575
1577
1578 return (Datum) 0;
1579}
1580
1581Datum
1583{
1584 bool showtext = PG_GETARG_BOOL(0);
1585
1587
1588 return (Datum) 0;
1589}
1590
1591Datum
1593{
1594 bool showtext = PG_GETARG_BOOL(0);
1595
1597
1598 return (Datum) 0;
1599}
1600
1601Datum
1603{
1604 bool showtext = PG_GETARG_BOOL(0);
1605
1607
1608 return (Datum) 0;
1609}
1610
1611Datum
1613{
1614 bool showtext = PG_GETARG_BOOL(0);
1615
1617
1618 return (Datum) 0;
1619}
1620
1621Datum
1623{
1624 bool showtext = PG_GETARG_BOOL(0);
1625
1627
1628 return (Datum) 0;
1629}
1630
1631Datum
1633{
1634 bool showtext = PG_GETARG_BOOL(0);
1635
1637
1638 return (Datum) 0;
1639}
1640
1641Datum
1643{
1644 bool showtext = PG_GETARG_BOOL(0);
1645
1647
1648 return (Datum) 0;
1649}
1650
1651/*
1652 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1653 * This can be removed someday, perhaps.
1654 */
1655Datum
1657{
1658 /* If it's really API 1.1, we'll figure that out below */
1660
1661 return (Datum) 0;
1662}
1663
1664/* Common code for all versions of pg_stat_statements() */
1665static void
1667 pgssVersion api_version,
1668 bool showtext)
1669{
1671 Oid userid = GetUserId();
1672 bool is_allowed_role = false;
1673 char *qbuffer = NULL;
1674 Size qbuffer_size = 0;
1675 Size extent = 0;
1676 int gc_count = 0;
1678 pgssEntry *entry;
1679
1680 /*
1681 * Superusers or roles with the privileges of pg_read_all_stats members
1682 * are allowed
1683 */
1685
1686 /* hash table must exist already */
1687 if (!pgss || !pgss_hash)
1688 ereport(ERROR,
1690 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1691
1692 InitMaterializedSRF(fcinfo, 0);
1693
1694 /*
1695 * Check we have the expected number of output arguments. Aside from
1696 * being a good safety check, we need a kluge here to detect API version
1697 * 1.1, which was wedged into the code in an ill-considered way.
1698 */
1699 switch (rsinfo->setDesc->natts)
1700 {
1702 if (api_version != PGSS_V1_0)
1703 elog(ERROR, "incorrect number of output arguments");
1704 break;
1706 /* pg_stat_statements() should have told us 1.0 */
1707 if (api_version != PGSS_V1_0)
1708 elog(ERROR, "incorrect number of output arguments");
1709 api_version = PGSS_V1_1;
1710 break;
1712 if (api_version != PGSS_V1_2)
1713 elog(ERROR, "incorrect number of output arguments");
1714 break;
1716 if (api_version != PGSS_V1_3)
1717 elog(ERROR, "incorrect number of output arguments");
1718 break;
1720 if (api_version != PGSS_V1_8)
1721 elog(ERROR, "incorrect number of output arguments");
1722 break;
1724 if (api_version != PGSS_V1_9)
1725 elog(ERROR, "incorrect number of output arguments");
1726 break;
1728 if (api_version != PGSS_V1_10)
1729 elog(ERROR, "incorrect number of output arguments");
1730 break;
1732 if (api_version != PGSS_V1_11)
1733 elog(ERROR, "incorrect number of output arguments");
1734 break;
1736 if (api_version != PGSS_V1_12)
1737 elog(ERROR, "incorrect number of output arguments");
1738 break;
1740 if (api_version != PGSS_V1_13)
1741 elog(ERROR, "incorrect number of output arguments");
1742 break;
1743 default:
1744 elog(ERROR, "incorrect number of output arguments");
1745 }
1746
1747 /*
1748 * We'd like to load the query text file (if needed) while not holding any
1749 * lock on pgss->lock. In the worst case we'll have to do this again
1750 * after we have the lock, but it's unlikely enough to make this a win
1751 * despite occasional duplicated work. We need to reload if anybody
1752 * writes to the file (either a retail qtext_store(), or a garbage
1753 * collection) between this point and where we've gotten shared lock. If
1754 * a qtext_store is actually in progress when we look, we might as well
1755 * skip the speculative load entirely.
1756 */
1757 if (showtext)
1758 {
1759 int n_writers;
1760
1761 /* Take the mutex so we can examine variables */
1763 extent = pgss->extent;
1764 n_writers = pgss->n_writers;
1765 gc_count = pgss->gc_count;
1767
1768 /* No point in loading file now if there are active writers */
1769 if (n_writers == 0)
1771 }
1772
1773 /*
1774 * Get shared lock, load or reload the query text file if we must, and
1775 * iterate over the hashtable entries.
1776 *
1777 * With a large hash table, we might be holding the lock rather longer
1778 * than one could wish. However, this only blocks creation of new hash
1779 * table entries, and the larger the hash table the less likely that is to
1780 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1781 * we need to partition the hash table to limit the time spent holding any
1782 * one lock.
1783 */
1785
1786 if (showtext)
1787 {
1788 /*
1789 * Here it is safe to examine extent and gc_count without taking the
1790 * mutex. Note that although other processes might change
1791 * pgss->extent just after we look at it, the strings they then write
1792 * into the file cannot yet be referenced in the hashtable, so we
1793 * don't care whether we see them or not.
1794 *
1795 * If qtext_load_file fails, we just press on; we'll return NULL for
1796 * every query text.
1797 */
1798 if (qbuffer == NULL ||
1799 pgss->extent != extent ||
1800 pgss->gc_count != gc_count)
1801 {
1802 if (qbuffer)
1803 pfree(qbuffer);
1805 }
1806 }
1807
1809 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1810 {
1812 bool nulls[PG_STAT_STATEMENTS_COLS];
1813 int i = 0;
1814 Counters tmp;
1815 double stddev;
1816 int64 queryid = entry->key.queryid;
1817 TimestampTz stats_since;
1818 TimestampTz minmax_stats_since;
1819
1820 memset(values, 0, sizeof(values));
1821 memset(nulls, 0, sizeof(nulls));
1822
1823 values[i++] = ObjectIdGetDatum(entry->key.userid);
1824 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1825 if (api_version >= PGSS_V1_9)
1826 values[i++] = BoolGetDatum(entry->key.toplevel);
1827
1828 if (is_allowed_role || entry->key.userid == userid)
1829 {
1830 if (api_version >= PGSS_V1_2)
1831 values[i++] = Int64GetDatumFast(queryid);
1832
1833 if (showtext)
1834 {
1835 char *qstr = qtext_fetch(entry->query_offset,
1836 entry->query_len,
1837 qbuffer,
1838 qbuffer_size);
1839
1840 if (qstr)
1841 {
1842 char *enc;
1843
1845 entry->query_len,
1846 entry->encoding);
1847
1849
1850 if (enc != qstr)
1851 pfree(enc);
1852 }
1853 else
1854 {
1855 /* Just return a null if we fail to find the text */
1856 nulls[i++] = true;
1857 }
1858 }
1859 else
1860 {
1861 /* Query text not requested */
1862 nulls[i++] = true;
1863 }
1864 }
1865 else
1866 {
1867 /* Don't show queryid */
1868 if (api_version >= PGSS_V1_2)
1869 nulls[i++] = true;
1870
1871 /*
1872 * Don't show query text, but hint as to the reason for not doing
1873 * so if it was requested
1874 */
1875 if (showtext)
1876 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1877 else
1878 nulls[i++] = true;
1879 }
1880
1881 /* copy counters to a local variable to keep locking time short */
1882 SpinLockAcquire(&entry->mutex);
1883 tmp = entry->counters;
1884 SpinLockRelease(&entry->mutex);
1885
1886 /*
1887 * The spinlock is not required when reading these two as they are
1888 * always updated when holding pgss->lock exclusively.
1889 */
1890 stats_since = entry->stats_since;
1891 minmax_stats_since = entry->minmax_stats_since;
1892
1893 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1894 if (IS_STICKY(tmp))
1895 continue;
1896
1897 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1898 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1899 {
1900 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1901 {
1902 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1903 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1904 }
1905
1906 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1907 api_version >= PGSS_V1_8)
1908 {
1909 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1910 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1911 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1912
1913 /*
1914 * Note we are calculating the population variance here, not
1915 * the sample variance, as we have data for the whole
1916 * population, so Bessel's correction is not used, and we
1917 * don't divide by tmp.calls - 1.
1918 */
1919 if (tmp.calls[kind] > 1)
1920 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1921 else
1922 stddev = 0.0;
1923 values[i++] = Float8GetDatumFast(stddev);
1924 }
1925 }
1926 values[i++] = Int64GetDatumFast(tmp.rows);
1929 if (api_version >= PGSS_V1_1)
1934 if (api_version >= PGSS_V1_1)
1939 if (api_version >= PGSS_V1_1)
1940 {
1943 }
1944 if (api_version >= PGSS_V1_11)
1945 {
1948 }
1949 if (api_version >= PGSS_V1_10)
1950 {
1953 }
1954 if (api_version >= PGSS_V1_8)
1955 {
1956 char buf[256];
1957 Datum wal_bytes;
1958
1961
1962 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1963
1964 /* Convert to numeric. */
1965 wal_bytes = DirectFunctionCall3(numeric_in,
1968 Int32GetDatum(-1));
1969 values[i++] = wal_bytes;
1970 }
1971 if (api_version >= PGSS_V1_12)
1972 {
1974 }
1975 if (api_version >= PGSS_V1_10)
1976 {
1985 }
1986 if (api_version >= PGSS_V1_11)
1987 {
1990 }
1991 if (api_version >= PGSS_V1_12)
1992 {
1995 }
1996 if (api_version >= PGSS_V1_13)
1997 {
2000 }
2001 if (api_version >= PGSS_V1_11)
2002 {
2003 values[i++] = TimestampTzGetDatum(stats_since);
2004 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2005 }
2006
2007 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2008 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2009 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2010 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2011 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2012 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2013 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2014 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2015 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2016 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2017 -1 /* fail if you forget to update this assert */ ));
2018
2019 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2020 }
2021
2023
2024 if (qbuffer)
2025 pfree(qbuffer);
2026}
2027
2028/* Number of output arguments (columns) for pg_stat_statements_info */
2029#define PG_STAT_STATEMENTS_INFO_COLS 2
2030
2031/*
2032 * Return statistics of pg_stat_statements.
2033 */
2034Datum
2036{
2037 pgssGlobalStats stats;
2038 TupleDesc tupdesc;
2040 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2041
2042 if (!pgss || !pgss_hash)
2043 ereport(ERROR,
2045 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2046
2047 /* Build a tuple descriptor for our result type */
2048 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2049 elog(ERROR, "return type must be a row type");
2050
2051 /* Read global statistics for pg_stat_statements */
2053 stats = pgss->stats;
2055
2056 values[0] = Int64GetDatum(stats.dealloc);
2058
2060}
2061
2062/*
2063 * Allocate a new hashtable entry.
2064 * caller must hold an exclusive lock on pgss->lock
2065 *
2066 * "query" need not be null-terminated; we rely on query_len instead
2067 *
2068 * If "sticky" is true, make the new entry artificially sticky so that it will
2069 * probably still be there when the query finishes execution. We do this by
2070 * giving it a median usage value rather than the normal value. (Strictly
2071 * speaking, query strings are normalized on a best effort basis, though it
2072 * would be difficult to demonstrate this even under artificial conditions.)
2073 *
2074 * Note: despite needing exclusive lock, it's not an error for the target
2075 * entry to already exist. This is because pgss_store releases and
2076 * reacquires lock after failing to find a match; so someone else could
2077 * have made the entry while we waited to get exclusive lock.
2078 */
2079static pgssEntry *
2080entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2081 bool sticky)
2082{
2083 pgssEntry *entry;
2084 bool found;
2085
2086 /* Make space if needed */
2088 entry_dealloc();
2089
2090 /* Find or create an entry with desired hash code */
2091 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2092
2093 if (!found)
2094 {
2095 /* New entry, initialize it */
2096
2097 /* reset the statistics */
2098 memset(&entry->counters, 0, sizeof(Counters));
2099 /* set the appropriate initial usage count */
2101 /* re-initialize the mutex each time ... we assume no one using it */
2102 SpinLockInit(&entry->mutex);
2103 /* ... and don't forget the query text metadata */
2104 Assert(query_len >= 0);
2105 entry->query_offset = query_offset;
2106 entry->query_len = query_len;
2107 entry->encoding = encoding;
2109 entry->minmax_stats_since = entry->stats_since;
2110 }
2111
2112 return entry;
2113}
2114
2115/*
2116 * qsort comparator for sorting into increasing usage order
2117 */
2118static int
2119entry_cmp(const void *lhs, const void *rhs)
2120{
2121 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2122 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2123
2124 if (l_usage < r_usage)
2125 return -1;
2126 else if (l_usage > r_usage)
2127 return +1;
2128 else
2129 return 0;
2130}
2131
2132/*
2133 * Deallocate least-used entries.
2134 *
2135 * Caller must hold an exclusive lock on pgss->lock.
2136 */
2137static void
2139{
2141 pgssEntry **entries;
2142 pgssEntry *entry;
2143 int nvictims;
2144 int i;
2146 int nvalidtexts;
2147
2148 /*
2149 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2150 * While we're scanning the table, apply the decay factor to the usage
2151 * values, and update the mean query length.
2152 *
2153 * Note that the mean query length is almost immediately obsolete, since
2154 * we compute it before not after discarding the least-used entries.
2155 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2156 * making two passes to get a more current result. Likewise, the new
2157 * cur_median_usage includes the entries we're about to zap.
2158 */
2159
2160 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2161
2162 i = 0;
2163 tottextlen = 0;
2164 nvalidtexts = 0;
2165
2167 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2168 {
2169 entries[i++] = entry;
2170 /* "Sticky" entries get a different usage decay rate. */
2171 if (IS_STICKY(entry->counters))
2173 else
2175 /* In the mean length computation, ignore dropped texts. */
2176 if (entry->query_len >= 0)
2177 {
2178 tottextlen += entry->query_len + 1;
2179 nvalidtexts++;
2180 }
2181 }
2182
2183 /* Sort into increasing order by usage */
2184 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2185
2186 /* Record the (approximate) median usage */
2187 if (i > 0)
2188 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2189 /* Record the mean query length */
2190 if (nvalidtexts > 0)
2192 else
2194
2195 /* Now zap an appropriate fraction of lowest-usage entries */
2196 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2197 nvictims = Min(nvictims, i);
2198
2199 for (i = 0; i < nvictims; i++)
2200 {
2201 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2202 }
2203
2204 pfree(entries);
2205
2206 /* Increment the number of times entries are deallocated */
2208 pgss->stats.dealloc += 1;
2210}
2211
2212/*
2213 * Given a query string (not necessarily null-terminated), allocate a new
2214 * entry in the external query text file and store the string there.
2215 *
2216 * If successful, returns true, and stores the new entry's offset in the file
2217 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2218 * number of garbage collections that have occurred so far.
2219 *
2220 * On failure, returns false.
2221 *
2222 * At least a shared lock on pgss->lock must be held by the caller, so as
2223 * to prevent a concurrent garbage collection. Share-lock-holding callers
2224 * should pass a gc_count pointer to obtain the number of garbage collections,
2225 * so that they can recheck the count after obtaining exclusive lock to
2226 * detect whether a garbage collection occurred (and removed this entry).
2227 */
2228static bool
2229qtext_store(const char *query, int query_len,
2230 Size *query_offset, int *gc_count)
2231{
2232 Size off;
2233 int fd;
2234
2235 /*
2236 * We use a spinlock to protect extent/n_writers/gc_count, so that
2237 * multiple processes may execute this function concurrently.
2238 */
2240 off = pgss->extent;
2241 pgss->extent += query_len + 1;
2242 pgss->n_writers++;
2243 if (gc_count)
2244 *gc_count = pgss->gc_count;
2246
2247 *query_offset = off;
2248
2249 /*
2250 * Don't allow the file to grow larger than what qtext_load_file can
2251 * (theoretically) handle. This has been seen to be reachable on 32-bit
2252 * platforms.
2253 */
2254 if (unlikely(query_len >= MaxAllocHugeSize - off))
2255 {
2256 errno = EFBIG; /* not quite right, but it'll do */
2257 fd = -1;
2258 goto error;
2259 }
2260
2261 /* Now write the data into the successfully-reserved part of the file */
2263 if (fd < 0)
2264 goto error;
2265
2266 if (pg_pwrite(fd, query, query_len, off) != query_len)
2267 goto error;
2268 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2269 goto error;
2270
2272
2273 /* Mark our write complete */
2275 pgss->n_writers--;
2277
2278 return true;
2279
2280error:
2281 ereport(LOG,
2283 errmsg("could not write file \"%s\": %m",
2284 PGSS_TEXT_FILE)));
2285
2286 if (fd >= 0)
2288
2289 /* Mark our write complete */
2291 pgss->n_writers--;
2293
2294 return false;
2295}
2296
2297/*
2298 * Read the external query text file into a palloc'd buffer.
2299 *
2300 * Returns NULL (without throwing an error) if unable to read, eg
2301 * file not there or insufficient memory.
2302 *
2303 * On success, the buffer size is also returned into *buffer_size.
2304 *
2305 * This can be called without any lock on pgss->lock, but in that case
2306 * the caller is responsible for verifying that the result is sane.
2307 */
2308static char *
2310{
2311 char *buf;
2312 int fd;
2313 struct stat stat;
2314 Size nread;
2315
2317 if (fd < 0)
2318 {
2319 if (errno != ENOENT)
2320 ereport(LOG,
2322 errmsg("could not read file \"%s\": %m",
2323 PGSS_TEXT_FILE)));
2324 return NULL;
2325 }
2326
2327 /* Get file length */
2328 if (fstat(fd, &stat))
2329 {
2330 ereport(LOG,
2332 errmsg("could not stat file \"%s\": %m",
2333 PGSS_TEXT_FILE)));
2335 return NULL;
2336 }
2337
2338 /* Allocate buffer; beware that off_t might be wider than size_t */
2341 else
2342 buf = NULL;
2343 if (buf == NULL)
2344 {
2345 ereport(LOG,
2347 errmsg("out of memory"),
2348 errdetail("Could not allocate enough memory to read file \"%s\".",
2349 PGSS_TEXT_FILE)));
2351 return NULL;
2352 }
2353
2354 /*
2355 * OK, slurp in the file. Windows fails if we try to read more than
2356 * INT_MAX bytes at once, and other platforms might not like that either,
2357 * so read a very large file in 1GB segments.
2358 */
2359 nread = 0;
2360 while (nread < stat.st_size)
2361 {
2362 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2363
2364 /*
2365 * If we get a short read and errno doesn't get set, the reason is
2366 * probably that garbage collection truncated the file since we did
2367 * the fstat(), so we don't log a complaint --- but we don't return
2368 * the data, either, since it's most likely corrupt due to concurrent
2369 * writes from garbage collection.
2370 */
2371 errno = 0;
2372 if (read(fd, buf + nread, toread) != toread)
2373 {
2374 if (errno)
2375 ereport(LOG,
2377 errmsg("could not read file \"%s\": %m",
2378 PGSS_TEXT_FILE)));
2379 pfree(buf);
2381 return NULL;
2382 }
2383 nread += toread;
2384 }
2385
2386 if (CloseTransientFile(fd) != 0)
2387 ereport(LOG,
2389 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2390
2391 *buffer_size = nread;
2392 return buf;
2393}
2394
2395/*
2396 * Locate a query text in the file image previously read by qtext_load_file().
2397 *
2398 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2399 * the result points to a null-terminated string within the buffer.
2400 */
2401static char *
2402qtext_fetch(Size query_offset, int query_len,
2403 char *buffer, Size buffer_size)
2404{
2405 /* File read failed? */
2406 if (buffer == NULL)
2407 return NULL;
2408 /* Bogus offset/length? */
2409 if (query_len < 0 ||
2410 query_offset + query_len >= buffer_size)
2411 return NULL;
2412 /* As a further sanity check, make sure there's a trailing null */
2413 if (buffer[query_offset + query_len] != '\0')
2414 return NULL;
2415 /* Looks OK */
2416 return buffer + query_offset;
2417}
2418
2419/*
2420 * Do we need to garbage-collect the external query text file?
2421 *
2422 * Caller should hold at least a shared lock on pgss->lock.
2423 */
2424static bool
2426{
2427 Size extent;
2428
2429 /* Read shared extent pointer */
2431 extent = pgss->extent;
2433
2434 /*
2435 * Don't proceed if file does not exceed 512 bytes per possible entry.
2436 *
2437 * Here and in the next test, 32-bit machines have overflow hazards if
2438 * pgss_max and/or mean_query_len are large. Force the multiplications
2439 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2440 */
2441 if ((uint64) extent < (uint64) 512 * pgss_max)
2442 return false;
2443
2444 /*
2445 * Don't proceed if file is less than about 50% bloat. Nothing can or
2446 * should be done in the event of unusually large query texts accounting
2447 * for file's large size. We go to the trouble of maintaining the mean
2448 * query length in order to prevent garbage collection from thrashing
2449 * uselessly.
2450 */
2451 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2452 return false;
2453
2454 return true;
2455}
2456
2457/*
2458 * Garbage-collect orphaned query texts in external file.
2459 *
2460 * This won't be called often in the typical case, since it's likely that
2461 * there won't be too much churn, and besides, a similar compaction process
2462 * occurs when serializing to disk at shutdown or as part of resetting.
2463 * Despite this, it seems prudent to plan for the edge case where the file
2464 * becomes unreasonably large, with no other method of compaction likely to
2465 * occur in the foreseeable future.
2466 *
2467 * The caller must hold an exclusive lock on pgss->lock.
2468 *
2469 * At the first sign of trouble we unlink the query text file to get a clean
2470 * slate (although existing statistics are retained), rather than risk
2471 * thrashing by allowing the same problem case to recur indefinitely.
2472 */
2473static void
2475{
2476 char *qbuffer;
2478 FILE *qfile = NULL;
2480 pgssEntry *entry;
2481 Size extent;
2482 int nentries;
2483
2484 /*
2485 * When called from pgss_store, some other session might have proceeded
2486 * with garbage collection in the no-lock-held interim of lock strength
2487 * escalation. Check once more that this is actually necessary.
2488 */
2489 if (!need_gc_qtexts())
2490 return;
2491
2492 /*
2493 * Load the old texts file. If we fail (out of memory, for instance),
2494 * invalidate query texts. Hopefully this is rare. It might seem better
2495 * to leave things alone on an OOM failure, but the problem is that the
2496 * file is only going to get bigger; hoping for a future non-OOM result is
2497 * risky and can easily lead to complete denial of service.
2498 */
2500 if (qbuffer == NULL)
2501 goto gc_fail;
2502
2503 /*
2504 * We overwrite the query texts file in place, so as to reduce the risk of
2505 * an out-of-disk-space failure. Since the file is guaranteed not to get
2506 * larger, this should always work on traditional filesystems; though we
2507 * could still lose on copy-on-write filesystems.
2508 */
2510 if (qfile == NULL)
2511 {
2512 ereport(LOG,
2514 errmsg("could not write file \"%s\": %m",
2515 PGSS_TEXT_FILE)));
2516 goto gc_fail;
2517 }
2518
2519 extent = 0;
2520 nentries = 0;
2521
2523 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2524 {
2525 int query_len = entry->query_len;
2526 char *qry = qtext_fetch(entry->query_offset,
2527 query_len,
2528 qbuffer,
2529 qbuffer_size);
2530
2531 if (qry == NULL)
2532 {
2533 /* Trouble ... drop the text */
2534 entry->query_offset = 0;
2535 entry->query_len = -1;
2536 /* entry will not be counted in mean query length computation */
2537 continue;
2538 }
2539
2540 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2541 {
2542 ereport(LOG,
2544 errmsg("could not write file \"%s\": %m",
2545 PGSS_TEXT_FILE)));
2547 goto gc_fail;
2548 }
2549
2550 entry->query_offset = extent;
2551 extent += query_len + 1;
2552 nentries++;
2553 }
2554
2555 /*
2556 * Truncate away any now-unused space. If this fails for some odd reason,
2557 * we log it, but there's no need to fail.
2558 */
2559 if (ftruncate(fileno(qfile), extent) != 0)
2560 ereport(LOG,
2562 errmsg("could not truncate file \"%s\": %m",
2563 PGSS_TEXT_FILE)));
2564
2565 if (FreeFile(qfile))
2566 {
2567 ereport(LOG,
2569 errmsg("could not write file \"%s\": %m",
2570 PGSS_TEXT_FILE)));
2571 qfile = NULL;
2572 goto gc_fail;
2573 }
2574
2575 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2576 pgss->extent, extent);
2577
2578 /* Reset the shared extent pointer */
2579 pgss->extent = extent;
2580
2581 /*
2582 * Also update the mean query length, to be sure that need_gc_qtexts()
2583 * won't still think we have a problem.
2584 */
2585 if (nentries > 0)
2586 pgss->mean_query_len = extent / nentries;
2587 else
2589
2590 pfree(qbuffer);
2591
2592 /*
2593 * OK, count a garbage collection cycle. (Note: even though we have
2594 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2595 * other processes may examine gc_count while holding only the mutex.
2596 * Also, we have to advance the count *after* we've rewritten the file,
2597 * else other processes might not realize they read a stale file.)
2598 */
2600
2601 return;
2602
2603gc_fail:
2604 /* clean up resources */
2605 if (qfile)
2606 FreeFile(qfile);
2607 if (qbuffer)
2608 pfree(qbuffer);
2609
2610 /*
2611 * Since the contents of the external file are now uncertain, mark all
2612 * hashtable entries as having invalid texts.
2613 */
2615 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2616 {
2617 entry->query_offset = 0;
2618 entry->query_len = -1;
2619 }
2620
2621 /*
2622 * Destroy the query text file and create a new, empty one
2623 */
2626 if (qfile == NULL)
2627 ereport(LOG,
2629 errmsg("could not recreate file \"%s\": %m",
2630 PGSS_TEXT_FILE)));
2631 else
2632 FreeFile(qfile);
2633
2634 /* Reset the shared extent pointer */
2635 pgss->extent = 0;
2636
2637 /* Reset mean_query_len to match the new state */
2639
2640 /*
2641 * Bump the GC count even though we failed.
2642 *
2643 * This is needed to make concurrent readers of file without any lock on
2644 * pgss->lock notice existence of new version of file. Once readers
2645 * subsequently observe a change in GC count with pgss->lock held, that
2646 * forces a safe reopen of file. Writers also require that we bump here,
2647 * of course. (As required by locking protocol, readers and writers don't
2648 * trust earlier file contents until gc_count is found unchanged after
2649 * pgss->lock acquired in shared or exclusive mode respectively.)
2650 */
2652}
2653
2654#define SINGLE_ENTRY_RESET(e) \
2655if (e) { \
2656 if (minmax_only) { \
2657 /* When requested reset only min/max statistics of an entry */ \
2658 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2659 { \
2660 e->counters.max_time[kind] = 0; \
2661 e->counters.min_time[kind] = 0; \
2662 } \
2663 e->minmax_stats_since = stats_reset; \
2664 } \
2665 else \
2666 { \
2667 /* Remove the key otherwise */ \
2668 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2669 num_remove++; \
2670 } \
2671}
2672
2673/*
2674 * Reset entries corresponding to parameters passed.
2675 */
2676static TimestampTz
2677entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2678{
2680 pgssEntry *entry;
2681 FILE *qfile;
2682 int64 num_entries;
2683 int64 num_remove = 0;
2684 pgssHashKey key;
2685 TimestampTz stats_reset;
2686
2687 if (!pgss || !pgss_hash)
2688 ereport(ERROR,
2690 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2691
2693 num_entries = hash_get_num_entries(pgss_hash);
2694
2695 stats_reset = GetCurrentTimestamp();
2696
2697 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2698 {
2699 /* If all the parameters are available, use the fast path. */
2700 memset(&key, 0, sizeof(pgssHashKey));
2701 key.userid = userid;
2702 key.dbid = dbid;
2703 key.queryid = queryid;
2704
2705 /*
2706 * Reset the entry if it exists, starting with the non-top-level
2707 * entry.
2708 */
2709 key.toplevel = false;
2710 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2711
2712 SINGLE_ENTRY_RESET(entry);
2713
2714 /* Also reset the top-level entry if it exists. */
2715 key.toplevel = true;
2716 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2717
2718 SINGLE_ENTRY_RESET(entry);
2719 }
2720 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2721 {
2722 /* Reset entries corresponding to valid parameters. */
2724 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2725 {
2726 if ((!userid || entry->key.userid == userid) &&
2727 (!dbid || entry->key.dbid == dbid) &&
2728 (!queryid || entry->key.queryid == queryid))
2729 {
2730 SINGLE_ENTRY_RESET(entry);
2731 }
2732 }
2733 }
2734 else
2735 {
2736 /* Reset all entries. */
2738 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2739 {
2740 SINGLE_ENTRY_RESET(entry);
2741 }
2742 }
2743
2744 /* All entries are removed? */
2745 if (num_entries != num_remove)
2746 goto release_lock;
2747
2748 /*
2749 * Reset global statistics for pg_stat_statements since all entries are
2750 * removed.
2751 */
2753 pgss->stats.dealloc = 0;
2754 pgss->stats.stats_reset = stats_reset;
2756
2757 /*
2758 * Write new empty query file, perhaps even creating a new one to recover
2759 * if the file was missing.
2760 */
2762 if (qfile == NULL)
2763 {
2764 ereport(LOG,
2766 errmsg("could not create file \"%s\": %m",
2767 PGSS_TEXT_FILE)));
2768 goto done;
2769 }
2770
2771 /* If ftruncate fails, log it, but it's not a fatal problem */
2772 if (ftruncate(fileno(qfile), 0) != 0)
2773 ereport(LOG,
2775 errmsg("could not truncate file \"%s\": %m",
2776 PGSS_TEXT_FILE)));
2777
2778 FreeFile(qfile);
2779
2780done:
2781 pgss->extent = 0;
2782 /* This counts as a query text garbage collection for our purposes */
2784
2787
2788 return stats_reset;
2789}
2790
2791/*
2792 * Generate a normalized version of the query string that will be used to
2793 * represent all similar queries.
2794 *
2795 * Note that the normalized representation may well vary depending on
2796 * just which "equivalent" query is used to create the hashtable entry.
2797 * We assume this is OK.
2798 *
2799 * If query_loc > 0, then "query" has been advanced by that much compared to
2800 * the original string start, so we need to translate the provided locations
2801 * to compensate. (This lets us avoid re-scanning statements before the one
2802 * of interest, so it's worth doing.)
2803 *
2804 * *query_len_p contains the input string length, and is updated with
2805 * the result string length on exit. The resulting string might be longer
2806 * or shorter depending on what happens with replacement of constants.
2807 *
2808 * Returns a palloc'd string.
2809 */
2810static char *
2812 int query_loc, int *query_len_p)
2813{
2814 char *norm_query;
2815 int query_len = *query_len_p;
2816 int norm_query_buflen, /* Space allowed for norm_query */
2817 len_to_wrt, /* Length (in bytes) to write */
2818 quer_loc = 0, /* Source query byte location */
2819 n_quer_loc = 0, /* Normalized query byte location */
2820 last_off = 0, /* Offset from start for previous tok */
2821 last_tok_len = 0; /* Length (in bytes) of that tok */
2822 int num_constants_replaced = 0;
2824
2825 /*
2826 * Determine constants' lengths (core system only gives us locations), and
2827 * return a sorted copy of jstate's LocationLen data with lengths filled
2828 * in.
2829 */
2831
2832 /*
2833 * Allow for $n symbols to be longer than the constants they replace.
2834 * Constants must take at least one byte in text form, while a $n symbol
2835 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2836 * could refine that limit based on the max value of n for the current
2837 * query, but it hardly seems worth any extra effort to do so.
2838 */
2839 norm_query_buflen = query_len + jstate->clocations_count * 10;
2840
2841 /* Allocate result buffer */
2843
2844 for (int i = 0; i < jstate->clocations_count; i++)
2845 {
2846 int off, /* Offset from start for cur tok */
2847 tok_len; /* Length (in bytes) of that tok */
2848
2849 /*
2850 * If we have an external param at this location, but no lists are
2851 * being squashed across the query, then we skip here; this will make
2852 * us print the characters found in the original query that represent
2853 * the parameter in the next iteration (or after the loop is done),
2854 * which is a bit odd but seems to work okay in most cases.
2855 */
2856 if (locs[i].extern_param && !jstate->has_squashed_lists)
2857 continue;
2858
2859 off = locs[i].location;
2860
2861 /* Adjust recorded location if we're dealing with partial string */
2862 off -= query_loc;
2863
2864 tok_len = locs[i].length;
2865
2866 if (tok_len < 0)
2867 continue; /* ignore any duplicates */
2868
2869 /* Copy next chunk (what precedes the next constant) */
2870 len_to_wrt = off - last_off;
2872 Assert(len_to_wrt >= 0);
2875
2876 /*
2877 * And insert a param symbol in place of the constant token; and, if
2878 * we have a squashable list, insert a placeholder comment starting
2879 * from the list's second value.
2880 */
2882 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2883 locs[i].squashed ? " /*, ... */" : "");
2885
2886 /* move forward */
2887 quer_loc = off + tok_len;
2888 last_off = off;
2890 }
2891
2892 /* Clean up, if needed */
2893 if (locs)
2894 pfree(locs);
2895
2896 /*
2897 * We've copied up until the last ignorable constant. Copy over the
2898 * remaining bytes of the original query string.
2899 */
2900 len_to_wrt = query_len - quer_loc;
2901
2902 Assert(len_to_wrt >= 0);
2905
2907 norm_query[n_quer_loc] = '\0';
2908
2910 return norm_query;
2911}
bool has_privs_of_role(Oid member, Oid role)
Definition acl.c:5314
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, const JumbleState *jstate)
Definition analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition numeric.c:626
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1639
static Datum values[MAXATTR]
Definition bootstrap.c:190
#define CStringGetTextDatum(s)
Definition builtins.h:98
#define INT64CONST(x)
Definition c.h:630
#define Min(x, y)
Definition c.h:1091
#define PG_BINARY_R
Definition c.h:1376
#define Max(x, y)
Definition c.h:1085
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
#define PG_BINARY
Definition c.h:1374
#define UINT64_FORMAT
Definition c.h:635
int32_t int32
Definition c.h:620
uint64_t uint64
Definition c.h:625
#define unlikely(x)
Definition c.h:438
uint32_t uint32
Definition c.h:624
#define PG_BINARY_W
Definition c.h:1377
size_t Size
Definition c.h:689
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int64 TimestampTz
Definition timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:889
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1352
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition dynahash.c:1444
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1273
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1317
Datum arg
Definition elog.c:1322
int errcode_for_file_access(void)
Definition elog.c:897
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:32
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define PG_TRY(...)
Definition elog.h:374
#define PG_END_TRY(...)
Definition elog.h:399
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define PG_FINALLY(...)
Definition elog.h:391
#define ereport(elevel,...)
Definition elog.h:152
ExecutorEnd_hook_type ExecutorEnd_hook
Definition execMain.c:73
ExecutorFinish_hook_type ExecutorFinish_hook
Definition execMain.c:72
ExecutorStart_hook_type ExecutorStart_hook
Definition execMain.c:70
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition execMain.c:143
ExecutorRun_hook_type ExecutorRun_hook
Definition execMain.c:71
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition execMain.c:318
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition execMain.c:486
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition execMain.c:426
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition executor.h:87
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition executor.h:81
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition executor.h:77
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition executor.h:91
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:783
int CloseTransientFile(int fd)
Definition fd.c:2855
int FreeFile(FILE *file)
Definition fd.c:2827
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2628
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
#define MCXT_ALLOC_HUGE
Definition fe_memutils.h:28
#define MCXT_ALLOC_NO_OOM
Definition fe_memutils.h:29
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_MODULE_MAGIC_EXT(...)
Definition fmgr.h:540
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_BOOL(n)
Definition fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:688
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, uint32 flags)
Definition funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
bool IsUnderPostmaster
Definition globals.c:122
Oid MyDatabaseId
Definition globals.c:96
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5152
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5049
void MarkGUCPrefixReserved(const char *className)
Definition guc.c:5186
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5073
@ PGC_SUSET
Definition guc.h:78
@ PGC_POSTMASTER
Definition guc.h:74
@ PGC_SIGHUP
Definition guc.h:75
return str start
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1025
@ HASH_FIND
Definition hsearch.h:108
@ HASH_REMOVE
Definition hsearch.h:110
@ HASH_ENTER
Definition hsearch.h:109
#define HASH_ELEM
Definition hsearch.h:90
#define HASH_BLOBS
Definition hsearch.h:92
void parse(int)
Definition parse.c:49
static char * encoding
Definition initdb.c:139
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:426
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:436
#define INSTR_TIME_GET_MILLISEC(t)
Definition instr_time.h:451
WalUsage pgWalUsage
Definition instrument.c:27
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:367
BufferUsage pgBufferUsage
Definition instrument.c:25
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:327
@ INSTRUMENT_ALL
Definition instrument.h:68
#define read(a, b, c)
Definition win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
int LWLockNewTrancheId(const char *name)
Definition lwlock.c:562
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
int GetDatabaseEncoding(void)
Definition mbutils.c:1388
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:687
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
void * palloc_extended(Size size, int flags)
Definition mcxt.c:1439
#define MaxAllocHugeSize
Definition memutils.h:45
Oid GetUserId(void)
Definition miscinit.c:470
bool process_shared_preload_libraries_in_progress
Definition miscinit.c:1788
#define IsA(nodeptr, _type_)
Definition nodes.h:164
static char * errmsg
post_parse_analyze_hook_type post_parse_analyze_hook
Definition analyze.c:74
const void size_t len
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
@ PGSS_V1_13
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
static ExecutorRun_hook_type prev_ExecutorRun
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS_V1_13
void _PG_init(void)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, const JumbleState *jstate)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
static char * generate_normalized_query(const JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS)
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
static const ShmemCallbacks pgss_shmem_callbacks
#define PG_STAT_STATEMENTS_COLS_V1_3
#define PGSS_NUMKIND
static bool pgss_save
static int nesting_level
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
static void pgss_store(const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
static void pgss_shmem_init(void *arg)
static void pgss_shmem_request(void *arg)
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define PG_VALID_BE_ENCODING(_enc)
Definition pg_wchar.h:134
static int duration
Definition pgbench.c:175
planner_hook_type planner_hook
Definition planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.c:333
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.h:28
PlannedStmtOrigin
Definition plannodes.h:36
@ PLAN_STMT_UNKNOWN
Definition plannodes.h:37
@ PLAN_STMT_CACHE_CUSTOM
Definition plannodes.h:41
@ PLAN_STMT_CACHE_GENERIC
Definition plannodes.h:40
#define sprintf
Definition port.h:262
#define pg_pwrite
Definition port.h:248
#define snprintf
Definition port.h:260
#define qsort(a, b, c, d)
Definition port.h:495
static Datum Int64GetDatum(int64 X)
Definition postgres.h:413
#define Int64GetDatumFast(X)
Definition postgres.h:525
#define Float8GetDatumFast(X)
Definition postgres.h:527
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Datum CStringGetDatum(const char *X)
Definition postgres.h:370
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
LocationLen * ComputeConstantLengths(const JumbleState *jstate, const char *query, int query_loc)
ScanDirection
Definition sdir.h:25
void RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
Definition shmem.c:874
#define ShmemRequestHash(...)
Definition shmem.h:179
#define ShmemRequestStruct(...)
Definition shmem.h:176
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
static void error(void)
instr_time local_blk_read_time
Definition instrument.h:38
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
instr_time temp_blk_write_time
Definition instrument.h:41
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
int64 local_blks_written
Definition instrument.h:33
instr_time temp_blk_read_time
Definition instrument.h:40
instr_time local_blk_write_time
Definition instrument.h:39
int64 temp_blks_read
Definition instrument.h:34
int64 shared_blks_read
Definition instrument.h:27
int64 shared_blks_written
Definition instrument.h:29
int64 temp_blks_written
Definition instrument.h:35
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
double min_time[PGSS_NUMKIND]
int64 local_blks_written
int64 generic_plan_calls
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
double jit_emission_time
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 custom_plan_calls
int64 local_blks_dirtied
int64 jit_inlining_count
double jit_deform_time
int64 parallel_workers_to_launch
double shared_blk_read_time
double jit_inlining_time
int es_parallel_workers_to_launch
Definition execnodes.h:782
struct JitContext * es_jit
Definition execnodes.h:800
uint64 es_total_processed
Definition execnodes.h:752
int es_parallel_workers_launched
Definition execnodes.h:784
WalUsage walusage
Definition instrument.h:90
instr_time total
Definition instrument.h:88
BufferUsage bufusage
Definition instrument.h:89
JitInstrumentation instr
Definition jit.h:62
Definition nodes.h:135
const char * p_sourcetext
Definition parse_node.h:214
int64 queryId
Definition plannodes.h:69
ParseLoc stmt_len
Definition plannodes.h:171
PlannedStmtOrigin planOrigin
Definition plannodes.h:75
ParseLoc stmt_location
Definition plannodes.h:169
Node * utilityStmt
Definition plannodes.h:153
uint64 nprocessed
Definition cmdtag.h:32
CommandTag commandTag
Definition cmdtag.h:31
const char * sourceText
Definition execdesc.h:38
EState * estate
Definition execdesc.h:50
PlannedStmt * plannedstmt
Definition execdesc.h:37
int query_instr_options
Definition execdesc.h:45
struct Instrumentation * query_instr
Definition execdesc.h:57
Node * utilityStmt
Definition parsenodes.h:141
ParseLoc stmt_location
Definition parsenodes.h:258
ShmemRequestCallback request_fn
Definition shmem.h:133
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
int64 wal_records
Definition instrument.h:53
Definition guc.h:174
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
pgssGlobalStats stats
__int64 st_size
Definition win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:785
LWLock lock
Definition lwlock.h:70
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.c:548
ProcessUtility_hook_type ProcessUtility_hook
Definition utility.c:72
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.h:71
ProcessUtilityContext
Definition utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition timestamp.h:68
const char * name
#define fstat
Definition win32_port.h:73