PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_stat_statements.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_stat_statements.c
4 * Track statement planning and execution times as well as resource
5 * usage across a whole database cluster.
6 *
7 * Execution costs are totaled for each distinct source query, and kept in
8 * a shared hashtable. (We track only as many distinct queries as will fit
9 * in the designated amount of shared memory.)
10 *
11 * Starting in Postgres 9.2, this module normalized query entries. As of
12 * Postgres 14, the normalization is done by the core if compute_query_id is
13 * enabled, or optionally by third-party modules.
14 *
15 * To facilitate presenting entries to users, we create "representative" query
16 * strings in which constants are replaced with parameter symbols ($n), to
17 * make it clearer what a normalized entry can represent. To save on shared
18 * memory, and to avoid having to truncate oversized query strings, we store
19 * these strings in a temporary external query-texts file. Offsets into this
20 * file are kept in shared memory.
21 *
22 * Note about locking issues: to create or delete an entry in the shared
23 * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 * in an entry except the counters requires the same. To look up an entry,
25 * one must hold the lock shared. To read or update the counters within
26 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 * disappear!) and also take the entry's mutex spinlock.
28 * The shared state variable pgss->extent (the next free spot in the external
29 * query-text file) should be accessed only while holding either the
30 * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 * allow reserving file space while holding only shared lock on pgss->lock.
32 * Rewriting the entire external query-text file, eg for garbage collection,
33 * requires holding pgss->lock exclusively; this allows individual entries
34 * in the file to be read or written while holding only shared lock.
35 *
36 *
37 * Copyright (c) 2008-2026, PostgreSQL Global Development Group
38 *
39 * IDENTIFICATION
40 * contrib/pg_stat_statements/pg_stat_statements.c
41 *
42 *-------------------------------------------------------------------------
43 */
44#include "postgres.h"
45
46#include <math.h>
47#include <sys/stat.h>
48#include <unistd.h>
49
50#include "access/htup_details.h"
51#include "access/parallel.h"
52#include "catalog/pg_authid.h"
53#include "executor/instrument.h"
54#include "funcapi.h"
55#include "jit/jit.h"
56#include "mb/pg_wchar.h"
57#include "miscadmin.h"
58#include "nodes/queryjumble.h"
59#include "optimizer/planner.h"
60#include "parser/analyze.h"
61#include "pgstat.h"
62#include "storage/fd.h"
63#include "storage/ipc.h"
64#include "storage/lwlock.h"
65#include "storage/shmem.h"
66#include "storage/spin.h"
67#include "tcop/utility.h"
68#include "utils/acl.h"
69#include "utils/builtins.h"
70#include "utils/memutils.h"
71#include "utils/timestamp.h"
72#include "utils/tuplestore.h"
73
75 .name = "pg_stat_statements",
76 .version = PG_VERSION
77);
78
79/* Location of permanent stats file (valid when database is shut down) */
80#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
81
82/*
83 * Location of external query text file.
84 */
85#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
86
87/* Magic number identifying the stats file format */
88static const uint32 PGSS_FILE_HEADER = 0x20250731;
89
90/* PostgreSQL major version number, changes in which invalidate all entries */
92
93/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
94#define USAGE_EXEC(duration) (1.0)
95#define USAGE_INIT (1.0) /* including initial planning */
96#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
97#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
98#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
99#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
100#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
101#define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
102
103/*
104 * Extension version number, for supporting older extension versions' objects
105 */
119
120typedef enum pgssStoreKind
121{
123
124 /*
125 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
126 * reference the underlying values in the arrays in the Counters struct,
127 * and this order is required in pg_stat_statements_internal().
128 */
132
133#define PGSS_NUMKIND (PGSS_EXEC + 1)
134
135/*
136 * Hashtable key that defines the identity of a hashtable entry. We separate
137 * queries by user and by database even if they are otherwise identical.
138 *
139 * If you add a new key to this struct, make sure to teach pgss_store() to
140 * zero the padding bytes. Otherwise, things will break, because pgss_hash is
141 * created using HASH_BLOBS, and thus tag_hash is used to hash this.
142 */
143typedef struct pgssHashKey
144{
145 Oid userid; /* user OID */
146 Oid dbid; /* database OID */
147 int64 queryid; /* query identifier */
148 bool toplevel; /* query executed at top level */
150
151/*
152 * The actual stats counters kept within pgssEntry.
153 */
154typedef struct Counters
155{
156 int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
157 double total_time[PGSS_NUMKIND]; /* total planning/execution time,
158 * in msec */
159 double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
160 * msec since min/max reset */
161 double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
162 * msec since min/max reset */
163 double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
164 * msec */
165 double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
166 * planning/execution time in msec */
167 int64 rows; /* total # of retrieved or affected rows */
168 int64 shared_blks_hit; /* # of shared buffer hits */
169 int64 shared_blks_read; /* # of shared disk blocks read */
170 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
171 int64 shared_blks_written; /* # of shared disk blocks written */
172 int64 local_blks_hit; /* # of local buffer hits */
173 int64 local_blks_read; /* # of local disk blocks read */
174 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
175 int64 local_blks_written; /* # of local disk blocks written */
176 int64 temp_blks_read; /* # of temp blocks read */
177 int64 temp_blks_written; /* # of temp blocks written */
178 double shared_blk_read_time; /* time spent reading shared blocks,
179 * in msec */
180 double shared_blk_write_time; /* time spent writing shared blocks,
181 * in msec */
182 double local_blk_read_time; /* time spent reading local blocks, in
183 * msec */
184 double local_blk_write_time; /* time spent writing local blocks, in
185 * msec */
186 double temp_blk_read_time; /* time spent reading temp blocks, in msec */
187 double temp_blk_write_time; /* time spent writing temp blocks, in
188 * msec */
189 double usage; /* usage factor */
190 int64 wal_records; /* # of WAL records generated */
191 int64 wal_fpi; /* # of WAL full page images generated */
192 uint64 wal_bytes; /* total amount of WAL generated in bytes */
193 int64 wal_buffers_full; /* # of times the WAL buffers became full */
194 int64 jit_functions; /* total number of JIT functions emitted */
195 double jit_generation_time; /* total time to generate jit code */
196 int64 jit_inlining_count; /* number of times inlining time has been
197 * > 0 */
198 double jit_deform_time; /* total time to deform tuples in jit code */
199 int64 jit_deform_count; /* number of times deform time has been >
200 * 0 */
201
202 double jit_inlining_time; /* total time to inline jit code */
203 int64 jit_optimization_count; /* number of times optimization time
204 * has been > 0 */
205 double jit_optimization_time; /* total time to optimize jit code */
206 int64 jit_emission_count; /* number of times emission time has been
207 * > 0 */
208 double jit_emission_time; /* total time to emit jit code */
209 int64 parallel_workers_to_launch; /* # of parallel workers planned
210 * to be launched */
211 int64 parallel_workers_launched; /* # of parallel workers actually
212 * launched */
213 int64 generic_plan_calls; /* number of calls using a generic plan */
214 int64 custom_plan_calls; /* number of calls using a custom plan */
216
217/*
218 * Global statistics for pg_stat_statements
219 */
220typedef struct pgssGlobalStats
221{
222 int64 dealloc; /* # of times entries were deallocated */
223 TimestampTz stats_reset; /* timestamp with all stats reset */
225
226/*
227 * Statistics per statement
228 *
229 * Note: in event of a failure in garbage collection of the query text file,
230 * we reset query_offset to zero and query_len to -1. This will be seen as
231 * an invalid state by qtext_fetch().
232 */
233typedef struct pgssEntry
234{
235 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
236 Counters counters; /* the statistics for this query */
237 Size query_offset; /* query text offset in external file */
238 int query_len; /* # of valid bytes in query string, or -1 */
239 int encoding; /* query text encoding */
240 TimestampTz stats_since; /* timestamp of entry allocation */
241 TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
242 slock_t mutex; /* protects the counters only */
244
245/*
246 * Global shared state
247 */
248typedef struct pgssSharedState
249{
250 LWLockPadded lock; /* protects hashtable search/modification */
251 double cur_median_usage; /* current median usage in hashtable */
252 Size mean_query_len; /* current mean entry text length */
253 slock_t mutex; /* protects following fields only: */
254 Size extent; /* current extent of query file */
255 int n_writers; /* number of active writers to query file */
256 int gc_count; /* query file garbage collection cycle count */
257 pgssGlobalStats stats; /* global statistics for pgss */
259
260/* Links to shared memory state */
263
264static void pgss_shmem_request(void *arg);
265static void pgss_shmem_init(void *arg);
266
271
272/*---- Local variables ----*/
273
274/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
275static int nesting_level = 0;
276
277/* Saved hook values */
285
286/*---- GUC variables ----*/
287
288typedef enum
289{
290 PGSS_TRACK_NONE, /* track no statements */
291 PGSS_TRACK_TOP, /* only top level statements */
292 PGSS_TRACK_ALL, /* all statements, including nested ones */
294
295static const struct config_enum_entry track_options[] =
296{
297 {"none", PGSS_TRACK_NONE, false},
298 {"top", PGSS_TRACK_TOP, false},
299 {"all", PGSS_TRACK_ALL, false},
300 {NULL, 0, false}
301};
302
303static int pgss_max = 5000; /* max # statements to track */
304static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
305static bool pgss_track_utility = true; /* whether to track utility commands */
306static bool pgss_track_planning = false; /* whether to track planning
307 * duration */
308static bool pgss_save = true; /* whether to save stats across shutdown */
309
310#define pgss_enabled(level) \
311 (!IsParallelWorker() && \
312 (pgss_track == PGSS_TRACK_ALL || \
313 (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
314
315#define record_gc_qtexts() \
316 do { \
317 SpinLockAcquire(&pgss->mutex); \
318 pgss->gc_count++; \
319 SpinLockRelease(&pgss->mutex); \
320 } while(0)
321
322/*---- Function declarations ----*/
323
337
338static void pgss_shmem_shutdown(int code, Datum arg);
339static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
340 const JumbleState *jstate);
341static PlannedStmt *pgss_planner(Query *parse,
342 const char *query_string,
343 int cursorOptions,
344 ParamListInfo boundParams,
345 ExplainState *es);
346static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
347static void pgss_ExecutorRun(QueryDesc *queryDesc,
348 ScanDirection direction,
349 uint64 count);
350static void pgss_ExecutorFinish(QueryDesc *queryDesc);
351static void pgss_ExecutorEnd(QueryDesc *queryDesc);
352static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
353 bool readOnlyTree,
354 ProcessUtilityContext context, ParamListInfo params,
355 QueryEnvironment *queryEnv,
356 DestReceiver *dest, QueryCompletion *qc);
357static void pgss_store(const char *query, int64 queryId,
358 int query_location, int query_len,
359 pgssStoreKind kind,
360 double total_time, uint64 rows,
361 const BufferUsage *bufusage,
362 const WalUsage *walusage,
363 const struct JitInstrumentation *jitusage,
364 const JumbleState *jstate,
365 int parallel_workers_to_launch,
366 int parallel_workers_launched,
367 PlannedStmtOrigin planOrigin);
369 pgssVersion api_version,
370 bool showtext);
371static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
372 int encoding, bool sticky);
373static void entry_dealloc(void);
374static bool qtext_store(const char *query, int query_len,
375 Size *query_offset, int *gc_count);
376static char *qtext_load_file(Size *buffer_size);
377static char *qtext_fetch(Size query_offset, int query_len,
378 char *buffer, Size buffer_size);
379static bool need_gc_qtexts(void);
380static void gc_qtexts(void);
381static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
383 const char *query,
384 int query_loc, int *query_len_p);
385
386/*
387 * Module load callback
388 */
389void
391{
392 /*
393 * In order to create our shared memory area, we have to be loaded via
394 * shared_preload_libraries. If not, fall out without hooking into any of
395 * the main system. (We don't throw error here because it seems useful to
396 * allow the pg_stat_statements functions to be created even when the
397 * module isn't active. The functions must protect themselves against
398 * being called then, however.)
399 */
401 return;
402
403 /*
404 * Inform the postmaster that we want to enable query_id calculation if
405 * compute_query_id is set to auto.
406 */
408
409 /*
410 * Define (or redefine) custom GUC variables.
411 */
412 DefineCustomIntVariable("pg_stat_statements.max",
413 "Sets the maximum number of statements tracked by pg_stat_statements.",
414 NULL,
415 &pgss_max,
416 5000,
417 100,
418 INT_MAX / 2,
420 0,
421 NULL,
422 NULL,
423 NULL);
424
425 DefineCustomEnumVariable("pg_stat_statements.track",
426 "Selects which statements are tracked by pg_stat_statements.",
427 NULL,
428 &pgss_track,
431 PGC_SUSET,
432 0,
433 NULL,
434 NULL,
435 NULL);
436
437 DefineCustomBoolVariable("pg_stat_statements.track_utility",
438 "Selects whether utility commands are tracked by pg_stat_statements.",
439 NULL,
441 true,
442 PGC_SUSET,
443 0,
444 NULL,
445 NULL,
446 NULL);
447
448 DefineCustomBoolVariable("pg_stat_statements.track_planning",
449 "Selects whether planning duration is tracked by pg_stat_statements.",
450 NULL,
452 false,
453 PGC_SUSET,
454 0,
455 NULL,
456 NULL,
457 NULL);
458
459 DefineCustomBoolVariable("pg_stat_statements.save",
460 "Save pg_stat_statements statistics across server shutdowns.",
461 NULL,
462 &pgss_save,
463 true,
465 0,
466 NULL,
467 NULL,
468 NULL);
469
470 MarkGUCPrefixReserved("pg_stat_statements");
471
472 /*
473 * Register our shared memory needs.
474 */
476
477 /*
478 * Install hooks.
479 */
494}
495
496/*
497 * shmem request callback: Request shared memory resources.
498 *
499 * This is called at postmaster startup. Note that the shared memory isn't
500 * allocated here yet, this merely register our needs.
501 *
502 * In EXEC_BACKEND mode, this is also called in each backend, to re-attach to
503 * the shared memory area that was already initialized.
504 */
505static void
507{
508 ShmemRequestHash(.name = "pg_stat_statements hash",
509 .nelems = pgss_max,
510 .hash_info.keysize = sizeof(pgssHashKey),
511 .hash_info.entrysize = sizeof(pgssEntry),
512 .hash_flags = HASH_ELEM | HASH_BLOBS,
513 .ptr = &pgss_hash,
514 );
515 ShmemRequestStruct(.name = "pg_stat_statements",
516 .size = sizeof(pgssSharedState),
517 .ptr = (void **) &pgss,
518 );
519}
520
521/*
522 * shmem init callback: Initialize our shared memory data structures at
523 * postmaster startup.
524 *
525 * Load any pre-existing statistics from file. Also create and load the
526 * query-texts file, which is expected to exist (even if empty) while the
527 * module is enabled.
528 */
529static void
531{
532 int tranche_id;
533 FILE *file = NULL;
534 FILE *qfile = NULL;
535 uint32 header;
536 int32 num;
537 int32 pgver;
538 int32 i;
539 int buffer_size;
540 char *buffer = NULL;
541
542 /*
543 * We already checked that we're loaded from shared_preload_libraries in
544 * _PG_init(), so we should not get here after postmaster startup.
545 */
547
548 /*
549 * Initialize the shmem area with no statistics.
550 */
551 tranche_id = LWLockNewTrancheId("pg_stat_statements");
552 LWLockInitialize(&pgss->lock.lock, tranche_id);
556 pgss->extent = 0;
557 pgss->n_writers = 0;
558 pgss->gc_count = 0;
559 pgss->stats.dealloc = 0;
561
562 /* The hash table must've also been initialized by now */
564
565 /*
566 * Set up a shmem exit hook to dump the statistics to disk on postmaster
567 * (or standalone backend) exit.
568 */
570
571 /*
572 * Load any pre-existing statistics from file.
573 *
574 * Note: we don't bother with locks here, because there should be no other
575 * processes running when this code is reached.
576 */
577
578 /* Unlink query text file possibly left over from crash */
580
581 /* Allocate new query text temp file */
583 if (qfile == NULL)
584 goto write_error;
585
586 /*
587 * If we were told not to load old statistics, we're done. (Note we do
588 * not try to unlink any old dump file in this case. This seems a bit
589 * questionable but it's the historical behavior.)
590 */
591 if (!pgss_save)
592 {
594 return;
595 }
596
597 /*
598 * Attempt to load old statistics from the dump file.
599 */
601 if (file == NULL)
602 {
603 if (errno != ENOENT)
604 goto read_error;
605 /* No existing persisted stats file, so we're done */
607 return;
608 }
609
610 buffer_size = 2048;
611 buffer = (char *) palloc(buffer_size);
612
613 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
614 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
615 fread(&num, sizeof(int32), 1, file) != 1)
616 goto read_error;
617
618 if (header != PGSS_FILE_HEADER ||
620 goto data_error;
621
622 for (i = 0; i < num; i++)
623 {
625 pgssEntry *entry;
626 Size query_offset;
627
628 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
629 goto read_error;
630
631 /* Encoding is the only field we can easily sanity-check */
632 if (!PG_VALID_BE_ENCODING(temp.encoding))
633 goto data_error;
634
635 /* Resize buffer as needed */
636 if (temp.query_len >= buffer_size)
637 {
638 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
639 buffer = repalloc(buffer, buffer_size);
640 }
641
642 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
643 goto read_error;
644
645 /* Should have a trailing null, but let's make sure */
646 buffer[temp.query_len] = '\0';
647
648 /* Skip loading "sticky" entries */
649 if (IS_STICKY(temp.counters))
650 continue;
651
652 /* Store the query text */
653 query_offset = pgss->extent;
654 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
655 goto write_error;
656 pgss->extent += temp.query_len + 1;
657
658 /* make the hashtable entry (discards old entries if too many) */
659 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
660 temp.encoding,
661 false);
662
663 /* copy in the actual stats */
664 entry->counters = temp.counters;
665 entry->stats_since = temp.stats_since;
666 entry->minmax_stats_since = temp.minmax_stats_since;
667 }
668
669 /* Read global statistics for pg_stat_statements */
670 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
671 goto read_error;
672
673 pfree(buffer);
674 FreeFile(file);
676
677 /*
678 * Remove the persisted stats file so it's not included in
679 * backups/replication standbys, etc. A new file will be written on next
680 * shutdown.
681 *
682 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
683 * because we remove that file on startup; it acts inversely to
684 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
685 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
686 * when the server is not running. Leaving the file creates no danger of
687 * a newly restored database having a spurious record of execution costs,
688 * which is what we're really concerned about here.
689 */
691
692 return;
693
695 ereport(LOG,
697 errmsg("could not read file \"%s\": %m",
699 goto fail;
701 ereport(LOG,
703 errmsg("ignoring invalid data in file \"%s\"",
705 goto fail;
707 ereport(LOG,
709 errmsg("could not write file \"%s\": %m",
711fail:
712 if (buffer)
713 pfree(buffer);
714 if (file)
715 FreeFile(file);
716 if (qfile)
718 /* If possible, throw away the bogus file; ignore any error */
720
721 /*
722 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
723 * server is running with pg_stat_statements enabled
724 */
725}
726
727/*
728 * shmem_shutdown hook: Dump statistics into file.
729 *
730 * Note: we don't bother with acquiring lock, because there should be no
731 * other processes running when this is called.
732 */
733static void
735{
736 FILE *file;
737 char *qbuffer = NULL;
738 Size qbuffer_size = 0;
740 int32 num_entries;
741 pgssEntry *entry;
742
743 /* Don't try to dump during a crash. */
744 if (code)
745 return;
746
747 /* Safety check ... shouldn't get here unless shmem is set up. */
748 if (!pgss || !pgss_hash)
749 return;
750
751 /* Don't dump if told not to. */
752 if (!pgss_save)
753 return;
754
756 if (file == NULL)
757 goto error;
758
759 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
760 goto error;
761 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
762 goto error;
763 num_entries = hash_get_num_entries(pgss_hash);
764 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
765 goto error;
766
768 if (qbuffer == NULL)
769 goto error;
770
771 /*
772 * When serializing to disk, we store query texts immediately after their
773 * entry data. Any orphaned query texts are thereby excluded.
774 */
776 while ((entry = hash_seq_search(&hash_seq)) != NULL)
777 {
778 int len = entry->query_len;
779 char *qstr = qtext_fetch(entry->query_offset, len,
781
782 if (qstr == NULL)
783 continue; /* Ignore any entries with bogus texts */
784
785 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
786 fwrite(qstr, 1, len + 1, file) != len + 1)
787 {
788 /* note: we assume hash_seq_term won't change errno */
790 goto error;
791 }
792 }
793
794 /* Dump global statistics for pg_stat_statements */
795 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
796 goto error;
797
798 pfree(qbuffer);
799 qbuffer = NULL;
800
801 if (FreeFile(file))
802 {
803 file = NULL;
804 goto error;
805 }
806
807 /*
808 * Rename file into place, so we atomically replace any old one.
809 */
811
812 /* Unlink query-texts file; it's not needed while shutdown */
814
815 return;
816
817error:
818 ereport(LOG,
820 errmsg("could not write file \"%s\": %m",
821 PGSS_DUMP_FILE ".tmp")));
822 if (qbuffer)
823 pfree(qbuffer);
824 if (file)
825 FreeFile(file);
826 unlink(PGSS_DUMP_FILE ".tmp");
828}
829
830/*
831 * Post-parse-analysis hook: mark query with a queryId
832 */
833static void
835{
837 prev_post_parse_analyze_hook(pstate, query, jstate);
838
839 /* Safety check... */
841 return;
842
843 /*
844 * If it's EXECUTE, clear the queryId so that stats will accumulate for
845 * the underlying PREPARE. But don't do this if we're not tracking
846 * utility statements, to avoid messing up another extension that might be
847 * tracking them.
848 */
849 if (query->utilityStmt)
850 {
852 {
853 query->queryId = INT64CONST(0);
854 return;
855 }
856 }
857
858 /*
859 * If query jumbling were able to identify any ignorable constants, we
860 * immediately create a hash table entry for the query, so that we can
861 * record the normalized form of the query string. If there were no such
862 * constants, the normalized string would be the same as the query text
863 * anyway, so there's no need for an early entry.
864 */
865 if (jstate && jstate->clocations_count > 0)
866 pgss_store(pstate->p_sourcetext,
867 query->queryId,
868 query->stmt_location,
869 query->stmt_len,
871 0,
872 0,
873 NULL,
874 NULL,
875 NULL,
876 jstate,
877 0,
878 0,
880}
881
882/*
883 * Planner hook: forward to regular planner, but measure planning time
884 * if needed.
885 */
886static PlannedStmt *
888 const char *query_string,
889 int cursorOptions,
890 ParamListInfo boundParams,
891 ExplainState *es)
892{
894
895 /*
896 * We can't process the query if no query_string is provided, as
897 * pgss_store needs it. We also ignore query without queryid, as it would
898 * be treated as a utility statement, which may not be the case.
899 */
901 && pgss_track_planning && query_string
902 && parse->queryId != INT64CONST(0))
903 {
906 BufferUsage bufusage_start,
907 bufusage;
908 WalUsage walusage_start,
909 walusage;
910
911 /* We need to track buffer usage as the planner can access them. */
912 bufusage_start = pgBufferUsage;
913
914 /*
915 * Similarly the planner could write some WAL records in some cases
916 * (e.g. setting a hint bit with those being WAL-logged)
917 */
918 walusage_start = pgWalUsage;
920
922 PG_TRY();
923 {
925 result = prev_planner_hook(parse, query_string, cursorOptions,
926 boundParams, es);
927 else
928 result = standard_planner(parse, query_string, cursorOptions,
929 boundParams, es);
930 }
931 PG_FINALLY();
932 {
934 }
935 PG_END_TRY();
936
939
940 /* calc differences of buffer counters. */
941 memset(&bufusage, 0, sizeof(BufferUsage));
942 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
943
944 /* calc differences of WAL counters. */
945 memset(&walusage, 0, sizeof(WalUsage));
946 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
947
948 pgss_store(query_string,
949 parse->queryId,
950 parse->stmt_location,
951 parse->stmt_len,
952 PGSS_PLAN,
954 0,
955 &bufusage,
956 &walusage,
957 NULL,
958 NULL,
959 0,
960 0,
961 result->planOrigin);
962 }
963 else
964 {
965 /*
966 * Even though we're not tracking plan time for this statement, we
967 * must still increment the nesting level, to ensure that functions
968 * evaluated during planning are not seen as top-level calls.
969 */
971 PG_TRY();
972 {
974 result = prev_planner_hook(parse, query_string, cursorOptions,
975 boundParams, es);
976 else
977 result = standard_planner(parse, query_string, cursorOptions,
978 boundParams, es);
979 }
980 PG_FINALLY();
981 {
983 }
984 PG_END_TRY();
985 }
986
987 return result;
988}
989
990/*
991 * ExecutorStart hook: start up tracking if needed
992 */
993static void
994pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
995{
996 /*
997 * If query has queryId zero, don't track it. This prevents double
998 * counting of optimizable statements that are directly contained in
999 * utility statements.
1000 */
1001 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1002 {
1003 /* Request all summary instrumentation, i.e. timing, buffers and WAL */
1004 queryDesc->query_instr_options |= INSTRUMENT_ALL;
1005 }
1006
1008 prev_ExecutorStart(queryDesc, eflags);
1009 else
1010 standard_ExecutorStart(queryDesc, eflags);
1011}
1012
1013/*
1014 * ExecutorRun hook: all we need do is track nesting depth
1015 */
1016static void
1017pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1018{
1019 nesting_level++;
1020 PG_TRY();
1021 {
1022 if (prev_ExecutorRun)
1023 prev_ExecutorRun(queryDesc, direction, count);
1024 else
1025 standard_ExecutorRun(queryDesc, direction, count);
1026 }
1027 PG_FINALLY();
1028 {
1029 nesting_level--;
1030 }
1031 PG_END_TRY();
1032}
1033
1034/*
1035 * ExecutorFinish hook: all we need do is track nesting depth
1036 */
1037static void
1039{
1040 nesting_level++;
1041 PG_TRY();
1042 {
1044 prev_ExecutorFinish(queryDesc);
1045 else
1046 standard_ExecutorFinish(queryDesc);
1047 }
1048 PG_FINALLY();
1049 {
1050 nesting_level--;
1051 }
1052 PG_END_TRY();
1053}
1054
1055/*
1056 * ExecutorEnd hook: store results if needed
1057 */
1058static void
1060{
1061 int64 queryId = queryDesc->plannedstmt->queryId;
1062
1063 if (queryId != INT64CONST(0) && queryDesc->query_instr &&
1065 {
1066 pgss_store(queryDesc->sourceText,
1067 queryId,
1068 queryDesc->plannedstmt->stmt_location,
1069 queryDesc->plannedstmt->stmt_len,
1070 PGSS_EXEC,
1072 queryDesc->estate->es_total_processed,
1073 &queryDesc->query_instr->bufusage,
1074 &queryDesc->query_instr->walusage,
1075 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1076 NULL,
1079 queryDesc->plannedstmt->planOrigin);
1080 }
1081
1082 if (prev_ExecutorEnd)
1083 prev_ExecutorEnd(queryDesc);
1084 else
1085 standard_ExecutorEnd(queryDesc);
1086}
1087
1088/*
1089 * ProcessUtility hook
1090 */
1091static void
1092pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1093 bool readOnlyTree,
1094 ProcessUtilityContext context,
1095 ParamListInfo params, QueryEnvironment *queryEnv,
1096 DestReceiver *dest, QueryCompletion *qc)
1097{
1098 Node *parsetree = pstmt->utilityStmt;
1099 int64 saved_queryId = pstmt->queryId;
1101 int saved_stmt_len = pstmt->stmt_len;
1104
1105 /*
1106 * Force utility statements to get queryId zero. We do this even in cases
1107 * where the statement contains an optimizable statement for which a
1108 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1109 * cases, runtime control will first go through ProcessUtility and then
1110 * the executor, and we don't want the executor hooks to do anything,
1111 * since we are already measuring the statement's costs at the utility
1112 * level.
1113 *
1114 * Note that this is only done if pg_stat_statements is enabled and
1115 * configured to track utility statements, in the unlikely possibility
1116 * that user configured another extension to handle utility statements
1117 * only.
1118 */
1119 if (enabled)
1120 pstmt->queryId = INT64CONST(0);
1121
1122 /*
1123 * If it's an EXECUTE statement, we don't track it and don't increment the
1124 * nesting level. This allows the cycles to be charged to the underlying
1125 * PREPARE instead (by the Executor hooks), which is much more useful.
1126 *
1127 * We also don't track execution of PREPARE. If we did, we would get one
1128 * hash table entry for the PREPARE (with hash calculated from the query
1129 * string), and then a different one with the same query string (but hash
1130 * calculated from the query tree) would be used to accumulate costs of
1131 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1132 * actually run the planner (only parse+rewrite), its costs are generally
1133 * pretty negligible and it seems okay to just ignore it.
1134 */
1135 if (enabled &&
1136 !IsA(parsetree, ExecuteStmt) &&
1137 !IsA(parsetree, PrepareStmt))
1138 {
1141 uint64 rows;
1142 BufferUsage bufusage_start,
1143 bufusage;
1144 WalUsage walusage_start,
1145 walusage;
1146
1147 bufusage_start = pgBufferUsage;
1148 walusage_start = pgWalUsage;
1150
1151 nesting_level++;
1152 PG_TRY();
1153 {
1155 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1156 context, params, queryEnv,
1157 dest, qc);
1158 else
1159 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1160 context, params, queryEnv,
1161 dest, qc);
1162 }
1163 PG_FINALLY();
1164 {
1165 nesting_level--;
1166 }
1167 PG_END_TRY();
1168
1169 /*
1170 * CAUTION: do not access the *pstmt data structure again below here.
1171 * If it was a ROLLBACK or similar, that data structure may have been
1172 * freed. We must copy everything we still need into local variables,
1173 * which we did above.
1174 *
1175 * For the same reason, we can't risk restoring pstmt->queryId to its
1176 * former value, which'd otherwise be a good idea.
1177 */
1178 pstmt = NULL;
1179
1182
1183 /*
1184 * Track the total number of rows retrieved or affected by the utility
1185 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1186 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1187 */
1188 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1189 qc->commandTag == CMDTAG_FETCH ||
1190 qc->commandTag == CMDTAG_SELECT ||
1192 qc->nprocessed : 0;
1193
1194 /* calc differences of buffer counters. */
1195 memset(&bufusage, 0, sizeof(BufferUsage));
1196 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1197
1198 /* calc differences of WAL counters. */
1199 memset(&walusage, 0, sizeof(WalUsage));
1200 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1201
1202 pgss_store(queryString,
1206 PGSS_EXEC,
1208 rows,
1209 &bufusage,
1210 &walusage,
1211 NULL,
1212 NULL,
1213 0,
1214 0,
1216 }
1217 else
1218 {
1219 /*
1220 * Even though we're not tracking execution time for this statement,
1221 * we must still increment the nesting level, to ensure that functions
1222 * evaluated within it are not seen as top-level calls. But don't do
1223 * so for EXECUTE; that way, when control reaches pgss_planner or
1224 * pgss_ExecutorStart, we will treat the costs as top-level if
1225 * appropriate. Likewise, don't bump for PREPARE, so that parse
1226 * analysis will treat the statement as top-level if appropriate.
1227 *
1228 * To be absolutely certain we don't mess up the nesting level,
1229 * evaluate the bump_level condition just once.
1230 */
1231 bool bump_level =
1232 !IsA(parsetree, ExecuteStmt) &&
1233 !IsA(parsetree, PrepareStmt);
1234
1235 if (bump_level)
1236 nesting_level++;
1237 PG_TRY();
1238 {
1240 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1241 context, params, queryEnv,
1242 dest, qc);
1243 else
1244 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1245 context, params, queryEnv,
1246 dest, qc);
1247 }
1248 PG_FINALLY();
1249 {
1250 if (bump_level)
1251 nesting_level--;
1252 }
1253 PG_END_TRY();
1254 }
1255}
1256
1257/*
1258 * Store some statistics for a statement.
1259 *
1260 * If jstate is not NULL then we're trying to create an entry for which
1261 * we have no statistics as yet; we just want to record the normalized
1262 * query string. total_time, rows, bufusage and walusage are ignored in this
1263 * case.
1264 *
1265 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1266 * for the arrays in the Counters field.
1267 */
1268static void
1269pgss_store(const char *query, int64 queryId,
1270 int query_location, int query_len,
1271 pgssStoreKind kind,
1272 double total_time, uint64 rows,
1273 const BufferUsage *bufusage,
1274 const WalUsage *walusage,
1275 const struct JitInstrumentation *jitusage,
1276 const JumbleState *jstate,
1277 int parallel_workers_to_launch,
1278 int parallel_workers_launched,
1279 PlannedStmtOrigin planOrigin)
1280{
1281 pgssHashKey key;
1282 pgssEntry *entry;
1283 char *norm_query = NULL;
1285
1286 Assert(query != NULL);
1287
1288 /* Safety check... */
1289 if (!pgss || !pgss_hash)
1290 return;
1291
1292 /*
1293 * Nothing to do if compute_query_id isn't enabled and no other module
1294 * computed a query identifier.
1295 */
1296 if (queryId == INT64CONST(0))
1297 return;
1298
1299 /*
1300 * Confine our attention to the relevant part of the string, if the query
1301 * is a portion of a multi-statement source string, and update query
1302 * location and length if needed.
1303 */
1304 query = CleanQuerytext(query, &query_location, &query_len);
1305
1306 /* Set up key for hashtable search */
1307
1308 /* clear padding */
1309 memset(&key, 0, sizeof(pgssHashKey));
1310
1311 key.userid = GetUserId();
1312 key.dbid = MyDatabaseId;
1313 key.queryid = queryId;
1314 key.toplevel = (nesting_level == 0);
1315
1316 /* Lookup the hash table entry with shared lock. */
1318
1319 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1320
1321 /* Create new entry, if not present */
1322 if (!entry)
1323 {
1324 Size query_offset;
1325 int gc_count;
1326 bool stored;
1327 bool do_gc;
1328
1329 /*
1330 * Create a new, normalized query string if caller asked. We don't
1331 * need to hold the lock while doing this work. (Note: in any case,
1332 * it's possible that someone else creates a duplicate hashtable entry
1333 * in the interval where we don't hold the lock below. That case is
1334 * handled by entry_alloc.)
1335 */
1336 if (jstate)
1337 {
1341 &query_len);
1343 }
1344
1345 /* Append new query text to file with only shared lock held */
1346 stored = qtext_store(norm_query ? norm_query : query, query_len,
1347 &query_offset, &gc_count);
1348
1349 /*
1350 * Determine whether we need to garbage collect external query texts
1351 * while the shared lock is still held. This micro-optimization
1352 * avoids taking the time to decide this while holding exclusive lock.
1353 */
1355
1356 /* Need exclusive lock to make a new hashtable entry - promote */
1359
1360 /*
1361 * A garbage collection may have occurred while we weren't holding the
1362 * lock. In the unlikely event that this happens, the query text we
1363 * stored above will have been garbage collected, so write it again.
1364 * This should be infrequent enough that doing it while holding
1365 * exclusive lock isn't a performance problem.
1366 */
1367 if (!stored || pgss->gc_count != gc_count)
1368 stored = qtext_store(norm_query ? norm_query : query, query_len,
1369 &query_offset, NULL);
1370
1371 /* If we failed to write to the text file, give up */
1372 if (!stored)
1373 goto done;
1374
1375 /* OK to create a new hashtable entry */
1376 entry = entry_alloc(&key, query_offset, query_len, encoding,
1377 jstate != NULL);
1378
1379 /* If needed, perform garbage collection while exclusive lock held */
1380 if (do_gc)
1381 gc_qtexts();
1382 }
1383
1384 /* Increment the counts, except when jstate is not NULL */
1385 if (!jstate)
1386 {
1387 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1388
1389 /*
1390 * Grab the spinlock while updating the counters (see comment about
1391 * locking rules at the head of the file)
1392 */
1393 SpinLockAcquire(&entry->mutex);
1394
1395 /* "Unstick" entry if it was previously sticky */
1396 if (IS_STICKY(entry->counters))
1397 entry->counters.usage = USAGE_INIT;
1398
1399 entry->counters.calls[kind] += 1;
1400 entry->counters.total_time[kind] += total_time;
1401
1402 if (entry->counters.calls[kind] == 1)
1403 {
1404 entry->counters.min_time[kind] = total_time;
1405 entry->counters.max_time[kind] = total_time;
1406 entry->counters.mean_time[kind] = total_time;
1407 }
1408 else
1409 {
1410 /*
1411 * Welford's method for accurately computing variance. See
1412 * <http://www.johndcook.com/blog/standard_deviation/>
1413 */
1414 double old_mean = entry->counters.mean_time[kind];
1415
1416 entry->counters.mean_time[kind] +=
1417 (total_time - old_mean) / entry->counters.calls[kind];
1418 entry->counters.sum_var_time[kind] +=
1419 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1420
1421 /*
1422 * Calculate min and max time. min = 0 and max = 0 means that the
1423 * min/max statistics were reset
1424 */
1425 if (entry->counters.min_time[kind] == 0
1426 && entry->counters.max_time[kind] == 0)
1427 {
1428 entry->counters.min_time[kind] = total_time;
1429 entry->counters.max_time[kind] = total_time;
1430 }
1431 else
1432 {
1433 if (entry->counters.min_time[kind] > total_time)
1434 entry->counters.min_time[kind] = total_time;
1435 if (entry->counters.max_time[kind] < total_time)
1436 entry->counters.max_time[kind] = total_time;
1437 }
1438 }
1439 entry->counters.rows += rows;
1440 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1441 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1444 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1445 entry->counters.local_blks_read += bufusage->local_blks_read;
1448 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1449 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1456 entry->counters.usage += USAGE_EXEC(total_time);
1457 entry->counters.wal_records += walusage->wal_records;
1458 entry->counters.wal_fpi += walusage->wal_fpi;
1459 entry->counters.wal_bytes += walusage->wal_bytes;
1460 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1461 if (jitusage)
1462 {
1463 entry->counters.jit_functions += jitusage->created_functions;
1464 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1465
1466 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1467 entry->counters.jit_deform_count++;
1468 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1469
1470 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1472 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1473
1474 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1476 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1477
1478 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1480 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1481 }
1482
1483 /* parallel worker counters */
1484 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1485 entry->counters.parallel_workers_launched += parallel_workers_launched;
1486
1487 /* plan cache counters */
1488 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1490 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1491 entry->counters.custom_plan_calls++;
1492
1493 SpinLockRelease(&entry->mutex);
1494 }
1495
1496done:
1498
1499 /* We postpone this clean-up until we're out of the lock */
1500 if (norm_query)
1502}
1503
1504/*
1505 * Reset statement statistics corresponding to userid, dbid, and queryid.
1506 */
1507Datum
1509{
1510 Oid userid;
1511 Oid dbid;
1512 int64 queryid;
1513
1514 userid = PG_GETARG_OID(0);
1515 dbid = PG_GETARG_OID(1);
1516 queryid = PG_GETARG_INT64(2);
1517
1518 entry_reset(userid, dbid, queryid, false);
1519
1521}
1522
1523Datum
1525{
1526 Oid userid;
1527 Oid dbid;
1528 int64 queryid;
1529 bool minmax_only;
1530
1531 userid = PG_GETARG_OID(0);
1532 dbid = PG_GETARG_OID(1);
1533 queryid = PG_GETARG_INT64(2);
1535
1536 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1537}
1538
1539/*
1540 * Reset statement statistics.
1541 */
1542Datum
1544{
1545 entry_reset(0, 0, 0, false);
1546
1548}
1549
1550/* Number of output arguments (columns) for various API versions */
1551#define PG_STAT_STATEMENTS_COLS_V1_0 14
1552#define PG_STAT_STATEMENTS_COLS_V1_1 18
1553#define PG_STAT_STATEMENTS_COLS_V1_2 19
1554#define PG_STAT_STATEMENTS_COLS_V1_3 23
1555#define PG_STAT_STATEMENTS_COLS_V1_8 32
1556#define PG_STAT_STATEMENTS_COLS_V1_9 33
1557#define PG_STAT_STATEMENTS_COLS_V1_10 43
1558#define PG_STAT_STATEMENTS_COLS_V1_11 49
1559#define PG_STAT_STATEMENTS_COLS_V1_12 52
1560#define PG_STAT_STATEMENTS_COLS_V1_13 54
1561#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1562
1563/*
1564 * Retrieve statement statistics.
1565 *
1566 * The SQL API of this function has changed multiple times, and will likely
1567 * do so again in future. To support the case where a newer version of this
1568 * loadable module is being used with an old SQL declaration of the function,
1569 * we continue to support the older API versions. For 1.2 and later, the
1570 * expected API version is identified by embedding it in the C name of the
1571 * function. Unfortunately we weren't bright enough to do that for 1.1.
1572 */
1573Datum
1575{
1576 bool showtext = PG_GETARG_BOOL(0);
1577
1579
1580 return (Datum) 0;
1581}
1582
1583Datum
1585{
1586 bool showtext = PG_GETARG_BOOL(0);
1587
1589
1590 return (Datum) 0;
1591}
1592
1593Datum
1595{
1596 bool showtext = PG_GETARG_BOOL(0);
1597
1599
1600 return (Datum) 0;
1601}
1602
1603Datum
1605{
1606 bool showtext = PG_GETARG_BOOL(0);
1607
1609
1610 return (Datum) 0;
1611}
1612
1613Datum
1615{
1616 bool showtext = PG_GETARG_BOOL(0);
1617
1619
1620 return (Datum) 0;
1621}
1622
1623Datum
1625{
1626 bool showtext = PG_GETARG_BOOL(0);
1627
1629
1630 return (Datum) 0;
1631}
1632
1633Datum
1635{
1636 bool showtext = PG_GETARG_BOOL(0);
1637
1639
1640 return (Datum) 0;
1641}
1642
1643Datum
1645{
1646 bool showtext = PG_GETARG_BOOL(0);
1647
1649
1650 return (Datum) 0;
1651}
1652
1653/*
1654 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1655 * This can be removed someday, perhaps.
1656 */
1657Datum
1659{
1660 /* If it's really API 1.1, we'll figure that out below */
1662
1663 return (Datum) 0;
1664}
1665
1666/* Common code for all versions of pg_stat_statements() */
1667static void
1669 pgssVersion api_version,
1670 bool showtext)
1671{
1673 Oid userid = GetUserId();
1674 bool is_allowed_role = false;
1675 char *qbuffer = NULL;
1676 Size qbuffer_size = 0;
1677 Size extent = 0;
1678 int gc_count = 0;
1680 pgssEntry *entry;
1681
1682 /*
1683 * Superusers or roles with the privileges of pg_read_all_stats members
1684 * are allowed
1685 */
1687
1688 /* hash table must exist already */
1689 if (!pgss || !pgss_hash)
1690 ereport(ERROR,
1692 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1693
1694 InitMaterializedSRF(fcinfo, 0);
1695
1696 /*
1697 * Check we have the expected number of output arguments. Aside from
1698 * being a good safety check, we need a kluge here to detect API version
1699 * 1.1, which was wedged into the code in an ill-considered way.
1700 */
1701 switch (rsinfo->setDesc->natts)
1702 {
1704 if (api_version != PGSS_V1_0)
1705 elog(ERROR, "incorrect number of output arguments");
1706 break;
1708 /* pg_stat_statements() should have told us 1.0 */
1709 if (api_version != PGSS_V1_0)
1710 elog(ERROR, "incorrect number of output arguments");
1711 api_version = PGSS_V1_1;
1712 break;
1714 if (api_version != PGSS_V1_2)
1715 elog(ERROR, "incorrect number of output arguments");
1716 break;
1718 if (api_version != PGSS_V1_3)
1719 elog(ERROR, "incorrect number of output arguments");
1720 break;
1722 if (api_version != PGSS_V1_8)
1723 elog(ERROR, "incorrect number of output arguments");
1724 break;
1726 if (api_version != PGSS_V1_9)
1727 elog(ERROR, "incorrect number of output arguments");
1728 break;
1730 if (api_version != PGSS_V1_10)
1731 elog(ERROR, "incorrect number of output arguments");
1732 break;
1734 if (api_version != PGSS_V1_11)
1735 elog(ERROR, "incorrect number of output arguments");
1736 break;
1738 if (api_version != PGSS_V1_12)
1739 elog(ERROR, "incorrect number of output arguments");
1740 break;
1742 if (api_version != PGSS_V1_13)
1743 elog(ERROR, "incorrect number of output arguments");
1744 break;
1745 default:
1746 elog(ERROR, "incorrect number of output arguments");
1747 }
1748
1749 /*
1750 * We'd like to load the query text file (if needed) while not holding any
1751 * lock on pgss->lock. In the worst case we'll have to do this again
1752 * after we have the lock, but it's unlikely enough to make this a win
1753 * despite occasional duplicated work. We need to reload if anybody
1754 * writes to the file (either a retail qtext_store(), or a garbage
1755 * collection) between this point and where we've gotten shared lock. If
1756 * a qtext_store is actually in progress when we look, we might as well
1757 * skip the speculative load entirely.
1758 */
1759 if (showtext)
1760 {
1761 int n_writers;
1762
1763 /* Take the mutex so we can examine variables */
1765 extent = pgss->extent;
1766 n_writers = pgss->n_writers;
1767 gc_count = pgss->gc_count;
1769
1770 /* No point in loading file now if there are active writers */
1771 if (n_writers == 0)
1773 }
1774
1775 /*
1776 * Get shared lock, load or reload the query text file if we must, and
1777 * iterate over the hashtable entries.
1778 *
1779 * With a large hash table, we might be holding the lock rather longer
1780 * than one could wish. However, this only blocks creation of new hash
1781 * table entries, and the larger the hash table the less likely that is to
1782 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1783 * we need to partition the hash table to limit the time spent holding any
1784 * one lock.
1785 */
1787
1788 if (showtext)
1789 {
1790 /*
1791 * Here it is safe to examine extent and gc_count without taking the
1792 * mutex. Note that although other processes might change
1793 * pgss->extent just after we look at it, the strings they then write
1794 * into the file cannot yet be referenced in the hashtable, so we
1795 * don't care whether we see them or not.
1796 *
1797 * If qtext_load_file fails, we just press on; we'll return NULL for
1798 * every query text.
1799 */
1800 if (qbuffer == NULL ||
1801 pgss->extent != extent ||
1802 pgss->gc_count != gc_count)
1803 {
1804 if (qbuffer)
1805 pfree(qbuffer);
1807 }
1808 }
1809
1811 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1812 {
1814 bool nulls[PG_STAT_STATEMENTS_COLS];
1815 int i = 0;
1816 Counters tmp;
1817 double stddev;
1818 int64 queryid = entry->key.queryid;
1819 TimestampTz stats_since;
1820 TimestampTz minmax_stats_since;
1821
1822 memset(values, 0, sizeof(values));
1823 memset(nulls, 0, sizeof(nulls));
1824
1825 values[i++] = ObjectIdGetDatum(entry->key.userid);
1826 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1827 if (api_version >= PGSS_V1_9)
1828 values[i++] = BoolGetDatum(entry->key.toplevel);
1829
1830 if (is_allowed_role || entry->key.userid == userid)
1831 {
1832 if (api_version >= PGSS_V1_2)
1833 values[i++] = Int64GetDatumFast(queryid);
1834
1835 if (showtext)
1836 {
1837 char *qstr = qtext_fetch(entry->query_offset,
1838 entry->query_len,
1839 qbuffer,
1840 qbuffer_size);
1841
1842 if (qstr)
1843 {
1844 char *enc;
1845
1847 entry->query_len,
1848 entry->encoding);
1849
1851
1852 if (enc != qstr)
1853 pfree(enc);
1854 }
1855 else
1856 {
1857 /* Just return a null if we fail to find the text */
1858 nulls[i++] = true;
1859 }
1860 }
1861 else
1862 {
1863 /* Query text not requested */
1864 nulls[i++] = true;
1865 }
1866 }
1867 else
1868 {
1869 /* Don't show queryid */
1870 if (api_version >= PGSS_V1_2)
1871 nulls[i++] = true;
1872
1873 /*
1874 * Don't show query text, but hint as to the reason for not doing
1875 * so if it was requested
1876 */
1877 if (showtext)
1878 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1879 else
1880 nulls[i++] = true;
1881 }
1882
1883 /* copy counters to a local variable to keep locking time short */
1884 SpinLockAcquire(&entry->mutex);
1885 tmp = entry->counters;
1886 SpinLockRelease(&entry->mutex);
1887
1888 /*
1889 * The spinlock is not required when reading these two as they are
1890 * always updated when holding pgss->lock exclusively.
1891 */
1892 stats_since = entry->stats_since;
1893 minmax_stats_since = entry->minmax_stats_since;
1894
1895 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1896 if (IS_STICKY(tmp))
1897 continue;
1898
1899 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1900 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1901 {
1902 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1903 {
1904 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1905 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1906 }
1907
1908 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1909 api_version >= PGSS_V1_8)
1910 {
1911 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1912 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1913 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1914
1915 /*
1916 * Note we are calculating the population variance here, not
1917 * the sample variance, as we have data for the whole
1918 * population, so Bessel's correction is not used, and we
1919 * don't divide by tmp.calls - 1.
1920 */
1921 if (tmp.calls[kind] > 1)
1922 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1923 else
1924 stddev = 0.0;
1925 values[i++] = Float8GetDatumFast(stddev);
1926 }
1927 }
1928 values[i++] = Int64GetDatumFast(tmp.rows);
1931 if (api_version >= PGSS_V1_1)
1936 if (api_version >= PGSS_V1_1)
1941 if (api_version >= PGSS_V1_1)
1942 {
1945 }
1946 if (api_version >= PGSS_V1_11)
1947 {
1950 }
1951 if (api_version >= PGSS_V1_10)
1952 {
1955 }
1956 if (api_version >= PGSS_V1_8)
1957 {
1958 char buf[256];
1959 Datum wal_bytes;
1960
1963
1964 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1965
1966 /* Convert to numeric. */
1967 wal_bytes = DirectFunctionCall3(numeric_in,
1970 Int32GetDatum(-1));
1971 values[i++] = wal_bytes;
1972 }
1973 if (api_version >= PGSS_V1_12)
1974 {
1976 }
1977 if (api_version >= PGSS_V1_10)
1978 {
1987 }
1988 if (api_version >= PGSS_V1_11)
1989 {
1992 }
1993 if (api_version >= PGSS_V1_12)
1994 {
1997 }
1998 if (api_version >= PGSS_V1_13)
1999 {
2002 }
2003 if (api_version >= PGSS_V1_11)
2004 {
2005 values[i++] = TimestampTzGetDatum(stats_since);
2006 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2007 }
2008
2009 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2010 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2011 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2012 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2013 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2014 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2015 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2016 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2017 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2018 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2019 -1 /* fail if you forget to update this assert */ ));
2020
2021 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2022 }
2023
2025
2026 if (qbuffer)
2027 pfree(qbuffer);
2028}
2029
2030/* Number of output arguments (columns) for pg_stat_statements_info */
2031#define PG_STAT_STATEMENTS_INFO_COLS 2
2032
2033/*
2034 * Return statistics of pg_stat_statements.
2035 */
2036Datum
2038{
2039 pgssGlobalStats stats;
2040 TupleDesc tupdesc;
2042 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2043
2044 if (!pgss || !pgss_hash)
2045 ereport(ERROR,
2047 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2048
2049 /* Build a tuple descriptor for our result type */
2050 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2051 elog(ERROR, "return type must be a row type");
2052
2053 /* Read global statistics for pg_stat_statements */
2055 stats = pgss->stats;
2057
2058 values[0] = Int64GetDatum(stats.dealloc);
2060
2062}
2063
2064/*
2065 * Allocate a new hashtable entry.
2066 * caller must hold an exclusive lock on pgss->lock
2067 *
2068 * "query" need not be null-terminated; we rely on query_len instead
2069 *
2070 * If "sticky" is true, make the new entry artificially sticky so that it will
2071 * probably still be there when the query finishes execution. We do this by
2072 * giving it a median usage value rather than the normal value. (Strictly
2073 * speaking, query strings are normalized on a best effort basis, though it
2074 * would be difficult to demonstrate this even under artificial conditions.)
2075 *
2076 * Note: despite needing exclusive lock, it's not an error for the target
2077 * entry to already exist. This is because pgss_store releases and
2078 * reacquires lock after failing to find a match; so someone else could
2079 * have made the entry while we waited to get exclusive lock.
2080 */
2081static pgssEntry *
2082entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2083 bool sticky)
2084{
2085 pgssEntry *entry;
2086 bool found;
2087
2088 /* Make space if needed */
2090 entry_dealloc();
2091
2092 /* Find or create an entry with desired hash code */
2093 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2094
2095 if (!found)
2096 {
2097 /* New entry, initialize it */
2098
2099 /* reset the statistics */
2100 memset(&entry->counters, 0, sizeof(Counters));
2101 /* set the appropriate initial usage count */
2103 /* re-initialize the mutex each time ... we assume no one using it */
2104 SpinLockInit(&entry->mutex);
2105 /* ... and don't forget the query text metadata */
2106 Assert(query_len >= 0);
2107 entry->query_offset = query_offset;
2108 entry->query_len = query_len;
2109 entry->encoding = encoding;
2111 entry->minmax_stats_since = entry->stats_since;
2112 }
2113
2114 return entry;
2115}
2116
2117/*
2118 * qsort comparator for sorting into increasing usage order
2119 */
2120static int
2121entry_cmp(const void *lhs, const void *rhs)
2122{
2123 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2124 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2125
2126 if (l_usage < r_usage)
2127 return -1;
2128 else if (l_usage > r_usage)
2129 return +1;
2130 else
2131 return 0;
2132}
2133
2134/*
2135 * Deallocate least-used entries.
2136 *
2137 * Caller must hold an exclusive lock on pgss->lock.
2138 */
2139static void
2141{
2143 pgssEntry **entries;
2144 pgssEntry *entry;
2145 int nvictims;
2146 int i;
2148 int nvalidtexts;
2149
2150 /*
2151 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2152 * While we're scanning the table, apply the decay factor to the usage
2153 * values, and update the mean query length.
2154 *
2155 * Note that the mean query length is almost immediately obsolete, since
2156 * we compute it before not after discarding the least-used entries.
2157 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2158 * making two passes to get a more current result. Likewise, the new
2159 * cur_median_usage includes the entries we're about to zap.
2160 */
2161
2162 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2163
2164 i = 0;
2165 tottextlen = 0;
2166 nvalidtexts = 0;
2167
2169 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2170 {
2171 entries[i++] = entry;
2172 /* "Sticky" entries get a different usage decay rate. */
2173 if (IS_STICKY(entry->counters))
2175 else
2177 /* In the mean length computation, ignore dropped texts. */
2178 if (entry->query_len >= 0)
2179 {
2180 tottextlen += entry->query_len + 1;
2181 nvalidtexts++;
2182 }
2183 }
2184
2185 /* Sort into increasing order by usage */
2186 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2187
2188 /* Record the (approximate) median usage */
2189 if (i > 0)
2190 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2191 /* Record the mean query length */
2192 if (nvalidtexts > 0)
2194 else
2196
2197 /* Now zap an appropriate fraction of lowest-usage entries */
2198 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2199 nvictims = Min(nvictims, i);
2200
2201 for (i = 0; i < nvictims; i++)
2202 {
2203 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2204 }
2205
2206 pfree(entries);
2207
2208 /* Increment the number of times entries are deallocated */
2210 pgss->stats.dealloc += 1;
2212}
2213
2214/*
2215 * Given a query string (not necessarily null-terminated), allocate a new
2216 * entry in the external query text file and store the string there.
2217 *
2218 * If successful, returns true, and stores the new entry's offset in the file
2219 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2220 * number of garbage collections that have occurred so far.
2221 *
2222 * On failure, returns false.
2223 *
2224 * At least a shared lock on pgss->lock must be held by the caller, so as
2225 * to prevent a concurrent garbage collection. Share-lock-holding callers
2226 * should pass a gc_count pointer to obtain the number of garbage collections,
2227 * so that they can recheck the count after obtaining exclusive lock to
2228 * detect whether a garbage collection occurred (and removed this entry).
2229 */
2230static bool
2231qtext_store(const char *query, int query_len,
2232 Size *query_offset, int *gc_count)
2233{
2234 Size off;
2235 int fd;
2236
2237 /*
2238 * We use a spinlock to protect extent/n_writers/gc_count, so that
2239 * multiple processes may execute this function concurrently.
2240 */
2242 off = pgss->extent;
2243 pgss->extent += query_len + 1;
2244 pgss->n_writers++;
2245 if (gc_count)
2246 *gc_count = pgss->gc_count;
2248
2249 *query_offset = off;
2250
2251 /*
2252 * Don't allow the file to grow larger than what qtext_load_file can
2253 * (theoretically) handle. This has been seen to be reachable on 32-bit
2254 * platforms.
2255 */
2256 if (unlikely(query_len >= MaxAllocHugeSize - off))
2257 {
2258 errno = EFBIG; /* not quite right, but it'll do */
2259 fd = -1;
2260 goto error;
2261 }
2262
2263 /* Now write the data into the successfully-reserved part of the file */
2265 if (fd < 0)
2266 goto error;
2267
2268 if (pg_pwrite(fd, query, query_len, off) != query_len)
2269 goto error;
2270 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2271 goto error;
2272
2274
2275 /* Mark our write complete */
2277 pgss->n_writers--;
2279
2280 return true;
2281
2282error:
2283 ereport(LOG,
2285 errmsg("could not write file \"%s\": %m",
2286 PGSS_TEXT_FILE)));
2287
2288 if (fd >= 0)
2290
2291 /* Mark our write complete */
2293 pgss->n_writers--;
2295
2296 return false;
2297}
2298
2299/*
2300 * Read the external query text file into a palloc'd buffer.
2301 *
2302 * Returns NULL (without throwing an error) if unable to read, eg
2303 * file not there or insufficient memory.
2304 *
2305 * On success, the buffer size is also returned into *buffer_size.
2306 *
2307 * This can be called without any lock on pgss->lock, but in that case
2308 * the caller is responsible for verifying that the result is sane.
2309 */
2310static char *
2312{
2313 char *buf;
2314 int fd;
2315 struct stat stat;
2316 Size nread;
2317
2319 if (fd < 0)
2320 {
2321 if (errno != ENOENT)
2322 ereport(LOG,
2324 errmsg("could not read file \"%s\": %m",
2325 PGSS_TEXT_FILE)));
2326 return NULL;
2327 }
2328
2329 /* Get file length */
2330 if (fstat(fd, &stat))
2331 {
2332 ereport(LOG,
2334 errmsg("could not stat file \"%s\": %m",
2335 PGSS_TEXT_FILE)));
2337 return NULL;
2338 }
2339
2340 /* Allocate buffer; beware that off_t might be wider than size_t */
2343 else
2344 buf = NULL;
2345 if (buf == NULL)
2346 {
2347 ereport(LOG,
2349 errmsg("out of memory"),
2350 errdetail("Could not allocate enough memory to read file \"%s\".",
2351 PGSS_TEXT_FILE)));
2353 return NULL;
2354 }
2355
2356 /*
2357 * OK, slurp in the file. Windows fails if we try to read more than
2358 * INT_MAX bytes at once, and other platforms might not like that either,
2359 * so read a very large file in 1GB segments.
2360 */
2361 nread = 0;
2362 while (nread < stat.st_size)
2363 {
2364 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2365
2366 /*
2367 * If we get a short read and errno doesn't get set, the reason is
2368 * probably that garbage collection truncated the file since we did
2369 * the fstat(), so we don't log a complaint --- but we don't return
2370 * the data, either, since it's most likely corrupt due to concurrent
2371 * writes from garbage collection.
2372 */
2373 errno = 0;
2374 if (read(fd, buf + nread, toread) != toread)
2375 {
2376 if (errno)
2377 ereport(LOG,
2379 errmsg("could not read file \"%s\": %m",
2380 PGSS_TEXT_FILE)));
2381 pfree(buf);
2383 return NULL;
2384 }
2385 nread += toread;
2386 }
2387
2388 if (CloseTransientFile(fd) != 0)
2389 ereport(LOG,
2391 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2392
2393 *buffer_size = nread;
2394 return buf;
2395}
2396
2397/*
2398 * Locate a query text in the file image previously read by qtext_load_file().
2399 *
2400 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2401 * the result points to a null-terminated string within the buffer.
2402 */
2403static char *
2404qtext_fetch(Size query_offset, int query_len,
2405 char *buffer, Size buffer_size)
2406{
2407 /* File read failed? */
2408 if (buffer == NULL)
2409 return NULL;
2410 /* Bogus offset/length? */
2411 if (query_len < 0 ||
2412 query_offset + query_len >= buffer_size)
2413 return NULL;
2414 /* As a further sanity check, make sure there's a trailing null */
2415 if (buffer[query_offset + query_len] != '\0')
2416 return NULL;
2417 /* Looks OK */
2418 return buffer + query_offset;
2419}
2420
2421/*
2422 * Do we need to garbage-collect the external query text file?
2423 *
2424 * Caller should hold at least a shared lock on pgss->lock.
2425 */
2426static bool
2428{
2429 Size extent;
2430
2431 /* Read shared extent pointer */
2433 extent = pgss->extent;
2435
2436 /*
2437 * Don't proceed if file does not exceed 512 bytes per possible entry.
2438 *
2439 * Here and in the next test, 32-bit machines have overflow hazards if
2440 * pgss_max and/or mean_query_len are large. Force the multiplications
2441 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2442 */
2443 if ((uint64) extent < (uint64) 512 * pgss_max)
2444 return false;
2445
2446 /*
2447 * Don't proceed if file is less than about 50% bloat. Nothing can or
2448 * should be done in the event of unusually large query texts accounting
2449 * for file's large size. We go to the trouble of maintaining the mean
2450 * query length in order to prevent garbage collection from thrashing
2451 * uselessly.
2452 */
2453 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2454 return false;
2455
2456 return true;
2457}
2458
2459/*
2460 * Garbage-collect orphaned query texts in external file.
2461 *
2462 * This won't be called often in the typical case, since it's likely that
2463 * there won't be too much churn, and besides, a similar compaction process
2464 * occurs when serializing to disk at shutdown or as part of resetting.
2465 * Despite this, it seems prudent to plan for the edge case where the file
2466 * becomes unreasonably large, with no other method of compaction likely to
2467 * occur in the foreseeable future.
2468 *
2469 * The caller must hold an exclusive lock on pgss->lock.
2470 *
2471 * At the first sign of trouble we unlink the query text file to get a clean
2472 * slate (although existing statistics are retained), rather than risk
2473 * thrashing by allowing the same problem case to recur indefinitely.
2474 */
2475static void
2477{
2478 char *qbuffer;
2480 FILE *qfile = NULL;
2482 pgssEntry *entry;
2483 Size extent;
2484 int nentries;
2485
2486 /*
2487 * When called from pgss_store, some other session might have proceeded
2488 * with garbage collection in the no-lock-held interim of lock strength
2489 * escalation. Check once more that this is actually necessary.
2490 */
2491 if (!need_gc_qtexts())
2492 return;
2493
2494 /*
2495 * Load the old texts file. If we fail (out of memory, for instance),
2496 * invalidate query texts. Hopefully this is rare. It might seem better
2497 * to leave things alone on an OOM failure, but the problem is that the
2498 * file is only going to get bigger; hoping for a future non-OOM result is
2499 * risky and can easily lead to complete denial of service.
2500 */
2502 if (qbuffer == NULL)
2503 goto gc_fail;
2504
2505 /*
2506 * We overwrite the query texts file in place, so as to reduce the risk of
2507 * an out-of-disk-space failure. Since the file is guaranteed not to get
2508 * larger, this should always work on traditional filesystems; though we
2509 * could still lose on copy-on-write filesystems.
2510 */
2512 if (qfile == NULL)
2513 {
2514 ereport(LOG,
2516 errmsg("could not write file \"%s\": %m",
2517 PGSS_TEXT_FILE)));
2518 goto gc_fail;
2519 }
2520
2521 extent = 0;
2522 nentries = 0;
2523
2525 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2526 {
2527 int query_len = entry->query_len;
2528 char *qry = qtext_fetch(entry->query_offset,
2529 query_len,
2530 qbuffer,
2531 qbuffer_size);
2532
2533 if (qry == NULL)
2534 {
2535 /* Trouble ... drop the text */
2536 entry->query_offset = 0;
2537 entry->query_len = -1;
2538 /* entry will not be counted in mean query length computation */
2539 continue;
2540 }
2541
2542 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2543 {
2544 ereport(LOG,
2546 errmsg("could not write file \"%s\": %m",
2547 PGSS_TEXT_FILE)));
2549 goto gc_fail;
2550 }
2551
2552 entry->query_offset = extent;
2553 extent += query_len + 1;
2554 nentries++;
2555 }
2556
2557 /*
2558 * Truncate away any now-unused space. If this fails for some odd reason,
2559 * we log it, but there's no need to fail.
2560 */
2561 if (ftruncate(fileno(qfile), extent) != 0)
2562 ereport(LOG,
2564 errmsg("could not truncate file \"%s\": %m",
2565 PGSS_TEXT_FILE)));
2566
2567 if (FreeFile(qfile))
2568 {
2569 ereport(LOG,
2571 errmsg("could not write file \"%s\": %m",
2572 PGSS_TEXT_FILE)));
2573 qfile = NULL;
2574 goto gc_fail;
2575 }
2576
2577 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2578 pgss->extent, extent);
2579
2580 /* Reset the shared extent pointer */
2581 pgss->extent = extent;
2582
2583 /*
2584 * Also update the mean query length, to be sure that need_gc_qtexts()
2585 * won't still think we have a problem.
2586 */
2587 if (nentries > 0)
2588 pgss->mean_query_len = extent / nentries;
2589 else
2591
2592 pfree(qbuffer);
2593
2594 /*
2595 * OK, count a garbage collection cycle. (Note: even though we have
2596 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2597 * other processes may examine gc_count while holding only the mutex.
2598 * Also, we have to advance the count *after* we've rewritten the file,
2599 * else other processes might not realize they read a stale file.)
2600 */
2602
2603 return;
2604
2605gc_fail:
2606 /* clean up resources */
2607 if (qfile)
2608 FreeFile(qfile);
2609 if (qbuffer)
2610 pfree(qbuffer);
2611
2612 /*
2613 * Since the contents of the external file are now uncertain, mark all
2614 * hashtable entries as having invalid texts.
2615 */
2617 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2618 {
2619 entry->query_offset = 0;
2620 entry->query_len = -1;
2621 }
2622
2623 /*
2624 * Destroy the query text file and create a new, empty one
2625 */
2628 if (qfile == NULL)
2629 ereport(LOG,
2631 errmsg("could not recreate file \"%s\": %m",
2632 PGSS_TEXT_FILE)));
2633 else
2634 FreeFile(qfile);
2635
2636 /* Reset the shared extent pointer */
2637 pgss->extent = 0;
2638
2639 /* Reset mean_query_len to match the new state */
2641
2642 /*
2643 * Bump the GC count even though we failed.
2644 *
2645 * This is needed to make concurrent readers of file without any lock on
2646 * pgss->lock notice existence of new version of file. Once readers
2647 * subsequently observe a change in GC count with pgss->lock held, that
2648 * forces a safe reopen of file. Writers also require that we bump here,
2649 * of course. (As required by locking protocol, readers and writers don't
2650 * trust earlier file contents until gc_count is found unchanged after
2651 * pgss->lock acquired in shared or exclusive mode respectively.)
2652 */
2654}
2655
2656#define SINGLE_ENTRY_RESET(e) \
2657if (e) { \
2658 if (minmax_only) { \
2659 /* When requested reset only min/max statistics of an entry */ \
2660 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2661 { \
2662 e->counters.max_time[kind] = 0; \
2663 e->counters.min_time[kind] = 0; \
2664 } \
2665 e->minmax_stats_since = stats_reset; \
2666 } \
2667 else \
2668 { \
2669 /* Remove the key otherwise */ \
2670 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2671 num_remove++; \
2672 } \
2673}
2674
2675/*
2676 * Reset entries corresponding to parameters passed.
2677 */
2678static TimestampTz
2679entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2680{
2682 pgssEntry *entry;
2683 FILE *qfile;
2684 int64 num_entries;
2685 int64 num_remove = 0;
2686 pgssHashKey key;
2687 TimestampTz stats_reset;
2688
2689 if (!pgss || !pgss_hash)
2690 ereport(ERROR,
2692 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2693
2695 num_entries = hash_get_num_entries(pgss_hash);
2696
2697 stats_reset = GetCurrentTimestamp();
2698
2699 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2700 {
2701 /* If all the parameters are available, use the fast path. */
2702 memset(&key, 0, sizeof(pgssHashKey));
2703 key.userid = userid;
2704 key.dbid = dbid;
2705 key.queryid = queryid;
2706
2707 /*
2708 * Reset the entry if it exists, starting with the non-top-level
2709 * entry.
2710 */
2711 key.toplevel = false;
2712 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2713
2714 SINGLE_ENTRY_RESET(entry);
2715
2716 /* Also reset the top-level entry if it exists. */
2717 key.toplevel = true;
2718 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2719
2720 SINGLE_ENTRY_RESET(entry);
2721 }
2722 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2723 {
2724 /* Reset entries corresponding to valid parameters. */
2726 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2727 {
2728 if ((!userid || entry->key.userid == userid) &&
2729 (!dbid || entry->key.dbid == dbid) &&
2730 (!queryid || entry->key.queryid == queryid))
2731 {
2732 SINGLE_ENTRY_RESET(entry);
2733 }
2734 }
2735 }
2736 else
2737 {
2738 /* Reset all entries. */
2740 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2741 {
2742 SINGLE_ENTRY_RESET(entry);
2743 }
2744 }
2745
2746 /* All entries are removed? */
2747 if (num_entries != num_remove)
2748 goto release_lock;
2749
2750 /*
2751 * Reset global statistics for pg_stat_statements since all entries are
2752 * removed.
2753 */
2755 pgss->stats.dealloc = 0;
2756 pgss->stats.stats_reset = stats_reset;
2758
2759 /*
2760 * Write new empty query file, perhaps even creating a new one to recover
2761 * if the file was missing.
2762 */
2764 if (qfile == NULL)
2765 {
2766 ereport(LOG,
2768 errmsg("could not create file \"%s\": %m",
2769 PGSS_TEXT_FILE)));
2770 goto done;
2771 }
2772
2773 /* If ftruncate fails, log it, but it's not a fatal problem */
2774 if (ftruncate(fileno(qfile), 0) != 0)
2775 ereport(LOG,
2777 errmsg("could not truncate file \"%s\": %m",
2778 PGSS_TEXT_FILE)));
2779
2780 FreeFile(qfile);
2781
2782done:
2783 pgss->extent = 0;
2784 /* This counts as a query text garbage collection for our purposes */
2786
2789
2790 return stats_reset;
2791}
2792
2793/*
2794 * Generate a normalized version of the query string that will be used to
2795 * represent all similar queries.
2796 *
2797 * Note that the normalized representation may well vary depending on
2798 * just which "equivalent" query is used to create the hashtable entry.
2799 * We assume this is OK.
2800 *
2801 * If query_loc > 0, then "query" has been advanced by that much compared to
2802 * the original string start, so we need to translate the provided locations
2803 * to compensate. (This lets us avoid re-scanning statements before the one
2804 * of interest, so it's worth doing.)
2805 *
2806 * *query_len_p contains the input string length, and is updated with
2807 * the result string length on exit. The resulting string might be longer
2808 * or shorter depending on what happens with replacement of constants.
2809 *
2810 * Returns a palloc'd string.
2811 */
2812static char *
2814 int query_loc, int *query_len_p)
2815{
2816 char *norm_query;
2817 int query_len = *query_len_p;
2818 int norm_query_buflen, /* Space allowed for norm_query */
2819 len_to_wrt, /* Length (in bytes) to write */
2820 quer_loc = 0, /* Source query byte location */
2821 n_quer_loc = 0, /* Normalized query byte location */
2822 last_off = 0, /* Offset from start for previous tok */
2823 last_tok_len = 0; /* Length (in bytes) of that tok */
2824 int num_constants_replaced = 0;
2826
2827 /*
2828 * Determine constants' lengths (core system only gives us locations), and
2829 * return a sorted copy of jstate's LocationLen data with lengths filled
2830 * in.
2831 */
2833
2834 /*
2835 * Allow for $n symbols to be longer than the constants they replace.
2836 * Constants must take at least one byte in text form, while a $n symbol
2837 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2838 * could refine that limit based on the max value of n for the current
2839 * query, but it hardly seems worth any extra effort to do so.
2840 */
2841 norm_query_buflen = query_len + jstate->clocations_count * 10;
2842
2843 /* Allocate result buffer */
2845
2846 for (int i = 0; i < jstate->clocations_count; i++)
2847 {
2848 int off, /* Offset from start for cur tok */
2849 tok_len; /* Length (in bytes) of that tok */
2850
2851 /*
2852 * If we have an external param at this location, but no lists are
2853 * being squashed across the query, then we skip here; this will make
2854 * us print the characters found in the original query that represent
2855 * the parameter in the next iteration (or after the loop is done),
2856 * which is a bit odd but seems to work okay in most cases.
2857 */
2858 if (locs[i].extern_param && !jstate->has_squashed_lists)
2859 continue;
2860
2861 off = locs[i].location;
2862
2863 /* Adjust recorded location if we're dealing with partial string */
2864 off -= query_loc;
2865
2866 tok_len = locs[i].length;
2867
2868 if (tok_len < 0)
2869 continue; /* ignore any duplicates */
2870
2871 /* Copy next chunk (what precedes the next constant) */
2872 len_to_wrt = off - last_off;
2874 Assert(len_to_wrt >= 0);
2877
2878 /*
2879 * And insert a param symbol in place of the constant token; and, if
2880 * we have a squashable list, insert a placeholder comment starting
2881 * from the list's second value.
2882 */
2884 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2885 locs[i].squashed ? " /*, ... */" : "");
2887
2888 /* move forward */
2889 quer_loc = off + tok_len;
2890 last_off = off;
2892 }
2893
2894 /* Clean up, if needed */
2895 if (locs)
2896 pfree(locs);
2897
2898 /*
2899 * We've copied up until the last ignorable constant. Copy over the
2900 * remaining bytes of the original query string.
2901 */
2902 len_to_wrt = query_len - quer_loc;
2903
2904 Assert(len_to_wrt >= 0);
2907
2909 norm_query[n_quer_loc] = '\0';
2910
2912 return norm_query;
2913}
bool has_privs_of_role(Oid member, Oid role)
Definition acl.c:5314
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, const JumbleState *jstate)
Definition analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition numeric.c:626
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1649
static Datum values[MAXATTR]
Definition bootstrap.c:190
#define CStringGetTextDatum(s)
Definition builtins.h:98
#define INT64CONST(x)
Definition c.h:630
#define Min(x, y)
Definition c.h:1091
#define PG_BINARY_R
Definition c.h:1376
#define Max(x, y)
Definition c.h:1085
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
#define PG_BINARY
Definition c.h:1374
#define UINT64_FORMAT
Definition c.h:635
int32_t int32
Definition c.h:620
uint64_t uint64
Definition c.h:625
#define unlikely(x)
Definition c.h:438
uint32_t uint32
Definition c.h:624
#define PG_BINARY_W
Definition c.h:1377
size_t Size
Definition c.h:689
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int64 TimestampTz
Definition timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:889
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1352
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition dynahash.c:1444
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1273
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1317
Datum arg
Definition elog.c:1323
int errcode_for_file_access(void)
Definition elog.c:898
int errcode(int sqlerrcode)
Definition elog.c:875
#define LOG
Definition elog.h:32
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define PG_TRY(...)
Definition elog.h:374
#define PG_END_TRY(...)
Definition elog.h:399
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define PG_FINALLY(...)
Definition elog.h:391
#define ereport(elevel,...)
Definition elog.h:152
ExecutorEnd_hook_type ExecutorEnd_hook
Definition execMain.c:73
ExecutorFinish_hook_type ExecutorFinish_hook
Definition execMain.c:72
ExecutorStart_hook_type ExecutorStart_hook
Definition execMain.c:70
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition execMain.c:143
ExecutorRun_hook_type ExecutorRun_hook
Definition execMain.c:71
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition execMain.c:318
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition execMain.c:486
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition execMain.c:426
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition executor.h:87
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition executor.h:81
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition executor.h:77
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition executor.h:91
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:783
int CloseTransientFile(int fd)
Definition fd.c:2855
int FreeFile(FILE *file)
Definition fd.c:2827
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2628
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
#define MCXT_ALLOC_HUGE
Definition fe_memutils.h:28
#define MCXT_ALLOC_NO_OOM
Definition fe_memutils.h:29
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_MODULE_MAGIC_EXT(...)
Definition fmgr.h:540
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_BOOL(n)
Definition fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:692
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, uint32 flags)
Definition funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
bool IsUnderPostmaster
Definition globals.c:122
Oid MyDatabaseId
Definition globals.c:96
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5152
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5049
void MarkGUCPrefixReserved(const char *className)
Definition guc.c:5186
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5073
@ PGC_SUSET
Definition guc.h:78
@ PGC_POSTMASTER
Definition guc.h:74
@ PGC_SIGHUP
Definition guc.h:75
return str start
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1025
@ HASH_FIND
Definition hsearch.h:108
@ HASH_REMOVE
Definition hsearch.h:110
@ HASH_ENTER
Definition hsearch.h:109
#define HASH_ELEM
Definition hsearch.h:90
#define HASH_BLOBS
Definition hsearch.h:92
void parse(int)
Definition parse.c:49
static char * encoding
Definition initdb.c:139
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:434
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:444
#define INSTR_TIME_GET_MILLISEC(t)
Definition instr_time.h:459
WalUsage pgWalUsage
Definition instrument.c:27
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:367
BufferUsage pgBufferUsage
Definition instrument.c:25
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:327
@ INSTRUMENT_ALL
Definition instrument.h:68
#define read(a, b, c)
Definition win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
int LWLockNewTrancheId(const char *name)
Definition lwlock.c:562
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
int GetDatabaseEncoding(void)
Definition mbutils.c:1389
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:687
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1635
void pfree(void *pointer)
Definition mcxt.c:1619
void * palloc(Size size)
Definition mcxt.c:1390
void * palloc_extended(Size size, int flags)
Definition mcxt.c:1442
#define MaxAllocHugeSize
Definition memutils.h:45
Oid GetUserId(void)
Definition miscinit.c:470
bool process_shared_preload_libraries_in_progress
Definition miscinit.c:1788
#define IsA(nodeptr, _type_)
Definition nodes.h:164
static char * errmsg
post_parse_analyze_hook_type post_parse_analyze_hook
Definition analyze.c:74
const void size_t len
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
@ PGSS_V1_13
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
static ExecutorRun_hook_type prev_ExecutorRun
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS_V1_13
void _PG_init(void)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, const JumbleState *jstate)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
static char * generate_normalized_query(const JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS)
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
static const ShmemCallbacks pgss_shmem_callbacks
#define PG_STAT_STATEMENTS_COLS_V1_3
#define PGSS_NUMKIND
static bool pgss_save
static int nesting_level
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
static void pgss_store(const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
static void pgss_shmem_init(void *arg)
static void pgss_shmem_request(void *arg)
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define PG_VALID_BE_ENCODING(_enc)
Definition pg_wchar.h:134
static int duration
Definition pgbench.c:175
planner_hook_type planner_hook
Definition planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.c:351
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.h:28
PlannedStmtOrigin
Definition plannodes.h:36
@ PLAN_STMT_UNKNOWN
Definition plannodes.h:37
@ PLAN_STMT_CACHE_CUSTOM
Definition plannodes.h:41
@ PLAN_STMT_CACHE_GENERIC
Definition plannodes.h:40
#define sprintf
Definition port.h:263
#define pg_pwrite
Definition port.h:249
#define snprintf
Definition port.h:261
#define qsort(a, b, c, d)
Definition port.h:496
static Datum Int64GetDatum(int64 X)
Definition postgres.h:426
#define Int64GetDatumFast(X)
Definition postgres.h:538
#define Float8GetDatumFast(X)
Definition postgres.h:540
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Datum CStringGetDatum(const char *X)
Definition postgres.h:383
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
LocationLen * ComputeConstantLengths(const JumbleState *jstate, const char *query, int query_loc)
ScanDirection
Definition sdir.h:25
void RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
Definition shmem.c:873
#define ShmemRequestHash(...)
Definition shmem.h:179
#define ShmemRequestStruct(...)
Definition shmem.h:176
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
static void error(void)
instr_time local_blk_read_time
Definition instrument.h:38
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
instr_time temp_blk_write_time
Definition instrument.h:41
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
int64 local_blks_written
Definition instrument.h:33
instr_time temp_blk_read_time
Definition instrument.h:40
instr_time local_blk_write_time
Definition instrument.h:39
int64 temp_blks_read
Definition instrument.h:34
int64 shared_blks_read
Definition instrument.h:27
int64 shared_blks_written
Definition instrument.h:29
int64 temp_blks_written
Definition instrument.h:35
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
double min_time[PGSS_NUMKIND]
int64 local_blks_written
int64 generic_plan_calls
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
double jit_emission_time
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 custom_plan_calls
int64 local_blks_dirtied
int64 jit_inlining_count
double jit_deform_time
int64 parallel_workers_to_launch
double shared_blk_read_time
double jit_inlining_time
int es_parallel_workers_to_launch
Definition execnodes.h:782
struct JitContext * es_jit
Definition execnodes.h:800
uint64 es_total_processed
Definition execnodes.h:752
int es_parallel_workers_launched
Definition execnodes.h:784
WalUsage walusage
Definition instrument.h:90
instr_time total
Definition instrument.h:88
BufferUsage bufusage
Definition instrument.h:89
JitInstrumentation instr
Definition jit.h:62
Definition nodes.h:135
const char * p_sourcetext
Definition parse_node.h:214
int64 queryId
Definition plannodes.h:69
ParseLoc stmt_len
Definition plannodes.h:171
PlannedStmtOrigin planOrigin
Definition plannodes.h:75
ParseLoc stmt_location
Definition plannodes.h:169
Node * utilityStmt
Definition plannodes.h:153
uint64 nprocessed
Definition cmdtag.h:32
CommandTag commandTag
Definition cmdtag.h:31
const char * sourceText
Definition execdesc.h:38
EState * estate
Definition execdesc.h:50
PlannedStmt * plannedstmt
Definition execdesc.h:37
int query_instr_options
Definition execdesc.h:45
struct Instrumentation * query_instr
Definition execdesc.h:57
Node * utilityStmt
Definition parsenodes.h:141
ParseLoc stmt_location
Definition parsenodes.h:258
ShmemRequestCallback request_fn
Definition shmem.h:133
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
int64 wal_records
Definition instrument.h:53
Definition guc.h:174
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
pgssGlobalStats stats
__int64 st_size
Definition win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:785
LWLock lock
Definition lwlock.h:70
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.c:548
ProcessUtility_hook_type ProcessUtility_hook
Definition utility.c:72
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.h:71
ProcessUtilityContext
Definition utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition timestamp.h:68
const char * name
#define fstat
Definition win32_port.h:73