PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_stat_statements.c File Reference
#include "postgres.h"
#include <math.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/htup_details.h"
#include "access/parallel.h"
#include "catalog/pg_authid.h"
#include "common/int.h"
#include "executor/instrument.h"
#include "funcapi.h"
#include "jit/jit.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/queryjumble.h"
#include "optimizer/planner.h"
#include "parser/analyze.h"
#include "parser/scanner.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
Include dependency graph for pg_stat_statements.c:

Go to the source code of this file.

Data Structures

struct  pgssHashKey
 
struct  Counters
 
struct  pgssGlobalStats
 
struct  pgssEntry
 
struct  pgssSharedState
 

Macros

#define PGSS_DUMP_FILE   PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
 
#define PGSS_TEXT_FILE   PG_STAT_TMP_DIR "/pgss_query_texts.stat"
 
#define USAGE_EXEC(duration)   (1.0)
 
#define USAGE_INIT   (1.0) /* including initial planning */
 
#define ASSUMED_MEDIAN_INIT   (10.0) /* initial assumed median usage */
 
#define ASSUMED_LENGTH_INIT   1024 /* initial assumed mean query length */
 
#define USAGE_DECREASE_FACTOR   (0.99) /* decreased every entry_dealloc */
 
#define STICKY_DECREASE_FACTOR   (0.50) /* factor for sticky entries */
 
#define USAGE_DEALLOC_PERCENT   5 /* free this % of entries at once */
 
#define IS_STICKY(c)   ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
 
#define PGSS_NUMKIND   (PGSS_EXEC + 1)
 
#define pgss_enabled(level)
 
#define record_gc_qtexts()
 
#define PG_STAT_STATEMENTS_COLS_V1_0   14
 
#define PG_STAT_STATEMENTS_COLS_V1_1   18
 
#define PG_STAT_STATEMENTS_COLS_V1_2   19
 
#define PG_STAT_STATEMENTS_COLS_V1_3   23
 
#define PG_STAT_STATEMENTS_COLS_V1_8   32
 
#define PG_STAT_STATEMENTS_COLS_V1_9   33
 
#define PG_STAT_STATEMENTS_COLS_V1_10   43
 
#define PG_STAT_STATEMENTS_COLS_V1_11   49
 
#define PG_STAT_STATEMENTS_COLS_V1_12   52
 
#define PG_STAT_STATEMENTS_COLS_V1_13   54
 
#define PG_STAT_STATEMENTS_COLS   54 /* maximum of above */
 
#define PG_STAT_STATEMENTS_INFO_COLS   2
 
#define SINGLE_ENTRY_RESET(e)
 

Typedefs

typedef enum pgssVersion pgssVersion
 
typedef enum pgssStoreKind pgssStoreKind
 
typedef struct pgssHashKey pgssHashKey
 
typedef struct Counters Counters
 
typedef struct pgssGlobalStats pgssGlobalStats
 
typedef struct pgssEntry pgssEntry
 
typedef struct pgssSharedState pgssSharedState
 

Enumerations

enum  pgssVersion {
  PGSS_V1_0 = 0 , PGSS_V1_1 , PGSS_V1_2 , PGSS_V1_3 ,
  PGSS_V1_8 , PGSS_V1_9 , PGSS_V1_10 , PGSS_V1_11 ,
  PGSS_V1_12 , PGSS_V1_13
}
 
enum  pgssStoreKind { PGSS_INVALID = -1 , PGSS_PLAN = 0 , PGSS_EXEC }
 
enum  PGSSTrackLevel { PGSS_TRACK_NONE , PGSS_TRACK_TOP , PGSS_TRACK_ALL }
 

Functions

 PG_MODULE_MAGIC_EXT (.name="pg_stat_statements",.version=PG_VERSION)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset_1_7)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset_1_11)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_2)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_3)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_8)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_9)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_10)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_11)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_12)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_13)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_info)
 
static void pgss_shmem_request (void)
 
static void pgss_shmem_startup (void)
 
static void pgss_shmem_shutdown (int code, Datum arg)
 
static void pgss_post_parse_analyze (ParseState *pstate, Query *query, JumbleState *jstate)
 
static PlannedStmtpgss_planner (Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
 
static void pgss_ExecutorStart (QueryDesc *queryDesc, int eflags)
 
static void pgss_ExecutorRun (QueryDesc *queryDesc, ScanDirection direction, uint64 count)
 
static void pgss_ExecutorFinish (QueryDesc *queryDesc)
 
static void pgss_ExecutorEnd (QueryDesc *queryDesc)
 
static void pgss_ProcessUtility (PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
 
static void pgss_store (const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
 
static void pg_stat_statements_internal (FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
 
static Size pgss_memsize (void)
 
static pgssEntryentry_alloc (pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
 
static void entry_dealloc (void)
 
static bool qtext_store (const char *query, int query_len, Size *query_offset, int *gc_count)
 
static charqtext_load_file (Size *buffer_size)
 
static charqtext_fetch (Size query_offset, int query_len, char *buffer, Size buffer_size)
 
static bool need_gc_qtexts (void)
 
static void gc_qtexts (void)
 
static TimestampTz entry_reset (Oid userid, Oid dbid, int64 queryid, bool minmax_only)
 
static chargenerate_normalized_query (JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
 
static void fill_in_constant_lengths (JumbleState *jstate, const char *query, int query_loc)
 
static int comp_location (const void *a, const void *b)
 
void _PG_init (void)
 
Datum pg_stat_statements_reset_1_7 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_reset_1_11 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_reset (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_13 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_12 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_11 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_10 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_9 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_8 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_3 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_2 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_info (PG_FUNCTION_ARGS)
 
static int entry_cmp (const void *lhs, const void *rhs)
 

Variables

static const uint32 PGSS_FILE_HEADER = 0x20250731
 
static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100
 
static int nesting_level = 0
 
static shmem_request_hook_type prev_shmem_request_hook = NULL
 
static shmem_startup_hook_type prev_shmem_startup_hook = NULL
 
static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL
 
static planner_hook_type prev_planner_hook = NULL
 
static ExecutorStart_hook_type prev_ExecutorStart = NULL
 
static ExecutorRun_hook_type prev_ExecutorRun = NULL
 
static ExecutorFinish_hook_type prev_ExecutorFinish = NULL
 
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL
 
static ProcessUtility_hook_type prev_ProcessUtility = NULL
 
static pgssSharedStatepgss = NULL
 
static HTABpgss_hash = NULL
 
static const struct config_enum_entry track_options []
 
static int pgss_max = 5000
 
static int pgss_track = PGSS_TRACK_TOP
 
static bool pgss_track_utility = true
 
static bool pgss_track_planning = false
 
static bool pgss_save = true
 

Macro Definition Documentation

◆ ASSUMED_LENGTH_INIT

#define ASSUMED_LENGTH_INIT   1024 /* initial assumed mean query length */

Definition at line 98 of file pg_stat_statements.c.

◆ ASSUMED_MEDIAN_INIT

#define ASSUMED_MEDIAN_INIT   (10.0) /* initial assumed median usage */

Definition at line 97 of file pg_stat_statements.c.

◆ IS_STICKY

#define IS_STICKY (   c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)

Definition at line 102 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS

#define PG_STAT_STATEMENTS_COLS   54 /* maximum of above */

Definition at line 1584 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_0

#define PG_STAT_STATEMENTS_COLS_V1_0   14

Definition at line 1574 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_1

#define PG_STAT_STATEMENTS_COLS_V1_1   18

Definition at line 1575 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_10

#define PG_STAT_STATEMENTS_COLS_V1_10   43

Definition at line 1580 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_11

#define PG_STAT_STATEMENTS_COLS_V1_11   49

Definition at line 1581 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_12

#define PG_STAT_STATEMENTS_COLS_V1_12   52

Definition at line 1582 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_13

#define PG_STAT_STATEMENTS_COLS_V1_13   54

Definition at line 1583 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_2

#define PG_STAT_STATEMENTS_COLS_V1_2   19

Definition at line 1576 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_3

#define PG_STAT_STATEMENTS_COLS_V1_3   23

Definition at line 1577 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_8

#define PG_STAT_STATEMENTS_COLS_V1_8   32

Definition at line 1578 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_9

#define PG_STAT_STATEMENTS_COLS_V1_9   33

Definition at line 1579 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_INFO_COLS

#define PG_STAT_STATEMENTS_INFO_COLS   2

Definition at line 2052 of file pg_stat_statements.c.

◆ PGSS_DUMP_FILE

#define PGSS_DUMP_FILE   PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"

Definition at line 81 of file pg_stat_statements.c.

◆ pgss_enabled

#define pgss_enabled (   level)
Value:
(pgss_track == PGSS_TRACK_TOP && (level) == 0)))
#define IsParallelWorker()
Definition parallel.h:60
static int pgss_track
@ PGSS_TRACK_ALL
@ PGSS_TRACK_TOP

Definition at line 305 of file pg_stat_statements.c.

310 { \
312 pgss->gc_count++; \
314 } while(0)
315
316/*---- Function declarations ----*/
317
331
332static void pgss_shmem_request(void);
333static void pgss_shmem_startup(void);
334static void pgss_shmem_shutdown(int code, Datum arg);
335static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
337static PlannedStmt *pgss_planner(Query *parse,
338 const char *query_string,
339 int cursorOptions,
340 ParamListInfo boundParams,
341 ExplainState *es);
342static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
343static void pgss_ExecutorRun(QueryDesc *queryDesc,
344 ScanDirection direction,
345 uint64 count);
346static void pgss_ExecutorFinish(QueryDesc *queryDesc);
347static void pgss_ExecutorEnd(QueryDesc *queryDesc);
348static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
349 bool readOnlyTree,
350 ProcessUtilityContext context, ParamListInfo params,
351 QueryEnvironment *queryEnv,
352 DestReceiver *dest, QueryCompletion *qc);
353static void pgss_store(const char *query, int64 queryId,
354 int query_location, int query_len,
355 pgssStoreKind kind,
356 double total_time, uint64 rows,
357 const BufferUsage *bufusage,
358 const WalUsage *walusage,
359 const struct JitInstrumentation *jitusage,
361 int parallel_workers_to_launch,
362 int parallel_workers_launched,
363 PlannedStmtOrigin planOrigin);
365 pgssVersion api_version,
366 bool showtext);
367static Size pgss_memsize(void);
368static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
369 int encoding, bool sticky);
370static void entry_dealloc(void);
371static bool qtext_store(const char *query, int query_len,
372 Size *query_offset, int *gc_count);
373static char *qtext_load_file(Size *buffer_size);
374static char *qtext_fetch(Size query_offset, int query_len,
375 char *buffer, Size buffer_size);
376static bool need_gc_qtexts(void);
377static void gc_qtexts(void);
378static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
379static char *generate_normalized_query(JumbleState *jstate, const char *query,
380 int query_loc, int *query_len_p);
381static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
382 int query_loc);
383static int comp_location(const void *a, const void *b);
384
385
386/*
387 * Module load callback
388 */
389void
390_PG_init(void)
391{
392 /*
393 * In order to create our shared memory area, we have to be loaded via
394 * shared_preload_libraries. If not, fall out without hooking into any of
395 * the main system. (We don't throw error here because it seems useful to
396 * allow the pg_stat_statements functions to be created even when the
397 * module isn't active. The functions must protect themselves against
398 * being called then, however.)
399 */
401 return;
402
403 /*
404 * Inform the postmaster that we want to enable query_id calculation if
405 * compute_query_id is set to auto.
406 */
408
409 /*
410 * Define (or redefine) custom GUC variables.
411 */
412 DefineCustomIntVariable("pg_stat_statements.max",
413 "Sets the maximum number of statements tracked by pg_stat_statements.",
414 NULL,
415 &pgss_max,
416 5000,
417 100,
418 INT_MAX / 2,
420 0,
421 NULL,
422 NULL,
423 NULL);
424
425 DefineCustomEnumVariable("pg_stat_statements.track",
426 "Selects which statements are tracked by pg_stat_statements.",
427 NULL,
428 &pgss_track,
431 PGC_SUSET,
432 0,
433 NULL,
434 NULL,
435 NULL);
436
437 DefineCustomBoolVariable("pg_stat_statements.track_utility",
438 "Selects whether utility commands are tracked by pg_stat_statements.",
439 NULL,
441 true,
442 PGC_SUSET,
443 0,
444 NULL,
445 NULL,
446 NULL);
447
448 DefineCustomBoolVariable("pg_stat_statements.track_planning",
449 "Selects whether planning duration is tracked by pg_stat_statements.",
450 NULL,
452 false,
453 PGC_SUSET,
454 0,
455 NULL,
456 NULL,
457 NULL);
458
459 DefineCustomBoolVariable("pg_stat_statements.save",
460 "Save pg_stat_statements statistics across server shutdowns.",
461 NULL,
462 &pgss_save,
463 true,
465 0,
466 NULL,
467 NULL,
468 NULL);
469
470 MarkGUCPrefixReserved("pg_stat_statements");
471
472 /*
473 * Install hooks.
474 */
493}
494
495/*
496 * shmem_request hook: request additional shared resources. We'll allocate or
497 * attach to the shared resources in pgss_shmem_startup().
498 */
499static void
501{
504
506 RequestNamedLWLockTranche("pg_stat_statements", 1);
507}
508
509/*
510 * shmem_startup hook: allocate or attach to shared memory,
511 * then load any pre-existing statistics from file.
512 * Also create and load the query-texts file, which is expected to exist
513 * (even if empty) while the module is enabled.
514 */
515static void
517{
518 bool found;
519 HASHCTL info;
520 FILE *file = NULL;
521 FILE *qfile = NULL;
522 uint32 header;
523 int32 num;
524 int32 pgver;
525 int32 i;
526 int buffer_size;
527 char *buffer = NULL;
528
531
532 /* reset in case this is a restart within the postmaster */
533 pgss = NULL;
534 pgss_hash = NULL;
535
536 /*
537 * Create or attach to the shared memory state, including hash table
538 */
540
541 pgss = ShmemInitStruct("pg_stat_statements",
542 sizeof(pgssSharedState),
543 &found);
544
545 if (!found)
546 {
547 /* First time through ... */
548 pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
552 pgss->extent = 0;
553 pgss->n_writers = 0;
554 pgss->gc_count = 0;
555 pgss->stats.dealloc = 0;
557 }
558
559 info.keysize = sizeof(pgssHashKey);
560 info.entrysize = sizeof(pgssEntry);
561 pgss_hash = ShmemInitHash("pg_stat_statements hash",
563 &info,
565
567
568 /*
569 * If we're in the postmaster (or a standalone backend...), set up a shmem
570 * exit hook to dump the statistics to disk.
571 */
574
575 /*
576 * Done if some other process already completed our initialization.
577 */
578 if (found)
579 return;
580
581 /*
582 * Note: we don't bother with locks here, because there should be no other
583 * processes running when this code is reached.
584 */
585
586 /* Unlink query text file possibly left over from crash */
588
589 /* Allocate new query text temp file */
591 if (qfile == NULL)
592 goto write_error;
593
594 /*
595 * If we were told not to load old statistics, we're done. (Note we do
596 * not try to unlink any old dump file in this case. This seems a bit
597 * questionable but it's the historical behavior.)
598 */
599 if (!pgss_save)
600 {
602 return;
603 }
604
605 /*
606 * Attempt to load old statistics from the dump file.
607 */
609 if (file == NULL)
610 {
611 if (errno != ENOENT)
612 goto read_error;
613 /* No existing persisted stats file, so we're done */
615 return;
616 }
617
618 buffer_size = 2048;
619 buffer = (char *) palloc(buffer_size);
620
621 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
622 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
623 fread(&num, sizeof(int32), 1, file) != 1)
624 goto read_error;
625
626 if (header != PGSS_FILE_HEADER ||
628 goto data_error;
629
630 for (i = 0; i < num; i++)
631 {
633 pgssEntry *entry;
634 Size query_offset;
635
636 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
637 goto read_error;
638
639 /* Encoding is the only field we can easily sanity-check */
640 if (!PG_VALID_BE_ENCODING(temp.encoding))
641 goto data_error;
642
643 /* Resize buffer as needed */
644 if (temp.query_len >= buffer_size)
645 {
646 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
647 buffer = repalloc(buffer, buffer_size);
648 }
649
650 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
651 goto read_error;
652
653 /* Should have a trailing null, but let's make sure */
654 buffer[temp.query_len] = '\0';
655
656 /* Skip loading "sticky" entries */
657 if (IS_STICKY(temp.counters))
658 continue;
659
660 /* Store the query text */
661 query_offset = pgss->extent;
662 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
663 goto write_error;
664 pgss->extent += temp.query_len + 1;
665
666 /* make the hashtable entry (discards old entries if too many) */
667 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
668 temp.encoding,
669 false);
670
671 /* copy in the actual stats */
672 entry->counters = temp.counters;
673 entry->stats_since = temp.stats_since;
674 entry->minmax_stats_since = temp.minmax_stats_since;
675 }
676
677 /* Read global statistics for pg_stat_statements */
678 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
679 goto read_error;
680
681 pfree(buffer);
682 FreeFile(file);
684
685 /*
686 * Remove the persisted stats file so it's not included in
687 * backups/replication standbys, etc. A new file will be written on next
688 * shutdown.
689 *
690 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
691 * because we remove that file on startup; it acts inversely to
692 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
693 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
694 * when the server is not running. Leaving the file creates no danger of
695 * a newly restored database having a spurious record of execution costs,
696 * which is what we're really concerned about here.
697 */
699
700 return;
701
703 ereport(LOG,
705 errmsg("could not read file \"%s\": %m",
707 goto fail;
709 ereport(LOG,
711 errmsg("ignoring invalid data in file \"%s\"",
713 goto fail;
715 ereport(LOG,
717 errmsg("could not write file \"%s\": %m",
719fail:
720 if (buffer)
721 pfree(buffer);
722 if (file)
723 FreeFile(file);
724 if (qfile)
726 /* If possible, throw away the bogus file; ignore any error */
728
729 /*
730 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
731 * server is running with pg_stat_statements enabled
732 */
733}
734
735/*
736 * shmem_shutdown hook: Dump statistics into file.
737 *
738 * Note: we don't bother with acquiring lock, because there should be no
739 * other processes running when this is called.
740 */
741static void
743{
744 FILE *file;
745 char *qbuffer = NULL;
746 Size qbuffer_size = 0;
748 int32 num_entries;
749 pgssEntry *entry;
750
751 /* Don't try to dump during a crash. */
752 if (code)
753 return;
754
755 /* Safety check ... shouldn't get here unless shmem is set up. */
756 if (!pgss || !pgss_hash)
757 return;
758
759 /* Don't dump if told not to. */
760 if (!pgss_save)
761 return;
762
764 if (file == NULL)
765 goto error;
766
767 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
768 goto error;
769 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
770 goto error;
771 num_entries = hash_get_num_entries(pgss_hash);
772 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
773 goto error;
774
776 if (qbuffer == NULL)
777 goto error;
778
779 /*
780 * When serializing to disk, we store query texts immediately after their
781 * entry data. Any orphaned query texts are thereby excluded.
782 */
784 while ((entry = hash_seq_search(&hash_seq)) != NULL)
785 {
786 int len = entry->query_len;
787 char *qstr = qtext_fetch(entry->query_offset, len,
789
790 if (qstr == NULL)
791 continue; /* Ignore any entries with bogus texts */
792
793 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
794 fwrite(qstr, 1, len + 1, file) != len + 1)
795 {
796 /* note: we assume hash_seq_term won't change errno */
798 goto error;
799 }
800 }
801
802 /* Dump global statistics for pg_stat_statements */
803 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
804 goto error;
805
806 free(qbuffer);
807 qbuffer = NULL;
808
809 if (FreeFile(file))
810 {
811 file = NULL;
812 goto error;
813 }
814
815 /*
816 * Rename file into place, so we atomically replace any old one.
817 */
819
820 /* Unlink query-texts file; it's not needed while shutdown */
822
823 return;
824
825error:
826 ereport(LOG,
828 errmsg("could not write file \"%s\": %m",
829 PGSS_DUMP_FILE ".tmp")));
830 free(qbuffer);
831 if (file)
832 FreeFile(file);
833 unlink(PGSS_DUMP_FILE ".tmp");
835}
836
837/*
838 * Post-parse-analysis hook: mark query with a queryId
839 */
840static void
842{
844 prev_post_parse_analyze_hook(pstate, query, jstate);
845
846 /* Safety check... */
848 return;
849
850 /*
851 * If it's EXECUTE, clear the queryId so that stats will accumulate for
852 * the underlying PREPARE. But don't do this if we're not tracking
853 * utility statements, to avoid messing up another extension that might be
854 * tracking them.
855 */
856 if (query->utilityStmt)
857 {
859 {
860 query->queryId = INT64CONST(0);
861 return;
862 }
863 }
864
865 /*
866 * If query jumbling were able to identify any ignorable constants, we
867 * immediately create a hash table entry for the query, so that we can
868 * record the normalized form of the query string. If there were no such
869 * constants, the normalized string would be the same as the query text
870 * anyway, so there's no need for an early entry.
871 */
872 if (jstate && jstate->clocations_count > 0)
873 pgss_store(pstate->p_sourcetext,
874 query->queryId,
875 query->stmt_location,
876 query->stmt_len,
878 0,
879 0,
880 NULL,
881 NULL,
882 NULL,
883 jstate,
884 0,
885 0,
887}
888
889/*
890 * Planner hook: forward to regular planner, but measure planning time
891 * if needed.
892 */
893static PlannedStmt *
894pgss_planner(Query *parse,
895 const char *query_string,
896 int cursorOptions,
897 ParamListInfo boundParams,
898 ExplainState *es)
899{
900 PlannedStmt *result;
901
902 /*
903 * We can't process the query if no query_string is provided, as
904 * pgss_store needs it. We also ignore query without queryid, as it would
905 * be treated as a utility statement, which may not be the case.
906 */
908 && pgss_track_planning && query_string
909 && parse->queryId != INT64CONST(0))
910 {
913 BufferUsage bufusage_start,
914 bufusage;
915 WalUsage walusage_start,
916 walusage;
917
918 /* We need to track buffer usage as the planner can access them. */
919 bufusage_start = pgBufferUsage;
920
921 /*
922 * Similarly the planner could write some WAL records in some cases
923 * (e.g. setting a hint bit with those being WAL-logged)
924 */
925 walusage_start = pgWalUsage;
927
929 PG_TRY();
930 {
932 result = prev_planner_hook(parse, query_string, cursorOptions,
933 boundParams, es);
934 else
935 result = standard_planner(parse, query_string, cursorOptions,
936 boundParams, es);
937 }
938 PG_FINALLY();
939 {
941 }
942 PG_END_TRY();
943
946
947 /* calc differences of buffer counters. */
948 memset(&bufusage, 0, sizeof(BufferUsage));
949 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
950
951 /* calc differences of WAL counters. */
952 memset(&walusage, 0, sizeof(WalUsage));
953 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
954
955 pgss_store(query_string,
956 parse->queryId,
957 parse->stmt_location,
958 parse->stmt_len,
959 PGSS_PLAN,
961 0,
962 &bufusage,
963 &walusage,
964 NULL,
965 NULL,
966 0,
967 0,
968 result->planOrigin);
969 }
970 else
971 {
972 /*
973 * Even though we're not tracking plan time for this statement, we
974 * must still increment the nesting level, to ensure that functions
975 * evaluated during planning are not seen as top-level calls.
976 */
978 PG_TRY();
979 {
981 result = prev_planner_hook(parse, query_string, cursorOptions,
982 boundParams, es);
983 else
984 result = standard_planner(parse, query_string, cursorOptions,
985 boundParams, es);
986 }
987 PG_FINALLY();
988 {
990 }
991 PG_END_TRY();
992 }
993
994 return result;
995}
996
997/*
998 * ExecutorStart hook: start up tracking if needed
999 */
1000static void
1001pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
1002{
1004 prev_ExecutorStart(queryDesc, eflags);
1005 else
1006 standard_ExecutorStart(queryDesc, eflags);
1007
1008 /*
1009 * If query has queryId zero, don't track it. This prevents double
1010 * counting of optimizable statements that are directly contained in
1011 * utility statements.
1012 */
1013 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1014 {
1015 /*
1016 * Set up to track total elapsed time in ExecutorRun. Make sure the
1017 * space is allocated in the per-query context so it will go away at
1018 * ExecutorEnd.
1019 */
1020 if (queryDesc->totaltime == NULL)
1021 {
1023
1025 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1027 }
1028 }
1029}
1030
1031/*
1032 * ExecutorRun hook: all we need do is track nesting depth
1033 */
1034static void
1035pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1036{
1037 nesting_level++;
1038 PG_TRY();
1039 {
1040 if (prev_ExecutorRun)
1041 prev_ExecutorRun(queryDesc, direction, count);
1042 else
1043 standard_ExecutorRun(queryDesc, direction, count);
1044 }
1045 PG_FINALLY();
1046 {
1047 nesting_level--;
1048 }
1049 PG_END_TRY();
1050}
1051
1052/*
1053 * ExecutorFinish hook: all we need do is track nesting depth
1054 */
1055static void
1057{
1058 nesting_level++;
1059 PG_TRY();
1060 {
1062 prev_ExecutorFinish(queryDesc);
1063 else
1064 standard_ExecutorFinish(queryDesc);
1065 }
1066 PG_FINALLY();
1067 {
1068 nesting_level--;
1069 }
1070 PG_END_TRY();
1071}
1072
1073/*
1074 * ExecutorEnd hook: store results if needed
1075 */
1076static void
1077pgss_ExecutorEnd(QueryDesc *queryDesc)
1078{
1079 int64 queryId = queryDesc->plannedstmt->queryId;
1080
1081 if (queryId != INT64CONST(0) && queryDesc->totaltime &&
1083 {
1084 /*
1085 * Make sure stats accumulation is done. (Note: it's okay if several
1086 * levels of hook all do this.)
1087 */
1088 InstrEndLoop(queryDesc->totaltime);
1089
1090 pgss_store(queryDesc->sourceText,
1091 queryId,
1092 queryDesc->plannedstmt->stmt_location,
1093 queryDesc->plannedstmt->stmt_len,
1094 PGSS_EXEC,
1096 queryDesc->estate->es_total_processed,
1097 &queryDesc->totaltime->bufusage,
1098 &queryDesc->totaltime->walusage,
1099 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1100 NULL,
1101 queryDesc->estate->es_parallel_workers_to_launch,
1102 queryDesc->estate->es_parallel_workers_launched,
1103 queryDesc->plannedstmt->planOrigin);
1104 }
1105
1106 if (prev_ExecutorEnd)
1107 prev_ExecutorEnd(queryDesc);
1108 else
1109 standard_ExecutorEnd(queryDesc);
1110}
1111
1112/*
1113 * ProcessUtility hook
1114 */
1115static void
1116pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1117 bool readOnlyTree,
1118 ProcessUtilityContext context,
1119 ParamListInfo params, QueryEnvironment *queryEnv,
1120 DestReceiver *dest, QueryCompletion *qc)
1121{
1122 Node *parsetree = pstmt->utilityStmt;
1123 int64 saved_queryId = pstmt->queryId;
1125 int saved_stmt_len = pstmt->stmt_len;
1127
1128 /*
1129 * Force utility statements to get queryId zero. We do this even in cases
1130 * where the statement contains an optimizable statement for which a
1131 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1132 * cases, runtime control will first go through ProcessUtility and then
1133 * the executor, and we don't want the executor hooks to do anything,
1134 * since we are already measuring the statement's costs at the utility
1135 * level.
1136 *
1137 * Note that this is only done if pg_stat_statements is enabled and
1138 * configured to track utility statements, in the unlikely possibility
1139 * that user configured another extension to handle utility statements
1140 * only.
1141 */
1142 if (enabled)
1143 pstmt->queryId = INT64CONST(0);
1144
1145 /*
1146 * If it's an EXECUTE statement, we don't track it and don't increment the
1147 * nesting level. This allows the cycles to be charged to the underlying
1148 * PREPARE instead (by the Executor hooks), which is much more useful.
1149 *
1150 * We also don't track execution of PREPARE. If we did, we would get one
1151 * hash table entry for the PREPARE (with hash calculated from the query
1152 * string), and then a different one with the same query string (but hash
1153 * calculated from the query tree) would be used to accumulate costs of
1154 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1155 * actually run the planner (only parse+rewrite), its costs are generally
1156 * pretty negligible and it seems okay to just ignore it.
1157 */
1158 if (enabled &&
1159 !IsA(parsetree, ExecuteStmt) &&
1160 !IsA(parsetree, PrepareStmt))
1161 {
1164 uint64 rows;
1165 BufferUsage bufusage_start,
1166 bufusage;
1167 WalUsage walusage_start,
1168 walusage;
1169
1170 bufusage_start = pgBufferUsage;
1171 walusage_start = pgWalUsage;
1173
1174 nesting_level++;
1175 PG_TRY();
1176 {
1178 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1179 context, params, queryEnv,
1180 dest, qc);
1181 else
1182 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1183 context, params, queryEnv,
1184 dest, qc);
1185 }
1186 PG_FINALLY();
1187 {
1188 nesting_level--;
1189 }
1190 PG_END_TRY();
1191
1192 /*
1193 * CAUTION: do not access the *pstmt data structure again below here.
1194 * If it was a ROLLBACK or similar, that data structure may have been
1195 * freed. We must copy everything we still need into local variables,
1196 * which we did above.
1197 *
1198 * For the same reason, we can't risk restoring pstmt->queryId to its
1199 * former value, which'd otherwise be a good idea.
1200 */
1201
1204
1205 /*
1206 * Track the total number of rows retrieved or affected by the utility
1207 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1208 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1209 */
1210 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1211 qc->commandTag == CMDTAG_FETCH ||
1212 qc->commandTag == CMDTAG_SELECT ||
1214 qc->nprocessed : 0;
1215
1216 /* calc differences of buffer counters. */
1217 memset(&bufusage, 0, sizeof(BufferUsage));
1218 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1219
1220 /* calc differences of WAL counters. */
1221 memset(&walusage, 0, sizeof(WalUsage));
1222 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1223
1224 pgss_store(queryString,
1228 PGSS_EXEC,
1230 rows,
1231 &bufusage,
1232 &walusage,
1233 NULL,
1234 NULL,
1235 0,
1236 0,
1237 pstmt->planOrigin);
1238 }
1239 else
1240 {
1241 /*
1242 * Even though we're not tracking execution time for this statement,
1243 * we must still increment the nesting level, to ensure that functions
1244 * evaluated within it are not seen as top-level calls. But don't do
1245 * so for EXECUTE; that way, when control reaches pgss_planner or
1246 * pgss_ExecutorStart, we will treat the costs as top-level if
1247 * appropriate. Likewise, don't bump for PREPARE, so that parse
1248 * analysis will treat the statement as top-level if appropriate.
1249 *
1250 * To be absolutely certain we don't mess up the nesting level,
1251 * evaluate the bump_level condition just once.
1252 */
1253 bool bump_level =
1254 !IsA(parsetree, ExecuteStmt) &&
1255 !IsA(parsetree, PrepareStmt);
1256
1257 if (bump_level)
1258 nesting_level++;
1259 PG_TRY();
1260 {
1262 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1263 context, params, queryEnv,
1264 dest, qc);
1265 else
1266 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1267 context, params, queryEnv,
1268 dest, qc);
1269 }
1270 PG_FINALLY();
1271 {
1272 if (bump_level)
1273 nesting_level--;
1274 }
1275 PG_END_TRY();
1276 }
1277}
1278
1279/*
1280 * Store some statistics for a statement.
1281 *
1282 * If jstate is not NULL then we're trying to create an entry for which
1283 * we have no statistics as yet; we just want to record the normalized
1284 * query string. total_time, rows, bufusage and walusage are ignored in this
1285 * case.
1286 *
1287 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1288 * for the arrays in the Counters field.
1289 */
1290static void
1291pgss_store(const char *query, int64 queryId,
1292 int query_location, int query_len,
1293 pgssStoreKind kind,
1294 double total_time, uint64 rows,
1295 const BufferUsage *bufusage,
1296 const WalUsage *walusage,
1297 const struct JitInstrumentation *jitusage,
1299 int parallel_workers_to_launch,
1300 int parallel_workers_launched,
1301 PlannedStmtOrigin planOrigin)
1302{
1304 pgssEntry *entry;
1305 char *norm_query = NULL;
1307
1308 Assert(query != NULL);
1309
1310 /* Safety check... */
1311 if (!pgss || !pgss_hash)
1312 return;
1313
1314 /*
1315 * Nothing to do if compute_query_id isn't enabled and no other module
1316 * computed a query identifier.
1317 */
1318 if (queryId == INT64CONST(0))
1319 return;
1320
1321 /*
1322 * Confine our attention to the relevant part of the string, if the query
1323 * is a portion of a multi-statement source string, and update query
1324 * location and length if needed.
1325 */
1326 query = CleanQuerytext(query, &query_location, &query_len);
1327
1328 /* Set up key for hashtable search */
1329
1330 /* clear padding */
1331 memset(&key, 0, sizeof(pgssHashKey));
1332
1333 key.userid = GetUserId();
1334 key.dbid = MyDatabaseId;
1335 key.queryid = queryId;
1336 key.toplevel = (nesting_level == 0);
1337
1338 /* Lookup the hash table entry with shared lock. */
1340
1341 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1342
1343 /* Create new entry, if not present */
1344 if (!entry)
1345 {
1346 Size query_offset;
1347 int gc_count;
1348 bool stored;
1349 bool do_gc;
1350
1351 /*
1352 * Create a new, normalized query string if caller asked. We don't
1353 * need to hold the lock while doing this work. (Note: in any case,
1354 * it's possible that someone else creates a duplicate hashtable entry
1355 * in the interval where we don't hold the lock below. That case is
1356 * handled by entry_alloc.)
1357 */
1358 if (jstate)
1359 {
1363 &query_len);
1365 }
1366
1367 /* Append new query text to file with only shared lock held */
1368 stored = qtext_store(norm_query ? norm_query : query, query_len,
1369 &query_offset, &gc_count);
1370
1371 /*
1372 * Determine whether we need to garbage collect external query texts
1373 * while the shared lock is still held. This micro-optimization
1374 * avoids taking the time to decide this while holding exclusive lock.
1375 */
1377
1378 /* Need exclusive lock to make a new hashtable entry - promote */
1381
1382 /*
1383 * A garbage collection may have occurred while we weren't holding the
1384 * lock. In the unlikely event that this happens, the query text we
1385 * stored above will have been garbage collected, so write it again.
1386 * This should be infrequent enough that doing it while holding
1387 * exclusive lock isn't a performance problem.
1388 */
1389 if (!stored || pgss->gc_count != gc_count)
1390 stored = qtext_store(norm_query ? norm_query : query, query_len,
1391 &query_offset, NULL);
1392
1393 /* If we failed to write to the text file, give up */
1394 if (!stored)
1395 goto done;
1396
1397 /* OK to create a new hashtable entry */
1398 entry = entry_alloc(&key, query_offset, query_len, encoding,
1399 jstate != NULL);
1400
1401 /* If needed, perform garbage collection while exclusive lock held */
1402 if (do_gc)
1403 gc_qtexts();
1404 }
1405
1406 /* Increment the counts, except when jstate is not NULL */
1407 if (!jstate)
1408 {
1409 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1410
1411 /*
1412 * Grab the spinlock while updating the counters (see comment about
1413 * locking rules at the head of the file)
1414 */
1415 SpinLockAcquire(&entry->mutex);
1416
1417 /* "Unstick" entry if it was previously sticky */
1418 if (IS_STICKY(entry->counters))
1419 entry->counters.usage = USAGE_INIT;
1420
1421 entry->counters.calls[kind] += 1;
1422 entry->counters.total_time[kind] += total_time;
1423
1424 if (entry->counters.calls[kind] == 1)
1425 {
1426 entry->counters.min_time[kind] = total_time;
1427 entry->counters.max_time[kind] = total_time;
1428 entry->counters.mean_time[kind] = total_time;
1429 }
1430 else
1431 {
1432 /*
1433 * Welford's method for accurately computing variance. See
1434 * <http://www.johndcook.com/blog/standard_deviation/>
1435 */
1436 double old_mean = entry->counters.mean_time[kind];
1437
1438 entry->counters.mean_time[kind] +=
1439 (total_time - old_mean) / entry->counters.calls[kind];
1440 entry->counters.sum_var_time[kind] +=
1441 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1442
1443 /*
1444 * Calculate min and max time. min = 0 and max = 0 means that the
1445 * min/max statistics were reset
1446 */
1447 if (entry->counters.min_time[kind] == 0
1448 && entry->counters.max_time[kind] == 0)
1449 {
1450 entry->counters.min_time[kind] = total_time;
1451 entry->counters.max_time[kind] = total_time;
1452 }
1453 else
1454 {
1455 if (entry->counters.min_time[kind] > total_time)
1456 entry->counters.min_time[kind] = total_time;
1457 if (entry->counters.max_time[kind] < total_time)
1458 entry->counters.max_time[kind] = total_time;
1459 }
1460 }
1461 entry->counters.rows += rows;
1462 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1463 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1466 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1467 entry->counters.local_blks_read += bufusage->local_blks_read;
1470 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1471 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1478 entry->counters.usage += USAGE_EXEC(total_time);
1479 entry->counters.wal_records += walusage->wal_records;
1480 entry->counters.wal_fpi += walusage->wal_fpi;
1481 entry->counters.wal_bytes += walusage->wal_bytes;
1482 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1483 if (jitusage)
1484 {
1485 entry->counters.jit_functions += jitusage->created_functions;
1486 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1487
1488 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1489 entry->counters.jit_deform_count++;
1490 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1491
1492 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1494 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1495
1496 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1498 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1499
1500 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1502 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1503 }
1504
1505 /* parallel worker counters */
1506 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1507 entry->counters.parallel_workers_launched += parallel_workers_launched;
1508
1509 /* plan cache counters */
1510 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1512 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1513 entry->counters.custom_plan_calls++;
1514
1515 SpinLockRelease(&entry->mutex);
1516 }
1517
1518done:
1520
1521 /* We postpone this clean-up until we're out of the lock */
1522 if (norm_query)
1524}
1525
1526/*
1527 * Reset statement statistics corresponding to userid, dbid, and queryid.
1528 */
1529Datum
1531{
1532 Oid userid;
1533 Oid dbid;
1534 int64 queryid;
1535
1536 userid = PG_GETARG_OID(0);
1537 dbid = PG_GETARG_OID(1);
1538 queryid = PG_GETARG_INT64(2);
1539
1540 entry_reset(userid, dbid, queryid, false);
1541
1543}
1544
1545Datum
1547{
1548 Oid userid;
1549 Oid dbid;
1550 int64 queryid;
1551 bool minmax_only;
1552
1553 userid = PG_GETARG_OID(0);
1554 dbid = PG_GETARG_OID(1);
1555 queryid = PG_GETARG_INT64(2);
1557
1558 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1559}
1560
1561/*
1562 * Reset statement statistics.
1563 */
1564Datum
1566{
1567 entry_reset(0, 0, 0, false);
1568
1570}
1571
1572/* Number of output arguments (columns) for various API versions */
1573#define PG_STAT_STATEMENTS_COLS_V1_0 14
1574#define PG_STAT_STATEMENTS_COLS_V1_1 18
1575#define PG_STAT_STATEMENTS_COLS_V1_2 19
1576#define PG_STAT_STATEMENTS_COLS_V1_3 23
1577#define PG_STAT_STATEMENTS_COLS_V1_8 32
1578#define PG_STAT_STATEMENTS_COLS_V1_9 33
1579#define PG_STAT_STATEMENTS_COLS_V1_10 43
1580#define PG_STAT_STATEMENTS_COLS_V1_11 49
1581#define PG_STAT_STATEMENTS_COLS_V1_12 52
1582#define PG_STAT_STATEMENTS_COLS_V1_13 54
1583#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1584
1585/*
1586 * Retrieve statement statistics.
1587 *
1588 * The SQL API of this function has changed multiple times, and will likely
1589 * do so again in future. To support the case where a newer version of this
1590 * loadable module is being used with an old SQL declaration of the function,
1591 * we continue to support the older API versions. For 1.2 and later, the
1592 * expected API version is identified by embedding it in the C name of the
1593 * function. Unfortunately we weren't bright enough to do that for 1.1.
1594 */
1595Datum
1597{
1598 bool showtext = PG_GETARG_BOOL(0);
1599
1601
1602 return (Datum) 0;
1603}
1604
1605Datum
1607{
1608 bool showtext = PG_GETARG_BOOL(0);
1609
1611
1612 return (Datum) 0;
1613}
1614
1615Datum
1617{
1618 bool showtext = PG_GETARG_BOOL(0);
1619
1621
1622 return (Datum) 0;
1623}
1624
1625Datum
1627{
1628 bool showtext = PG_GETARG_BOOL(0);
1629
1631
1632 return (Datum) 0;
1633}
1634
1635Datum
1637{
1638 bool showtext = PG_GETARG_BOOL(0);
1639
1641
1642 return (Datum) 0;
1643}
1644
1645Datum
1647{
1648 bool showtext = PG_GETARG_BOOL(0);
1649
1651
1652 return (Datum) 0;
1653}
1654
1655Datum
1657{
1658 bool showtext = PG_GETARG_BOOL(0);
1659
1661
1662 return (Datum) 0;
1663}
1664
1665Datum
1667{
1668 bool showtext = PG_GETARG_BOOL(0);
1669
1671
1672 return (Datum) 0;
1673}
1674
1675/*
1676 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1677 * This can be removed someday, perhaps.
1678 */
1679Datum
1681{
1682 /* If it's really API 1.1, we'll figure that out below */
1684
1685 return (Datum) 0;
1686}
1687
1688/* Common code for all versions of pg_stat_statements() */
1689static void
1691 pgssVersion api_version,
1692 bool showtext)
1693{
1695 Oid userid = GetUserId();
1696 bool is_allowed_role = false;
1697 char *qbuffer = NULL;
1698 Size qbuffer_size = 0;
1699 Size extent = 0;
1700 int gc_count = 0;
1702 pgssEntry *entry;
1703
1704 /*
1705 * Superusers or roles with the privileges of pg_read_all_stats members
1706 * are allowed
1707 */
1709
1710 /* hash table must exist already */
1711 if (!pgss || !pgss_hash)
1712 ereport(ERROR,
1714 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1715
1716 InitMaterializedSRF(fcinfo, 0);
1717
1718 /*
1719 * Check we have the expected number of output arguments. Aside from
1720 * being a good safety check, we need a kluge here to detect API version
1721 * 1.1, which was wedged into the code in an ill-considered way.
1722 */
1723 switch (rsinfo->setDesc->natts)
1724 {
1726 if (api_version != PGSS_V1_0)
1727 elog(ERROR, "incorrect number of output arguments");
1728 break;
1730 /* pg_stat_statements() should have told us 1.0 */
1731 if (api_version != PGSS_V1_0)
1732 elog(ERROR, "incorrect number of output arguments");
1733 api_version = PGSS_V1_1;
1734 break;
1736 if (api_version != PGSS_V1_2)
1737 elog(ERROR, "incorrect number of output arguments");
1738 break;
1740 if (api_version != PGSS_V1_3)
1741 elog(ERROR, "incorrect number of output arguments");
1742 break;
1744 if (api_version != PGSS_V1_8)
1745 elog(ERROR, "incorrect number of output arguments");
1746 break;
1748 if (api_version != PGSS_V1_9)
1749 elog(ERROR, "incorrect number of output arguments");
1750 break;
1752 if (api_version != PGSS_V1_10)
1753 elog(ERROR, "incorrect number of output arguments");
1754 break;
1756 if (api_version != PGSS_V1_11)
1757 elog(ERROR, "incorrect number of output arguments");
1758 break;
1760 if (api_version != PGSS_V1_12)
1761 elog(ERROR, "incorrect number of output arguments");
1762 break;
1764 if (api_version != PGSS_V1_13)
1765 elog(ERROR, "incorrect number of output arguments");
1766 break;
1767 default:
1768 elog(ERROR, "incorrect number of output arguments");
1769 }
1770
1771 /*
1772 * We'd like to load the query text file (if needed) while not holding any
1773 * lock on pgss->lock. In the worst case we'll have to do this again
1774 * after we have the lock, but it's unlikely enough to make this a win
1775 * despite occasional duplicated work. We need to reload if anybody
1776 * writes to the file (either a retail qtext_store(), or a garbage
1777 * collection) between this point and where we've gotten shared lock. If
1778 * a qtext_store is actually in progress when we look, we might as well
1779 * skip the speculative load entirely.
1780 */
1781 if (showtext)
1782 {
1783 int n_writers;
1784
1785 /* Take the mutex so we can examine variables */
1787 extent = pgss->extent;
1788 n_writers = pgss->n_writers;
1789 gc_count = pgss->gc_count;
1791
1792 /* No point in loading file now if there are active writers */
1793 if (n_writers == 0)
1795 }
1796
1797 /*
1798 * Get shared lock, load or reload the query text file if we must, and
1799 * iterate over the hashtable entries.
1800 *
1801 * With a large hash table, we might be holding the lock rather longer
1802 * than one could wish. However, this only blocks creation of new hash
1803 * table entries, and the larger the hash table the less likely that is to
1804 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1805 * we need to partition the hash table to limit the time spent holding any
1806 * one lock.
1807 */
1809
1810 if (showtext)
1811 {
1812 /*
1813 * Here it is safe to examine extent and gc_count without taking the
1814 * mutex. Note that although other processes might change
1815 * pgss->extent just after we look at it, the strings they then write
1816 * into the file cannot yet be referenced in the hashtable, so we
1817 * don't care whether we see them or not.
1818 *
1819 * If qtext_load_file fails, we just press on; we'll return NULL for
1820 * every query text.
1821 */
1822 if (qbuffer == NULL ||
1823 pgss->extent != extent ||
1824 pgss->gc_count != gc_count)
1825 {
1826 free(qbuffer);
1828 }
1829 }
1830
1832 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1833 {
1835 bool nulls[PG_STAT_STATEMENTS_COLS];
1836 int i = 0;
1837 Counters tmp;
1838 double stddev;
1839 int64 queryid = entry->key.queryid;
1840 TimestampTz stats_since;
1841 TimestampTz minmax_stats_since;
1842
1843 memset(values, 0, sizeof(values));
1844 memset(nulls, 0, sizeof(nulls));
1845
1846 values[i++] = ObjectIdGetDatum(entry->key.userid);
1847 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1848 if (api_version >= PGSS_V1_9)
1849 values[i++] = BoolGetDatum(entry->key.toplevel);
1850
1851 if (is_allowed_role || entry->key.userid == userid)
1852 {
1853 if (api_version >= PGSS_V1_2)
1854 values[i++] = Int64GetDatumFast(queryid);
1855
1856 if (showtext)
1857 {
1858 char *qstr = qtext_fetch(entry->query_offset,
1859 entry->query_len,
1860 qbuffer,
1861 qbuffer_size);
1862
1863 if (qstr)
1864 {
1865 char *enc;
1866
1868 entry->query_len,
1869 entry->encoding);
1870
1872
1873 if (enc != qstr)
1874 pfree(enc);
1875 }
1876 else
1877 {
1878 /* Just return a null if we fail to find the text */
1879 nulls[i++] = true;
1880 }
1881 }
1882 else
1883 {
1884 /* Query text not requested */
1885 nulls[i++] = true;
1886 }
1887 }
1888 else
1889 {
1890 /* Don't show queryid */
1891 if (api_version >= PGSS_V1_2)
1892 nulls[i++] = true;
1893
1894 /*
1895 * Don't show query text, but hint as to the reason for not doing
1896 * so if it was requested
1897 */
1898 if (showtext)
1899 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1900 else
1901 nulls[i++] = true;
1902 }
1903
1904 /* copy counters to a local variable to keep locking time short */
1905 SpinLockAcquire(&entry->mutex);
1906 tmp = entry->counters;
1907 SpinLockRelease(&entry->mutex);
1908
1909 /*
1910 * The spinlock is not required when reading these two as they are
1911 * always updated when holding pgss->lock exclusively.
1912 */
1913 stats_since = entry->stats_since;
1914 minmax_stats_since = entry->minmax_stats_since;
1915
1916 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1917 if (IS_STICKY(tmp))
1918 continue;
1919
1920 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1921 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1922 {
1923 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1924 {
1925 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1926 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1927 }
1928
1929 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1930 api_version >= PGSS_V1_8)
1931 {
1932 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1933 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1934 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1935
1936 /*
1937 * Note we are calculating the population variance here, not
1938 * the sample variance, as we have data for the whole
1939 * population, so Bessel's correction is not used, and we
1940 * don't divide by tmp.calls - 1.
1941 */
1942 if (tmp.calls[kind] > 1)
1943 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1944 else
1945 stddev = 0.0;
1946 values[i++] = Float8GetDatumFast(stddev);
1947 }
1948 }
1949 values[i++] = Int64GetDatumFast(tmp.rows);
1952 if (api_version >= PGSS_V1_1)
1957 if (api_version >= PGSS_V1_1)
1962 if (api_version >= PGSS_V1_1)
1963 {
1966 }
1967 if (api_version >= PGSS_V1_11)
1968 {
1971 }
1972 if (api_version >= PGSS_V1_10)
1973 {
1976 }
1977 if (api_version >= PGSS_V1_8)
1978 {
1979 char buf[256];
1980 Datum wal_bytes;
1981
1984
1985 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1986
1987 /* Convert to numeric. */
1988 wal_bytes = DirectFunctionCall3(numeric_in,
1991 Int32GetDatum(-1));
1992 values[i++] = wal_bytes;
1993 }
1994 if (api_version >= PGSS_V1_12)
1995 {
1997 }
1998 if (api_version >= PGSS_V1_10)
1999 {
2008 }
2009 if (api_version >= PGSS_V1_11)
2010 {
2013 }
2014 if (api_version >= PGSS_V1_12)
2015 {
2018 }
2019 if (api_version >= PGSS_V1_13)
2020 {
2023 }
2024 if (api_version >= PGSS_V1_11)
2025 {
2026 values[i++] = TimestampTzGetDatum(stats_since);
2027 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2028 }
2029
2030 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2031 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2032 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2033 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2034 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2035 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2036 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2037 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2038 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2039 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2040 -1 /* fail if you forget to update this assert */ ));
2041
2042 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2043 }
2044
2046
2047 free(qbuffer);
2048}
2049
2050/* Number of output arguments (columns) for pg_stat_statements_info */
2051#define PG_STAT_STATEMENTS_INFO_COLS 2
2052
2053/*
2054 * Return statistics of pg_stat_statements.
2055 */
2056Datum
2058{
2059 pgssGlobalStats stats;
2060 TupleDesc tupdesc;
2062 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2063
2064 if (!pgss || !pgss_hash)
2065 ereport(ERROR,
2067 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2068
2069 /* Build a tuple descriptor for our result type */
2070 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2071 elog(ERROR, "return type must be a row type");
2072
2073 /* Read global statistics for pg_stat_statements */
2075 stats = pgss->stats;
2077
2078 values[0] = Int64GetDatum(stats.dealloc);
2080
2082}
2083
2084/*
2085 * Estimate shared memory space needed.
2086 */
2087static Size
2088pgss_memsize(void)
2089{
2090 Size size;
2091
2092 size = MAXALIGN(sizeof(pgssSharedState));
2093 size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2094
2095 return size;
2096}
2097
2098/*
2099 * Allocate a new hashtable entry.
2100 * caller must hold an exclusive lock on pgss->lock
2101 *
2102 * "query" need not be null-terminated; we rely on query_len instead
2103 *
2104 * If "sticky" is true, make the new entry artificially sticky so that it will
2105 * probably still be there when the query finishes execution. We do this by
2106 * giving it a median usage value rather than the normal value. (Strictly
2107 * speaking, query strings are normalized on a best effort basis, though it
2108 * would be difficult to demonstrate this even under artificial conditions.)
2109 *
2110 * Note: despite needing exclusive lock, it's not an error for the target
2111 * entry to already exist. This is because pgss_store releases and
2112 * reacquires lock after failing to find a match; so someone else could
2113 * have made the entry while we waited to get exclusive lock.
2114 */
2115static pgssEntry *
2116entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2117 bool sticky)
2118{
2119 pgssEntry *entry;
2120 bool found;
2121
2122 /* Make space if needed */
2124 entry_dealloc();
2125
2126 /* Find or create an entry with desired hash code */
2127 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2128
2129 if (!found)
2130 {
2131 /* New entry, initialize it */
2132
2133 /* reset the statistics */
2134 memset(&entry->counters, 0, sizeof(Counters));
2135 /* set the appropriate initial usage count */
2137 /* re-initialize the mutex each time ... we assume no one using it */
2138 SpinLockInit(&entry->mutex);
2139 /* ... and don't forget the query text metadata */
2140 Assert(query_len >= 0);
2141 entry->query_offset = query_offset;
2142 entry->query_len = query_len;
2143 entry->encoding = encoding;
2145 entry->minmax_stats_since = entry->stats_since;
2146 }
2147
2148 return entry;
2149}
2150
2151/*
2152 * qsort comparator for sorting into increasing usage order
2153 */
2154static int
2155entry_cmp(const void *lhs, const void *rhs)
2156{
2157 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2158 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2159
2160 if (l_usage < r_usage)
2161 return -1;
2162 else if (l_usage > r_usage)
2163 return +1;
2164 else
2165 return 0;
2166}
2167
2168/*
2169 * Deallocate least-used entries.
2170 *
2171 * Caller must hold an exclusive lock on pgss->lock.
2172 */
2173static void
2174entry_dealloc(void)
2175{
2177 pgssEntry **entries;
2178 pgssEntry *entry;
2179 int nvictims;
2180 int i;
2182 int nvalidtexts;
2183
2184 /*
2185 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2186 * While we're scanning the table, apply the decay factor to the usage
2187 * values, and update the mean query length.
2188 *
2189 * Note that the mean query length is almost immediately obsolete, since
2190 * we compute it before not after discarding the least-used entries.
2191 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2192 * making two passes to get a more current result. Likewise, the new
2193 * cur_median_usage includes the entries we're about to zap.
2194 */
2195
2196 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2197
2198 i = 0;
2199 tottextlen = 0;
2200 nvalidtexts = 0;
2201
2203 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2204 {
2205 entries[i++] = entry;
2206 /* "Sticky" entries get a different usage decay rate. */
2207 if (IS_STICKY(entry->counters))
2209 else
2211 /* In the mean length computation, ignore dropped texts. */
2212 if (entry->query_len >= 0)
2213 {
2214 tottextlen += entry->query_len + 1;
2215 nvalidtexts++;
2216 }
2217 }
2218
2219 /* Sort into increasing order by usage */
2220 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2221
2222 /* Record the (approximate) median usage */
2223 if (i > 0)
2224 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2225 /* Record the mean query length */
2226 if (nvalidtexts > 0)
2228 else
2230
2231 /* Now zap an appropriate fraction of lowest-usage entries */
2232 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2233 nvictims = Min(nvictims, i);
2234
2235 for (i = 0; i < nvictims; i++)
2236 {
2237 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2238 }
2239
2240 pfree(entries);
2241
2242 /* Increment the number of times entries are deallocated */
2244 pgss->stats.dealloc += 1;
2246}
2247
2248/*
2249 * Given a query string (not necessarily null-terminated), allocate a new
2250 * entry in the external query text file and store the string there.
2251 *
2252 * If successful, returns true, and stores the new entry's offset in the file
2253 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2254 * number of garbage collections that have occurred so far.
2255 *
2256 * On failure, returns false.
2257 *
2258 * At least a shared lock on pgss->lock must be held by the caller, so as
2259 * to prevent a concurrent garbage collection. Share-lock-holding callers
2260 * should pass a gc_count pointer to obtain the number of garbage collections,
2261 * so that they can recheck the count after obtaining exclusive lock to
2262 * detect whether a garbage collection occurred (and removed this entry).
2263 */
2264static bool
2265qtext_store(const char *query, int query_len,
2266 Size *query_offset, int *gc_count)
2267{
2268 Size off;
2269 int fd;
2270
2271 /*
2272 * We use a spinlock to protect extent/n_writers/gc_count, so that
2273 * multiple processes may execute this function concurrently.
2274 */
2276 off = pgss->extent;
2277 pgss->extent += query_len + 1;
2278 pgss->n_writers++;
2279 if (gc_count)
2280 *gc_count = pgss->gc_count;
2282
2283 *query_offset = off;
2284
2285 /*
2286 * Don't allow the file to grow larger than what qtext_load_file can
2287 * (theoretically) handle. This has been seen to be reachable on 32-bit
2288 * platforms.
2289 */
2290 if (unlikely(query_len >= MaxAllocHugeSize - off))
2291 {
2292 errno = EFBIG; /* not quite right, but it'll do */
2293 fd = -1;
2294 goto error;
2295 }
2296
2297 /* Now write the data into the successfully-reserved part of the file */
2299 if (fd < 0)
2300 goto error;
2301
2302 if (pg_pwrite(fd, query, query_len, off) != query_len)
2303 goto error;
2304 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2305 goto error;
2306
2308
2309 /* Mark our write complete */
2311 pgss->n_writers--;
2313
2314 return true;
2315
2316error:
2317 ereport(LOG,
2319 errmsg("could not write file \"%s\": %m",
2320 PGSS_TEXT_FILE)));
2321
2322 if (fd >= 0)
2324
2325 /* Mark our write complete */
2327 pgss->n_writers--;
2329
2330 return false;
2331}
2332
2333/*
2334 * Read the external query text file into a malloc'd buffer.
2335 *
2336 * Returns NULL (without throwing an error) if unable to read, eg
2337 * file not there or insufficient memory.
2338 *
2339 * On success, the buffer size is also returned into *buffer_size.
2340 *
2341 * This can be called without any lock on pgss->lock, but in that case
2342 * the caller is responsible for verifying that the result is sane.
2343 */
2344static char *
2345qtext_load_file(Size *buffer_size)
2346{
2347 char *buf;
2348 int fd;
2349 struct stat stat;
2350 Size nread;
2351
2353 if (fd < 0)
2354 {
2355 if (errno != ENOENT)
2356 ereport(LOG,
2358 errmsg("could not read file \"%s\": %m",
2359 PGSS_TEXT_FILE)));
2360 return NULL;
2361 }
2362
2363 /* Get file length */
2364 if (fstat(fd, &stat))
2365 {
2366 ereport(LOG,
2368 errmsg("could not stat file \"%s\": %m",
2369 PGSS_TEXT_FILE)));
2371 return NULL;
2372 }
2373
2374 /* Allocate buffer; beware that off_t might be wider than size_t */
2376 buf = (char *) malloc(stat.st_size);
2377 else
2378 buf = NULL;
2379 if (buf == NULL)
2380 {
2381 ereport(LOG,
2383 errmsg("out of memory"),
2384 errdetail("Could not allocate enough memory to read file \"%s\".",
2385 PGSS_TEXT_FILE)));
2387 return NULL;
2388 }
2389
2390 /*
2391 * OK, slurp in the file. Windows fails if we try to read more than
2392 * INT_MAX bytes at once, and other platforms might not like that either,
2393 * so read a very large file in 1GB segments.
2394 */
2395 nread = 0;
2396 while (nread < stat.st_size)
2397 {
2398 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2399
2400 /*
2401 * If we get a short read and errno doesn't get set, the reason is
2402 * probably that garbage collection truncated the file since we did
2403 * the fstat(), so we don't log a complaint --- but we don't return
2404 * the data, either, since it's most likely corrupt due to concurrent
2405 * writes from garbage collection.
2406 */
2407 errno = 0;
2408 if (read(fd, buf + nread, toread) != toread)
2409 {
2410 if (errno)
2411 ereport(LOG,
2413 errmsg("could not read file \"%s\": %m",
2414 PGSS_TEXT_FILE)));
2415 free(buf);
2417 return NULL;
2418 }
2419 nread += toread;
2420 }
2421
2422 if (CloseTransientFile(fd) != 0)
2423 ereport(LOG,
2425 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2426
2427 *buffer_size = nread;
2428 return buf;
2429}
2430
2431/*
2432 * Locate a query text in the file image previously read by qtext_load_file().
2433 *
2434 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2435 * the result points to a null-terminated string within the buffer.
2436 */
2437static char *
2438qtext_fetch(Size query_offset, int query_len,
2439 char *buffer, Size buffer_size)
2440{
2441 /* File read failed? */
2442 if (buffer == NULL)
2443 return NULL;
2444 /* Bogus offset/length? */
2445 if (query_len < 0 ||
2446 query_offset + query_len >= buffer_size)
2447 return NULL;
2448 /* As a further sanity check, make sure there's a trailing null */
2449 if (buffer[query_offset + query_len] != '\0')
2450 return NULL;
2451 /* Looks OK */
2452 return buffer + query_offset;
2453}
2454
2455/*
2456 * Do we need to garbage-collect the external query text file?
2457 *
2458 * Caller should hold at least a shared lock on pgss->lock.
2459 */
2460static bool
2461need_gc_qtexts(void)
2462{
2463 Size extent;
2464
2465 /* Read shared extent pointer */
2467 extent = pgss->extent;
2469
2470 /*
2471 * Don't proceed if file does not exceed 512 bytes per possible entry.
2472 *
2473 * Here and in the next test, 32-bit machines have overflow hazards if
2474 * pgss_max and/or mean_query_len are large. Force the multiplications
2475 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2476 */
2477 if ((uint64) extent < (uint64) 512 * pgss_max)
2478 return false;
2479
2480 /*
2481 * Don't proceed if file is less than about 50% bloat. Nothing can or
2482 * should be done in the event of unusually large query texts accounting
2483 * for file's large size. We go to the trouble of maintaining the mean
2484 * query length in order to prevent garbage collection from thrashing
2485 * uselessly.
2486 */
2487 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2488 return false;
2489
2490 return true;
2491}
2492
2493/*
2494 * Garbage-collect orphaned query texts in external file.
2495 *
2496 * This won't be called often in the typical case, since it's likely that
2497 * there won't be too much churn, and besides, a similar compaction process
2498 * occurs when serializing to disk at shutdown or as part of resetting.
2499 * Despite this, it seems prudent to plan for the edge case where the file
2500 * becomes unreasonably large, with no other method of compaction likely to
2501 * occur in the foreseeable future.
2502 *
2503 * The caller must hold an exclusive lock on pgss->lock.
2504 *
2505 * At the first sign of trouble we unlink the query text file to get a clean
2506 * slate (although existing statistics are retained), rather than risk
2507 * thrashing by allowing the same problem case to recur indefinitely.
2508 */
2509static void
2510gc_qtexts(void)
2511{
2512 char *qbuffer;
2514 FILE *qfile = NULL;
2516 pgssEntry *entry;
2517 Size extent;
2518 int nentries;
2519
2520 /*
2521 * When called from pgss_store, some other session might have proceeded
2522 * with garbage collection in the no-lock-held interim of lock strength
2523 * escalation. Check once more that this is actually necessary.
2524 */
2525 if (!need_gc_qtexts())
2526 return;
2527
2528 /*
2529 * Load the old texts file. If we fail (out of memory, for instance),
2530 * invalidate query texts. Hopefully this is rare. It might seem better
2531 * to leave things alone on an OOM failure, but the problem is that the
2532 * file is only going to get bigger; hoping for a future non-OOM result is
2533 * risky and can easily lead to complete denial of service.
2534 */
2536 if (qbuffer == NULL)
2537 goto gc_fail;
2538
2539 /*
2540 * We overwrite the query texts file in place, so as to reduce the risk of
2541 * an out-of-disk-space failure. Since the file is guaranteed not to get
2542 * larger, this should always work on traditional filesystems; though we
2543 * could still lose on copy-on-write filesystems.
2544 */
2546 if (qfile == NULL)
2547 {
2548 ereport(LOG,
2550 errmsg("could not write file \"%s\": %m",
2551 PGSS_TEXT_FILE)));
2552 goto gc_fail;
2553 }
2554
2555 extent = 0;
2556 nentries = 0;
2557
2559 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2560 {
2561 int query_len = entry->query_len;
2562 char *qry = qtext_fetch(entry->query_offset,
2563 query_len,
2564 qbuffer,
2565 qbuffer_size);
2566
2567 if (qry == NULL)
2568 {
2569 /* Trouble ... drop the text */
2570 entry->query_offset = 0;
2571 entry->query_len = -1;
2572 /* entry will not be counted in mean query length computation */
2573 continue;
2574 }
2575
2576 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2577 {
2578 ereport(LOG,
2580 errmsg("could not write file \"%s\": %m",
2581 PGSS_TEXT_FILE)));
2583 goto gc_fail;
2584 }
2585
2586 entry->query_offset = extent;
2587 extent += query_len + 1;
2588 nentries++;
2589 }
2590
2591 /*
2592 * Truncate away any now-unused space. If this fails for some odd reason,
2593 * we log it, but there's no need to fail.
2594 */
2595 if (ftruncate(fileno(qfile), extent) != 0)
2596 ereport(LOG,
2598 errmsg("could not truncate file \"%s\": %m",
2599 PGSS_TEXT_FILE)));
2600
2601 if (FreeFile(qfile))
2602 {
2603 ereport(LOG,
2605 errmsg("could not write file \"%s\": %m",
2606 PGSS_TEXT_FILE)));
2607 qfile = NULL;
2608 goto gc_fail;
2609 }
2610
2611 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2612 pgss->extent, extent);
2613
2614 /* Reset the shared extent pointer */
2615 pgss->extent = extent;
2616
2617 /*
2618 * Also update the mean query length, to be sure that need_gc_qtexts()
2619 * won't still think we have a problem.
2620 */
2621 if (nentries > 0)
2622 pgss->mean_query_len = extent / nentries;
2623 else
2625
2626 free(qbuffer);
2627
2628 /*
2629 * OK, count a garbage collection cycle. (Note: even though we have
2630 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2631 * other processes may examine gc_count while holding only the mutex.
2632 * Also, we have to advance the count *after* we've rewritten the file,
2633 * else other processes might not realize they read a stale file.)
2634 */
2636
2637 return;
2638
2639gc_fail:
2640 /* clean up resources */
2641 if (qfile)
2642 FreeFile(qfile);
2643 free(qbuffer);
2644
2645 /*
2646 * Since the contents of the external file are now uncertain, mark all
2647 * hashtable entries as having invalid texts.
2648 */
2650 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2651 {
2652 entry->query_offset = 0;
2653 entry->query_len = -1;
2654 }
2655
2656 /*
2657 * Destroy the query text file and create a new, empty one
2658 */
2661 if (qfile == NULL)
2662 ereport(LOG,
2664 errmsg("could not recreate file \"%s\": %m",
2665 PGSS_TEXT_FILE)));
2666 else
2667 FreeFile(qfile);
2668
2669 /* Reset the shared extent pointer */
2670 pgss->extent = 0;
2671
2672 /* Reset mean_query_len to match the new state */
2674
2675 /*
2676 * Bump the GC count even though we failed.
2677 *
2678 * This is needed to make concurrent readers of file without any lock on
2679 * pgss->lock notice existence of new version of file. Once readers
2680 * subsequently observe a change in GC count with pgss->lock held, that
2681 * forces a safe reopen of file. Writers also require that we bump here,
2682 * of course. (As required by locking protocol, readers and writers don't
2683 * trust earlier file contents until gc_count is found unchanged after
2684 * pgss->lock acquired in shared or exclusive mode respectively.)
2685 */
2687}
2688
2689#define SINGLE_ENTRY_RESET(e) \
2690if (e) { \
2691 if (minmax_only) { \
2692 /* When requested reset only min/max statistics of an entry */ \
2693 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2694 { \
2695 e->counters.max_time[kind] = 0; \
2696 e->counters.min_time[kind] = 0; \
2697 } \
2698 e->minmax_stats_since = stats_reset; \
2699 } \
2700 else \
2701 { \
2702 /* Remove the key otherwise */ \
2703 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2704 num_remove++; \
2705 } \
2706}
2707
2708/*
2709 * Reset entries corresponding to parameters passed.
2710 */
2711static TimestampTz
2712entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2713{
2715 pgssEntry *entry;
2716 FILE *qfile;
2717 int64 num_entries;
2718 int64 num_remove = 0;
2720 TimestampTz stats_reset;
2721
2722 if (!pgss || !pgss_hash)
2723 ereport(ERROR,
2725 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2726
2728 num_entries = hash_get_num_entries(pgss_hash);
2729
2730 stats_reset = GetCurrentTimestamp();
2731
2732 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2733 {
2734 /* If all the parameters are available, use the fast path. */
2735 memset(&key, 0, sizeof(pgssHashKey));
2736 key.userid = userid;
2737 key.dbid = dbid;
2738 key.queryid = queryid;
2739
2740 /*
2741 * Reset the entry if it exists, starting with the non-top-level
2742 * entry.
2743 */
2744 key.toplevel = false;
2745 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2746
2747 SINGLE_ENTRY_RESET(entry);
2748
2749 /* Also reset the top-level entry if it exists. */
2750 key.toplevel = true;
2751 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2752
2753 SINGLE_ENTRY_RESET(entry);
2754 }
2755 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2756 {
2757 /* Reset entries corresponding to valid parameters. */
2759 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2760 {
2761 if ((!userid || entry->key.userid == userid) &&
2762 (!dbid || entry->key.dbid == dbid) &&
2763 (!queryid || entry->key.queryid == queryid))
2764 {
2765 SINGLE_ENTRY_RESET(entry);
2766 }
2767 }
2768 }
2769 else
2770 {
2771 /* Reset all entries. */
2773 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2774 {
2775 SINGLE_ENTRY_RESET(entry);
2776 }
2777 }
2778
2779 /* All entries are removed? */
2780 if (num_entries != num_remove)
2781 goto release_lock;
2782
2783 /*
2784 * Reset global statistics for pg_stat_statements since all entries are
2785 * removed.
2786 */
2788 pgss->stats.dealloc = 0;
2789 pgss->stats.stats_reset = stats_reset;
2791
2792 /*
2793 * Write new empty query file, perhaps even creating a new one to recover
2794 * if the file was missing.
2795 */
2797 if (qfile == NULL)
2798 {
2799 ereport(LOG,
2801 errmsg("could not create file \"%s\": %m",
2802 PGSS_TEXT_FILE)));
2803 goto done;
2804 }
2805
2806 /* If ftruncate fails, log it, but it's not a fatal problem */
2807 if (ftruncate(fileno(qfile), 0) != 0)
2808 ereport(LOG,
2810 errmsg("could not truncate file \"%s\": %m",
2811 PGSS_TEXT_FILE)));
2812
2813 FreeFile(qfile);
2814
2815done:
2816 pgss->extent = 0;
2817 /* This counts as a query text garbage collection for our purposes */
2819
2822
2823 return stats_reset;
2824}
2825
2826/*
2827 * Generate a normalized version of the query string that will be used to
2828 * represent all similar queries.
2829 *
2830 * Note that the normalized representation may well vary depending on
2831 * just which "equivalent" query is used to create the hashtable entry.
2832 * We assume this is OK.
2833 *
2834 * If query_loc > 0, then "query" has been advanced by that much compared to
2835 * the original string start, so we need to translate the provided locations
2836 * to compensate. (This lets us avoid re-scanning statements before the one
2837 * of interest, so it's worth doing.)
2838 *
2839 * *query_len_p contains the input string length, and is updated with
2840 * the result string length on exit. The resulting string might be longer
2841 * or shorter depending on what happens with replacement of constants.
2842 *
2843 * Returns a palloc'd string.
2844 */
2845static char *
2846generate_normalized_query(JumbleState *jstate, const char *query,
2847 int query_loc, int *query_len_p)
2848{
2849 char *norm_query;
2850 int query_len = *query_len_p;
2851 int norm_query_buflen, /* Space allowed for norm_query */
2852 len_to_wrt, /* Length (in bytes) to write */
2853 quer_loc = 0, /* Source query byte location */
2854 n_quer_loc = 0, /* Normalized query byte location */
2855 last_off = 0, /* Offset from start for previous tok */
2856 last_tok_len = 0; /* Length (in bytes) of that tok */
2857 int num_constants_replaced = 0;
2858
2859 /*
2860 * Get constants' lengths (core system only gives us locations). Note
2861 * this also ensures the items are sorted by location.
2862 */
2864
2865 /*
2866 * Allow for $n symbols to be longer than the constants they replace.
2867 * Constants must take at least one byte in text form, while a $n symbol
2868 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2869 * could refine that limit based on the max value of n for the current
2870 * query, but it hardly seems worth any extra effort to do so.
2871 */
2872 norm_query_buflen = query_len + jstate->clocations_count * 10;
2873
2874 /* Allocate result buffer */
2876
2877 for (int i = 0; i < jstate->clocations_count; i++)
2878 {
2879 int off, /* Offset from start for cur tok */
2880 tok_len; /* Length (in bytes) of that tok */
2881
2882 /*
2883 * If we have an external param at this location, but no lists are
2884 * being squashed across the query, then we skip here; this will make
2885 * us print the characters found in the original query that represent
2886 * the parameter in the next iteration (or after the loop is done),
2887 * which is a bit odd but seems to work okay in most cases.
2888 */
2889 if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
2890 continue;
2891
2892 off = jstate->clocations[i].location;
2893
2894 /* Adjust recorded location if we're dealing with partial string */
2895 off -= query_loc;
2896
2897 tok_len = jstate->clocations[i].length;
2898
2899 if (tok_len < 0)
2900 continue; /* ignore any duplicates */
2901
2902 /* Copy next chunk (what precedes the next constant) */
2903 len_to_wrt = off - last_off;
2905 Assert(len_to_wrt >= 0);
2908
2909 /*
2910 * And insert a param symbol in place of the constant token; and, if
2911 * we have a squashable list, insert a placeholder comment starting
2912 * from the list's second value.
2913 */
2915 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2916 jstate->clocations[i].squashed ? " /*, ... */" : "");
2918
2919 /* move forward */
2920 quer_loc = off + tok_len;
2921 last_off = off;
2923 }
2924
2925 /*
2926 * We've copied up until the last ignorable constant. Copy over the
2927 * remaining bytes of the original query string.
2928 */
2929 len_to_wrt = query_len - quer_loc;
2930
2931 Assert(len_to_wrt >= 0);
2934
2936 norm_query[n_quer_loc] = '\0';
2937
2939 return norm_query;
2940}
2941
2942/*
2943 * Given a valid SQL string and an array of constant-location records,
2944 * fill in the textual lengths of those constants.
2945 *
2946 * The constants may use any allowed constant syntax, such as float literals,
2947 * bit-strings, single-quoted strings and dollar-quoted strings. This is
2948 * accomplished by using the public API for the core scanner.
2949 *
2950 * It is the caller's job to ensure that the string is a valid SQL statement
2951 * with constants at the indicated locations. Since in practice the string
2952 * has already been parsed, and the locations that the caller provides will
2953 * have originated from within the authoritative parser, this should not be
2954 * a problem.
2955 *
2956 * Multiple constants can have the same location. We reset lengths of those
2957 * past the first to -1 so that they can later be ignored.
2958 *
2959 * If query_loc > 0, then "query" has been advanced by that much compared to
2960 * the original string start, so we need to translate the provided locations
2961 * to compensate. (This lets us avoid re-scanning statements before the one
2962 * of interest, so it's worth doing.)
2963 *
2964 * N.B. There is an assumption that a '-' character at a Const location begins
2965 * a negative numeric constant. This precludes there ever being another
2966 * reason for a constant to start with a '-'.
2967 */
2968static void
2969fill_in_constant_lengths(JumbleState *jstate, const char *query,
2970 int query_loc)
2971{
2973 core_yyscan_t yyscanner;
2977
2978 /*
2979 * Sort the records by location so that we can process them in order while
2980 * scanning the query text.
2981 */
2982 if (jstate->clocations_count > 1)
2983 qsort(jstate->clocations, jstate->clocations_count,
2984 sizeof(LocationLen), comp_location);
2985 locs = jstate->clocations;
2986
2987 /* initialize the flex scanner --- should match raw_parser() */
2988 yyscanner = scanner_init(query,
2989 &yyextra,
2990 &ScanKeywords,
2992
2993 /* Search for each constant, in sequence */
2994 for (int i = 0; i < jstate->clocations_count; i++)
2995 {
2996 int loc;
2997 int tok;
2998
2999 /* Ignore constants after the first one in the same location */
3000 if (i > 0 && locs[i].location == locs[i - 1].location)
3001 {
3002 locs[i].length = -1;
3003 continue;
3004 }
3005
3006 if (locs[i].squashed)
3007 continue; /* squashable list, ignore */
3008
3009 /* Adjust recorded location if we're dealing with partial string */
3010 loc = locs[i].location - query_loc;
3011 Assert(loc >= 0);
3012
3013 /*
3014 * We have a valid location for a constant that's not a dupe. Lex
3015 * tokens until we find the desired constant.
3016 */
3017 for (;;)
3018 {
3019 tok = core_yylex(&yylval, &yylloc, yyscanner);
3020
3021 /* We should not hit end-of-string, but if we do, behave sanely */
3022 if (tok == 0)
3023 break; /* out of inner for-loop */
3024
3025 /*
3026 * We should find the token position exactly, but if we somehow
3027 * run past it, work with that.
3028 */
3029 if (yylloc >= loc)
3030 {
3031 if (query[loc] == '-')
3032 {
3033 /*
3034 * It's a negative value - this is the one and only case
3035 * where we replace more than a single token.
3036 *
3037 * Do not compensate for the core system's special-case
3038 * adjustment of location to that of the leading '-'
3039 * operator in the event of a negative constant. It is
3040 * also useful for our purposes to start from the minus
3041 * symbol. In this way, queries like "select * from foo
3042 * where bar = 1" and "select * from foo where bar = -2"
3043 * will have identical normalized query strings.
3044 */
3045 tok = core_yylex(&yylval, &yylloc, yyscanner);
3046 if (tok == 0)
3047 break; /* out of inner for-loop */
3048 }
3049
3050 /*
3051 * We now rely on the assumption that flex has placed a zero
3052 * byte after the text of the current token in scanbuf.
3053 */
3054 locs[i].length = strlen(yyextra.scanbuf + loc);
3055 break; /* out of inner for-loop */
3056 }
3057 }
3058
3059 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3060 if (tok == 0)
3061 break;
3062 }
3063
3064 scanner_finish(yyscanner);
3065}
3066
3067/*
3068 * comp_location: comparator for qsorting LocationLen structs by location
3069 */
3070static int
3071comp_location(const void *a, const void *b)
3072{
3073 int l = ((const LocationLen *) a)->location;
3074 int r = ((const LocationLen *) b)->location;
3075
3076 return pg_cmp_s32(l, r);
3077}
bool has_privs_of_role(Oid member, Oid role)
Definition acl.c:5284
Datum numeric_in(PG_FUNCTION_ARGS)
Definition numeric.c:626
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
static Datum values[MAXATTR]
Definition bootstrap.c:155
#define CStringGetTextDatum(s)
Definition builtins.h:97
#define INT64CONST(x)
Definition c.h:560
#define Min(x, y)
Definition c.h:997
#define PG_BINARY_R
Definition c.h:1289
#define MAXALIGN(LEN)
Definition c.h:826
#define Max(x, y)
Definition c.h:991
#define Assert(condition)
Definition c.h:873
int64_t int64
Definition c.h:543
#define PG_BINARY
Definition c.h:1287
#define UINT64_FORMAT
Definition c.h:565
int32_t int32
Definition c.h:542
uint64_t uint64
Definition c.h:547
#define unlikely(x)
Definition c.h:412
uint32_t uint32
Definition c.h:546
#define PG_BINARY_W
Definition c.h:1290
size_t Size
Definition c.h:619
int64 TimestampTz
Definition timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
Size hash_estimate_size(int64 num_entries, Size entrysize)
Definition dynahash.c:783
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition dynahash.c:1509
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1336
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
int errcode_for_file_access(void)
Definition elog.c:886
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define LOG
Definition elog.h:31
#define PG_TRY(...)
Definition elog.h:372
#define PG_END_TRY(...)
Definition elog.h:397
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define PG_FINALLY(...)
Definition elog.h:389
#define ereport(elevel,...)
Definition elog.h:150
ExecutorEnd_hook_type ExecutorEnd_hook
Definition execMain.c:71
ExecutorFinish_hook_type ExecutorFinish_hook
Definition execMain.c:70
ExecutorStart_hook_type ExecutorStart_hook
Definition execMain.c:68
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition execMain.c:141
ExecutorRun_hook_type ExecutorRun_hook
Definition execMain.c:69
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition execMain.c:307
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition execMain.c:475
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition execMain.c:415
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:782
int CloseTransientFile(int fd)
Definition fd.c:2854
int FreeFile(FILE *file)
Definition fd.c:2826
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2627
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2677
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_BOOL(n)
Definition fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:688
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
bool IsUnderPostmaster
Definition globals.c:120
Oid MyDatabaseId
Definition globals.c:94
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5114
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5011
void MarkGUCPrefixReserved(const char *className)
Definition guc.c:5148
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5035
@ PGC_SUSET
Definition guc.h:78
@ PGC_POSTMASTER
Definition guc.h:74
@ PGC_SIGHUP
Definition guc.h:75
return str start
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1117
@ HASH_FIND
Definition hsearch.h:113
@ HASH_REMOVE
Definition hsearch.h:115
@ HASH_ENTER
Definition hsearch.h:114
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
void parse(int)
Definition parse.c:49
static char * encoding
Definition initdb.c:139
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:181
#define INSTR_TIME_GET_MILLISEC(t)
Definition instr_time.h:193
void InstrEndLoop(Instrumentation *instr)
Definition instrument.c:140
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition instrument.c:31
WalUsage pgWalUsage
Definition instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:285
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:245
@ INSTRUMENT_ALL
Definition instrument.h:67
static int pg_cmp_s32(int32 a, int32 b)
Definition int.h:713
#define read(a, b, c)
Definition win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
shmem_startup_hook_type shmem_startup_hook
Definition ipci.c:59
void RequestAddinShmemSpace(Size size)
Definition ipci.c:75
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
PGDLLIMPORT const ScanKeywordList ScanKeywords
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition lwlock.c:566
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition lwlock.c:649
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
int GetDatabaseEncoding(void)
Definition mbutils.c:1264
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:679
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
#define MaxAllocHugeSize
Definition memutils.h:45
Oid GetUserId(void)
Definition miscinit.c:469
shmem_request_hook_type shmem_request_hook
Definition miscinit.c:1789
bool process_shared_preload_libraries_in_progress
Definition miscinit.c:1786
#define IsA(nodeptr, _type_)
Definition nodes.h:164
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
post_parse_analyze_hook_type post_parse_analyze_hook
Definition analyze.c:68
void * arg
const void size_t len
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
@ PGSS_V1_13
#define SINGLE_ENTRY_RESET(e)
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
static ExecutorRun_hook_type prev_ExecutorRun
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS_V1_13
void _PG_init(void)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS)
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void pgss_shmem_request(void)
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
#define PGSS_NUMKIND
static bool pgss_save
static void pgss_shmem_startup(void)
static int nesting_level
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
static void pgss_store(const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define PG_VALID_BE_ENCODING(_enc)
Definition pg_wchar.h:281
static int duration
Definition pgbench.c:175
planner_hook_type planner_hook
Definition planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.c:333
PlannedStmtOrigin
Definition plannodes.h:38
@ PLAN_STMT_UNKNOWN
Definition plannodes.h:39
@ PLAN_STMT_CACHE_CUSTOM
Definition plannodes.h:43
@ PLAN_STMT_CACHE_GENERIC
Definition plannodes.h:42
#define sprintf
Definition port.h:262
#define pg_pwrite
Definition port.h:248
#define snprintf
Definition port.h:260
#define qsort(a, b, c, d)
Definition port.h:495
static Datum Int64GetDatum(int64 X)
Definition postgres.h:423
#define Int64GetDatumFast(X)
Definition postgres.h:535
#define Float8GetDatumFast(X)
Definition postgres.h:537
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:262
uint64_t Datum
Definition postgres.h:70
static Datum CStringGetDatum(const char *X)
Definition postgres.h:380
static Datum Int32GetDatum(int32 X)
Definition postgres.h:222
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
const char * YYLTYPE
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
Definition scan.l:1224
#define yylloc
Definition scan.l:1097
void scanner_finish(core_yyscan_t yyscanner)
Definition scan.l:1264
#define yyextra
Definition scan.l:1093
const uint16 ScanKeywordTokens[]
Definition scan.l:80
void * core_yyscan_t
Definition scanner.h:118
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
ScanDirection
Definition sdir.h:25
Size add_size(Size s1, Size s2)
Definition shmem.c:482
HTAB * ShmemInitHash(const char *name, int64 init_size, int64 max_size, HASHCTL *infoP, int hash_flags)
Definition shmem.c:323
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:378
#define free(a)
#define malloc(a)
#define SpinLockInit(lock)
Definition spin.h:57
#define SpinLockRelease(lock)
Definition spin.h:61
#define SpinLockAcquire(lock)
Definition spin.h:59
static void error(void)
instr_time local_blk_read_time
Definition instrument.h:38
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
instr_time temp_blk_write_time
Definition instrument.h:41
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
int64 local_blks_written
Definition instrument.h:33
instr_time temp_blk_read_time
Definition instrument.h:40
instr_time local_blk_write_time
Definition instrument.h:39
int64 temp_blks_read
Definition instrument.h:34
int64 shared_blks_read
Definition instrument.h:27
int64 shared_blks_written
Definition instrument.h:29
int64 temp_blks_written
Definition instrument.h:35
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
double min_time[PGSS_NUMKIND]
int64 local_blks_written
int64 generic_plan_calls
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
double jit_emission_time
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 custom_plan_calls
int64 local_blks_dirtied
int64 jit_inlining_count
double jit_deform_time
int64 parallel_workers_to_launch
double shared_blk_read_time
double jit_inlining_time
struct JitContext * es_jit
Definition execnodes.h:766
uint64 es_total_processed
Definition execnodes.h:718
MemoryContext es_query_cxt
Definition execnodes.h:712
Size keysize
Definition hsearch.h:75
Size entrysize
Definition hsearch.h:76
WalUsage walusage
Definition instrument.h:94
instr_time total
Definition instrument.h:87
BufferUsage bufusage
Definition instrument.h:93
JitInstrumentation instr
Definition jit.h:62
Definition nodes.h:135
const char * p_sourcetext
Definition parse_node.h:195
int64 queryId
Definition plannodes.h:71
ParseLoc stmt_len
Definition plannodes.h:165
PlannedStmtOrigin planOrigin
Definition plannodes.h:77
ParseLoc stmt_location
Definition plannodes.h:163
Node * utilityStmt
Definition plannodes.h:150
uint64 nprocessed
Definition cmdtag.h:32
CommandTag commandTag
Definition cmdtag.h:31
const char * sourceText
Definition execdesc.h:38
EState * estate
Definition execdesc.h:48
PlannedStmt * plannedstmt
Definition execdesc.h:37
struct Instrumentation * totaltime
Definition execdesc.h:55
Node * utilityStmt
Definition parsenodes.h:141
ParseLoc stmt_location
Definition parsenodes.h:255
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
int64 wal_records
Definition instrument.h:53
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
pgssGlobalStats stats
__int64 st_size
Definition win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:784
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.c:545
ProcessUtility_hook_type ProcessUtility_hook
Definition utility.c:71
ProcessUtilityContext
Definition utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition timestamp.h:68
#define fstat
Definition win32_port.h:73

◆ PGSS_NUMKIND

#define PGSS_NUMKIND   (PGSS_EXEC + 1)

Definition at line 134 of file pg_stat_statements.c.

◆ PGSS_TEXT_FILE

#define PGSS_TEXT_FILE   PG_STAT_TMP_DIR "/pgss_query_texts.stat"

Definition at line 86 of file pg_stat_statements.c.

◆ record_gc_qtexts

#define record_gc_qtexts ( )
Value:

Definition at line 310 of file pg_stat_statements.c.

311 { \
313 pgss->gc_count++; \
315 } while(0)

◆ SINGLE_ENTRY_RESET

#define SINGLE_ENTRY_RESET (   e)
Value:
if (e) { \
/* When requested reset only min/max statistics of an entry */ \
for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
{ \
e->counters.max_time[kind] = 0; \
e->counters.min_time[kind] = 0; \
} \
e->minmax_stats_since = stats_reset; \
} \
else \
{ \
/* Remove the key otherwise */ \
} \
}
e

Definition at line 2690 of file pg_stat_statements.c.

2691 { \
2692 if (minmax_only) { \
2693 /* When requested reset only min/max statistics of an entry */ \
2694 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2695 { \
2696 e->counters.max_time[kind] = 0; \
2697 e->counters.min_time[kind] = 0; \
2698 } \
2699 e->minmax_stats_since = stats_reset; \
2700 } \
2701 else \
2702 { \
2703 /* Remove the key otherwise */ \
2705 num_remove++; \
2706 } \
2707}

◆ STICKY_DECREASE_FACTOR

#define STICKY_DECREASE_FACTOR   (0.50) /* factor for sticky entries */

Definition at line 100 of file pg_stat_statements.c.

◆ USAGE_DEALLOC_PERCENT

#define USAGE_DEALLOC_PERCENT   5 /* free this % of entries at once */

Definition at line 101 of file pg_stat_statements.c.

◆ USAGE_DECREASE_FACTOR

#define USAGE_DECREASE_FACTOR   (0.99) /* decreased every entry_dealloc */

Definition at line 99 of file pg_stat_statements.c.

◆ USAGE_EXEC

#define USAGE_EXEC (   duration)    (1.0)

Definition at line 95 of file pg_stat_statements.c.

◆ USAGE_INIT

#define USAGE_INIT   (1.0) /* including initial planning */

Definition at line 96 of file pg_stat_statements.c.

Typedef Documentation

◆ Counters

◆ pgssEntry

◆ pgssGlobalStats

◆ pgssHashKey

◆ pgssSharedState

◆ pgssStoreKind

◆ pgssVersion

Enumeration Type Documentation

◆ pgssStoreKind

Enumerator
PGSS_INVALID 
PGSS_PLAN 
PGSS_EXEC 

Definition at line 121 of file pg_stat_statements.c.

122{
123 PGSS_INVALID = -1,
124
125 /*
126 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
127 * reference the underlying values in the arrays in the Counters struct,
128 * and this order is required in pg_stat_statements_internal().
129 */
130 PGSS_PLAN = 0,
131 PGSS_EXEC,

◆ PGSSTrackLevel

Enumerator
PGSS_TRACK_NONE 
PGSS_TRACK_TOP 
PGSS_TRACK_ALL 

Definition at line 283 of file pg_stat_statements.c.

284{
285 PGSS_TRACK_NONE, /* track no statements */
286 PGSS_TRACK_TOP, /* only top level statements */
287 PGSS_TRACK_ALL, /* all statements, including nested ones */
@ PGSS_TRACK_NONE

◆ pgssVersion

Enumerator
PGSS_V1_0 
PGSS_V1_1 
PGSS_V1_2 
PGSS_V1_3 
PGSS_V1_8 
PGSS_V1_9 
PGSS_V1_10 
PGSS_V1_11 
PGSS_V1_12 
PGSS_V1_13 

Definition at line 107 of file pg_stat_statements.c.

108{
109 PGSS_V1_0 = 0,
110 PGSS_V1_1,
111 PGSS_V1_2,
112 PGSS_V1_3,
113 PGSS_V1_8,
114 PGSS_V1_9,

Function Documentation

◆ _PG_init()

void _PG_init ( void  )

Definition at line 391 of file pg_stat_statements.c.

392{
393 /*
394 * In order to create our shared memory area, we have to be loaded via
395 * shared_preload_libraries. If not, fall out without hooking into any of
396 * the main system. (We don't throw error here because it seems useful to
397 * allow the pg_stat_statements functions to be created even when the
398 * module isn't active. The functions must protect themselves against
399 * being called then, however.)
400 */
402 return;
403
404 /*
405 * Inform the postmaster that we want to enable query_id calculation if
406 * compute_query_id is set to auto.
407 */
409
410 /*
411 * Define (or redefine) custom GUC variables.
412 */
413 DefineCustomIntVariable("pg_stat_statements.max",
414 "Sets the maximum number of statements tracked by pg_stat_statements.",
415 NULL,
416 &pgss_max,
417 5000,
418 100,
419 INT_MAX / 2,
421 0,
422 NULL,
423 NULL,
424 NULL);
425
426 DefineCustomEnumVariable("pg_stat_statements.track",
427 "Selects which statements are tracked by pg_stat_statements.",
428 NULL,
429 &pgss_track,
432 PGC_SUSET,
433 0,
434 NULL,
435 NULL,
436 NULL);
437
438 DefineCustomBoolVariable("pg_stat_statements.track_utility",
439 "Selects whether utility commands are tracked by pg_stat_statements.",
440 NULL,
442 true,
443 PGC_SUSET,
444 0,
445 NULL,
446 NULL,
447 NULL);
448
449 DefineCustomBoolVariable("pg_stat_statements.track_planning",
450 "Selects whether planning duration is tracked by pg_stat_statements.",
451 NULL,
453 false,
454 PGC_SUSET,
455 0,
456 NULL,
457 NULL,
458 NULL);
459
460 DefineCustomBoolVariable("pg_stat_statements.save",
461 "Save pg_stat_statements statistics across server shutdowns.",
462 NULL,
463 &pgss_save,
464 true,
466 0,
467 NULL,
468 NULL,
469 NULL);
470
471 MarkGUCPrefixReserved("pg_stat_statements");
472
473 /*
474 * Install hooks.
475 */
494}

References DefineCustomBoolVariable(), DefineCustomEnumVariable(), DefineCustomIntVariable(), EnableQueryId(), ExecutorEnd_hook, ExecutorFinish_hook, ExecutorRun_hook, ExecutorStart_hook, fb(), MarkGUCPrefixReserved(), PGC_POSTMASTER, PGC_SIGHUP, PGC_SUSET, pgss_ExecutorEnd(), pgss_ExecutorFinish(), pgss_ExecutorRun(), pgss_ExecutorStart(), pgss_max, pgss_planner(), pgss_post_parse_analyze(), pgss_ProcessUtility(), pgss_save, pgss_shmem_request(), pgss_shmem_startup(), pgss_track, pgss_track_planning, PGSS_TRACK_TOP, pgss_track_utility, planner_hook, post_parse_analyze_hook, prev_ExecutorEnd, prev_ExecutorFinish, prev_ExecutorRun, prev_ExecutorStart, prev_planner_hook, prev_post_parse_analyze_hook, prev_ProcessUtility, prev_shmem_request_hook, prev_shmem_startup_hook, process_shared_preload_libraries_in_progress, ProcessUtility_hook, shmem_request_hook, shmem_startup_hook, and track_options.

◆ comp_location()

static int comp_location ( const void a,
const void b 
)
static

Definition at line 3072 of file pg_stat_statements.c.

3073{
3074 int l = ((const LocationLen *) a)->location;
3075 int r = ((const LocationLen *) b)->location;
3076
3077 return pg_cmp_s32(l, r);
3078}

References a, b, and pg_cmp_s32().

Referenced by fill_in_constant_lengths().

◆ entry_alloc()

static pgssEntry * entry_alloc ( pgssHashKey key,
Size  query_offset,
int  query_len,
int  encoding,
bool  sticky 
)
static

Definition at line 2117 of file pg_stat_statements.c.

2119{
2120 pgssEntry *entry;
2121 bool found;
2122
2123 /* Make space if needed */
2125 entry_dealloc();
2126
2127 /* Find or create an entry with desired hash code */
2128 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2129
2130 if (!found)
2131 {
2132 /* New entry, initialize it */
2133
2134 /* reset the statistics */
2135 memset(&entry->counters, 0, sizeof(Counters));
2136 /* set the appropriate initial usage count */
2138 /* re-initialize the mutex each time ... we assume no one using it */
2139 SpinLockInit(&entry->mutex);
2140 /* ... and don't forget the query text metadata */
2141 Assert(query_len >= 0);
2142 entry->query_offset = query_offset;
2143 entry->query_len = query_len;
2144 entry->encoding = encoding;
2146 entry->minmax_stats_since = entry->stats_since;
2147 }
2148
2149 return entry;
2150}

References Assert, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssEntry::encoding, encoding, entry_dealloc(), fb(), GetCurrentTimestamp(), HASH_ENTER, hash_get_num_entries(), hash_search(), pgssEntry::minmax_stats_since, pgssEntry::mutex, pgss, pgss_hash, pgss_max, pgssEntry::query_len, pgssEntry::query_offset, SpinLockInit, pgssEntry::stats_since, Counters::usage, and USAGE_INIT.

Referenced by pgss_shmem_startup(), and pgss_store().

◆ entry_cmp()

static int entry_cmp ( const void lhs,
const void rhs 
)
static

Definition at line 2156 of file pg_stat_statements.c.

2157{
2158 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2159 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2160
2161 if (l_usage < r_usage)
2162 return -1;
2163 else if (l_usage > r_usage)
2164 return +1;
2165 else
2166 return 0;
2167}

References pgssEntry::counters, fb(), and Counters::usage.

Referenced by entry_dealloc().

◆ entry_dealloc()

static void entry_dealloc ( void  )
static

Definition at line 2175 of file pg_stat_statements.c.

2176{
2178 pgssEntry **entries;
2179 pgssEntry *entry;
2180 int nvictims;
2181 int i;
2183 int nvalidtexts;
2184
2185 /*
2186 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2187 * While we're scanning the table, apply the decay factor to the usage
2188 * values, and update the mean query length.
2189 *
2190 * Note that the mean query length is almost immediately obsolete, since
2191 * we compute it before not after discarding the least-used entries.
2192 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2193 * making two passes to get a more current result. Likewise, the new
2194 * cur_median_usage includes the entries we're about to zap.
2195 */
2196
2197 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2198
2199 i = 0;
2200 tottextlen = 0;
2201 nvalidtexts = 0;
2202
2204 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2205 {
2206 entries[i++] = entry;
2207 /* "Sticky" entries get a different usage decay rate. */
2208 if (IS_STICKY(entry->counters))
2210 else
2212 /* In the mean length computation, ignore dropped texts. */
2213 if (entry->query_len >= 0)
2214 {
2215 tottextlen += entry->query_len + 1;
2216 nvalidtexts++;
2217 }
2218 }
2219
2220 /* Sort into increasing order by usage */
2221 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2222
2223 /* Record the (approximate) median usage */
2224 if (i > 0)
2225 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2226 /* Record the mean query length */
2227 if (nvalidtexts > 0)
2229 else
2231
2232 /* Now zap an appropriate fraction of lowest-usage entries */
2233 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2234 nvictims = Min(nvictims, i);
2235
2236 for (i = 0; i < nvictims; i++)
2237 {
2238 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2239 }
2240
2241 pfree(entries);
2242
2243 /* Increment the number of times entries are deallocated */
2245 pgss->stats.dealloc += 1;
2247}

References ASSUMED_LENGTH_INIT, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssGlobalStats::dealloc, entry_cmp(), fb(), hash_get_num_entries(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), i, IS_STICKY, Max, pgssSharedState::mean_query_len, Min, pgssSharedState::mutex, palloc(), pfree(), pgss, pgss_hash, qsort, pgssEntry::query_len, SpinLockAcquire, SpinLockRelease, pgssSharedState::stats, STICKY_DECREASE_FACTOR, Counters::usage, USAGE_DEALLOC_PERCENT, and USAGE_DECREASE_FACTOR.

Referenced by entry_alloc().

◆ entry_reset()

static TimestampTz entry_reset ( Oid  userid,
Oid  dbid,
int64  queryid,
bool  minmax_only 
)
static

Definition at line 2713 of file pg_stat_statements.c.

2714{
2716 pgssEntry *entry;
2717 FILE *qfile;
2718 int64 num_entries;
2719 int64 num_remove = 0;
2721 TimestampTz stats_reset;
2722
2723 if (!pgss || !pgss_hash)
2724 ereport(ERROR,
2726 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2727
2729 num_entries = hash_get_num_entries(pgss_hash);
2730
2731 stats_reset = GetCurrentTimestamp();
2732
2733 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2734 {
2735 /* If all the parameters are available, use the fast path. */
2736 memset(&key, 0, sizeof(pgssHashKey));
2737 key.userid = userid;
2738 key.dbid = dbid;
2739 key.queryid = queryid;
2740
2741 /*
2742 * Reset the entry if it exists, starting with the non-top-level
2743 * entry.
2744 */
2745 key.toplevel = false;
2746 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2747
2748 SINGLE_ENTRY_RESET(entry);
2749
2750 /* Also reset the top-level entry if it exists. */
2751 key.toplevel = true;
2752 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2753
2754 SINGLE_ENTRY_RESET(entry);
2755 }
2756 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2757 {
2758 /* Reset entries corresponding to valid parameters. */
2760 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2761 {
2762 if ((!userid || entry->key.userid == userid) &&
2763 (!dbid || entry->key.dbid == dbid) &&
2764 (!queryid || entry->key.queryid == queryid))
2765 {
2766 SINGLE_ENTRY_RESET(entry);
2767 }
2768 }
2769 }
2770 else
2771 {
2772 /* Reset all entries. */
2774 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2775 {
2776 SINGLE_ENTRY_RESET(entry);
2777 }
2778 }
2779
2780 /* All entries are removed? */
2781 if (num_entries != num_remove)
2782 goto release_lock;
2783
2784 /*
2785 * Reset global statistics for pg_stat_statements since all entries are
2786 * removed.
2787 */
2789 pgss->stats.dealloc = 0;
2790 pgss->stats.stats_reset = stats_reset;
2792
2793 /*
2794 * Write new empty query file, perhaps even creating a new one to recover
2795 * if the file was missing.
2796 */
2798 if (qfile == NULL)
2799 {
2800 ereport(LOG,
2802 errmsg("could not create file \"%s\": %m",
2803 PGSS_TEXT_FILE)));
2804 goto done;
2805 }
2806
2807 /* If ftruncate fails, log it, but it's not a fatal problem */
2808 if (ftruncate(fileno(qfile), 0) != 0)
2809 ereport(LOG,
2811 errmsg("could not truncate file \"%s\": %m",
2812 PGSS_TEXT_FILE)));
2813
2814 FreeFile(qfile);
2815
2816done:
2817 pgss->extent = 0;
2818 /* This counts as a query text garbage collection for our purposes */
2820
2823
2824 return stats_reset;
2825}

References AllocateFile(), pgssHashKey::dbid, pgssGlobalStats::dealloc, ereport, errcode(), errcode_for_file_access(), errmsg(), ERROR, pgssSharedState::extent, fb(), FreeFile(), GetCurrentTimestamp(), HASH_FIND, hash_get_num_entries(), hash_search(), hash_seq_init(), hash_seq_search(), INT64CONST, pgssEntry::key, pgssSharedState::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pgssSharedState::mutex, PG_BINARY_W, pgss, pgss_hash, PGSS_TEXT_FILE, pgssHashKey::queryid, record_gc_qtexts, SINGLE_ENTRY_RESET, SpinLockAcquire, SpinLockRelease, pgssSharedState::stats, pgssGlobalStats::stats_reset, and pgssHashKey::userid.

Referenced by pg_stat_statements_reset(), pg_stat_statements_reset_1_11(), and pg_stat_statements_reset_1_7().

◆ fill_in_constant_lengths()

static void fill_in_constant_lengths ( JumbleState jstate,
const char query,
int  query_loc 
)
static

Definition at line 2970 of file pg_stat_statements.c.

2972{
2974 core_yyscan_t yyscanner;
2978
2979 /*
2980 * Sort the records by location so that we can process them in order while
2981 * scanning the query text.
2982 */
2983 if (jstate->clocations_count > 1)
2984 qsort(jstate->clocations, jstate->clocations_count,
2985 sizeof(LocationLen), comp_location);
2986 locs = jstate->clocations;
2987
2988 /* initialize the flex scanner --- should match raw_parser() */
2989 yyscanner = scanner_init(query,
2990 &yyextra,
2991 &ScanKeywords,
2993
2994 /* Search for each constant, in sequence */
2995 for (int i = 0; i < jstate->clocations_count; i++)
2996 {
2997 int loc;
2998 int tok;
2999
3000 /* Ignore constants after the first one in the same location */
3001 if (i > 0 && locs[i].location == locs[i - 1].location)
3002 {
3003 locs[i].length = -1;
3004 continue;
3005 }
3006
3007 if (locs[i].squashed)
3008 continue; /* squashable list, ignore */
3009
3010 /* Adjust recorded location if we're dealing with partial string */
3011 loc = locs[i].location - query_loc;
3012 Assert(loc >= 0);
3013
3014 /*
3015 * We have a valid location for a constant that's not a dupe. Lex
3016 * tokens until we find the desired constant.
3017 */
3018 for (;;)
3019 {
3020 tok = core_yylex(&yylval, &yylloc, yyscanner);
3021
3022 /* We should not hit end-of-string, but if we do, behave sanely */
3023 if (tok == 0)
3024 break; /* out of inner for-loop */
3025
3026 /*
3027 * We should find the token position exactly, but if we somehow
3028 * run past it, work with that.
3029 */
3030 if (yylloc >= loc)
3031 {
3032 if (query[loc] == '-')
3033 {
3034 /*
3035 * It's a negative value - this is the one and only case
3036 * where we replace more than a single token.
3037 *
3038 * Do not compensate for the core system's special-case
3039 * adjustment of location to that of the leading '-'
3040 * operator in the event of a negative constant. It is
3041 * also useful for our purposes to start from the minus
3042 * symbol. In this way, queries like "select * from foo
3043 * where bar = 1" and "select * from foo where bar = -2"
3044 * will have identical normalized query strings.
3045 */
3046 tok = core_yylex(&yylval, &yylloc, yyscanner);
3047 if (tok == 0)
3048 break; /* out of inner for-loop */
3049 }
3050
3051 /*
3052 * We now rely on the assumption that flex has placed a zero
3053 * byte after the text of the current token in scanbuf.
3054 */
3055 locs[i].length = strlen(yyextra.scanbuf + loc);
3056 break; /* out of inner for-loop */
3057 }
3058 }
3059
3060 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3061 if (tok == 0)
3062 break;
3063 }
3064
3065 scanner_finish(yyscanner);
3066}

References Assert, comp_location(), core_yylex(), fb(), i, qsort, ScanKeywords, ScanKeywordTokens, scanner_finish(), scanner_init(), yyextra, and yylloc.

Referenced by generate_normalized_query().

◆ gc_qtexts()

static void gc_qtexts ( void  )
static

Definition at line 2511 of file pg_stat_statements.c.

2512{
2513 char *qbuffer;
2515 FILE *qfile = NULL;
2517 pgssEntry *entry;
2518 Size extent;
2519 int nentries;
2520
2521 /*
2522 * When called from pgss_store, some other session might have proceeded
2523 * with garbage collection in the no-lock-held interim of lock strength
2524 * escalation. Check once more that this is actually necessary.
2525 */
2526 if (!need_gc_qtexts())
2527 return;
2528
2529 /*
2530 * Load the old texts file. If we fail (out of memory, for instance),
2531 * invalidate query texts. Hopefully this is rare. It might seem better
2532 * to leave things alone on an OOM failure, but the problem is that the
2533 * file is only going to get bigger; hoping for a future non-OOM result is
2534 * risky and can easily lead to complete denial of service.
2535 */
2537 if (qbuffer == NULL)
2538 goto gc_fail;
2539
2540 /*
2541 * We overwrite the query texts file in place, so as to reduce the risk of
2542 * an out-of-disk-space failure. Since the file is guaranteed not to get
2543 * larger, this should always work on traditional filesystems; though we
2544 * could still lose on copy-on-write filesystems.
2545 */
2547 if (qfile == NULL)
2548 {
2549 ereport(LOG,
2551 errmsg("could not write file \"%s\": %m",
2552 PGSS_TEXT_FILE)));
2553 goto gc_fail;
2554 }
2555
2556 extent = 0;
2557 nentries = 0;
2558
2560 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2561 {
2562 int query_len = entry->query_len;
2563 char *qry = qtext_fetch(entry->query_offset,
2564 query_len,
2565 qbuffer,
2566 qbuffer_size);
2567
2568 if (qry == NULL)
2569 {
2570 /* Trouble ... drop the text */
2571 entry->query_offset = 0;
2572 entry->query_len = -1;
2573 /* entry will not be counted in mean query length computation */
2574 continue;
2575 }
2576
2577 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2578 {
2579 ereport(LOG,
2581 errmsg("could not write file \"%s\": %m",
2582 PGSS_TEXT_FILE)));
2584 goto gc_fail;
2585 }
2586
2587 entry->query_offset = extent;
2588 extent += query_len + 1;
2589 nentries++;
2590 }
2591
2592 /*
2593 * Truncate away any now-unused space. If this fails for some odd reason,
2594 * we log it, but there's no need to fail.
2595 */
2596 if (ftruncate(fileno(qfile), extent) != 0)
2597 ereport(LOG,
2599 errmsg("could not truncate file \"%s\": %m",
2600 PGSS_TEXT_FILE)));
2601
2602 if (FreeFile(qfile))
2603 {
2604 ereport(LOG,
2606 errmsg("could not write file \"%s\": %m",
2607 PGSS_TEXT_FILE)));
2608 qfile = NULL;
2609 goto gc_fail;
2610 }
2611
2612 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2613 pgss->extent, extent);
2614
2615 /* Reset the shared extent pointer */
2616 pgss->extent = extent;
2617
2618 /*
2619 * Also update the mean query length, to be sure that need_gc_qtexts()
2620 * won't still think we have a problem.
2621 */
2622 if (nentries > 0)
2623 pgss->mean_query_len = extent / nentries;
2624 else
2626
2627 free(qbuffer);
2628
2629 /*
2630 * OK, count a garbage collection cycle. (Note: even though we have
2631 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2632 * other processes may examine gc_count while holding only the mutex.
2633 * Also, we have to advance the count *after* we've rewritten the file,
2634 * else other processes might not realize they read a stale file.)
2635 */
2637
2638 return;
2639
2640gc_fail:
2641 /* clean up resources */
2642 if (qfile)
2643 FreeFile(qfile);
2644 free(qbuffer);
2645
2646 /*
2647 * Since the contents of the external file are now uncertain, mark all
2648 * hashtable entries as having invalid texts.
2649 */
2651 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2652 {
2653 entry->query_offset = 0;
2654 entry->query_len = -1;
2655 }
2656
2657 /*
2658 * Destroy the query text file and create a new, empty one
2659 */
2662 if (qfile == NULL)
2663 ereport(LOG,
2665 errmsg("could not recreate file \"%s\": %m",
2666 PGSS_TEXT_FILE)));
2667 else
2668 FreeFile(qfile);
2669
2670 /* Reset the shared extent pointer */
2671 pgss->extent = 0;
2672
2673 /* Reset mean_query_len to match the new state */
2675
2676 /*
2677 * Bump the GC count even though we failed.
2678 *
2679 * This is needed to make concurrent readers of file without any lock on
2680 * pgss->lock notice existence of new version of file. Once readers
2681 * subsequently observe a change in GC count with pgss->lock held, that
2682 * forces a safe reopen of file. Writers also require that we bump here,
2683 * of course. (As required by locking protocol, readers and writers don't
2684 * trust earlier file contents until gc_count is found unchanged after
2685 * pgss->lock acquired in shared or exclusive mode respectively.)
2686 */
2688}

References AllocateFile(), ASSUMED_LENGTH_INIT, DEBUG1, elog, ereport, errcode_for_file_access(), errmsg(), pgssSharedState::extent, fb(), free, FreeFile(), hash_seq_init(), hash_seq_search(), hash_seq_term(), LOG, pgssSharedState::mean_query_len, need_gc_qtexts(), PG_BINARY_W, pgss, pgss_hash, PGSS_TEXT_FILE, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, and record_gc_qtexts.

Referenced by pgss_store().

◆ generate_normalized_query()

static char * generate_normalized_query ( JumbleState jstate,
const char query,
int  query_loc,
int query_len_p 
)
static

Definition at line 2847 of file pg_stat_statements.c.

2849{
2850 char *norm_query;
2851 int query_len = *query_len_p;
2852 int norm_query_buflen, /* Space allowed for norm_query */
2853 len_to_wrt, /* Length (in bytes) to write */
2854 quer_loc = 0, /* Source query byte location */
2855 n_quer_loc = 0, /* Normalized query byte location */
2856 last_off = 0, /* Offset from start for previous tok */
2857 last_tok_len = 0; /* Length (in bytes) of that tok */
2858 int num_constants_replaced = 0;
2859
2860 /*
2861 * Get constants' lengths (core system only gives us locations). Note
2862 * this also ensures the items are sorted by location.
2863 */
2865
2866 /*
2867 * Allow for $n symbols to be longer than the constants they replace.
2868 * Constants must take at least one byte in text form, while a $n symbol
2869 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2870 * could refine that limit based on the max value of n for the current
2871 * query, but it hardly seems worth any extra effort to do so.
2872 */
2873 norm_query_buflen = query_len + jstate->clocations_count * 10;
2874
2875 /* Allocate result buffer */
2877
2878 for (int i = 0; i < jstate->clocations_count; i++)
2879 {
2880 int off, /* Offset from start for cur tok */
2881 tok_len; /* Length (in bytes) of that tok */
2882
2883 /*
2884 * If we have an external param at this location, but no lists are
2885 * being squashed across the query, then we skip here; this will make
2886 * us print the characters found in the original query that represent
2887 * the parameter in the next iteration (or after the loop is done),
2888 * which is a bit odd but seems to work okay in most cases.
2889 */
2890 if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
2891 continue;
2892
2893 off = jstate->clocations[i].location;
2894
2895 /* Adjust recorded location if we're dealing with partial string */
2896 off -= query_loc;
2897
2898 tok_len = jstate->clocations[i].length;
2899
2900 if (tok_len < 0)
2901 continue; /* ignore any duplicates */
2902
2903 /* Copy next chunk (what precedes the next constant) */
2904 len_to_wrt = off - last_off;
2906 Assert(len_to_wrt >= 0);
2909
2910 /*
2911 * And insert a param symbol in place of the constant token; and, if
2912 * we have a squashable list, insert a placeholder comment starting
2913 * from the list's second value.
2914 */
2916 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2917 jstate->clocations[i].squashed ? " /*, ... */" : "");
2919
2920 /* move forward */
2921 quer_loc = off + tok_len;
2922 last_off = off;
2924 }
2925
2926 /*
2927 * We've copied up until the last ignorable constant. Copy over the
2928 * remaining bytes of the original query string.
2929 */
2930 len_to_wrt = query_len - quer_loc;
2931
2932 Assert(len_to_wrt >= 0);
2935
2937 norm_query[n_quer_loc] = '\0';
2938
2940 return norm_query;
2941}

References Assert, fb(), fill_in_constant_lengths(), i, palloc(), and sprintf.

Referenced by pgss_store().

◆ need_gc_qtexts()

static bool need_gc_qtexts ( void  )
static

Definition at line 2462 of file pg_stat_statements.c.

2463{
2464 Size extent;
2465
2466 /* Read shared extent pointer */
2468 extent = pgss->extent;
2470
2471 /*
2472 * Don't proceed if file does not exceed 512 bytes per possible entry.
2473 *
2474 * Here and in the next test, 32-bit machines have overflow hazards if
2475 * pgss_max and/or mean_query_len are large. Force the multiplications
2476 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2477 */
2478 if ((uint64) extent < (uint64) 512 * pgss_max)
2479 return false;
2480
2481 /*
2482 * Don't proceed if file is less than about 50% bloat. Nothing can or
2483 * should be done in the event of unusually large query texts accounting
2484 * for file's large size. We go to the trouble of maintaining the mean
2485 * query length in order to prevent garbage collection from thrashing
2486 * uselessly.
2487 */
2488 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2489 return false;
2490
2491 return true;
2492}

References pgssSharedState::extent, pgssSharedState::mean_query_len, pgssSharedState::mutex, pgss, pgss_max, SpinLockAcquire, and SpinLockRelease.

Referenced by gc_qtexts(), and pgss_store().

◆ PG_FUNCTION_INFO_V1() [1/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements  )

◆ PG_FUNCTION_INFO_V1() [2/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_10  )

◆ PG_FUNCTION_INFO_V1() [3/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_11  )

◆ PG_FUNCTION_INFO_V1() [4/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_12  )

◆ PG_FUNCTION_INFO_V1() [5/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_13  )

◆ PG_FUNCTION_INFO_V1() [6/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_2  )

◆ PG_FUNCTION_INFO_V1() [7/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_3  )

◆ PG_FUNCTION_INFO_V1() [8/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_8  )

◆ PG_FUNCTION_INFO_V1() [9/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_9  )

◆ PG_FUNCTION_INFO_V1() [10/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_info  )

◆ PG_FUNCTION_INFO_V1() [11/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset  )

◆ PG_FUNCTION_INFO_V1() [12/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset_1_11  )

◆ PG_FUNCTION_INFO_V1() [13/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset_1_7  )

◆ PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( name = "pg_stat_statements",
version = PG_VERSION 
)

◆ pg_stat_statements()

Datum pg_stat_statements ( PG_FUNCTION_ARGS  )

Definition at line 1681 of file pg_stat_statements.c.

1682{
1683 /* If it's really API 1.1, we'll figure that out below */
1685
1686 return (Datum) 0;
1687}

References pg_stat_statements_internal(), and PGSS_V1_0.

◆ pg_stat_statements_1_10()

Datum pg_stat_statements_1_10 ( PG_FUNCTION_ARGS  )

Definition at line 1627 of file pg_stat_statements.c.

1628{
1629 bool showtext = PG_GETARG_BOOL(0);
1630
1632
1633 return (Datum) 0;
1634}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_10.

◆ pg_stat_statements_1_11()

Datum pg_stat_statements_1_11 ( PG_FUNCTION_ARGS  )

Definition at line 1617 of file pg_stat_statements.c.

1618{
1619 bool showtext = PG_GETARG_BOOL(0);
1620
1622
1623 return (Datum) 0;
1624}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_11.

◆ pg_stat_statements_1_12()

Datum pg_stat_statements_1_12 ( PG_FUNCTION_ARGS  )

Definition at line 1607 of file pg_stat_statements.c.

1608{
1609 bool showtext = PG_GETARG_BOOL(0);
1610
1612
1613 return (Datum) 0;
1614}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_12.

◆ pg_stat_statements_1_13()

Datum pg_stat_statements_1_13 ( PG_FUNCTION_ARGS  )

Definition at line 1597 of file pg_stat_statements.c.

1598{
1599 bool showtext = PG_GETARG_BOOL(0);
1600
1602
1603 return (Datum) 0;
1604}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_13.

◆ pg_stat_statements_1_2()

Datum pg_stat_statements_1_2 ( PG_FUNCTION_ARGS  )

Definition at line 1667 of file pg_stat_statements.c.

1668{
1669 bool showtext = PG_GETARG_BOOL(0);
1670
1672
1673 return (Datum) 0;
1674}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_2.

◆ pg_stat_statements_1_3()

Datum pg_stat_statements_1_3 ( PG_FUNCTION_ARGS  )

Definition at line 1657 of file pg_stat_statements.c.

1658{
1659 bool showtext = PG_GETARG_BOOL(0);
1660
1662
1663 return (Datum) 0;
1664}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_3.

◆ pg_stat_statements_1_8()

Datum pg_stat_statements_1_8 ( PG_FUNCTION_ARGS  )

Definition at line 1647 of file pg_stat_statements.c.

1648{
1649 bool showtext = PG_GETARG_BOOL(0);
1650
1652
1653 return (Datum) 0;
1654}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_8.

◆ pg_stat_statements_1_9()

Datum pg_stat_statements_1_9 ( PG_FUNCTION_ARGS  )

Definition at line 1637 of file pg_stat_statements.c.

1638{
1639 bool showtext = PG_GETARG_BOOL(0);
1640
1642
1643 return (Datum) 0;
1644}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_9.

◆ pg_stat_statements_info()

Datum pg_stat_statements_info ( PG_FUNCTION_ARGS  )

Definition at line 2058 of file pg_stat_statements.c.

2059{
2060 pgssGlobalStats stats;
2061 TupleDesc tupdesc;
2063 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2064
2065 if (!pgss || !pgss_hash)
2066 ereport(ERROR,
2068 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2069
2070 /* Build a tuple descriptor for our result type */
2071 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2072 elog(ERROR, "return type must be a row type");
2073
2074 /* Read global statistics for pg_stat_statements */
2076 stats = pgss->stats;
2078
2079 values[0] = Int64GetDatum(stats.dealloc);
2081
2083}

References pgssGlobalStats::dealloc, elog, ereport, errcode(), errmsg(), ERROR, fb(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), Int64GetDatum(), pgssSharedState::mutex, PG_RETURN_DATUM, PG_STAT_STATEMENTS_INFO_COLS, pgss, pgss_hash, SpinLockAcquire, SpinLockRelease, pgssSharedState::stats, pgssGlobalStats::stats_reset, TimestampTzGetDatum(), TYPEFUNC_COMPOSITE, and values.

◆ pg_stat_statements_internal()

static void pg_stat_statements_internal ( FunctionCallInfo  fcinfo,
pgssVersion  api_version,
bool  showtext 
)
static

Definition at line 1691 of file pg_stat_statements.c.

1694{
1696 Oid userid = GetUserId();
1697 bool is_allowed_role = false;
1698 char *qbuffer = NULL;
1699 Size qbuffer_size = 0;
1700 Size extent = 0;
1701 int gc_count = 0;
1703 pgssEntry *entry;
1704
1705 /*
1706 * Superusers or roles with the privileges of pg_read_all_stats members
1707 * are allowed
1708 */
1710
1711 /* hash table must exist already */
1712 if (!pgss || !pgss_hash)
1713 ereport(ERROR,
1715 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1716
1717 InitMaterializedSRF(fcinfo, 0);
1718
1719 /*
1720 * Check we have the expected number of output arguments. Aside from
1721 * being a good safety check, we need a kluge here to detect API version
1722 * 1.1, which was wedged into the code in an ill-considered way.
1723 */
1724 switch (rsinfo->setDesc->natts)
1725 {
1727 if (api_version != PGSS_V1_0)
1728 elog(ERROR, "incorrect number of output arguments");
1729 break;
1731 /* pg_stat_statements() should have told us 1.0 */
1732 if (api_version != PGSS_V1_0)
1733 elog(ERROR, "incorrect number of output arguments");
1734 api_version = PGSS_V1_1;
1735 break;
1737 if (api_version != PGSS_V1_2)
1738 elog(ERROR, "incorrect number of output arguments");
1739 break;
1741 if (api_version != PGSS_V1_3)
1742 elog(ERROR, "incorrect number of output arguments");
1743 break;
1745 if (api_version != PGSS_V1_8)
1746 elog(ERROR, "incorrect number of output arguments");
1747 break;
1749 if (api_version != PGSS_V1_9)
1750 elog(ERROR, "incorrect number of output arguments");
1751 break;
1753 if (api_version != PGSS_V1_10)
1754 elog(ERROR, "incorrect number of output arguments");
1755 break;
1757 if (api_version != PGSS_V1_11)
1758 elog(ERROR, "incorrect number of output arguments");
1759 break;
1761 if (api_version != PGSS_V1_12)
1762 elog(ERROR, "incorrect number of output arguments");
1763 break;
1765 if (api_version != PGSS_V1_13)
1766 elog(ERROR, "incorrect number of output arguments");
1767 break;
1768 default:
1769 elog(ERROR, "incorrect number of output arguments");
1770 }
1771
1772 /*
1773 * We'd like to load the query text file (if needed) while not holding any
1774 * lock on pgss->lock. In the worst case we'll have to do this again
1775 * after we have the lock, but it's unlikely enough to make this a win
1776 * despite occasional duplicated work. We need to reload if anybody
1777 * writes to the file (either a retail qtext_store(), or a garbage
1778 * collection) between this point and where we've gotten shared lock. If
1779 * a qtext_store is actually in progress when we look, we might as well
1780 * skip the speculative load entirely.
1781 */
1782 if (showtext)
1783 {
1784 int n_writers;
1785
1786 /* Take the mutex so we can examine variables */
1788 extent = pgss->extent;
1789 n_writers = pgss->n_writers;
1790 gc_count = pgss->gc_count;
1792
1793 /* No point in loading file now if there are active writers */
1794 if (n_writers == 0)
1796 }
1797
1798 /*
1799 * Get shared lock, load or reload the query text file if we must, and
1800 * iterate over the hashtable entries.
1801 *
1802 * With a large hash table, we might be holding the lock rather longer
1803 * than one could wish. However, this only blocks creation of new hash
1804 * table entries, and the larger the hash table the less likely that is to
1805 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1806 * we need to partition the hash table to limit the time spent holding any
1807 * one lock.
1808 */
1810
1811 if (showtext)
1812 {
1813 /*
1814 * Here it is safe to examine extent and gc_count without taking the
1815 * mutex. Note that although other processes might change
1816 * pgss->extent just after we look at it, the strings they then write
1817 * into the file cannot yet be referenced in the hashtable, so we
1818 * don't care whether we see them or not.
1819 *
1820 * If qtext_load_file fails, we just press on; we'll return NULL for
1821 * every query text.
1822 */
1823 if (qbuffer == NULL ||
1824 pgss->extent != extent ||
1825 pgss->gc_count != gc_count)
1826 {
1827 free(qbuffer);
1829 }
1830 }
1831
1833 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1834 {
1836 bool nulls[PG_STAT_STATEMENTS_COLS];
1837 int i = 0;
1838 Counters tmp;
1839 double stddev;
1840 int64 queryid = entry->key.queryid;
1841 TimestampTz stats_since;
1842 TimestampTz minmax_stats_since;
1843
1844 memset(values, 0, sizeof(values));
1845 memset(nulls, 0, sizeof(nulls));
1846
1847 values[i++] = ObjectIdGetDatum(entry->key.userid);
1848 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1849 if (api_version >= PGSS_V1_9)
1850 values[i++] = BoolGetDatum(entry->key.toplevel);
1851
1852 if (is_allowed_role || entry->key.userid == userid)
1853 {
1854 if (api_version >= PGSS_V1_2)
1855 values[i++] = Int64GetDatumFast(queryid);
1856
1857 if (showtext)
1858 {
1859 char *qstr = qtext_fetch(entry->query_offset,
1860 entry->query_len,
1861 qbuffer,
1862 qbuffer_size);
1863
1864 if (qstr)
1865 {
1866 char *enc;
1867
1869 entry->query_len,
1870 entry->encoding);
1871
1873
1874 if (enc != qstr)
1875 pfree(enc);
1876 }
1877 else
1878 {
1879 /* Just return a null if we fail to find the text */
1880 nulls[i++] = true;
1881 }
1882 }
1883 else
1884 {
1885 /* Query text not requested */
1886 nulls[i++] = true;
1887 }
1888 }
1889 else
1890 {
1891 /* Don't show queryid */
1892 if (api_version >= PGSS_V1_2)
1893 nulls[i++] = true;
1894
1895 /*
1896 * Don't show query text, but hint as to the reason for not doing
1897 * so if it was requested
1898 */
1899 if (showtext)
1900 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1901 else
1902 nulls[i++] = true;
1903 }
1904
1905 /* copy counters to a local variable to keep locking time short */
1906 SpinLockAcquire(&entry->mutex);
1907 tmp = entry->counters;
1908 SpinLockRelease(&entry->mutex);
1909
1910 /*
1911 * The spinlock is not required when reading these two as they are
1912 * always updated when holding pgss->lock exclusively.
1913 */
1914 stats_since = entry->stats_since;
1915 minmax_stats_since = entry->minmax_stats_since;
1916
1917 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1918 if (IS_STICKY(tmp))
1919 continue;
1920
1921 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1922 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1923 {
1924 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1925 {
1926 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1927 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1928 }
1929
1930 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1931 api_version >= PGSS_V1_8)
1932 {
1933 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1934 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1935 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1936
1937 /*
1938 * Note we are calculating the population variance here, not
1939 * the sample variance, as we have data for the whole
1940 * population, so Bessel's correction is not used, and we
1941 * don't divide by tmp.calls - 1.
1942 */
1943 if (tmp.calls[kind] > 1)
1944 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1945 else
1946 stddev = 0.0;
1947 values[i++] = Float8GetDatumFast(stddev);
1948 }
1949 }
1950 values[i++] = Int64GetDatumFast(tmp.rows);
1953 if (api_version >= PGSS_V1_1)
1958 if (api_version >= PGSS_V1_1)
1963 if (api_version >= PGSS_V1_1)
1964 {
1967 }
1968 if (api_version >= PGSS_V1_11)
1969 {
1972 }
1973 if (api_version >= PGSS_V1_10)
1974 {
1977 }
1978 if (api_version >= PGSS_V1_8)
1979 {
1980 char buf[256];
1981 Datum wal_bytes;
1982
1985
1986 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1987
1988 /* Convert to numeric. */
1989 wal_bytes = DirectFunctionCall3(numeric_in,
1992 Int32GetDatum(-1));
1993 values[i++] = wal_bytes;
1994 }
1995 if (api_version >= PGSS_V1_12)
1996 {
1998 }
1999 if (api_version >= PGSS_V1_10)
2000 {
2009 }
2010 if (api_version >= PGSS_V1_11)
2011 {
2014 }
2015 if (api_version >= PGSS_V1_12)
2016 {
2019 }
2020 if (api_version >= PGSS_V1_13)
2021 {
2024 }
2025 if (api_version >= PGSS_V1_11)
2026 {
2027 values[i++] = TimestampTzGetDatum(stats_since);
2028 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2029 }
2030
2031 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2032 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2033 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2034 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2035 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2036 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2037 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2038 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2039 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2040 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2041 -1 /* fail if you forget to update this assert */ ));
2042
2043 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2044 }
2045
2047
2048 free(qbuffer);
2049}

References Assert, BoolGetDatum(), buf, Counters::calls, pgssEntry::counters, CStringGetDatum(), CStringGetTextDatum, Counters::custom_plan_calls, pgssHashKey::dbid, DirectFunctionCall3, elog, enc, pgssEntry::encoding, ereport, errcode(), errmsg(), ERROR, pgssSharedState::extent, fb(), Float8GetDatumFast, free, pgssSharedState::gc_count, Counters::generic_plan_calls, GetUserId(), has_privs_of_role(), hash_seq_init(), hash_seq_search(), i, InitMaterializedSRF(), Int32GetDatum(), Int64GetDatumFast, IS_STICKY, Counters::jit_deform_count, Counters::jit_deform_time, Counters::jit_emission_count, Counters::jit_emission_time, Counters::jit_functions, Counters::jit_generation_time, Counters::jit_inlining_count, Counters::jit_inlining_time, Counters::jit_optimization_count, Counters::jit_optimization_time, pgssEntry::key, Counters::local_blk_read_time, Counters::local_blk_write_time, Counters::local_blks_dirtied, Counters::local_blks_hit, Counters::local_blks_read, Counters::local_blks_written, pgssSharedState::lock, LW_SHARED, LWLockAcquire(), LWLockRelease(), Counters::max_time, Counters::mean_time, Counters::min_time, pgssEntry::minmax_stats_since, pgssEntry::mutex, pgssSharedState::mutex, pgssSharedState::n_writers, numeric_in(), ObjectIdGetDatum(), Counters::parallel_workers_launched, Counters::parallel_workers_to_launch, pfree(), pg_any_to_server(), PG_STAT_STATEMENTS_COLS, PG_STAT_STATEMENTS_COLS_V1_0, PG_STAT_STATEMENTS_COLS_V1_1, PG_STAT_STATEMENTS_COLS_V1_10, PG_STAT_STATEMENTS_COLS_V1_11, PG_STAT_STATEMENTS_COLS_V1_12, PG_STAT_STATEMENTS_COLS_V1_13, PG_STAT_STATEMENTS_COLS_V1_2, PG_STAT_STATEMENTS_COLS_V1_3, PG_STAT_STATEMENTS_COLS_V1_8, PG_STAT_STATEMENTS_COLS_V1_9, pgss, PGSS_EXEC, pgss_hash, PGSS_NUMKIND, PGSS_V1_0, PGSS_V1_1, PGSS_V1_10, PGSS_V1_11, PGSS_V1_12, PGSS_V1_13, PGSS_V1_2, PGSS_V1_3, PGSS_V1_8, PGSS_V1_9, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, pgssHashKey::queryid, FunctionCallInfoBaseData::resultinfo, Counters::rows, Counters::shared_blk_read_time, Counters::shared_blk_write_time, Counters::shared_blks_dirtied, Counters::shared_blks_hit, Counters::shared_blks_read, Counters::shared_blks_written, snprintf, SpinLockAcquire, SpinLockRelease, pgssEntry::stats_since, Counters::sum_var_time, Counters::temp_blk_read_time, Counters::temp_blk_write_time, Counters::temp_blks_read, Counters::temp_blks_written, TimestampTzGetDatum(), pgssHashKey::toplevel, Counters::total_time, tuplestore_putvalues(), UINT64_FORMAT, pgssHashKey::userid, values, Counters::wal_buffers_full, Counters::wal_bytes, Counters::wal_fpi, and Counters::wal_records.

Referenced by pg_stat_statements(), pg_stat_statements_1_10(), pg_stat_statements_1_11(), pg_stat_statements_1_12(), pg_stat_statements_1_13(), pg_stat_statements_1_2(), pg_stat_statements_1_3(), pg_stat_statements_1_8(), and pg_stat_statements_1_9().

◆ pg_stat_statements_reset()

Datum pg_stat_statements_reset ( PG_FUNCTION_ARGS  )

Definition at line 1566 of file pg_stat_statements.c.

1567{
1568 entry_reset(0, 0, 0, false);
1569
1571}

References entry_reset(), and PG_RETURN_VOID.

◆ pg_stat_statements_reset_1_11()

Datum pg_stat_statements_reset_1_11 ( PG_FUNCTION_ARGS  )

Definition at line 1547 of file pg_stat_statements.c.

1548{
1549 Oid userid;
1550 Oid dbid;
1551 int64 queryid;
1552 bool minmax_only;
1553
1554 userid = PG_GETARG_OID(0);
1555 dbid = PG_GETARG_OID(1);
1556 queryid = PG_GETARG_INT64(2);
1558
1559 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1560}

References entry_reset(), fb(), PG_GETARG_BOOL, PG_GETARG_INT64, PG_GETARG_OID, and PG_RETURN_TIMESTAMPTZ.

◆ pg_stat_statements_reset_1_7()

Datum pg_stat_statements_reset_1_7 ( PG_FUNCTION_ARGS  )

Definition at line 1531 of file pg_stat_statements.c.

1532{
1533 Oid userid;
1534 Oid dbid;
1535 int64 queryid;
1536
1537 userid = PG_GETARG_OID(0);
1538 dbid = PG_GETARG_OID(1);
1539 queryid = PG_GETARG_INT64(2);
1540
1541 entry_reset(userid, dbid, queryid, false);
1542
1544}

References entry_reset(), PG_GETARG_INT64, PG_GETARG_OID, and PG_RETURN_VOID.

◆ pgss_ExecutorEnd()

static void pgss_ExecutorEnd ( QueryDesc queryDesc)
static

Definition at line 1078 of file pg_stat_statements.c.

1079{
1080 int64 queryId = queryDesc->plannedstmt->queryId;
1081
1082 if (queryId != INT64CONST(0) && queryDesc->totaltime &&
1084 {
1085 /*
1086 * Make sure stats accumulation is done. (Note: it's okay if several
1087 * levels of hook all do this.)
1088 */
1089 InstrEndLoop(queryDesc->totaltime);
1090
1091 pgss_store(queryDesc->sourceText,
1092 queryId,
1093 queryDesc->plannedstmt->stmt_location,
1094 queryDesc->plannedstmt->stmt_len,
1095 PGSS_EXEC,
1097 queryDesc->estate->es_total_processed,
1098 &queryDesc->totaltime->bufusage,
1099 &queryDesc->totaltime->walusage,
1100 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1101 NULL,
1102 queryDesc->estate->es_parallel_workers_to_launch,
1103 queryDesc->estate->es_parallel_workers_launched,
1104 queryDesc->plannedstmt->planOrigin);
1105 }
1106
1107 if (prev_ExecutorEnd)
1108 prev_ExecutorEnd(queryDesc);
1109 else
1110 standard_ExecutorEnd(queryDesc);
1111}

References Instrumentation::bufusage, EState::es_jit, EState::es_parallel_workers_launched, EState::es_parallel_workers_to_launch, EState::es_total_processed, QueryDesc::estate, fb(), JitContext::instr, INSTR_TIME_GET_MILLISEC, InstrEndLoop(), INT64CONST, nesting_level, pgss_enabled, PGSS_EXEC, pgss_store(), QueryDesc::plannedstmt, PlannedStmt::planOrigin, prev_ExecutorEnd, PlannedStmt::queryId, QueryDesc::sourceText, standard_ExecutorEnd(), PlannedStmt::stmt_len, PlannedStmt::stmt_location, Instrumentation::total, QueryDesc::totaltime, and Instrumentation::walusage.

Referenced by _PG_init().

◆ pgss_ExecutorFinish()

static void pgss_ExecutorFinish ( QueryDesc queryDesc)
static

Definition at line 1057 of file pg_stat_statements.c.

1058{
1059 nesting_level++;
1060 PG_TRY();
1061 {
1063 prev_ExecutorFinish(queryDesc);
1064 else
1065 standard_ExecutorFinish(queryDesc);
1066 }
1067 PG_FINALLY();
1068 {
1069 nesting_level--;
1070 }
1071 PG_END_TRY();
1072}

References nesting_level, PG_END_TRY, PG_FINALLY, PG_TRY, prev_ExecutorFinish, and standard_ExecutorFinish().

Referenced by _PG_init().

◆ pgss_ExecutorRun()

static void pgss_ExecutorRun ( QueryDesc queryDesc,
ScanDirection  direction,
uint64  count 
)
static

Definition at line 1036 of file pg_stat_statements.c.

1037{
1038 nesting_level++;
1039 PG_TRY();
1040 {
1041 if (prev_ExecutorRun)
1042 prev_ExecutorRun(queryDesc, direction, count);
1043 else
1044 standard_ExecutorRun(queryDesc, direction, count);
1045 }
1046 PG_FINALLY();
1047 {
1048 nesting_level--;
1049 }
1050 PG_END_TRY();
1051}

References nesting_level, PG_END_TRY, PG_FINALLY, PG_TRY, prev_ExecutorRun, and standard_ExecutorRun().

Referenced by _PG_init().

◆ pgss_ExecutorStart()

static void pgss_ExecutorStart ( QueryDesc queryDesc,
int  eflags 
)
static

Definition at line 1002 of file pg_stat_statements.c.

1003{
1005 prev_ExecutorStart(queryDesc, eflags);
1006 else
1007 standard_ExecutorStart(queryDesc, eflags);
1008
1009 /*
1010 * If query has queryId zero, don't track it. This prevents double
1011 * counting of optimizable statements that are directly contained in
1012 * utility statements.
1013 */
1014 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1015 {
1016 /*
1017 * Set up to track total elapsed time in ExecutorRun. Make sure the
1018 * space is allocated in the per-query context so it will go away at
1019 * ExecutorEnd.
1020 */
1021 if (queryDesc->totaltime == NULL)
1022 {
1024
1026 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1028 }
1029 }
1030}

References EState::es_query_cxt, QueryDesc::estate, fb(), InstrAlloc(), INSTRUMENT_ALL, INT64CONST, MemoryContextSwitchTo(), nesting_level, pgss_enabled, QueryDesc::plannedstmt, prev_ExecutorStart, PlannedStmt::queryId, standard_ExecutorStart(), and QueryDesc::totaltime.

Referenced by _PG_init().

◆ pgss_memsize()

static Size pgss_memsize ( void  )
static

Definition at line 2089 of file pg_stat_statements.c.

2090{
2091 Size size;
2092
2093 size = MAXALIGN(sizeof(pgssSharedState));
2094 size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2095
2096 return size;
2097}

References add_size(), hash_estimate_size(), MAXALIGN, and pgss_max.

Referenced by pgss_shmem_request().

◆ pgss_planner()

static PlannedStmt * pgss_planner ( Query parse,
const char query_string,
int  cursorOptions,
ParamListInfo  boundParams,
ExplainState es 
)
static

Definition at line 895 of file pg_stat_statements.c.

900{
901 PlannedStmt *result;
902
903 /*
904 * We can't process the query if no query_string is provided, as
905 * pgss_store needs it. We also ignore query without queryid, as it would
906 * be treated as a utility statement, which may not be the case.
907 */
909 && pgss_track_planning && query_string
910 && parse->queryId != INT64CONST(0))
911 {
914 BufferUsage bufusage_start,
915 bufusage;
916 WalUsage walusage_start,
917 walusage;
918
919 /* We need to track buffer usage as the planner can access them. */
920 bufusage_start = pgBufferUsage;
921
922 /*
923 * Similarly the planner could write some WAL records in some cases
924 * (e.g. setting a hint bit with those being WAL-logged)
925 */
926 walusage_start = pgWalUsage;
928
930 PG_TRY();
931 {
933 result = prev_planner_hook(parse, query_string, cursorOptions,
934 boundParams, es);
935 else
936 result = standard_planner(parse, query_string, cursorOptions,
937 boundParams, es);
938 }
939 PG_FINALLY();
940 {
942 }
943 PG_END_TRY();
944
947
948 /* calc differences of buffer counters. */
949 memset(&bufusage, 0, sizeof(BufferUsage));
950 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
951
952 /* calc differences of WAL counters. */
953 memset(&walusage, 0, sizeof(WalUsage));
954 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
955
956 pgss_store(query_string,
957 parse->queryId,
958 parse->stmt_location,
959 parse->stmt_len,
960 PGSS_PLAN,
962 0,
963 &bufusage,
964 &walusage,
965 NULL,
966 NULL,
967 0,
968 0,
969 result->planOrigin);
970 }
971 else
972 {
973 /*
974 * Even though we're not tracking plan time for this statement, we
975 * must still increment the nesting level, to ensure that functions
976 * evaluated during planning are not seen as top-level calls.
977 */
979 PG_TRY();
980 {
982 result = prev_planner_hook(parse, query_string, cursorOptions,
983 boundParams, es);
984 else
985 result = standard_planner(parse, query_string, cursorOptions,
986 boundParams, es);
987 }
988 PG_FINALLY();
989 {
991 }
992 PG_END_TRY();
993 }
994
995 return result;
996}

References BufferUsageAccumDiff(), duration, fb(), INSTR_TIME_GET_MILLISEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, INT64CONST, nesting_level, parse(), PG_END_TRY, PG_FINALLY, PG_TRY, pgBufferUsage, pgss_enabled, PGSS_PLAN, pgss_store(), pgss_track_planning, pgWalUsage, PlannedStmt::planOrigin, prev_planner_hook, standard_planner(), start, and WalUsageAccumDiff().

Referenced by _PG_init().

◆ pgss_post_parse_analyze()

static void pgss_post_parse_analyze ( ParseState pstate,
Query query,
JumbleState jstate 
)
static

Definition at line 842 of file pg_stat_statements.c.

843{
845 prev_post_parse_analyze_hook(pstate, query, jstate);
846
847 /* Safety check... */
849 return;
850
851 /*
852 * If it's EXECUTE, clear the queryId so that stats will accumulate for
853 * the underlying PREPARE. But don't do this if we're not tracking
854 * utility statements, to avoid messing up another extension that might be
855 * tracking them.
856 */
857 if (query->utilityStmt)
858 {
860 {
861 query->queryId = INT64CONST(0);
862 return;
863 }
864 }
865
866 /*
867 * If query jumbling were able to identify any ignorable constants, we
868 * immediately create a hash table entry for the query, so that we can
869 * record the normalized form of the query string. If there were no such
870 * constants, the normalized string would be the same as the query text
871 * anyway, so there's no need for an early entry.
872 */
873 if (jstate && jstate->clocations_count > 0)
874 pgss_store(pstate->p_sourcetext,
875 query->queryId,
876 query->stmt_location,
877 query->stmt_len,
879 0,
880 0,
881 NULL,
882 NULL,
883 NULL,
884 jstate,
885 0,
886 0,
888}

References fb(), INT64CONST, IsA, nesting_level, ParseState::p_sourcetext, pgss, pgss_enabled, pgss_hash, PGSS_INVALID, pgss_store(), pgss_track_utility, PLAN_STMT_UNKNOWN, prev_post_parse_analyze_hook, Query::stmt_location, and Query::utilityStmt.

Referenced by _PG_init().

◆ pgss_ProcessUtility()

static void pgss_ProcessUtility ( PlannedStmt pstmt,
const char queryString,
bool  readOnlyTree,
ProcessUtilityContext  context,
ParamListInfo  params,
QueryEnvironment queryEnv,
DestReceiver dest,
QueryCompletion qc 
)
static

Definition at line 1117 of file pg_stat_statements.c.

1122{
1123 Node *parsetree = pstmt->utilityStmt;
1124 int64 saved_queryId = pstmt->queryId;
1126 int saved_stmt_len = pstmt->stmt_len;
1128
1129 /*
1130 * Force utility statements to get queryId zero. We do this even in cases
1131 * where the statement contains an optimizable statement for which a
1132 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1133 * cases, runtime control will first go through ProcessUtility and then
1134 * the executor, and we don't want the executor hooks to do anything,
1135 * since we are already measuring the statement's costs at the utility
1136 * level.
1137 *
1138 * Note that this is only done if pg_stat_statements is enabled and
1139 * configured to track utility statements, in the unlikely possibility
1140 * that user configured another extension to handle utility statements
1141 * only.
1142 */
1143 if (enabled)
1144 pstmt->queryId = INT64CONST(0);
1145
1146 /*
1147 * If it's an EXECUTE statement, we don't track it and don't increment the
1148 * nesting level. This allows the cycles to be charged to the underlying
1149 * PREPARE instead (by the Executor hooks), which is much more useful.
1150 *
1151 * We also don't track execution of PREPARE. If we did, we would get one
1152 * hash table entry for the PREPARE (with hash calculated from the query
1153 * string), and then a different one with the same query string (but hash
1154 * calculated from the query tree) would be used to accumulate costs of
1155 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1156 * actually run the planner (only parse+rewrite), its costs are generally
1157 * pretty negligible and it seems okay to just ignore it.
1158 */
1159 if (enabled &&
1160 !IsA(parsetree, ExecuteStmt) &&
1161 !IsA(parsetree, PrepareStmt))
1162 {
1165 uint64 rows;
1166 BufferUsage bufusage_start,
1167 bufusage;
1168 WalUsage walusage_start,
1169 walusage;
1170
1171 bufusage_start = pgBufferUsage;
1172 walusage_start = pgWalUsage;
1174
1175 nesting_level++;
1176 PG_TRY();
1177 {
1179 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1180 context, params, queryEnv,
1181 dest, qc);
1182 else
1183 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1184 context, params, queryEnv,
1185 dest, qc);
1186 }
1187 PG_FINALLY();
1188 {
1189 nesting_level--;
1190 }
1191 PG_END_TRY();
1192
1193 /*
1194 * CAUTION: do not access the *pstmt data structure again below here.
1195 * If it was a ROLLBACK or similar, that data structure may have been
1196 * freed. We must copy everything we still need into local variables,
1197 * which we did above.
1198 *
1199 * For the same reason, we can't risk restoring pstmt->queryId to its
1200 * former value, which'd otherwise be a good idea.
1201 */
1202
1205
1206 /*
1207 * Track the total number of rows retrieved or affected by the utility
1208 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1209 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1210 */
1211 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1212 qc->commandTag == CMDTAG_FETCH ||
1213 qc->commandTag == CMDTAG_SELECT ||
1215 qc->nprocessed : 0;
1216
1217 /* calc differences of buffer counters. */
1218 memset(&bufusage, 0, sizeof(BufferUsage));
1219 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1220
1221 /* calc differences of WAL counters. */
1222 memset(&walusage, 0, sizeof(WalUsage));
1223 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1224
1225 pgss_store(queryString,
1229 PGSS_EXEC,
1231 rows,
1232 &bufusage,
1233 &walusage,
1234 NULL,
1235 NULL,
1236 0,
1237 0,
1238 pstmt->planOrigin);
1239 }
1240 else
1241 {
1242 /*
1243 * Even though we're not tracking execution time for this statement,
1244 * we must still increment the nesting level, to ensure that functions
1245 * evaluated within it are not seen as top-level calls. But don't do
1246 * so for EXECUTE; that way, when control reaches pgss_planner or
1247 * pgss_ExecutorStart, we will treat the costs as top-level if
1248 * appropriate. Likewise, don't bump for PREPARE, so that parse
1249 * analysis will treat the statement as top-level if appropriate.
1250 *
1251 * To be absolutely certain we don't mess up the nesting level,
1252 * evaluate the bump_level condition just once.
1253 */
1254 bool bump_level =
1255 !IsA(parsetree, ExecuteStmt) &&
1256 !IsA(parsetree, PrepareStmt);
1257
1258 if (bump_level)
1259 nesting_level++;
1260 PG_TRY();
1261 {
1263 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1264 context, params, queryEnv,
1265 dest, qc);
1266 else
1267 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1268 context, params, queryEnv,
1269 dest, qc);
1270 }
1271 PG_FINALLY();
1272 {
1273 if (bump_level)
1274 nesting_level--;
1275 }
1276 PG_END_TRY();
1277 }
1278}

References BufferUsageAccumDiff(), QueryCompletion::commandTag, duration, fb(), INSTR_TIME_GET_MILLISEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, INT64CONST, IsA, nesting_level, QueryCompletion::nprocessed, PG_END_TRY, PG_FINALLY, PG_TRY, pgBufferUsage, pgss_enabled, PGSS_EXEC, pgss_store(), pgss_track_utility, pgWalUsage, PlannedStmt::planOrigin, prev_ProcessUtility, PlannedStmt::queryId, standard_ProcessUtility(), start, PlannedStmt::stmt_len, PlannedStmt::stmt_location, PlannedStmt::utilityStmt, and WalUsageAccumDiff().

Referenced by _PG_init().

◆ pgss_shmem_request()

static void pgss_shmem_request ( void  )
static

◆ pgss_shmem_shutdown()

static void pgss_shmem_shutdown ( int  code,
Datum  arg 
)
static

Definition at line 743 of file pg_stat_statements.c.

744{
745 FILE *file;
746 char *qbuffer = NULL;
747 Size qbuffer_size = 0;
749 int32 num_entries;
750 pgssEntry *entry;
751
752 /* Don't try to dump during a crash. */
753 if (code)
754 return;
755
756 /* Safety check ... shouldn't get here unless shmem is set up. */
757 if (!pgss || !pgss_hash)
758 return;
759
760 /* Don't dump if told not to. */
761 if (!pgss_save)
762 return;
763
765 if (file == NULL)
766 goto error;
767
768 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
769 goto error;
770 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
771 goto error;
772 num_entries = hash_get_num_entries(pgss_hash);
773 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
774 goto error;
775
777 if (qbuffer == NULL)
778 goto error;
779
780 /*
781 * When serializing to disk, we store query texts immediately after their
782 * entry data. Any orphaned query texts are thereby excluded.
783 */
785 while ((entry = hash_seq_search(&hash_seq)) != NULL)
786 {
787 int len = entry->query_len;
788 char *qstr = qtext_fetch(entry->query_offset, len,
790
791 if (qstr == NULL)
792 continue; /* Ignore any entries with bogus texts */
793
794 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
795 fwrite(qstr, 1, len + 1, file) != len + 1)
796 {
797 /* note: we assume hash_seq_term won't change errno */
799 goto error;
800 }
801 }
802
803 /* Dump global statistics for pg_stat_statements */
804 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
805 goto error;
806
807 free(qbuffer);
808 qbuffer = NULL;
809
810 if (FreeFile(file))
811 {
812 file = NULL;
813 goto error;
814 }
815
816 /*
817 * Rename file into place, so we atomically replace any old one.
818 */
820
821 /* Unlink query-texts file; it's not needed while shutdown */
823
824 return;
825
826error:
827 ereport(LOG,
829 errmsg("could not write file \"%s\": %m",
830 PGSS_DUMP_FILE ".tmp")));
831 free(qbuffer);
832 if (file)
833 FreeFile(file);
834 unlink(PGSS_DUMP_FILE ".tmp");
836}

References AllocateFile(), durable_rename(), ereport, errcode_for_file_access(), errmsg(), error(), fb(), free, FreeFile(), hash_get_num_entries(), hash_seq_init(), hash_seq_search(), hash_seq_term(), len, LOG, PG_BINARY_W, pgss, PGSS_DUMP_FILE, PGSS_FILE_HEADER, pgss_hash, PGSS_PG_MAJOR_VERSION, pgss_save, PGSS_TEXT_FILE, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, and pgssSharedState::stats.

Referenced by pgss_shmem_startup().

◆ pgss_shmem_startup()

static void pgss_shmem_startup ( void  )
static

Definition at line 517 of file pg_stat_statements.c.

518{
519 bool found;
520 HASHCTL info;
521 FILE *file = NULL;
522 FILE *qfile = NULL;
523 uint32 header;
524 int32 num;
525 int32 pgver;
526 int32 i;
527 int buffer_size;
528 char *buffer = NULL;
529
532
533 /* reset in case this is a restart within the postmaster */
534 pgss = NULL;
535 pgss_hash = NULL;
536
537 /*
538 * Create or attach to the shared memory state, including hash table
539 */
541
542 pgss = ShmemInitStruct("pg_stat_statements",
543 sizeof(pgssSharedState),
544 &found);
545
546 if (!found)
547 {
548 /* First time through ... */
549 pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
553 pgss->extent = 0;
554 pgss->n_writers = 0;
555 pgss->gc_count = 0;
556 pgss->stats.dealloc = 0;
558 }
559
560 info.keysize = sizeof(pgssHashKey);
561 info.entrysize = sizeof(pgssEntry);
562 pgss_hash = ShmemInitHash("pg_stat_statements hash",
564 &info,
566
568
569 /*
570 * If we're in the postmaster (or a standalone backend...), set up a shmem
571 * exit hook to dump the statistics to disk.
572 */
575
576 /*
577 * Done if some other process already completed our initialization.
578 */
579 if (found)
580 return;
581
582 /*
583 * Note: we don't bother with locks here, because there should be no other
584 * processes running when this code is reached.
585 */
586
587 /* Unlink query text file possibly left over from crash */
589
590 /* Allocate new query text temp file */
592 if (qfile == NULL)
593 goto write_error;
594
595 /*
596 * If we were told not to load old statistics, we're done. (Note we do
597 * not try to unlink any old dump file in this case. This seems a bit
598 * questionable but it's the historical behavior.)
599 */
600 if (!pgss_save)
601 {
603 return;
604 }
605
606 /*
607 * Attempt to load old statistics from the dump file.
608 */
610 if (file == NULL)
611 {
612 if (errno != ENOENT)
613 goto read_error;
614 /* No existing persisted stats file, so we're done */
616 return;
617 }
618
619 buffer_size = 2048;
620 buffer = (char *) palloc(buffer_size);
621
622 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
623 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
624 fread(&num, sizeof(int32), 1, file) != 1)
625 goto read_error;
626
627 if (header != PGSS_FILE_HEADER ||
629 goto data_error;
630
631 for (i = 0; i < num; i++)
632 {
634 pgssEntry *entry;
635 Size query_offset;
636
637 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
638 goto read_error;
639
640 /* Encoding is the only field we can easily sanity-check */
641 if (!PG_VALID_BE_ENCODING(temp.encoding))
642 goto data_error;
643
644 /* Resize buffer as needed */
645 if (temp.query_len >= buffer_size)
646 {
647 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
648 buffer = repalloc(buffer, buffer_size);
649 }
650
651 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
652 goto read_error;
653
654 /* Should have a trailing null, but let's make sure */
655 buffer[temp.query_len] = '\0';
656
657 /* Skip loading "sticky" entries */
658 if (IS_STICKY(temp.counters))
659 continue;
660
661 /* Store the query text */
662 query_offset = pgss->extent;
663 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
664 goto write_error;
665 pgss->extent += temp.query_len + 1;
666
667 /* make the hashtable entry (discards old entries if too many) */
668 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
669 temp.encoding,
670 false);
671
672 /* copy in the actual stats */
673 entry->counters = temp.counters;
674 entry->stats_since = temp.stats_since;
675 entry->minmax_stats_since = temp.minmax_stats_since;
676 }
677
678 /* Read global statistics for pg_stat_statements */
679 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
680 goto read_error;
681
682 pfree(buffer);
683 FreeFile(file);
685
686 /*
687 * Remove the persisted stats file so it's not included in
688 * backups/replication standbys, etc. A new file will be written on next
689 * shutdown.
690 *
691 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
692 * because we remove that file on startup; it acts inversely to
693 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
694 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
695 * when the server is not running. Leaving the file creates no danger of
696 * a newly restored database having a spurious record of execution costs,
697 * which is what we're really concerned about here.
698 */
700
701 return;
702
704 ereport(LOG,
706 errmsg("could not read file \"%s\": %m",
708 goto fail;
710 ereport(LOG,
712 errmsg("ignoring invalid data in file \"%s\"",
714 goto fail;
716 ereport(LOG,
718 errmsg("could not write file \"%s\": %m",
720fail:
721 if (buffer)
722 pfree(buffer);
723 if (file)
724 FreeFile(file);
725 if (qfile)
727 /* If possible, throw away the bogus file; ignore any error */
729
730 /*
731 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
732 * server is running with pg_stat_statements enabled
733 */
734}

References AllocateFile(), ASSUMED_LENGTH_INIT, ASSUMED_MEDIAN_INIT, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssGlobalStats::dealloc, entry_alloc(), HASHCTL::entrysize, ereport, errcode(), errcode_for_file_access(), errmsg(), pgssSharedState::extent, fb(), FreeFile(), pgssSharedState::gc_count, GetCurrentTimestamp(), GetNamedLWLockTranche(), HASH_BLOBS, HASH_ELEM, i, IS_STICKY, IsUnderPostmaster, HASHCTL::keysize, pgssSharedState::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), Max, pgssSharedState::mean_query_len, pgssEntry::minmax_stats_since, pgssSharedState::mutex, pgssSharedState::n_writers, on_shmem_exit(), palloc(), pfree(), PG_BINARY_R, PG_BINARY_W, PG_VALID_BE_ENCODING, pgss, PGSS_DUMP_FILE, PGSS_FILE_HEADER, pgss_hash, pgss_max, PGSS_PG_MAJOR_VERSION, pgss_save, pgss_shmem_shutdown(), PGSS_TEXT_FILE, prev_shmem_startup_hook, repalloc(), ShmemInitHash(), ShmemInitStruct(), SpinLockInit, pgssSharedState::stats, pgssGlobalStats::stats_reset, and pgssEntry::stats_since.

Referenced by _PG_init().

◆ pgss_store()

static void pgss_store ( const char query,
int64  queryId,
int  query_location,
int  query_len,
pgssStoreKind  kind,
double  total_time,
uint64  rows,
const BufferUsage bufusage,
const WalUsage walusage,
const struct JitInstrumentation jitusage,
JumbleState jstate,
int  parallel_workers_to_launch,
int  parallel_workers_launched,
PlannedStmtOrigin  planOrigin 
)
static

Definition at line 1292 of file pg_stat_statements.c.

1303{
1305 pgssEntry *entry;
1306 char *norm_query = NULL;
1308
1309 Assert(query != NULL);
1310
1311 /* Safety check... */
1312 if (!pgss || !pgss_hash)
1313 return;
1314
1315 /*
1316 * Nothing to do if compute_query_id isn't enabled and no other module
1317 * computed a query identifier.
1318 */
1319 if (queryId == INT64CONST(0))
1320 return;
1321
1322 /*
1323 * Confine our attention to the relevant part of the string, if the query
1324 * is a portion of a multi-statement source string, and update query
1325 * location and length if needed.
1326 */
1327 query = CleanQuerytext(query, &query_location, &query_len);
1328
1329 /* Set up key for hashtable search */
1330
1331 /* clear padding */
1332 memset(&key, 0, sizeof(pgssHashKey));
1333
1334 key.userid = GetUserId();
1335 key.dbid = MyDatabaseId;
1336 key.queryid = queryId;
1337 key.toplevel = (nesting_level == 0);
1338
1339 /* Lookup the hash table entry with shared lock. */
1341
1342 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1343
1344 /* Create new entry, if not present */
1345 if (!entry)
1346 {
1347 Size query_offset;
1348 int gc_count;
1349 bool stored;
1350 bool do_gc;
1351
1352 /*
1353 * Create a new, normalized query string if caller asked. We don't
1354 * need to hold the lock while doing this work. (Note: in any case,
1355 * it's possible that someone else creates a duplicate hashtable entry
1356 * in the interval where we don't hold the lock below. That case is
1357 * handled by entry_alloc.)
1358 */
1359 if (jstate)
1360 {
1364 &query_len);
1366 }
1367
1368 /* Append new query text to file with only shared lock held */
1369 stored = qtext_store(norm_query ? norm_query : query, query_len,
1370 &query_offset, &gc_count);
1371
1372 /*
1373 * Determine whether we need to garbage collect external query texts
1374 * while the shared lock is still held. This micro-optimization
1375 * avoids taking the time to decide this while holding exclusive lock.
1376 */
1378
1379 /* Need exclusive lock to make a new hashtable entry - promote */
1382
1383 /*
1384 * A garbage collection may have occurred while we weren't holding the
1385 * lock. In the unlikely event that this happens, the query text we
1386 * stored above will have been garbage collected, so write it again.
1387 * This should be infrequent enough that doing it while holding
1388 * exclusive lock isn't a performance problem.
1389 */
1390 if (!stored || pgss->gc_count != gc_count)
1391 stored = qtext_store(norm_query ? norm_query : query, query_len,
1392 &query_offset, NULL);
1393
1394 /* If we failed to write to the text file, give up */
1395 if (!stored)
1396 goto done;
1397
1398 /* OK to create a new hashtable entry */
1399 entry = entry_alloc(&key, query_offset, query_len, encoding,
1400 jstate != NULL);
1401
1402 /* If needed, perform garbage collection while exclusive lock held */
1403 if (do_gc)
1404 gc_qtexts();
1405 }
1406
1407 /* Increment the counts, except when jstate is not NULL */
1408 if (!jstate)
1409 {
1410 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1411
1412 /*
1413 * Grab the spinlock while updating the counters (see comment about
1414 * locking rules at the head of the file)
1415 */
1416 SpinLockAcquire(&entry->mutex);
1417
1418 /* "Unstick" entry if it was previously sticky */
1419 if (IS_STICKY(entry->counters))
1420 entry->counters.usage = USAGE_INIT;
1421
1422 entry->counters.calls[kind] += 1;
1423 entry->counters.total_time[kind] += total_time;
1424
1425 if (entry->counters.calls[kind] == 1)
1426 {
1427 entry->counters.min_time[kind] = total_time;
1428 entry->counters.max_time[kind] = total_time;
1429 entry->counters.mean_time[kind] = total_time;
1430 }
1431 else
1432 {
1433 /*
1434 * Welford's method for accurately computing variance. See
1435 * <http://www.johndcook.com/blog/standard_deviation/>
1436 */
1437 double old_mean = entry->counters.mean_time[kind];
1438
1439 entry->counters.mean_time[kind] +=
1440 (total_time - old_mean) / entry->counters.calls[kind];
1441 entry->counters.sum_var_time[kind] +=
1442 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1443
1444 /*
1445 * Calculate min and max time. min = 0 and max = 0 means that the
1446 * min/max statistics were reset
1447 */
1448 if (entry->counters.min_time[kind] == 0
1449 && entry->counters.max_time[kind] == 0)
1450 {
1451 entry->counters.min_time[kind] = total_time;
1452 entry->counters.max_time[kind] = total_time;
1453 }
1454 else
1455 {
1456 if (entry->counters.min_time[kind] > total_time)
1457 entry->counters.min_time[kind] = total_time;
1458 if (entry->counters.max_time[kind] < total_time)
1459 entry->counters.max_time[kind] = total_time;
1460 }
1461 }
1462 entry->counters.rows += rows;
1463 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1464 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1467 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1468 entry->counters.local_blks_read += bufusage->local_blks_read;
1471 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1472 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1479 entry->counters.usage += USAGE_EXEC(total_time);
1480 entry->counters.wal_records += walusage->wal_records;
1481 entry->counters.wal_fpi += walusage->wal_fpi;
1482 entry->counters.wal_bytes += walusage->wal_bytes;
1483 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1484 if (jitusage)
1485 {
1486 entry->counters.jit_functions += jitusage->created_functions;
1487 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1488
1489 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1490 entry->counters.jit_deform_count++;
1491 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1492
1493 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1495 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1496
1497 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1499 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1500
1501 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1503 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1504 }
1505
1506 /* parallel worker counters */
1507 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1508 entry->counters.parallel_workers_launched += parallel_workers_launched;
1509
1510 /* plan cache counters */
1511 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1513 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1514 entry->counters.custom_plan_calls++;
1515
1516 SpinLockRelease(&entry->mutex);
1517 }
1518
1519done:
1521
1522 /* We postpone this clean-up until we're out of the lock */
1523 if (norm_query)
1525}

References Assert, Counters::calls, CleanQuerytext(), pgssEntry::counters, Counters::custom_plan_calls, encoding, entry_alloc(), fb(), pgssSharedState::gc_count, gc_qtexts(), generate_normalized_query(), Counters::generic_plan_calls, GetDatabaseEncoding(), GetUserId(), HASH_FIND, hash_search(), INSTR_TIME_GET_MILLISEC, INT64CONST, IS_STICKY, Counters::jit_deform_count, Counters::jit_deform_time, Counters::jit_emission_count, Counters::jit_emission_time, Counters::jit_functions, Counters::jit_generation_time, Counters::jit_inlining_count, Counters::jit_inlining_time, Counters::jit_optimization_count, Counters::jit_optimization_time, Counters::local_blk_read_time, BufferUsage::local_blk_read_time, Counters::local_blk_write_time, BufferUsage::local_blk_write_time, Counters::local_blks_dirtied, BufferUsage::local_blks_dirtied, Counters::local_blks_hit, BufferUsage::local_blks_hit, Counters::local_blks_read, BufferUsage::local_blks_read, Counters::local_blks_written, BufferUsage::local_blks_written, pgssSharedState::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), Counters::max_time, Counters::mean_time, Counters::min_time, pgssEntry::mutex, MyDatabaseId, need_gc_qtexts(), nesting_level, Counters::parallel_workers_launched, Counters::parallel_workers_to_launch, pfree(), pgss, PGSS_EXEC, pgss_hash, PGSS_PLAN, PLAN_STMT_CACHE_CUSTOM, PLAN_STMT_CACHE_GENERIC, qtext_store(), Counters::rows, Counters::shared_blk_read_time, BufferUsage::shared_blk_read_time, Counters::shared_blk_write_time, BufferUsage::shared_blk_write_time, Counters::shared_blks_dirtied, BufferUsage::shared_blks_dirtied, Counters::shared_blks_hit, BufferUsage::shared_blks_hit, Counters::shared_blks_read, BufferUsage::shared_blks_read, Counters::shared_blks_written, BufferUsage::shared_blks_written, SpinLockAcquire, SpinLockRelease, Counters::sum_var_time, Counters::temp_blk_read_time, BufferUsage::temp_blk_read_time, Counters::temp_blk_write_time, BufferUsage::temp_blk_write_time, Counters::temp_blks_read, BufferUsage::temp_blks_read, Counters::temp_blks_written, BufferUsage::temp_blks_written, Counters::total_time, Counters::usage, USAGE_EXEC, USAGE_INIT, Counters::wal_buffers_full, WalUsage::wal_buffers_full, Counters::wal_bytes, WalUsage::wal_bytes, Counters::wal_fpi, WalUsage::wal_fpi, Counters::wal_records, and WalUsage::wal_records.

Referenced by pgss_ExecutorEnd(), pgss_planner(), pgss_post_parse_analyze(), and pgss_ProcessUtility().

◆ qtext_fetch()

static char * qtext_fetch ( Size  query_offset,
int  query_len,
char buffer,
Size  buffer_size 
)
static

Definition at line 2439 of file pg_stat_statements.c.

2441{
2442 /* File read failed? */
2443 if (buffer == NULL)
2444 return NULL;
2445 /* Bogus offset/length? */
2446 if (query_len < 0 ||
2447 query_offset + query_len >= buffer_size)
2448 return NULL;
2449 /* As a further sanity check, make sure there's a trailing null */
2450 if (buffer[query_offset + query_len] != '\0')
2451 return NULL;
2452 /* Looks OK */
2453 return buffer + query_offset;
2454}

References fb().

Referenced by gc_qtexts(), pg_stat_statements_internal(), and pgss_shmem_shutdown().

◆ qtext_load_file()

static char * qtext_load_file ( Size buffer_size)
static

Definition at line 2346 of file pg_stat_statements.c.

2347{
2348 char *buf;
2349 int fd;
2350 struct stat stat;
2351 Size nread;
2352
2354 if (fd < 0)
2355 {
2356 if (errno != ENOENT)
2357 ereport(LOG,
2359 errmsg("could not read file \"%s\": %m",
2360 PGSS_TEXT_FILE)));
2361 return NULL;
2362 }
2363
2364 /* Get file length */
2365 if (fstat(fd, &stat))
2366 {
2367 ereport(LOG,
2369 errmsg("could not stat file \"%s\": %m",
2370 PGSS_TEXT_FILE)));
2372 return NULL;
2373 }
2374
2375 /* Allocate buffer; beware that off_t might be wider than size_t */
2377 buf = (char *) malloc(stat.st_size);
2378 else
2379 buf = NULL;
2380 if (buf == NULL)
2381 {
2382 ereport(LOG,
2384 errmsg("out of memory"),
2385 errdetail("Could not allocate enough memory to read file \"%s\".",
2386 PGSS_TEXT_FILE)));
2388 return NULL;
2389 }
2390
2391 /*
2392 * OK, slurp in the file. Windows fails if we try to read more than
2393 * INT_MAX bytes at once, and other platforms might not like that either,
2394 * so read a very large file in 1GB segments.
2395 */
2396 nread = 0;
2397 while (nread < stat.st_size)
2398 {
2399 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2400
2401 /*
2402 * If we get a short read and errno doesn't get set, the reason is
2403 * probably that garbage collection truncated the file since we did
2404 * the fstat(), so we don't log a complaint --- but we don't return
2405 * the data, either, since it's most likely corrupt due to concurrent
2406 * writes from garbage collection.
2407 */
2408 errno = 0;
2409 if (read(fd, buf + nread, toread) != toread)
2410 {
2411 if (errno)
2412 ereport(LOG,
2414 errmsg("could not read file \"%s\": %m",
2415 PGSS_TEXT_FILE)));
2416 free(buf);
2418 return NULL;
2419 }
2420 nread += toread;
2421 }
2422
2423 if (CloseTransientFile(fd) != 0)
2424 ereport(LOG,
2426 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2427
2428 *buffer_size = nread;
2429 return buf;
2430}

References buf, CloseTransientFile(), ereport, errcode(), errcode_for_file_access(), errdetail(), errmsg(), fb(), fd(), free, fstat, LOG, malloc, MaxAllocHugeSize, Min, OpenTransientFile(), PG_BINARY, PGSS_TEXT_FILE, read, and stat::st_size.

Referenced by gc_qtexts(), pg_stat_statements_internal(), and pgss_shmem_shutdown().

◆ qtext_store()

static bool qtext_store ( const char query,
int  query_len,
Size query_offset,
int gc_count 
)
static

Definition at line 2266 of file pg_stat_statements.c.

2268{
2269 Size off;
2270 int fd;
2271
2272 /*
2273 * We use a spinlock to protect extent/n_writers/gc_count, so that
2274 * multiple processes may execute this function concurrently.
2275 */
2277 off = pgss->extent;
2278 pgss->extent += query_len + 1;
2279 pgss->n_writers++;
2280 if (gc_count)
2281 *gc_count = pgss->gc_count;
2283
2284 *query_offset = off;
2285
2286 /*
2287 * Don't allow the file to grow larger than what qtext_load_file can
2288 * (theoretically) handle. This has been seen to be reachable on 32-bit
2289 * platforms.
2290 */
2291 if (unlikely(query_len >= MaxAllocHugeSize - off))
2292 {
2293 errno = EFBIG; /* not quite right, but it'll do */
2294 fd = -1;
2295 goto error;
2296 }
2297
2298 /* Now write the data into the successfully-reserved part of the file */
2300 if (fd < 0)
2301 goto error;
2302
2303 if (pg_pwrite(fd, query, query_len, off) != query_len)
2304 goto error;
2305 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2306 goto error;
2307
2309
2310 /* Mark our write complete */
2312 pgss->n_writers--;
2314
2315 return true;
2316
2317error:
2318 ereport(LOG,
2320 errmsg("could not write file \"%s\": %m",
2321 PGSS_TEXT_FILE)));
2322
2323 if (fd >= 0)
2325
2326 /* Mark our write complete */
2328 pgss->n_writers--;
2330
2331 return false;
2332}

References CloseTransientFile(), ereport, errcode_for_file_access(), errmsg(), error(), pgssSharedState::extent, fb(), fd(), pgssSharedState::gc_count, LOG, MaxAllocHugeSize, pgssSharedState::mutex, pgssSharedState::n_writers, OpenTransientFile(), PG_BINARY, pg_pwrite, pgss, PGSS_TEXT_FILE, SpinLockAcquire, SpinLockRelease, and unlikely.

Referenced by pgss_store().

Variable Documentation

◆ nesting_level

◆ pgss

◆ PGSS_FILE_HEADER

const uint32 PGSS_FILE_HEADER = 0x20250731
static

Definition at line 89 of file pg_stat_statements.c.

Referenced by pgss_shmem_shutdown(), and pgss_shmem_startup().

◆ pgss_hash

◆ pgss_max

int pgss_max = 5000
static

◆ PGSS_PG_MAJOR_VERSION

const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100
static

Definition at line 92 of file pg_stat_statements.c.

Referenced by pgss_shmem_shutdown(), and pgss_shmem_startup().

◆ pgss_save

bool pgss_save = true
static

Definition at line 303 of file pg_stat_statements.c.

Referenced by _PG_init(), pgss_shmem_shutdown(), and pgss_shmem_startup().

◆ pgss_track

int pgss_track = PGSS_TRACK_TOP
static

Definition at line 299 of file pg_stat_statements.c.

Referenced by _PG_init().

◆ pgss_track_planning

bool pgss_track_planning = false
static

Definition at line 301 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_planner().

◆ pgss_track_utility

bool pgss_track_utility = true
static

Definition at line 300 of file pg_stat_statements.c.

Referenced by _PG_init(), pgss_post_parse_analyze(), and pgss_ProcessUtility().

◆ prev_ExecutorEnd

ExecutorEnd_hook_type prev_ExecutorEnd = NULL
static

Definition at line 274 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorEnd().

◆ prev_ExecutorFinish

ExecutorFinish_hook_type prev_ExecutorFinish = NULL
static

Definition at line 273 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorFinish().

◆ prev_ExecutorRun

ExecutorRun_hook_type prev_ExecutorRun = NULL
static

Definition at line 272 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorRun().

◆ prev_ExecutorStart

ExecutorStart_hook_type prev_ExecutorStart = NULL
static

Definition at line 271 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorStart().

◆ prev_planner_hook

planner_hook_type prev_planner_hook = NULL
static

Definition at line 270 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_planner().

◆ prev_post_parse_analyze_hook

post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL
static

Definition at line 269 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_post_parse_analyze().

◆ prev_ProcessUtility

ProcessUtility_hook_type prev_ProcessUtility = NULL
static

Definition at line 275 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ProcessUtility().

◆ prev_shmem_request_hook

shmem_request_hook_type prev_shmem_request_hook = NULL
static

Definition at line 267 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_shmem_request().

◆ prev_shmem_startup_hook

shmem_startup_hook_type prev_shmem_startup_hook = NULL
static

Definition at line 268 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_shmem_startup().

◆ track_options

const struct config_enum_entry track_options[]
static
Initial value:
=
{
{"none", PGSS_TRACK_NONE, false},
{"top", PGSS_TRACK_TOP, false},
{"all", PGSS_TRACK_ALL, false},
{NULL, 0, false}
}

Definition at line 290 of file pg_stat_statements.c.

291{
292 {"none", PGSS_TRACK_NONE, false},
293 {"top", PGSS_TRACK_TOP, false},
294 {"all", PGSS_TRACK_ALL, false},
295 {NULL, 0, false}
296};

Referenced by _PG_init().