PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_stat_statements.c File Reference
#include "postgres.h"
#include <math.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/htup_details.h"
#include "access/parallel.h"
#include "catalog/pg_authid.h"
#include "executor/instrument.h"
#include "funcapi.h"
#include "jit/jit.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/queryjumble.h"
#include "optimizer/planner.h"
#include "parser/analyze.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
#include "utils/tuplestore.h"
Include dependency graph for pg_stat_statements.c:

Go to the source code of this file.

Data Structures

struct  pgssHashKey
 
struct  Counters
 
struct  pgssGlobalStats
 
struct  pgssEntry
 
struct  pgssSharedState
 

Macros

#define PGSS_DUMP_FILE   PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
 
#define PGSS_TEXT_FILE   PG_STAT_TMP_DIR "/pgss_query_texts.stat"
 
#define USAGE_EXEC(duration)   (1.0)
 
#define USAGE_INIT   (1.0) /* including initial planning */
 
#define ASSUMED_MEDIAN_INIT   (10.0) /* initial assumed median usage */
 
#define ASSUMED_LENGTH_INIT   1024 /* initial assumed mean query length */
 
#define USAGE_DECREASE_FACTOR   (0.99) /* decreased every entry_dealloc */
 
#define STICKY_DECREASE_FACTOR   (0.50) /* factor for sticky entries */
 
#define USAGE_DEALLOC_PERCENT   5 /* free this % of entries at once */
 
#define IS_STICKY(c)   ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
 
#define PGSS_NUMKIND   (PGSS_EXEC + 1)
 
#define pgss_enabled(level)
 
#define record_gc_qtexts()
 
#define PG_STAT_STATEMENTS_COLS_V1_0   14
 
#define PG_STAT_STATEMENTS_COLS_V1_1   18
 
#define PG_STAT_STATEMENTS_COLS_V1_2   19
 
#define PG_STAT_STATEMENTS_COLS_V1_3   23
 
#define PG_STAT_STATEMENTS_COLS_V1_8   32
 
#define PG_STAT_STATEMENTS_COLS_V1_9   33
 
#define PG_STAT_STATEMENTS_COLS_V1_10   43
 
#define PG_STAT_STATEMENTS_COLS_V1_11   49
 
#define PG_STAT_STATEMENTS_COLS_V1_12   52
 
#define PG_STAT_STATEMENTS_COLS_V1_13   54
 
#define PG_STAT_STATEMENTS_COLS   54 /* maximum of above */
 
#define PG_STAT_STATEMENTS_INFO_COLS   2
 
#define SINGLE_ENTRY_RESET(e)
 

Typedefs

typedef enum pgssVersion pgssVersion
 
typedef enum pgssStoreKind pgssStoreKind
 
typedef struct pgssHashKey pgssHashKey
 
typedef struct Counters Counters
 
typedef struct pgssGlobalStats pgssGlobalStats
 
typedef struct pgssEntry pgssEntry
 
typedef struct pgssSharedState pgssSharedState
 

Enumerations

enum  pgssVersion {
  PGSS_V1_0 = 0 , PGSS_V1_1 , PGSS_V1_2 , PGSS_V1_3 ,
  PGSS_V1_8 , PGSS_V1_9 , PGSS_V1_10 , PGSS_V1_11 ,
  PGSS_V1_12 , PGSS_V1_13
}
 
enum  pgssStoreKind { PGSS_INVALID = -1 , PGSS_PLAN = 0 , PGSS_EXEC }
 
enum  PGSSTrackLevel { PGSS_TRACK_NONE , PGSS_TRACK_TOP , PGSS_TRACK_ALL }
 

Functions

 PG_MODULE_MAGIC_EXT (.name="pg_stat_statements",.version=PG_VERSION)
 
static void pgss_shmem_request (void *arg)
 
static void pgss_shmem_init (void *arg)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset_1_7)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset_1_11)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_2)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_3)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_8)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_9)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_10)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_11)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_12)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_13)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_info)
 
static void pgss_shmem_shutdown (int code, Datum arg)
 
static void pgss_post_parse_analyze (ParseState *pstate, Query *query, const JumbleState *jstate)
 
static PlannedStmtpgss_planner (Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
 
static void pgss_ExecutorStart (QueryDesc *queryDesc, int eflags)
 
static void pgss_ExecutorRun (QueryDesc *queryDesc, ScanDirection direction, uint64 count)
 
static void pgss_ExecutorFinish (QueryDesc *queryDesc)
 
static void pgss_ExecutorEnd (QueryDesc *queryDesc)
 
static void pgss_ProcessUtility (PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
 
static void pgss_store (const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
 
static void pg_stat_statements_internal (FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
 
static pgssEntryentry_alloc (pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
 
static void entry_dealloc (void)
 
static bool qtext_store (const char *query, int query_len, Size *query_offset, int *gc_count)
 
static charqtext_load_file (Size *buffer_size)
 
static charqtext_fetch (Size query_offset, int query_len, char *buffer, Size buffer_size)
 
static bool need_gc_qtexts (void)
 
static void gc_qtexts (void)
 
static TimestampTz entry_reset (Oid userid, Oid dbid, int64 queryid, bool minmax_only)
 
static chargenerate_normalized_query (const JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
 
void _PG_init (void)
 
Datum pg_stat_statements_reset_1_7 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_reset_1_11 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_reset (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_13 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_12 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_11 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_10 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_9 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_8 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_3 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_2 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_info (PG_FUNCTION_ARGS)
 
static int entry_cmp (const void *lhs, const void *rhs)
 

Variables

static const uint32 PGSS_FILE_HEADER = 0x20250731
 
static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100
 
static pgssSharedStatepgss
 
static HTABpgss_hash
 
static const ShmemCallbacks pgss_shmem_callbacks
 
static int nesting_level = 0
 
static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL
 
static planner_hook_type prev_planner_hook = NULL
 
static ExecutorStart_hook_type prev_ExecutorStart = NULL
 
static ExecutorRun_hook_type prev_ExecutorRun = NULL
 
static ExecutorFinish_hook_type prev_ExecutorFinish = NULL
 
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL
 
static ProcessUtility_hook_type prev_ProcessUtility = NULL
 
static const struct config_enum_entry track_options []
 
static int pgss_max = 5000
 
static int pgss_track = PGSS_TRACK_TOP
 
static bool pgss_track_utility = true
 
static bool pgss_track_planning = false
 
static bool pgss_save = true
 

Macro Definition Documentation

◆ ASSUMED_LENGTH_INIT

#define ASSUMED_LENGTH_INIT   1024 /* initial assumed mean query length */

Definition at line 97 of file pg_stat_statements.c.

◆ ASSUMED_MEDIAN_INIT

#define ASSUMED_MEDIAN_INIT   (10.0) /* initial assumed median usage */

Definition at line 96 of file pg_stat_statements.c.

◆ IS_STICKY

#define IS_STICKY (   c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)

Definition at line 101 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS

#define PG_STAT_STATEMENTS_COLS   54 /* maximum of above */

Definition at line 1559 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_0

#define PG_STAT_STATEMENTS_COLS_V1_0   14

Definition at line 1549 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_1

#define PG_STAT_STATEMENTS_COLS_V1_1   18

Definition at line 1550 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_10

#define PG_STAT_STATEMENTS_COLS_V1_10   43

Definition at line 1555 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_11

#define PG_STAT_STATEMENTS_COLS_V1_11   49

Definition at line 1556 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_12

#define PG_STAT_STATEMENTS_COLS_V1_12   52

Definition at line 1557 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_13

#define PG_STAT_STATEMENTS_COLS_V1_13   54

Definition at line 1558 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_2

#define PG_STAT_STATEMENTS_COLS_V1_2   19

Definition at line 1551 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_3

#define PG_STAT_STATEMENTS_COLS_V1_3   23

Definition at line 1552 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_8

#define PG_STAT_STATEMENTS_COLS_V1_8   32

Definition at line 1553 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_9

#define PG_STAT_STATEMENTS_COLS_V1_9   33

Definition at line 1554 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_INFO_COLS

#define PG_STAT_STATEMENTS_INFO_COLS   2

Definition at line 2029 of file pg_stat_statements.c.

◆ PGSS_DUMP_FILE

#define PGSS_DUMP_FILE   PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"

Definition at line 80 of file pg_stat_statements.c.

◆ pgss_enabled

#define pgss_enabled (   level)
Value:
(pgss_track == PGSS_TRACK_TOP && (level) == 0)))
#define IsParallelWorker()
Definition parallel.h:62
static int pgss_track
@ PGSS_TRACK_ALL
@ PGSS_TRACK_TOP

Definition at line 310 of file pg_stat_statements.c.

315 { \
317 pgss->gc_count++; \
319 } while(0)
320
321/*---- Function declarations ----*/
322
336
337static void pgss_shmem_shutdown(int code, Datum arg);
338static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
339 const JumbleState *jstate);
340static PlannedStmt *pgss_planner(Query *parse,
341 const char *query_string,
342 int cursorOptions,
343 ParamListInfo boundParams,
344 ExplainState *es);
345static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
346static void pgss_ExecutorRun(QueryDesc *queryDesc,
347 ScanDirection direction,
348 uint64 count);
349static void pgss_ExecutorFinish(QueryDesc *queryDesc);
350static void pgss_ExecutorEnd(QueryDesc *queryDesc);
351static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
352 bool readOnlyTree,
353 ProcessUtilityContext context, ParamListInfo params,
354 QueryEnvironment *queryEnv,
355 DestReceiver *dest, QueryCompletion *qc);
356static void pgss_store(const char *query, int64 queryId,
357 int query_location, int query_len,
358 pgssStoreKind kind,
359 double total_time, uint64 rows,
360 const BufferUsage *bufusage,
361 const WalUsage *walusage,
362 const struct JitInstrumentation *jitusage,
363 const JumbleState *jstate,
364 int parallel_workers_to_launch,
365 int parallel_workers_launched,
366 PlannedStmtOrigin planOrigin);
368 pgssVersion api_version,
369 bool showtext);
370static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
371 int encoding, bool sticky);
372static void entry_dealloc(void);
373static bool qtext_store(const char *query, int query_len,
374 Size *query_offset, int *gc_count);
375static char *qtext_load_file(Size *buffer_size);
376static char *qtext_fetch(Size query_offset, int query_len,
377 char *buffer, Size buffer_size);
378static bool need_gc_qtexts(void);
379static void gc_qtexts(void);
380static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
382 const char *query,
383 int query_loc, int *query_len_p);
384
385/*
386 * Module load callback
387 */
388void
389_PG_init(void)
390{
391 /*
392 * In order to create our shared memory area, we have to be loaded via
393 * shared_preload_libraries. If not, fall out without hooking into any of
394 * the main system. (We don't throw error here because it seems useful to
395 * allow the pg_stat_statements functions to be created even when the
396 * module isn't active. The functions must protect themselves against
397 * being called then, however.)
398 */
400 return;
401
402 /*
403 * Inform the postmaster that we want to enable query_id calculation if
404 * compute_query_id is set to auto.
405 */
407
408 /*
409 * Define (or redefine) custom GUC variables.
410 */
411 DefineCustomIntVariable("pg_stat_statements.max",
412 "Sets the maximum number of statements tracked by pg_stat_statements.",
413 NULL,
414 &pgss_max,
415 5000,
416 100,
417 INT_MAX / 2,
419 0,
420 NULL,
421 NULL,
422 NULL);
423
424 DefineCustomEnumVariable("pg_stat_statements.track",
425 "Selects which statements are tracked by pg_stat_statements.",
426 NULL,
427 &pgss_track,
430 PGC_SUSET,
431 0,
432 NULL,
433 NULL,
434 NULL);
435
436 DefineCustomBoolVariable("pg_stat_statements.track_utility",
437 "Selects whether utility commands are tracked by pg_stat_statements.",
438 NULL,
440 true,
441 PGC_SUSET,
442 0,
443 NULL,
444 NULL,
445 NULL);
446
447 DefineCustomBoolVariable("pg_stat_statements.track_planning",
448 "Selects whether planning duration is tracked by pg_stat_statements.",
449 NULL,
451 false,
452 PGC_SUSET,
453 0,
454 NULL,
455 NULL,
456 NULL);
457
458 DefineCustomBoolVariable("pg_stat_statements.save",
459 "Save pg_stat_statements statistics across server shutdowns.",
460 NULL,
461 &pgss_save,
462 true,
464 0,
465 NULL,
466 NULL,
467 NULL);
468
469 MarkGUCPrefixReserved("pg_stat_statements");
470
471 /*
472 * Register our shared memory needs.
473 */
475
476 /*
477 * Install hooks.
478 */
493}
494
495/*
496 * shmem request callback: Request shared memory resources.
497 *
498 * This is called at postmaster startup. Note that the shared memory isn't
499 * allocated here yet, this merely register our needs.
500 *
501 * In EXEC_BACKEND mode, this is also called in each backend, to re-attach to
502 * the shared memory area that was already initialized.
503 */
504static void
506{
507 ShmemRequestHash(.name = "pg_stat_statements hash",
508 .nelems = pgss_max,
509 .hash_info.keysize = sizeof(pgssHashKey),
510 .hash_info.entrysize = sizeof(pgssEntry),
511 .hash_flags = HASH_ELEM | HASH_BLOBS,
512 .ptr = &pgss_hash,
513 );
514 ShmemRequestStruct(.name = "pg_stat_statements",
515 .size = sizeof(pgssSharedState),
516 .ptr = (void **) &pgss,
517 );
518}
519
520/*
521 * shmem init callback: Initialize our shared memory data structures at
522 * postmaster startup.
523 *
524 * Load any pre-existing statistics from file. Also create and load the
525 * query-texts file, which is expected to exist (even if empty) while the
526 * module is enabled.
527 */
528static void
529pgss_shmem_init(void *arg)
530{
531 int tranche_id;
532 FILE *file = NULL;
533 FILE *qfile = NULL;
534 uint32 header;
535 int32 num;
536 int32 pgver;
537 int32 i;
538 int buffer_size;
539 char *buffer = NULL;
540
541 /*
542 * We already checked that we're loaded from shared_preload_libraries in
543 * _PG_init(), so we should not get here after postmaster startup.
544 */
546
547 /*
548 * Initialize the shmem area with no statistics.
549 */
550 tranche_id = LWLockNewTrancheId("pg_stat_statements");
551 LWLockInitialize(&pgss->lock.lock, tranche_id);
555 pgss->extent = 0;
556 pgss->n_writers = 0;
557 pgss->gc_count = 0;
558 pgss->stats.dealloc = 0;
560
561 /* The hash table must've also been initialized by now */
563
564 /*
565 * Set up a shmem exit hook to dump the statistics to disk on postmaster
566 * (or standalone backend) exit.
567 */
569
570 /*
571 * Load any pre-existing statistics from file.
572 *
573 * Note: we don't bother with locks here, because there should be no other
574 * processes running when this code is reached.
575 */
576
577 /* Unlink query text file possibly left over from crash */
579
580 /* Allocate new query text temp file */
582 if (qfile == NULL)
583 goto write_error;
584
585 /*
586 * If we were told not to load old statistics, we're done. (Note we do
587 * not try to unlink any old dump file in this case. This seems a bit
588 * questionable but it's the historical behavior.)
589 */
590 if (!pgss_save)
591 {
593 return;
594 }
595
596 /*
597 * Attempt to load old statistics from the dump file.
598 */
600 if (file == NULL)
601 {
602 if (errno != ENOENT)
603 goto read_error;
604 /* No existing persisted stats file, so we're done */
606 return;
607 }
608
609 buffer_size = 2048;
610 buffer = (char *) palloc(buffer_size);
611
612 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
613 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
614 fread(&num, sizeof(int32), 1, file) != 1)
615 goto read_error;
616
617 if (header != PGSS_FILE_HEADER ||
619 goto data_error;
620
621 for (i = 0; i < num; i++)
622 {
624 pgssEntry *entry;
625 Size query_offset;
626
627 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
628 goto read_error;
629
630 /* Encoding is the only field we can easily sanity-check */
631 if (!PG_VALID_BE_ENCODING(temp.encoding))
632 goto data_error;
633
634 /* Resize buffer as needed */
635 if (temp.query_len >= buffer_size)
636 {
637 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
638 buffer = repalloc(buffer, buffer_size);
639 }
640
641 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
642 goto read_error;
643
644 /* Should have a trailing null, but let's make sure */
645 buffer[temp.query_len] = '\0';
646
647 /* Skip loading "sticky" entries */
648 if (IS_STICKY(temp.counters))
649 continue;
650
651 /* Store the query text */
652 query_offset = pgss->extent;
653 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
654 goto write_error;
655 pgss->extent += temp.query_len + 1;
656
657 /* make the hashtable entry (discards old entries if too many) */
658 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
659 temp.encoding,
660 false);
661
662 /* copy in the actual stats */
663 entry->counters = temp.counters;
664 entry->stats_since = temp.stats_since;
665 entry->minmax_stats_since = temp.minmax_stats_since;
666 }
667
668 /* Read global statistics for pg_stat_statements */
669 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
670 goto read_error;
671
672 pfree(buffer);
673 FreeFile(file);
675
676 /*
677 * Remove the persisted stats file so it's not included in
678 * backups/replication standbys, etc. A new file will be written on next
679 * shutdown.
680 *
681 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
682 * because we remove that file on startup; it acts inversely to
683 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
684 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
685 * when the server is not running. Leaving the file creates no danger of
686 * a newly restored database having a spurious record of execution costs,
687 * which is what we're really concerned about here.
688 */
690
691 return;
692
694 ereport(LOG,
696 errmsg("could not read file \"%s\": %m",
698 goto fail;
700 ereport(LOG,
702 errmsg("ignoring invalid data in file \"%s\"",
704 goto fail;
706 ereport(LOG,
708 errmsg("could not write file \"%s\": %m",
710fail:
711 if (buffer)
712 pfree(buffer);
713 if (file)
714 FreeFile(file);
715 if (qfile)
717 /* If possible, throw away the bogus file; ignore any error */
719
720 /*
721 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
722 * server is running with pg_stat_statements enabled
723 */
724}
725
726/*
727 * shmem_shutdown hook: Dump statistics into file.
728 *
729 * Note: we don't bother with acquiring lock, because there should be no
730 * other processes running when this is called.
731 */
732static void
734{
735 FILE *file;
736 char *qbuffer = NULL;
737 Size qbuffer_size = 0;
739 int32 num_entries;
740 pgssEntry *entry;
741
742 /* Don't try to dump during a crash. */
743 if (code)
744 return;
745
746 /* Safety check ... shouldn't get here unless shmem is set up. */
747 if (!pgss || !pgss_hash)
748 return;
749
750 /* Don't dump if told not to. */
751 if (!pgss_save)
752 return;
753
755 if (file == NULL)
756 goto error;
757
758 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
759 goto error;
760 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
761 goto error;
762 num_entries = hash_get_num_entries(pgss_hash);
763 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
764 goto error;
765
767 if (qbuffer == NULL)
768 goto error;
769
770 /*
771 * When serializing to disk, we store query texts immediately after their
772 * entry data. Any orphaned query texts are thereby excluded.
773 */
775 while ((entry = hash_seq_search(&hash_seq)) != NULL)
776 {
777 int len = entry->query_len;
778 char *qstr = qtext_fetch(entry->query_offset, len,
780
781 if (qstr == NULL)
782 continue; /* Ignore any entries with bogus texts */
783
784 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
785 fwrite(qstr, 1, len + 1, file) != len + 1)
786 {
787 /* note: we assume hash_seq_term won't change errno */
789 goto error;
790 }
791 }
792
793 /* Dump global statistics for pg_stat_statements */
794 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
795 goto error;
796
797 pfree(qbuffer);
798 qbuffer = NULL;
799
800 if (FreeFile(file))
801 {
802 file = NULL;
803 goto error;
804 }
805
806 /*
807 * Rename file into place, so we atomically replace any old one.
808 */
810
811 /* Unlink query-texts file; it's not needed while shutdown */
813
814 return;
815
816error:
817 ereport(LOG,
819 errmsg("could not write file \"%s\": %m",
820 PGSS_DUMP_FILE ".tmp")));
821 if (qbuffer)
822 pfree(qbuffer);
823 if (file)
824 FreeFile(file);
825 unlink(PGSS_DUMP_FILE ".tmp");
827}
828
829/*
830 * Post-parse-analysis hook: mark query with a queryId
831 */
832static void
834{
836 prev_post_parse_analyze_hook(pstate, query, jstate);
837
838 /* Safety check... */
840 return;
841
842 /*
843 * If it's EXECUTE, clear the queryId so that stats will accumulate for
844 * the underlying PREPARE. But don't do this if we're not tracking
845 * utility statements, to avoid messing up another extension that might be
846 * tracking them.
847 */
848 if (query->utilityStmt)
849 {
851 {
852 query->queryId = INT64CONST(0);
853 return;
854 }
855 }
856
857 /*
858 * If query jumbling were able to identify any ignorable constants, we
859 * immediately create a hash table entry for the query, so that we can
860 * record the normalized form of the query string. If there were no such
861 * constants, the normalized string would be the same as the query text
862 * anyway, so there's no need for an early entry.
863 */
864 if (jstate && jstate->clocations_count > 0)
865 pgss_store(pstate->p_sourcetext,
866 query->queryId,
867 query->stmt_location,
868 query->stmt_len,
870 0,
871 0,
872 NULL,
873 NULL,
874 NULL,
875 jstate,
876 0,
877 0,
879}
880
881/*
882 * Planner hook: forward to regular planner, but measure planning time
883 * if needed.
884 */
885static PlannedStmt *
886pgss_planner(Query *parse,
887 const char *query_string,
888 int cursorOptions,
889 ParamListInfo boundParams,
890 ExplainState *es)
891{
893
894 /*
895 * We can't process the query if no query_string is provided, as
896 * pgss_store needs it. We also ignore query without queryid, as it would
897 * be treated as a utility statement, which may not be the case.
898 */
900 && pgss_track_planning && query_string
901 && parse->queryId != INT64CONST(0))
902 {
905 BufferUsage bufusage_start,
906 bufusage;
907 WalUsage walusage_start,
908 walusage;
909
910 /* We need to track buffer usage as the planner can access them. */
911 bufusage_start = pgBufferUsage;
912
913 /*
914 * Similarly the planner could write some WAL records in some cases
915 * (e.g. setting a hint bit with those being WAL-logged)
916 */
917 walusage_start = pgWalUsage;
919
921 PG_TRY();
922 {
924 result = prev_planner_hook(parse, query_string, cursorOptions,
925 boundParams, es);
926 else
927 result = standard_planner(parse, query_string, cursorOptions,
928 boundParams, es);
929 }
930 PG_FINALLY();
931 {
933 }
934 PG_END_TRY();
935
938
939 /* calc differences of buffer counters. */
940 memset(&bufusage, 0, sizeof(BufferUsage));
941 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
942
943 /* calc differences of WAL counters. */
944 memset(&walusage, 0, sizeof(WalUsage));
945 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
946
947 pgss_store(query_string,
948 parse->queryId,
949 parse->stmt_location,
950 parse->stmt_len,
951 PGSS_PLAN,
953 0,
954 &bufusage,
955 &walusage,
956 NULL,
957 NULL,
958 0,
959 0,
960 result->planOrigin);
961 }
962 else
963 {
964 /*
965 * Even though we're not tracking plan time for this statement, we
966 * must still increment the nesting level, to ensure that functions
967 * evaluated during planning are not seen as top-level calls.
968 */
970 PG_TRY();
971 {
973 result = prev_planner_hook(parse, query_string, cursorOptions,
974 boundParams, es);
975 else
976 result = standard_planner(parse, query_string, cursorOptions,
977 boundParams, es);
978 }
979 PG_FINALLY();
980 {
982 }
983 PG_END_TRY();
984 }
985
986 return result;
987}
988
989/*
990 * ExecutorStart hook: start up tracking if needed
991 */
992static void
993pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
994{
995 /*
996 * If query has queryId zero, don't track it. This prevents double
997 * counting of optimizable statements that are directly contained in
998 * utility statements.
999 */
1000 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1001 {
1002 /* Request all summary instrumentation, i.e. timing, buffers and WAL */
1003 queryDesc->query_instr_options |= INSTRUMENT_ALL;
1004 }
1005
1007 prev_ExecutorStart(queryDesc, eflags);
1008 else
1009 standard_ExecutorStart(queryDesc, eflags);
1010}
1011
1012/*
1013 * ExecutorRun hook: all we need do is track nesting depth
1014 */
1015static void
1016pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1017{
1018 nesting_level++;
1019 PG_TRY();
1020 {
1021 if (prev_ExecutorRun)
1022 prev_ExecutorRun(queryDesc, direction, count);
1023 else
1024 standard_ExecutorRun(queryDesc, direction, count);
1025 }
1026 PG_FINALLY();
1027 {
1028 nesting_level--;
1029 }
1030 PG_END_TRY();
1031}
1032
1033/*
1034 * ExecutorFinish hook: all we need do is track nesting depth
1035 */
1036static void
1038{
1039 nesting_level++;
1040 PG_TRY();
1041 {
1043 prev_ExecutorFinish(queryDesc);
1044 else
1045 standard_ExecutorFinish(queryDesc);
1046 }
1047 PG_FINALLY();
1048 {
1049 nesting_level--;
1050 }
1051 PG_END_TRY();
1052}
1053
1054/*
1055 * ExecutorEnd hook: store results if needed
1056 */
1057static void
1058pgss_ExecutorEnd(QueryDesc *queryDesc)
1059{
1060 int64 queryId = queryDesc->plannedstmt->queryId;
1061
1062 if (queryId != INT64CONST(0) && queryDesc->query_instr &&
1064 {
1065 pgss_store(queryDesc->sourceText,
1066 queryId,
1067 queryDesc->plannedstmt->stmt_location,
1068 queryDesc->plannedstmt->stmt_len,
1069 PGSS_EXEC,
1071 queryDesc->estate->es_total_processed,
1072 &queryDesc->query_instr->bufusage,
1073 &queryDesc->query_instr->walusage,
1074 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1075 NULL,
1076 queryDesc->estate->es_parallel_workers_to_launch,
1077 queryDesc->estate->es_parallel_workers_launched,
1078 queryDesc->plannedstmt->planOrigin);
1079 }
1080
1081 if (prev_ExecutorEnd)
1082 prev_ExecutorEnd(queryDesc);
1083 else
1084 standard_ExecutorEnd(queryDesc);
1085}
1086
1087/*
1088 * ProcessUtility hook
1089 */
1090static void
1091pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1092 bool readOnlyTree,
1093 ProcessUtilityContext context,
1094 ParamListInfo params, QueryEnvironment *queryEnv,
1095 DestReceiver *dest, QueryCompletion *qc)
1096{
1097 Node *parsetree = pstmt->utilityStmt;
1098 int64 saved_queryId = pstmt->queryId;
1100 int saved_stmt_len = pstmt->stmt_len;
1102
1103 /*
1104 * Force utility statements to get queryId zero. We do this even in cases
1105 * where the statement contains an optimizable statement for which a
1106 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1107 * cases, runtime control will first go through ProcessUtility and then
1108 * the executor, and we don't want the executor hooks to do anything,
1109 * since we are already measuring the statement's costs at the utility
1110 * level.
1111 *
1112 * Note that this is only done if pg_stat_statements is enabled and
1113 * configured to track utility statements, in the unlikely possibility
1114 * that user configured another extension to handle utility statements
1115 * only.
1116 */
1117 if (enabled)
1118 pstmt->queryId = INT64CONST(0);
1119
1120 /*
1121 * If it's an EXECUTE statement, we don't track it and don't increment the
1122 * nesting level. This allows the cycles to be charged to the underlying
1123 * PREPARE instead (by the Executor hooks), which is much more useful.
1124 *
1125 * We also don't track execution of PREPARE. If we did, we would get one
1126 * hash table entry for the PREPARE (with hash calculated from the query
1127 * string), and then a different one with the same query string (but hash
1128 * calculated from the query tree) would be used to accumulate costs of
1129 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1130 * actually run the planner (only parse+rewrite), its costs are generally
1131 * pretty negligible and it seems okay to just ignore it.
1132 */
1133 if (enabled &&
1134 !IsA(parsetree, ExecuteStmt) &&
1135 !IsA(parsetree, PrepareStmt))
1136 {
1139 uint64 rows;
1140 BufferUsage bufusage_start,
1141 bufusage;
1142 WalUsage walusage_start,
1143 walusage;
1144
1145 bufusage_start = pgBufferUsage;
1146 walusage_start = pgWalUsage;
1148
1149 nesting_level++;
1150 PG_TRY();
1151 {
1153 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1154 context, params, queryEnv,
1155 dest, qc);
1156 else
1157 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1158 context, params, queryEnv,
1159 dest, qc);
1160 }
1161 PG_FINALLY();
1162 {
1163 nesting_level--;
1164 }
1165 PG_END_TRY();
1166
1167 /*
1168 * CAUTION: do not access the *pstmt data structure again below here.
1169 * If it was a ROLLBACK or similar, that data structure may have been
1170 * freed. We must copy everything we still need into local variables,
1171 * which we did above.
1172 *
1173 * For the same reason, we can't risk restoring pstmt->queryId to its
1174 * former value, which'd otherwise be a good idea.
1175 */
1176
1179
1180 /*
1181 * Track the total number of rows retrieved or affected by the utility
1182 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1183 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1184 */
1185 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1186 qc->commandTag == CMDTAG_FETCH ||
1187 qc->commandTag == CMDTAG_SELECT ||
1189 qc->nprocessed : 0;
1190
1191 /* calc differences of buffer counters. */
1192 memset(&bufusage, 0, sizeof(BufferUsage));
1193 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1194
1195 /* calc differences of WAL counters. */
1196 memset(&walusage, 0, sizeof(WalUsage));
1197 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1198
1199 pgss_store(queryString,
1203 PGSS_EXEC,
1205 rows,
1206 &bufusage,
1207 &walusage,
1208 NULL,
1209 NULL,
1210 0,
1211 0,
1212 pstmt->planOrigin);
1213 }
1214 else
1215 {
1216 /*
1217 * Even though we're not tracking execution time for this statement,
1218 * we must still increment the nesting level, to ensure that functions
1219 * evaluated within it are not seen as top-level calls. But don't do
1220 * so for EXECUTE; that way, when control reaches pgss_planner or
1221 * pgss_ExecutorStart, we will treat the costs as top-level if
1222 * appropriate. Likewise, don't bump for PREPARE, so that parse
1223 * analysis will treat the statement as top-level if appropriate.
1224 *
1225 * To be absolutely certain we don't mess up the nesting level,
1226 * evaluate the bump_level condition just once.
1227 */
1228 bool bump_level =
1229 !IsA(parsetree, ExecuteStmt) &&
1230 !IsA(parsetree, PrepareStmt);
1231
1232 if (bump_level)
1233 nesting_level++;
1234 PG_TRY();
1235 {
1237 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1238 context, params, queryEnv,
1239 dest, qc);
1240 else
1241 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1242 context, params, queryEnv,
1243 dest, qc);
1244 }
1245 PG_FINALLY();
1246 {
1247 if (bump_level)
1248 nesting_level--;
1249 }
1250 PG_END_TRY();
1251 }
1252}
1253
1254/*
1255 * Store some statistics for a statement.
1256 *
1257 * If jstate is not NULL then we're trying to create an entry for which
1258 * we have no statistics as yet; we just want to record the normalized
1259 * query string. total_time, rows, bufusage and walusage are ignored in this
1260 * case.
1261 *
1262 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1263 * for the arrays in the Counters field.
1264 */
1265static void
1266pgss_store(const char *query, int64 queryId,
1267 int query_location, int query_len,
1268 pgssStoreKind kind,
1269 double total_time, uint64 rows,
1270 const BufferUsage *bufusage,
1271 const WalUsage *walusage,
1272 const struct JitInstrumentation *jitusage,
1273 const JumbleState *jstate,
1274 int parallel_workers_to_launch,
1275 int parallel_workers_launched,
1276 PlannedStmtOrigin planOrigin)
1277{
1279 pgssEntry *entry;
1280 char *norm_query = NULL;
1282
1283 Assert(query != NULL);
1284
1285 /* Safety check... */
1286 if (!pgss || !pgss_hash)
1287 return;
1288
1289 /*
1290 * Nothing to do if compute_query_id isn't enabled and no other module
1291 * computed a query identifier.
1292 */
1293 if (queryId == INT64CONST(0))
1294 return;
1295
1296 /*
1297 * Confine our attention to the relevant part of the string, if the query
1298 * is a portion of a multi-statement source string, and update query
1299 * location and length if needed.
1300 */
1301 query = CleanQuerytext(query, &query_location, &query_len);
1302
1303 /* Set up key for hashtable search */
1304
1305 /* clear padding */
1306 memset(&key, 0, sizeof(pgssHashKey));
1307
1308 key.userid = GetUserId();
1309 key.dbid = MyDatabaseId;
1310 key.queryid = queryId;
1311 key.toplevel = (nesting_level == 0);
1312
1313 /* Lookup the hash table entry with shared lock. */
1315
1316 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1317
1318 /* Create new entry, if not present */
1319 if (!entry)
1320 {
1321 Size query_offset;
1322 int gc_count;
1323 bool stored;
1324 bool do_gc;
1325
1326 /*
1327 * Create a new, normalized query string if caller asked. We don't
1328 * need to hold the lock while doing this work. (Note: in any case,
1329 * it's possible that someone else creates a duplicate hashtable entry
1330 * in the interval where we don't hold the lock below. That case is
1331 * handled by entry_alloc.)
1332 */
1333 if (jstate)
1334 {
1338 &query_len);
1340 }
1341
1342 /* Append new query text to file with only shared lock held */
1343 stored = qtext_store(norm_query ? norm_query : query, query_len,
1344 &query_offset, &gc_count);
1345
1346 /*
1347 * Determine whether we need to garbage collect external query texts
1348 * while the shared lock is still held. This micro-optimization
1349 * avoids taking the time to decide this while holding exclusive lock.
1350 */
1352
1353 /* Need exclusive lock to make a new hashtable entry - promote */
1356
1357 /*
1358 * A garbage collection may have occurred while we weren't holding the
1359 * lock. In the unlikely event that this happens, the query text we
1360 * stored above will have been garbage collected, so write it again.
1361 * This should be infrequent enough that doing it while holding
1362 * exclusive lock isn't a performance problem.
1363 */
1364 if (!stored || pgss->gc_count != gc_count)
1365 stored = qtext_store(norm_query ? norm_query : query, query_len,
1366 &query_offset, NULL);
1367
1368 /* If we failed to write to the text file, give up */
1369 if (!stored)
1370 goto done;
1371
1372 /* OK to create a new hashtable entry */
1373 entry = entry_alloc(&key, query_offset, query_len, encoding,
1374 jstate != NULL);
1375
1376 /* If needed, perform garbage collection while exclusive lock held */
1377 if (do_gc)
1378 gc_qtexts();
1379 }
1380
1381 /* Increment the counts, except when jstate is not NULL */
1382 if (!jstate)
1383 {
1384 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1385
1386 /*
1387 * Grab the spinlock while updating the counters (see comment about
1388 * locking rules at the head of the file)
1389 */
1390 SpinLockAcquire(&entry->mutex);
1391
1392 /* "Unstick" entry if it was previously sticky */
1393 if (IS_STICKY(entry->counters))
1394 entry->counters.usage = USAGE_INIT;
1395
1396 entry->counters.calls[kind] += 1;
1397 entry->counters.total_time[kind] += total_time;
1398
1399 if (entry->counters.calls[kind] == 1)
1400 {
1401 entry->counters.min_time[kind] = total_time;
1402 entry->counters.max_time[kind] = total_time;
1403 entry->counters.mean_time[kind] = total_time;
1404 }
1405 else
1406 {
1407 /*
1408 * Welford's method for accurately computing variance. See
1409 * <http://www.johndcook.com/blog/standard_deviation/>
1410 */
1411 double old_mean = entry->counters.mean_time[kind];
1412
1413 entry->counters.mean_time[kind] +=
1414 (total_time - old_mean) / entry->counters.calls[kind];
1415 entry->counters.sum_var_time[kind] +=
1416 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1417
1418 /*
1419 * Calculate min and max time. min = 0 and max = 0 means that the
1420 * min/max statistics were reset
1421 */
1422 if (entry->counters.min_time[kind] == 0
1423 && entry->counters.max_time[kind] == 0)
1424 {
1425 entry->counters.min_time[kind] = total_time;
1426 entry->counters.max_time[kind] = total_time;
1427 }
1428 else
1429 {
1430 if (entry->counters.min_time[kind] > total_time)
1431 entry->counters.min_time[kind] = total_time;
1432 if (entry->counters.max_time[kind] < total_time)
1433 entry->counters.max_time[kind] = total_time;
1434 }
1435 }
1436 entry->counters.rows += rows;
1437 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1438 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1441 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1442 entry->counters.local_blks_read += bufusage->local_blks_read;
1445 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1446 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1453 entry->counters.usage += USAGE_EXEC(total_time);
1454 entry->counters.wal_records += walusage->wal_records;
1455 entry->counters.wal_fpi += walusage->wal_fpi;
1456 entry->counters.wal_bytes += walusage->wal_bytes;
1457 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1458 if (jitusage)
1459 {
1460 entry->counters.jit_functions += jitusage->created_functions;
1461 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1462
1463 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1464 entry->counters.jit_deform_count++;
1465 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1466
1467 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1469 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1470
1471 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1473 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1474
1475 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1477 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1478 }
1479
1480 /* parallel worker counters */
1481 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1482 entry->counters.parallel_workers_launched += parallel_workers_launched;
1483
1484 /* plan cache counters */
1485 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1487 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1488 entry->counters.custom_plan_calls++;
1489
1490 SpinLockRelease(&entry->mutex);
1491 }
1492
1493done:
1495
1496 /* We postpone this clean-up until we're out of the lock */
1497 if (norm_query)
1499}
1500
1501/*
1502 * Reset statement statistics corresponding to userid, dbid, and queryid.
1503 */
1504Datum
1506{
1507 Oid userid;
1508 Oid dbid;
1509 int64 queryid;
1510
1511 userid = PG_GETARG_OID(0);
1512 dbid = PG_GETARG_OID(1);
1513 queryid = PG_GETARG_INT64(2);
1514
1515 entry_reset(userid, dbid, queryid, false);
1516
1518}
1519
1520Datum
1522{
1523 Oid userid;
1524 Oid dbid;
1525 int64 queryid;
1526 bool minmax_only;
1527
1528 userid = PG_GETARG_OID(0);
1529 dbid = PG_GETARG_OID(1);
1530 queryid = PG_GETARG_INT64(2);
1532
1533 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1534}
1535
1536/*
1537 * Reset statement statistics.
1538 */
1539Datum
1541{
1542 entry_reset(0, 0, 0, false);
1543
1545}
1546
1547/* Number of output arguments (columns) for various API versions */
1548#define PG_STAT_STATEMENTS_COLS_V1_0 14
1549#define PG_STAT_STATEMENTS_COLS_V1_1 18
1550#define PG_STAT_STATEMENTS_COLS_V1_2 19
1551#define PG_STAT_STATEMENTS_COLS_V1_3 23
1552#define PG_STAT_STATEMENTS_COLS_V1_8 32
1553#define PG_STAT_STATEMENTS_COLS_V1_9 33
1554#define PG_STAT_STATEMENTS_COLS_V1_10 43
1555#define PG_STAT_STATEMENTS_COLS_V1_11 49
1556#define PG_STAT_STATEMENTS_COLS_V1_12 52
1557#define PG_STAT_STATEMENTS_COLS_V1_13 54
1558#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1559
1560/*
1561 * Retrieve statement statistics.
1562 *
1563 * The SQL API of this function has changed multiple times, and will likely
1564 * do so again in future. To support the case where a newer version of this
1565 * loadable module is being used with an old SQL declaration of the function,
1566 * we continue to support the older API versions. For 1.2 and later, the
1567 * expected API version is identified by embedding it in the C name of the
1568 * function. Unfortunately we weren't bright enough to do that for 1.1.
1569 */
1570Datum
1572{
1573 bool showtext = PG_GETARG_BOOL(0);
1574
1576
1577 return (Datum) 0;
1578}
1579
1580Datum
1582{
1583 bool showtext = PG_GETARG_BOOL(0);
1584
1586
1587 return (Datum) 0;
1588}
1589
1590Datum
1592{
1593 bool showtext = PG_GETARG_BOOL(0);
1594
1596
1597 return (Datum) 0;
1598}
1599
1600Datum
1602{
1603 bool showtext = PG_GETARG_BOOL(0);
1604
1606
1607 return (Datum) 0;
1608}
1609
1610Datum
1612{
1613 bool showtext = PG_GETARG_BOOL(0);
1614
1616
1617 return (Datum) 0;
1618}
1619
1620Datum
1622{
1623 bool showtext = PG_GETARG_BOOL(0);
1624
1626
1627 return (Datum) 0;
1628}
1629
1630Datum
1632{
1633 bool showtext = PG_GETARG_BOOL(0);
1634
1636
1637 return (Datum) 0;
1638}
1639
1640Datum
1642{
1643 bool showtext = PG_GETARG_BOOL(0);
1644
1646
1647 return (Datum) 0;
1648}
1649
1650/*
1651 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1652 * This can be removed someday, perhaps.
1653 */
1654Datum
1656{
1657 /* If it's really API 1.1, we'll figure that out below */
1659
1660 return (Datum) 0;
1661}
1662
1663/* Common code for all versions of pg_stat_statements() */
1664static void
1666 pgssVersion api_version,
1667 bool showtext)
1668{
1670 Oid userid = GetUserId();
1671 bool is_allowed_role = false;
1672 char *qbuffer = NULL;
1673 Size qbuffer_size = 0;
1674 Size extent = 0;
1675 int gc_count = 0;
1677 pgssEntry *entry;
1678
1679 /*
1680 * Superusers or roles with the privileges of pg_read_all_stats members
1681 * are allowed
1682 */
1684
1685 /* hash table must exist already */
1686 if (!pgss || !pgss_hash)
1687 ereport(ERROR,
1689 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1690
1691 InitMaterializedSRF(fcinfo, 0);
1692
1693 /*
1694 * Check we have the expected number of output arguments. Aside from
1695 * being a good safety check, we need a kluge here to detect API version
1696 * 1.1, which was wedged into the code in an ill-considered way.
1697 */
1698 switch (rsinfo->setDesc->natts)
1699 {
1701 if (api_version != PGSS_V1_0)
1702 elog(ERROR, "incorrect number of output arguments");
1703 break;
1705 /* pg_stat_statements() should have told us 1.0 */
1706 if (api_version != PGSS_V1_0)
1707 elog(ERROR, "incorrect number of output arguments");
1708 api_version = PGSS_V1_1;
1709 break;
1711 if (api_version != PGSS_V1_2)
1712 elog(ERROR, "incorrect number of output arguments");
1713 break;
1715 if (api_version != PGSS_V1_3)
1716 elog(ERROR, "incorrect number of output arguments");
1717 break;
1719 if (api_version != PGSS_V1_8)
1720 elog(ERROR, "incorrect number of output arguments");
1721 break;
1723 if (api_version != PGSS_V1_9)
1724 elog(ERROR, "incorrect number of output arguments");
1725 break;
1727 if (api_version != PGSS_V1_10)
1728 elog(ERROR, "incorrect number of output arguments");
1729 break;
1731 if (api_version != PGSS_V1_11)
1732 elog(ERROR, "incorrect number of output arguments");
1733 break;
1735 if (api_version != PGSS_V1_12)
1736 elog(ERROR, "incorrect number of output arguments");
1737 break;
1739 if (api_version != PGSS_V1_13)
1740 elog(ERROR, "incorrect number of output arguments");
1741 break;
1742 default:
1743 elog(ERROR, "incorrect number of output arguments");
1744 }
1745
1746 /*
1747 * We'd like to load the query text file (if needed) while not holding any
1748 * lock on pgss->lock. In the worst case we'll have to do this again
1749 * after we have the lock, but it's unlikely enough to make this a win
1750 * despite occasional duplicated work. We need to reload if anybody
1751 * writes to the file (either a retail qtext_store(), or a garbage
1752 * collection) between this point and where we've gotten shared lock. If
1753 * a qtext_store is actually in progress when we look, we might as well
1754 * skip the speculative load entirely.
1755 */
1756 if (showtext)
1757 {
1758 int n_writers;
1759
1760 /* Take the mutex so we can examine variables */
1762 extent = pgss->extent;
1763 n_writers = pgss->n_writers;
1764 gc_count = pgss->gc_count;
1766
1767 /* No point in loading file now if there are active writers */
1768 if (n_writers == 0)
1770 }
1771
1772 /*
1773 * Get shared lock, load or reload the query text file if we must, and
1774 * iterate over the hashtable entries.
1775 *
1776 * With a large hash table, we might be holding the lock rather longer
1777 * than one could wish. However, this only blocks creation of new hash
1778 * table entries, and the larger the hash table the less likely that is to
1779 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1780 * we need to partition the hash table to limit the time spent holding any
1781 * one lock.
1782 */
1784
1785 if (showtext)
1786 {
1787 /*
1788 * Here it is safe to examine extent and gc_count without taking the
1789 * mutex. Note that although other processes might change
1790 * pgss->extent just after we look at it, the strings they then write
1791 * into the file cannot yet be referenced in the hashtable, so we
1792 * don't care whether we see them or not.
1793 *
1794 * If qtext_load_file fails, we just press on; we'll return NULL for
1795 * every query text.
1796 */
1797 if (qbuffer == NULL ||
1798 pgss->extent != extent ||
1799 pgss->gc_count != gc_count)
1800 {
1801 if (qbuffer)
1802 pfree(qbuffer);
1804 }
1805 }
1806
1808 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1809 {
1811 bool nulls[PG_STAT_STATEMENTS_COLS];
1812 int i = 0;
1813 Counters tmp;
1814 double stddev;
1815 int64 queryid = entry->key.queryid;
1816 TimestampTz stats_since;
1817 TimestampTz minmax_stats_since;
1818
1819 memset(values, 0, sizeof(values));
1820 memset(nulls, 0, sizeof(nulls));
1821
1822 values[i++] = ObjectIdGetDatum(entry->key.userid);
1823 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1824 if (api_version >= PGSS_V1_9)
1825 values[i++] = BoolGetDatum(entry->key.toplevel);
1826
1827 if (is_allowed_role || entry->key.userid == userid)
1828 {
1829 if (api_version >= PGSS_V1_2)
1830 values[i++] = Int64GetDatumFast(queryid);
1831
1832 if (showtext)
1833 {
1834 char *qstr = qtext_fetch(entry->query_offset,
1835 entry->query_len,
1836 qbuffer,
1837 qbuffer_size);
1838
1839 if (qstr)
1840 {
1841 char *enc;
1842
1844 entry->query_len,
1845 entry->encoding);
1846
1848
1849 if (enc != qstr)
1850 pfree(enc);
1851 }
1852 else
1853 {
1854 /* Just return a null if we fail to find the text */
1855 nulls[i++] = true;
1856 }
1857 }
1858 else
1859 {
1860 /* Query text not requested */
1861 nulls[i++] = true;
1862 }
1863 }
1864 else
1865 {
1866 /* Don't show queryid */
1867 if (api_version >= PGSS_V1_2)
1868 nulls[i++] = true;
1869
1870 /*
1871 * Don't show query text, but hint as to the reason for not doing
1872 * so if it was requested
1873 */
1874 if (showtext)
1875 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1876 else
1877 nulls[i++] = true;
1878 }
1879
1880 /* copy counters to a local variable to keep locking time short */
1881 SpinLockAcquire(&entry->mutex);
1882 tmp = entry->counters;
1883 SpinLockRelease(&entry->mutex);
1884
1885 /*
1886 * The spinlock is not required when reading these two as they are
1887 * always updated when holding pgss->lock exclusively.
1888 */
1889 stats_since = entry->stats_since;
1890 minmax_stats_since = entry->minmax_stats_since;
1891
1892 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1893 if (IS_STICKY(tmp))
1894 continue;
1895
1896 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1897 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1898 {
1899 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1900 {
1901 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1902 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1903 }
1904
1905 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1906 api_version >= PGSS_V1_8)
1907 {
1908 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1909 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1910 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1911
1912 /*
1913 * Note we are calculating the population variance here, not
1914 * the sample variance, as we have data for the whole
1915 * population, so Bessel's correction is not used, and we
1916 * don't divide by tmp.calls - 1.
1917 */
1918 if (tmp.calls[kind] > 1)
1919 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1920 else
1921 stddev = 0.0;
1922 values[i++] = Float8GetDatumFast(stddev);
1923 }
1924 }
1925 values[i++] = Int64GetDatumFast(tmp.rows);
1928 if (api_version >= PGSS_V1_1)
1933 if (api_version >= PGSS_V1_1)
1938 if (api_version >= PGSS_V1_1)
1939 {
1942 }
1943 if (api_version >= PGSS_V1_11)
1944 {
1947 }
1948 if (api_version >= PGSS_V1_10)
1949 {
1952 }
1953 if (api_version >= PGSS_V1_8)
1954 {
1955 char buf[256];
1956 Datum wal_bytes;
1957
1960
1961 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1962
1963 /* Convert to numeric. */
1964 wal_bytes = DirectFunctionCall3(numeric_in,
1967 Int32GetDatum(-1));
1968 values[i++] = wal_bytes;
1969 }
1970 if (api_version >= PGSS_V1_12)
1971 {
1973 }
1974 if (api_version >= PGSS_V1_10)
1975 {
1984 }
1985 if (api_version >= PGSS_V1_11)
1986 {
1989 }
1990 if (api_version >= PGSS_V1_12)
1991 {
1994 }
1995 if (api_version >= PGSS_V1_13)
1996 {
1999 }
2000 if (api_version >= PGSS_V1_11)
2001 {
2002 values[i++] = TimestampTzGetDatum(stats_since);
2003 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2004 }
2005
2006 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2007 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2008 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2009 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2010 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2011 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2012 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2013 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2014 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2015 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2016 -1 /* fail if you forget to update this assert */ ));
2017
2018 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2019 }
2020
2022
2023 if (qbuffer)
2024 pfree(qbuffer);
2025}
2026
2027/* Number of output arguments (columns) for pg_stat_statements_info */
2028#define PG_STAT_STATEMENTS_INFO_COLS 2
2029
2030/*
2031 * Return statistics of pg_stat_statements.
2032 */
2033Datum
2035{
2036 pgssGlobalStats stats;
2037 TupleDesc tupdesc;
2039 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2040
2041 if (!pgss || !pgss_hash)
2042 ereport(ERROR,
2044 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2045
2046 /* Build a tuple descriptor for our result type */
2047 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2048 elog(ERROR, "return type must be a row type");
2049
2050 /* Read global statistics for pg_stat_statements */
2052 stats = pgss->stats;
2054
2055 values[0] = Int64GetDatum(stats.dealloc);
2057
2059}
2060
2061/*
2062 * Allocate a new hashtable entry.
2063 * caller must hold an exclusive lock on pgss->lock
2064 *
2065 * "query" need not be null-terminated; we rely on query_len instead
2066 *
2067 * If "sticky" is true, make the new entry artificially sticky so that it will
2068 * probably still be there when the query finishes execution. We do this by
2069 * giving it a median usage value rather than the normal value. (Strictly
2070 * speaking, query strings are normalized on a best effort basis, though it
2071 * would be difficult to demonstrate this even under artificial conditions.)
2072 *
2073 * Note: despite needing exclusive lock, it's not an error for the target
2074 * entry to already exist. This is because pgss_store releases and
2075 * reacquires lock after failing to find a match; so someone else could
2076 * have made the entry while we waited to get exclusive lock.
2077 */
2078static pgssEntry *
2079entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2080 bool sticky)
2081{
2082 pgssEntry *entry;
2083 bool found;
2084
2085 /* Make space if needed */
2087 entry_dealloc();
2088
2089 /* Find or create an entry with desired hash code */
2090 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2091
2092 if (!found)
2093 {
2094 /* New entry, initialize it */
2095
2096 /* reset the statistics */
2097 memset(&entry->counters, 0, sizeof(Counters));
2098 /* set the appropriate initial usage count */
2100 /* re-initialize the mutex each time ... we assume no one using it */
2101 SpinLockInit(&entry->mutex);
2102 /* ... and don't forget the query text metadata */
2103 Assert(query_len >= 0);
2104 entry->query_offset = query_offset;
2105 entry->query_len = query_len;
2106 entry->encoding = encoding;
2108 entry->minmax_stats_since = entry->stats_since;
2109 }
2110
2111 return entry;
2112}
2113
2114/*
2115 * qsort comparator for sorting into increasing usage order
2116 */
2117static int
2118entry_cmp(const void *lhs, const void *rhs)
2119{
2120 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2121 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2122
2123 if (l_usage < r_usage)
2124 return -1;
2125 else if (l_usage > r_usage)
2126 return +1;
2127 else
2128 return 0;
2129}
2130
2131/*
2132 * Deallocate least-used entries.
2133 *
2134 * Caller must hold an exclusive lock on pgss->lock.
2135 */
2136static void
2137entry_dealloc(void)
2138{
2140 pgssEntry **entries;
2141 pgssEntry *entry;
2142 int nvictims;
2143 int i;
2145 int nvalidtexts;
2146
2147 /*
2148 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2149 * While we're scanning the table, apply the decay factor to the usage
2150 * values, and update the mean query length.
2151 *
2152 * Note that the mean query length is almost immediately obsolete, since
2153 * we compute it before not after discarding the least-used entries.
2154 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2155 * making two passes to get a more current result. Likewise, the new
2156 * cur_median_usage includes the entries we're about to zap.
2157 */
2158
2159 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2160
2161 i = 0;
2162 tottextlen = 0;
2163 nvalidtexts = 0;
2164
2166 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2167 {
2168 entries[i++] = entry;
2169 /* "Sticky" entries get a different usage decay rate. */
2170 if (IS_STICKY(entry->counters))
2172 else
2174 /* In the mean length computation, ignore dropped texts. */
2175 if (entry->query_len >= 0)
2176 {
2177 tottextlen += entry->query_len + 1;
2178 nvalidtexts++;
2179 }
2180 }
2181
2182 /* Sort into increasing order by usage */
2183 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2184
2185 /* Record the (approximate) median usage */
2186 if (i > 0)
2187 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2188 /* Record the mean query length */
2189 if (nvalidtexts > 0)
2191 else
2193
2194 /* Now zap an appropriate fraction of lowest-usage entries */
2195 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2196 nvictims = Min(nvictims, i);
2197
2198 for (i = 0; i < nvictims; i++)
2199 {
2200 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2201 }
2202
2203 pfree(entries);
2204
2205 /* Increment the number of times entries are deallocated */
2207 pgss->stats.dealloc += 1;
2209}
2210
2211/*
2212 * Given a query string (not necessarily null-terminated), allocate a new
2213 * entry in the external query text file and store the string there.
2214 *
2215 * If successful, returns true, and stores the new entry's offset in the file
2216 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2217 * number of garbage collections that have occurred so far.
2218 *
2219 * On failure, returns false.
2220 *
2221 * At least a shared lock on pgss->lock must be held by the caller, so as
2222 * to prevent a concurrent garbage collection. Share-lock-holding callers
2223 * should pass a gc_count pointer to obtain the number of garbage collections,
2224 * so that they can recheck the count after obtaining exclusive lock to
2225 * detect whether a garbage collection occurred (and removed this entry).
2226 */
2227static bool
2228qtext_store(const char *query, int query_len,
2229 Size *query_offset, int *gc_count)
2230{
2231 Size off;
2232 int fd;
2233
2234 /*
2235 * We use a spinlock to protect extent/n_writers/gc_count, so that
2236 * multiple processes may execute this function concurrently.
2237 */
2239 off = pgss->extent;
2240 pgss->extent += query_len + 1;
2241 pgss->n_writers++;
2242 if (gc_count)
2243 *gc_count = pgss->gc_count;
2245
2246 *query_offset = off;
2247
2248 /*
2249 * Don't allow the file to grow larger than what qtext_load_file can
2250 * (theoretically) handle. This has been seen to be reachable on 32-bit
2251 * platforms.
2252 */
2253 if (unlikely(query_len >= MaxAllocHugeSize - off))
2254 {
2255 errno = EFBIG; /* not quite right, but it'll do */
2256 fd = -1;
2257 goto error;
2258 }
2259
2260 /* Now write the data into the successfully-reserved part of the file */
2262 if (fd < 0)
2263 goto error;
2264
2265 if (pg_pwrite(fd, query, query_len, off) != query_len)
2266 goto error;
2267 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2268 goto error;
2269
2271
2272 /* Mark our write complete */
2274 pgss->n_writers--;
2276
2277 return true;
2278
2279error:
2280 ereport(LOG,
2282 errmsg("could not write file \"%s\": %m",
2283 PGSS_TEXT_FILE)));
2284
2285 if (fd >= 0)
2287
2288 /* Mark our write complete */
2290 pgss->n_writers--;
2292
2293 return false;
2294}
2295
2296/*
2297 * Read the external query text file into a palloc'd buffer.
2298 *
2299 * Returns NULL (without throwing an error) if unable to read, eg
2300 * file not there or insufficient memory.
2301 *
2302 * On success, the buffer size is also returned into *buffer_size.
2303 *
2304 * This can be called without any lock on pgss->lock, but in that case
2305 * the caller is responsible for verifying that the result is sane.
2306 */
2307static char *
2308qtext_load_file(Size *buffer_size)
2309{
2310 char *buf;
2311 int fd;
2312 struct stat stat;
2313 Size nread;
2314
2316 if (fd < 0)
2317 {
2318 if (errno != ENOENT)
2319 ereport(LOG,
2321 errmsg("could not read file \"%s\": %m",
2322 PGSS_TEXT_FILE)));
2323 return NULL;
2324 }
2325
2326 /* Get file length */
2327 if (fstat(fd, &stat))
2328 {
2329 ereport(LOG,
2331 errmsg("could not stat file \"%s\": %m",
2332 PGSS_TEXT_FILE)));
2334 return NULL;
2335 }
2336
2337 /* Allocate buffer; beware that off_t might be wider than size_t */
2340 else
2341 buf = NULL;
2342 if (buf == NULL)
2343 {
2344 ereport(LOG,
2346 errmsg("out of memory"),
2347 errdetail("Could not allocate enough memory to read file \"%s\".",
2348 PGSS_TEXT_FILE)));
2350 return NULL;
2351 }
2352
2353 /*
2354 * OK, slurp in the file. Windows fails if we try to read more than
2355 * INT_MAX bytes at once, and other platforms might not like that either,
2356 * so read a very large file in 1GB segments.
2357 */
2358 nread = 0;
2359 while (nread < stat.st_size)
2360 {
2361 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2362
2363 /*
2364 * If we get a short read and errno doesn't get set, the reason is
2365 * probably that garbage collection truncated the file since we did
2366 * the fstat(), so we don't log a complaint --- but we don't return
2367 * the data, either, since it's most likely corrupt due to concurrent
2368 * writes from garbage collection.
2369 */
2370 errno = 0;
2371 if (read(fd, buf + nread, toread) != toread)
2372 {
2373 if (errno)
2374 ereport(LOG,
2376 errmsg("could not read file \"%s\": %m",
2377 PGSS_TEXT_FILE)));
2378 pfree(buf);
2380 return NULL;
2381 }
2382 nread += toread;
2383 }
2384
2385 if (CloseTransientFile(fd) != 0)
2386 ereport(LOG,
2388 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2389
2390 *buffer_size = nread;
2391 return buf;
2392}
2393
2394/*
2395 * Locate a query text in the file image previously read by qtext_load_file().
2396 *
2397 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2398 * the result points to a null-terminated string within the buffer.
2399 */
2400static char *
2401qtext_fetch(Size query_offset, int query_len,
2402 char *buffer, Size buffer_size)
2403{
2404 /* File read failed? */
2405 if (buffer == NULL)
2406 return NULL;
2407 /* Bogus offset/length? */
2408 if (query_len < 0 ||
2409 query_offset + query_len >= buffer_size)
2410 return NULL;
2411 /* As a further sanity check, make sure there's a trailing null */
2412 if (buffer[query_offset + query_len] != '\0')
2413 return NULL;
2414 /* Looks OK */
2415 return buffer + query_offset;
2416}
2417
2418/*
2419 * Do we need to garbage-collect the external query text file?
2420 *
2421 * Caller should hold at least a shared lock on pgss->lock.
2422 */
2423static bool
2424need_gc_qtexts(void)
2425{
2426 Size extent;
2427
2428 /* Read shared extent pointer */
2430 extent = pgss->extent;
2432
2433 /*
2434 * Don't proceed if file does not exceed 512 bytes per possible entry.
2435 *
2436 * Here and in the next test, 32-bit machines have overflow hazards if
2437 * pgss_max and/or mean_query_len are large. Force the multiplications
2438 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2439 */
2440 if ((uint64) extent < (uint64) 512 * pgss_max)
2441 return false;
2442
2443 /*
2444 * Don't proceed if file is less than about 50% bloat. Nothing can or
2445 * should be done in the event of unusually large query texts accounting
2446 * for file's large size. We go to the trouble of maintaining the mean
2447 * query length in order to prevent garbage collection from thrashing
2448 * uselessly.
2449 */
2450 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2451 return false;
2452
2453 return true;
2454}
2455
2456/*
2457 * Garbage-collect orphaned query texts in external file.
2458 *
2459 * This won't be called often in the typical case, since it's likely that
2460 * there won't be too much churn, and besides, a similar compaction process
2461 * occurs when serializing to disk at shutdown or as part of resetting.
2462 * Despite this, it seems prudent to plan for the edge case where the file
2463 * becomes unreasonably large, with no other method of compaction likely to
2464 * occur in the foreseeable future.
2465 *
2466 * The caller must hold an exclusive lock on pgss->lock.
2467 *
2468 * At the first sign of trouble we unlink the query text file to get a clean
2469 * slate (although existing statistics are retained), rather than risk
2470 * thrashing by allowing the same problem case to recur indefinitely.
2471 */
2472static void
2473gc_qtexts(void)
2474{
2475 char *qbuffer;
2477 FILE *qfile = NULL;
2479 pgssEntry *entry;
2480 Size extent;
2481 int nentries;
2482
2483 /*
2484 * When called from pgss_store, some other session might have proceeded
2485 * with garbage collection in the no-lock-held interim of lock strength
2486 * escalation. Check once more that this is actually necessary.
2487 */
2488 if (!need_gc_qtexts())
2489 return;
2490
2491 /*
2492 * Load the old texts file. If we fail (out of memory, for instance),
2493 * invalidate query texts. Hopefully this is rare. It might seem better
2494 * to leave things alone on an OOM failure, but the problem is that the
2495 * file is only going to get bigger; hoping for a future non-OOM result is
2496 * risky and can easily lead to complete denial of service.
2497 */
2499 if (qbuffer == NULL)
2500 goto gc_fail;
2501
2502 /*
2503 * We overwrite the query texts file in place, so as to reduce the risk of
2504 * an out-of-disk-space failure. Since the file is guaranteed not to get
2505 * larger, this should always work on traditional filesystems; though we
2506 * could still lose on copy-on-write filesystems.
2507 */
2509 if (qfile == NULL)
2510 {
2511 ereport(LOG,
2513 errmsg("could not write file \"%s\": %m",
2514 PGSS_TEXT_FILE)));
2515 goto gc_fail;
2516 }
2517
2518 extent = 0;
2519 nentries = 0;
2520
2522 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2523 {
2524 int query_len = entry->query_len;
2525 char *qry = qtext_fetch(entry->query_offset,
2526 query_len,
2527 qbuffer,
2528 qbuffer_size);
2529
2530 if (qry == NULL)
2531 {
2532 /* Trouble ... drop the text */
2533 entry->query_offset = 0;
2534 entry->query_len = -1;
2535 /* entry will not be counted in mean query length computation */
2536 continue;
2537 }
2538
2539 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2540 {
2541 ereport(LOG,
2543 errmsg("could not write file \"%s\": %m",
2544 PGSS_TEXT_FILE)));
2546 goto gc_fail;
2547 }
2548
2549 entry->query_offset = extent;
2550 extent += query_len + 1;
2551 nentries++;
2552 }
2553
2554 /*
2555 * Truncate away any now-unused space. If this fails for some odd reason,
2556 * we log it, but there's no need to fail.
2557 */
2558 if (ftruncate(fileno(qfile), extent) != 0)
2559 ereport(LOG,
2561 errmsg("could not truncate file \"%s\": %m",
2562 PGSS_TEXT_FILE)));
2563
2564 if (FreeFile(qfile))
2565 {
2566 ereport(LOG,
2568 errmsg("could not write file \"%s\": %m",
2569 PGSS_TEXT_FILE)));
2570 qfile = NULL;
2571 goto gc_fail;
2572 }
2573
2574 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2575 pgss->extent, extent);
2576
2577 /* Reset the shared extent pointer */
2578 pgss->extent = extent;
2579
2580 /*
2581 * Also update the mean query length, to be sure that need_gc_qtexts()
2582 * won't still think we have a problem.
2583 */
2584 if (nentries > 0)
2585 pgss->mean_query_len = extent / nentries;
2586 else
2588
2589 pfree(qbuffer);
2590
2591 /*
2592 * OK, count a garbage collection cycle. (Note: even though we have
2593 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2594 * other processes may examine gc_count while holding only the mutex.
2595 * Also, we have to advance the count *after* we've rewritten the file,
2596 * else other processes might not realize they read a stale file.)
2597 */
2599
2600 return;
2601
2602gc_fail:
2603 /* clean up resources */
2604 if (qfile)
2605 FreeFile(qfile);
2606 if (qbuffer)
2607 pfree(qbuffer);
2608
2609 /*
2610 * Since the contents of the external file are now uncertain, mark all
2611 * hashtable entries as having invalid texts.
2612 */
2614 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2615 {
2616 entry->query_offset = 0;
2617 entry->query_len = -1;
2618 }
2619
2620 /*
2621 * Destroy the query text file and create a new, empty one
2622 */
2625 if (qfile == NULL)
2626 ereport(LOG,
2628 errmsg("could not recreate file \"%s\": %m",
2629 PGSS_TEXT_FILE)));
2630 else
2631 FreeFile(qfile);
2632
2633 /* Reset the shared extent pointer */
2634 pgss->extent = 0;
2635
2636 /* Reset mean_query_len to match the new state */
2638
2639 /*
2640 * Bump the GC count even though we failed.
2641 *
2642 * This is needed to make concurrent readers of file without any lock on
2643 * pgss->lock notice existence of new version of file. Once readers
2644 * subsequently observe a change in GC count with pgss->lock held, that
2645 * forces a safe reopen of file. Writers also require that we bump here,
2646 * of course. (As required by locking protocol, readers and writers don't
2647 * trust earlier file contents until gc_count is found unchanged after
2648 * pgss->lock acquired in shared or exclusive mode respectively.)
2649 */
2651}
2652
2653#define SINGLE_ENTRY_RESET(e) \
2654if (e) { \
2655 if (minmax_only) { \
2656 /* When requested reset only min/max statistics of an entry */ \
2657 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2658 { \
2659 e->counters.max_time[kind] = 0; \
2660 e->counters.min_time[kind] = 0; \
2661 } \
2662 e->minmax_stats_since = stats_reset; \
2663 } \
2664 else \
2665 { \
2666 /* Remove the key otherwise */ \
2667 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2668 num_remove++; \
2669 } \
2670}
2671
2672/*
2673 * Reset entries corresponding to parameters passed.
2674 */
2675static TimestampTz
2676entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2677{
2679 pgssEntry *entry;
2680 FILE *qfile;
2681 int64 num_entries;
2682 int64 num_remove = 0;
2684 TimestampTz stats_reset;
2685
2686 if (!pgss || !pgss_hash)
2687 ereport(ERROR,
2689 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2690
2692 num_entries = hash_get_num_entries(pgss_hash);
2693
2694 stats_reset = GetCurrentTimestamp();
2695
2696 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2697 {
2698 /* If all the parameters are available, use the fast path. */
2699 memset(&key, 0, sizeof(pgssHashKey));
2700 key.userid = userid;
2701 key.dbid = dbid;
2702 key.queryid = queryid;
2703
2704 /*
2705 * Reset the entry if it exists, starting with the non-top-level
2706 * entry.
2707 */
2708 key.toplevel = false;
2709 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2710
2711 SINGLE_ENTRY_RESET(entry);
2712
2713 /* Also reset the top-level entry if it exists. */
2714 key.toplevel = true;
2715 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2716
2717 SINGLE_ENTRY_RESET(entry);
2718 }
2719 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2720 {
2721 /* Reset entries corresponding to valid parameters. */
2723 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2724 {
2725 if ((!userid || entry->key.userid == userid) &&
2726 (!dbid || entry->key.dbid == dbid) &&
2727 (!queryid || entry->key.queryid == queryid))
2728 {
2729 SINGLE_ENTRY_RESET(entry);
2730 }
2731 }
2732 }
2733 else
2734 {
2735 /* Reset all entries. */
2737 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2738 {
2739 SINGLE_ENTRY_RESET(entry);
2740 }
2741 }
2742
2743 /* All entries are removed? */
2744 if (num_entries != num_remove)
2745 goto release_lock;
2746
2747 /*
2748 * Reset global statistics for pg_stat_statements since all entries are
2749 * removed.
2750 */
2752 pgss->stats.dealloc = 0;
2753 pgss->stats.stats_reset = stats_reset;
2755
2756 /*
2757 * Write new empty query file, perhaps even creating a new one to recover
2758 * if the file was missing.
2759 */
2761 if (qfile == NULL)
2762 {
2763 ereport(LOG,
2765 errmsg("could not create file \"%s\": %m",
2766 PGSS_TEXT_FILE)));
2767 goto done;
2768 }
2769
2770 /* If ftruncate fails, log it, but it's not a fatal problem */
2771 if (ftruncate(fileno(qfile), 0) != 0)
2772 ereport(LOG,
2774 errmsg("could not truncate file \"%s\": %m",
2775 PGSS_TEXT_FILE)));
2776
2777 FreeFile(qfile);
2778
2779done:
2780 pgss->extent = 0;
2781 /* This counts as a query text garbage collection for our purposes */
2783
2786
2787 return stats_reset;
2788}
2789
2790/*
2791 * Generate a normalized version of the query string that will be used to
2792 * represent all similar queries.
2793 *
2794 * Note that the normalized representation may well vary depending on
2795 * just which "equivalent" query is used to create the hashtable entry.
2796 * We assume this is OK.
2797 *
2798 * If query_loc > 0, then "query" has been advanced by that much compared to
2799 * the original string start, so we need to translate the provided locations
2800 * to compensate. (This lets us avoid re-scanning statements before the one
2801 * of interest, so it's worth doing.)
2802 *
2803 * *query_len_p contains the input string length, and is updated with
2804 * the result string length on exit. The resulting string might be longer
2805 * or shorter depending on what happens with replacement of constants.
2806 *
2807 * Returns a palloc'd string.
2808 */
2809static char *
2810generate_normalized_query(const JumbleState *jstate, const char *query,
2811 int query_loc, int *query_len_p)
2812{
2813 char *norm_query;
2814 int query_len = *query_len_p;
2815 int norm_query_buflen, /* Space allowed for norm_query */
2816 len_to_wrt, /* Length (in bytes) to write */
2817 quer_loc = 0, /* Source query byte location */
2818 n_quer_loc = 0, /* Normalized query byte location */
2819 last_off = 0, /* Offset from start for previous tok */
2820 last_tok_len = 0; /* Length (in bytes) of that tok */
2821 int num_constants_replaced = 0;
2823
2824 /*
2825 * Determine constants' lengths (core system only gives us locations), and
2826 * return a sorted copy of jstate's LocationLen data with lengths filled
2827 * in.
2828 */
2830
2831 /*
2832 * Allow for $n symbols to be longer than the constants they replace.
2833 * Constants must take at least one byte in text form, while a $n symbol
2834 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2835 * could refine that limit based on the max value of n for the current
2836 * query, but it hardly seems worth any extra effort to do so.
2837 */
2838 norm_query_buflen = query_len + jstate->clocations_count * 10;
2839
2840 /* Allocate result buffer */
2842
2843 for (int i = 0; i < jstate->clocations_count; i++)
2844 {
2845 int off, /* Offset from start for cur tok */
2846 tok_len; /* Length (in bytes) of that tok */
2847
2848 /*
2849 * If we have an external param at this location, but no lists are
2850 * being squashed across the query, then we skip here; this will make
2851 * us print the characters found in the original query that represent
2852 * the parameter in the next iteration (or after the loop is done),
2853 * which is a bit odd but seems to work okay in most cases.
2854 */
2855 if (locs[i].extern_param && !jstate->has_squashed_lists)
2856 continue;
2857
2858 off = locs[i].location;
2859
2860 /* Adjust recorded location if we're dealing with partial string */
2861 off -= query_loc;
2862
2863 tok_len = locs[i].length;
2864
2865 if (tok_len < 0)
2866 continue; /* ignore any duplicates */
2867
2868 /* Copy next chunk (what precedes the next constant) */
2869 len_to_wrt = off - last_off;
2871 Assert(len_to_wrt >= 0);
2874
2875 /*
2876 * And insert a param symbol in place of the constant token; and, if
2877 * we have a squashable list, insert a placeholder comment starting
2878 * from the list's second value.
2879 */
2881 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2882 locs[i].squashed ? " /*, ... */" : "");
2884
2885 /* move forward */
2886 quer_loc = off + tok_len;
2887 last_off = off;
2889 }
2890
2891 /* Clean up, if needed */
2892 if (locs)
2893 pfree(locs);
2894
2895 /*
2896 * We've copied up until the last ignorable constant. Copy over the
2897 * remaining bytes of the original query string.
2898 */
2899 len_to_wrt = query_len - quer_loc;
2900
2901 Assert(len_to_wrt >= 0);
2904
2906 norm_query[n_quer_loc] = '\0';
2907
2909 return norm_query;
2910}
bool has_privs_of_role(Oid member, Oid role)
Definition acl.c:5314
Datum numeric_in(PG_FUNCTION_ARGS)
Definition numeric.c:626
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1639
static Datum values[MAXATTR]
Definition bootstrap.c:190
#define CStringGetTextDatum(s)
Definition builtins.h:98
#define INT64CONST(x)
Definition c.h:630
#define Min(x, y)
Definition c.h:1091
#define PG_BINARY_R
Definition c.h:1376
#define Max(x, y)
Definition c.h:1085
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
#define PG_BINARY
Definition c.h:1374
#define UINT64_FORMAT
Definition c.h:635
int32_t int32
Definition c.h:620
uint64_t uint64
Definition c.h:625
#define unlikely(x)
Definition c.h:438
uint32_t uint32
Definition c.h:624
#define PG_BINARY_W
Definition c.h:1377
size_t Size
Definition c.h:689
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int64 TimestampTz
Definition timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:889
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1352
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition dynahash.c:1444
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1273
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1317
Datum arg
Definition elog.c:1322
int errcode_for_file_access(void)
Definition elog.c:897
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:32
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define PG_TRY(...)
Definition elog.h:374
#define PG_END_TRY(...)
Definition elog.h:399
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define PG_FINALLY(...)
Definition elog.h:391
#define ereport(elevel,...)
Definition elog.h:152
ExecutorEnd_hook_type ExecutorEnd_hook
Definition execMain.c:73
ExecutorFinish_hook_type ExecutorFinish_hook
Definition execMain.c:72
ExecutorStart_hook_type ExecutorStart_hook
Definition execMain.c:70
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition execMain.c:143
ExecutorRun_hook_type ExecutorRun_hook
Definition execMain.c:71
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition execMain.c:318
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition execMain.c:486
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition execMain.c:426
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:783
int CloseTransientFile(int fd)
Definition fd.c:2855
int FreeFile(FILE *file)
Definition fd.c:2827
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2628
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
#define MCXT_ALLOC_HUGE
Definition fe_memutils.h:28
#define MCXT_ALLOC_NO_OOM
Definition fe_memutils.h:29
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_BOOL(n)
Definition fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:688
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, uint32 flags)
Definition funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
bool IsUnderPostmaster
Definition globals.c:122
Oid MyDatabaseId
Definition globals.c:96
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5152
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5049
void MarkGUCPrefixReserved(const char *className)
Definition guc.c:5186
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5073
@ PGC_SUSET
Definition guc.h:78
@ PGC_POSTMASTER
Definition guc.h:74
@ PGC_SIGHUP
Definition guc.h:75
return str start
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1025
@ HASH_FIND
Definition hsearch.h:108
@ HASH_REMOVE
Definition hsearch.h:110
@ HASH_ENTER
Definition hsearch.h:109
#define HASH_ELEM
Definition hsearch.h:90
#define HASH_BLOBS
Definition hsearch.h:92
void parse(int)
Definition parse.c:49
static char * encoding
Definition initdb.c:139
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:426
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:436
#define INSTR_TIME_GET_MILLISEC(t)
Definition instr_time.h:451
WalUsage pgWalUsage
Definition instrument.c:27
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:367
BufferUsage pgBufferUsage
Definition instrument.c:25
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:327
@ INSTRUMENT_ALL
Definition instrument.h:68
#define read(a, b, c)
Definition win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
int LWLockNewTrancheId(const char *name)
Definition lwlock.c:562
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
int GetDatabaseEncoding(void)
Definition mbutils.c:1388
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:687
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
void * palloc_extended(Size size, int flags)
Definition mcxt.c:1439
#define MaxAllocHugeSize
Definition memutils.h:45
Oid GetUserId(void)
Definition miscinit.c:470
bool process_shared_preload_libraries_in_progress
Definition miscinit.c:1788
#define IsA(nodeptr, _type_)
Definition nodes.h:164
static char * errmsg
post_parse_analyze_hook_type post_parse_analyze_hook
Definition analyze.c:74
const void size_t len
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
@ PGSS_V1_13
#define SINGLE_ENTRY_RESET(e)
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
static ExecutorRun_hook_type prev_ExecutorRun
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS_V1_13
void _PG_init(void)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, const JumbleState *jstate)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
static char * generate_normalized_query(const JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS)
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
static const ShmemCallbacks pgss_shmem_callbacks
#define PG_STAT_STATEMENTS_COLS_V1_3
#define PGSS_NUMKIND
static bool pgss_save
static int nesting_level
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
static void pgss_store(const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
static void pgss_shmem_init(void *arg)
static void pgss_shmem_request(void *arg)
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define PG_VALID_BE_ENCODING(_enc)
Definition pg_wchar.h:134
static int duration
Definition pgbench.c:175
planner_hook_type planner_hook
Definition planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.c:333
PlannedStmtOrigin
Definition plannodes.h:36
@ PLAN_STMT_UNKNOWN
Definition plannodes.h:37
@ PLAN_STMT_CACHE_CUSTOM
Definition plannodes.h:41
@ PLAN_STMT_CACHE_GENERIC
Definition plannodes.h:40
#define sprintf
Definition port.h:262
#define pg_pwrite
Definition port.h:248
#define snprintf
Definition port.h:260
#define qsort(a, b, c, d)
Definition port.h:495
static Datum Int64GetDatum(int64 X)
Definition postgres.h:413
#define Int64GetDatumFast(X)
Definition postgres.h:525
#define Float8GetDatumFast(X)
Definition postgres.h:527
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Datum CStringGetDatum(const char *X)
Definition postgres.h:370
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
LocationLen * ComputeConstantLengths(const JumbleState *jstate, const char *query, int query_loc)
ScanDirection
Definition sdir.h:25
void RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
Definition shmem.c:874
#define ShmemRequestHash(...)
Definition shmem.h:179
#define ShmemRequestStruct(...)
Definition shmem.h:176
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
static void error(void)
instr_time local_blk_read_time
Definition instrument.h:38
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
instr_time temp_blk_write_time
Definition instrument.h:41
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
int64 local_blks_written
Definition instrument.h:33
instr_time temp_blk_read_time
Definition instrument.h:40
instr_time local_blk_write_time
Definition instrument.h:39
int64 temp_blks_read
Definition instrument.h:34
int64 shared_blks_read
Definition instrument.h:27
int64 shared_blks_written
Definition instrument.h:29
int64 temp_blks_written
Definition instrument.h:35
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
double min_time[PGSS_NUMKIND]
int64 local_blks_written
int64 generic_plan_calls
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
double jit_emission_time
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 custom_plan_calls
int64 local_blks_dirtied
int64 jit_inlining_count
double jit_deform_time
int64 parallel_workers_to_launch
double shared_blk_read_time
double jit_inlining_time
struct JitContext * es_jit
Definition execnodes.h:800
uint64 es_total_processed
Definition execnodes.h:752
WalUsage walusage
Definition instrument.h:90
instr_time total
Definition instrument.h:88
BufferUsage bufusage
Definition instrument.h:89
JitInstrumentation instr
Definition jit.h:62
Definition nodes.h:135
const char * p_sourcetext
Definition parse_node.h:214
int64 queryId
Definition plannodes.h:69
ParseLoc stmt_len
Definition plannodes.h:171
PlannedStmtOrigin planOrigin
Definition plannodes.h:75
ParseLoc stmt_location
Definition plannodes.h:169
Node * utilityStmt
Definition plannodes.h:153
uint64 nprocessed
Definition cmdtag.h:32
CommandTag commandTag
Definition cmdtag.h:31
const char * sourceText
Definition execdesc.h:38
EState * estate
Definition execdesc.h:50
PlannedStmt * plannedstmt
Definition execdesc.h:37
int query_instr_options
Definition execdesc.h:45
struct Instrumentation * query_instr
Definition execdesc.h:57
Node * utilityStmt
Definition parsenodes.h:141
ParseLoc stmt_location
Definition parsenodes.h:258
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
int64 wal_records
Definition instrument.h:53
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
pgssGlobalStats stats
__int64 st_size
Definition win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:785
LWLock lock
Definition lwlock.h:70
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.c:548
ProcessUtility_hook_type ProcessUtility_hook
Definition utility.c:72
ProcessUtilityContext
Definition utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition timestamp.h:68
const char * name
#define fstat
Definition win32_port.h:73

◆ PGSS_NUMKIND

#define PGSS_NUMKIND   (PGSS_EXEC + 1)

Definition at line 133 of file pg_stat_statements.c.

◆ PGSS_TEXT_FILE

#define PGSS_TEXT_FILE   PG_STAT_TMP_DIR "/pgss_query_texts.stat"

Definition at line 85 of file pg_stat_statements.c.

◆ record_gc_qtexts

#define record_gc_qtexts ( )
Value:

Definition at line 315 of file pg_stat_statements.c.

316 { \
318 pgss->gc_count++; \
320 } while(0)

◆ SINGLE_ENTRY_RESET

#define SINGLE_ENTRY_RESET (   e)
Value:
if (e) { \
/* When requested reset only min/max statistics of an entry */ \
for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
{ \
e->counters.max_time[kind] = 0; \
e->counters.min_time[kind] = 0; \
} \
e->minmax_stats_since = stats_reset; \
} \
else \
{ \
/* Remove the key otherwise */ \
} \
}
e

Definition at line 2654 of file pg_stat_statements.c.

2655 { \
2656 if (minmax_only) { \
2657 /* When requested reset only min/max statistics of an entry */ \
2658 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2659 { \
2660 e->counters.max_time[kind] = 0; \
2661 e->counters.min_time[kind] = 0; \
2662 } \
2663 e->minmax_stats_since = stats_reset; \
2664 } \
2665 else \
2666 { \
2667 /* Remove the key otherwise */ \
2669 num_remove++; \
2670 } \
2671}

◆ STICKY_DECREASE_FACTOR

#define STICKY_DECREASE_FACTOR   (0.50) /* factor for sticky entries */

Definition at line 99 of file pg_stat_statements.c.

◆ USAGE_DEALLOC_PERCENT

#define USAGE_DEALLOC_PERCENT   5 /* free this % of entries at once */

Definition at line 100 of file pg_stat_statements.c.

◆ USAGE_DECREASE_FACTOR

#define USAGE_DECREASE_FACTOR   (0.99) /* decreased every entry_dealloc */

Definition at line 98 of file pg_stat_statements.c.

◆ USAGE_EXEC

#define USAGE_EXEC (   duration)    (1.0)

Definition at line 94 of file pg_stat_statements.c.

◆ USAGE_INIT

#define USAGE_INIT   (1.0) /* including initial planning */

Definition at line 95 of file pg_stat_statements.c.

Typedef Documentation

◆ Counters

◆ pgssEntry

◆ pgssGlobalStats

◆ pgssHashKey

◆ pgssSharedState

◆ pgssStoreKind

◆ pgssVersion

Enumeration Type Documentation

◆ pgssStoreKind

Enumerator
PGSS_INVALID 
PGSS_PLAN 
PGSS_EXEC 

Definition at line 120 of file pg_stat_statements.c.

121{
122 PGSS_INVALID = -1,
123
124 /*
125 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
126 * reference the underlying values in the arrays in the Counters struct,
127 * and this order is required in pg_stat_statements_internal().
128 */
129 PGSS_PLAN = 0,
130 PGSS_EXEC,

◆ PGSSTrackLevel

Enumerator
PGSS_TRACK_NONE 
PGSS_TRACK_TOP 
PGSS_TRACK_ALL 

Definition at line 288 of file pg_stat_statements.c.

289{
290 PGSS_TRACK_NONE, /* track no statements */
291 PGSS_TRACK_TOP, /* only top level statements */
292 PGSS_TRACK_ALL, /* all statements, including nested ones */
@ PGSS_TRACK_NONE

◆ pgssVersion

Enumerator
PGSS_V1_0 
PGSS_V1_1 
PGSS_V1_2 
PGSS_V1_3 
PGSS_V1_8 
PGSS_V1_9 
PGSS_V1_10 
PGSS_V1_11 
PGSS_V1_12 
PGSS_V1_13 

Definition at line 106 of file pg_stat_statements.c.

107{
108 PGSS_V1_0 = 0,
109 PGSS_V1_1,
110 PGSS_V1_2,
111 PGSS_V1_3,
112 PGSS_V1_8,
113 PGSS_V1_9,

Function Documentation

◆ _PG_init()

void _PG_init ( void  )

Definition at line 390 of file pg_stat_statements.c.

391{
392 /*
393 * In order to create our shared memory area, we have to be loaded via
394 * shared_preload_libraries. If not, fall out without hooking into any of
395 * the main system. (We don't throw error here because it seems useful to
396 * allow the pg_stat_statements functions to be created even when the
397 * module isn't active. The functions must protect themselves against
398 * being called then, however.)
399 */
401 return;
402
403 /*
404 * Inform the postmaster that we want to enable query_id calculation if
405 * compute_query_id is set to auto.
406 */
408
409 /*
410 * Define (or redefine) custom GUC variables.
411 */
412 DefineCustomIntVariable("pg_stat_statements.max",
413 "Sets the maximum number of statements tracked by pg_stat_statements.",
414 NULL,
415 &pgss_max,
416 5000,
417 100,
418 INT_MAX / 2,
420 0,
421 NULL,
422 NULL,
423 NULL);
424
425 DefineCustomEnumVariable("pg_stat_statements.track",
426 "Selects which statements are tracked by pg_stat_statements.",
427 NULL,
428 &pgss_track,
431 PGC_SUSET,
432 0,
433 NULL,
434 NULL,
435 NULL);
436
437 DefineCustomBoolVariable("pg_stat_statements.track_utility",
438 "Selects whether utility commands are tracked by pg_stat_statements.",
439 NULL,
441 true,
442 PGC_SUSET,
443 0,
444 NULL,
445 NULL,
446 NULL);
447
448 DefineCustomBoolVariable("pg_stat_statements.track_planning",
449 "Selects whether planning duration is tracked by pg_stat_statements.",
450 NULL,
452 false,
453 PGC_SUSET,
454 0,
455 NULL,
456 NULL,
457 NULL);
458
459 DefineCustomBoolVariable("pg_stat_statements.save",
460 "Save pg_stat_statements statistics across server shutdowns.",
461 NULL,
462 &pgss_save,
463 true,
465 0,
466 NULL,
467 NULL,
468 NULL);
469
470 MarkGUCPrefixReserved("pg_stat_statements");
471
472 /*
473 * Register our shared memory needs.
474 */
476
477 /*
478 * Install hooks.
479 */
494}

References DefineCustomBoolVariable(), DefineCustomEnumVariable(), DefineCustomIntVariable(), EnableQueryId(), ExecutorEnd_hook, ExecutorFinish_hook, ExecutorRun_hook, ExecutorStart_hook, fb(), MarkGUCPrefixReserved(), PGC_POSTMASTER, PGC_SIGHUP, PGC_SUSET, pgss_ExecutorEnd(), pgss_ExecutorFinish(), pgss_ExecutorRun(), pgss_ExecutorStart(), pgss_max, pgss_planner(), pgss_post_parse_analyze(), pgss_ProcessUtility(), pgss_save, pgss_shmem_callbacks, pgss_track, pgss_track_planning, PGSS_TRACK_TOP, pgss_track_utility, planner_hook, post_parse_analyze_hook, prev_ExecutorEnd, prev_ExecutorFinish, prev_ExecutorRun, prev_ExecutorStart, prev_planner_hook, prev_post_parse_analyze_hook, prev_ProcessUtility, process_shared_preload_libraries_in_progress, ProcessUtility_hook, RegisterShmemCallbacks(), and track_options.

◆ entry_alloc()

static pgssEntry * entry_alloc ( pgssHashKey key,
Size  query_offset,
int  query_len,
int  encoding,
bool  sticky 
)
static

Definition at line 2080 of file pg_stat_statements.c.

2082{
2083 pgssEntry *entry;
2084 bool found;
2085
2086 /* Make space if needed */
2088 entry_dealloc();
2089
2090 /* Find or create an entry with desired hash code */
2091 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2092
2093 if (!found)
2094 {
2095 /* New entry, initialize it */
2096
2097 /* reset the statistics */
2098 memset(&entry->counters, 0, sizeof(Counters));
2099 /* set the appropriate initial usage count */
2101 /* re-initialize the mutex each time ... we assume no one using it */
2102 SpinLockInit(&entry->mutex);
2103 /* ... and don't forget the query text metadata */
2104 Assert(query_len >= 0);
2105 entry->query_offset = query_offset;
2106 entry->query_len = query_len;
2107 entry->encoding = encoding;
2109 entry->minmax_stats_since = entry->stats_since;
2110 }
2111
2112 return entry;
2113}

References Assert, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssEntry::encoding, encoding, entry_dealloc(), fb(), GetCurrentTimestamp(), HASH_ENTER, hash_get_num_entries(), hash_search(), pgssEntry::minmax_stats_since, pgssEntry::mutex, pgss, pgss_hash, pgss_max, pgssEntry::query_len, pgssEntry::query_offset, SpinLockInit(), pgssEntry::stats_since, Counters::usage, and USAGE_INIT.

Referenced by pgss_shmem_init(), and pgss_store().

◆ entry_cmp()

static int entry_cmp ( const void lhs,
const void rhs 
)
static

Definition at line 2119 of file pg_stat_statements.c.

2120{
2121 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2122 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2123
2124 if (l_usage < r_usage)
2125 return -1;
2126 else if (l_usage > r_usage)
2127 return +1;
2128 else
2129 return 0;
2130}

References pgssEntry::counters, fb(), and Counters::usage.

Referenced by entry_dealloc().

◆ entry_dealloc()

static void entry_dealloc ( void  )
static

Definition at line 2138 of file pg_stat_statements.c.

2139{
2141 pgssEntry **entries;
2142 pgssEntry *entry;
2143 int nvictims;
2144 int i;
2146 int nvalidtexts;
2147
2148 /*
2149 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2150 * While we're scanning the table, apply the decay factor to the usage
2151 * values, and update the mean query length.
2152 *
2153 * Note that the mean query length is almost immediately obsolete, since
2154 * we compute it before not after discarding the least-used entries.
2155 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2156 * making two passes to get a more current result. Likewise, the new
2157 * cur_median_usage includes the entries we're about to zap.
2158 */
2159
2160 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2161
2162 i = 0;
2163 tottextlen = 0;
2164 nvalidtexts = 0;
2165
2167 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2168 {
2169 entries[i++] = entry;
2170 /* "Sticky" entries get a different usage decay rate. */
2171 if (IS_STICKY(entry->counters))
2173 else
2175 /* In the mean length computation, ignore dropped texts. */
2176 if (entry->query_len >= 0)
2177 {
2178 tottextlen += entry->query_len + 1;
2179 nvalidtexts++;
2180 }
2181 }
2182
2183 /* Sort into increasing order by usage */
2184 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2185
2186 /* Record the (approximate) median usage */
2187 if (i > 0)
2188 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2189 /* Record the mean query length */
2190 if (nvalidtexts > 0)
2192 else
2194
2195 /* Now zap an appropriate fraction of lowest-usage entries */
2196 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2197 nvictims = Min(nvictims, i);
2198
2199 for (i = 0; i < nvictims; i++)
2200 {
2201 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2202 }
2203
2204 pfree(entries);
2205
2206 /* Increment the number of times entries are deallocated */
2208 pgss->stats.dealloc += 1;
2210}

References ASSUMED_LENGTH_INIT, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssGlobalStats::dealloc, entry_cmp(), fb(), hash_get_num_entries(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), i, IS_STICKY, Max, pgssSharedState::mean_query_len, Min, pgssSharedState::mutex, palloc(), pfree(), pgss, pgss_hash, qsort, pgssEntry::query_len, SpinLockAcquire(), SpinLockRelease(), pgssSharedState::stats, STICKY_DECREASE_FACTOR, Counters::usage, USAGE_DEALLOC_PERCENT, and USAGE_DECREASE_FACTOR.

Referenced by entry_alloc().

◆ entry_reset()

static TimestampTz entry_reset ( Oid  userid,
Oid  dbid,
int64  queryid,
bool  minmax_only 
)
static

Definition at line 2677 of file pg_stat_statements.c.

2678{
2680 pgssEntry *entry;
2681 FILE *qfile;
2682 int64 num_entries;
2683 int64 num_remove = 0;
2685 TimestampTz stats_reset;
2686
2687 if (!pgss || !pgss_hash)
2688 ereport(ERROR,
2690 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2691
2693 num_entries = hash_get_num_entries(pgss_hash);
2694
2695 stats_reset = GetCurrentTimestamp();
2696
2697 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2698 {
2699 /* If all the parameters are available, use the fast path. */
2700 memset(&key, 0, sizeof(pgssHashKey));
2701 key.userid = userid;
2702 key.dbid = dbid;
2703 key.queryid = queryid;
2704
2705 /*
2706 * Reset the entry if it exists, starting with the non-top-level
2707 * entry.
2708 */
2709 key.toplevel = false;
2710 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2711
2712 SINGLE_ENTRY_RESET(entry);
2713
2714 /* Also reset the top-level entry if it exists. */
2715 key.toplevel = true;
2716 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2717
2718 SINGLE_ENTRY_RESET(entry);
2719 }
2720 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2721 {
2722 /* Reset entries corresponding to valid parameters. */
2724 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2725 {
2726 if ((!userid || entry->key.userid == userid) &&
2727 (!dbid || entry->key.dbid == dbid) &&
2728 (!queryid || entry->key.queryid == queryid))
2729 {
2730 SINGLE_ENTRY_RESET(entry);
2731 }
2732 }
2733 }
2734 else
2735 {
2736 /* Reset all entries. */
2738 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2739 {
2740 SINGLE_ENTRY_RESET(entry);
2741 }
2742 }
2743
2744 /* All entries are removed? */
2745 if (num_entries != num_remove)
2746 goto release_lock;
2747
2748 /*
2749 * Reset global statistics for pg_stat_statements since all entries are
2750 * removed.
2751 */
2753 pgss->stats.dealloc = 0;
2754 pgss->stats.stats_reset = stats_reset;
2756
2757 /*
2758 * Write new empty query file, perhaps even creating a new one to recover
2759 * if the file was missing.
2760 */
2762 if (qfile == NULL)
2763 {
2764 ereport(LOG,
2766 errmsg("could not create file \"%s\": %m",
2767 PGSS_TEXT_FILE)));
2768 goto done;
2769 }
2770
2771 /* If ftruncate fails, log it, but it's not a fatal problem */
2772 if (ftruncate(fileno(qfile), 0) != 0)
2773 ereport(LOG,
2775 errmsg("could not truncate file \"%s\": %m",
2776 PGSS_TEXT_FILE)));
2777
2778 FreeFile(qfile);
2779
2780done:
2781 pgss->extent = 0;
2782 /* This counts as a query text garbage collection for our purposes */
2784
2787
2788 return stats_reset;
2789}

References AllocateFile(), pgssHashKey::dbid, pgssGlobalStats::dealloc, ereport, errcode(), errcode_for_file_access(), errmsg, ERROR, pgssSharedState::extent, fb(), FreeFile(), GetCurrentTimestamp(), HASH_FIND, hash_get_num_entries(), hash_search(), hash_seq_init(), hash_seq_search(), INT64CONST, pgssEntry::key, pgssSharedState::lock, LWLockPadded::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pgssSharedState::mutex, PG_BINARY_W, pgss, pgss_hash, PGSS_TEXT_FILE, pgssHashKey::queryid, record_gc_qtexts, SINGLE_ENTRY_RESET, SpinLockAcquire(), SpinLockRelease(), pgssSharedState::stats, pgssGlobalStats::stats_reset, and pgssHashKey::userid.

Referenced by pg_stat_statements_reset(), pg_stat_statements_reset_1_11(), and pg_stat_statements_reset_1_7().

◆ gc_qtexts()

static void gc_qtexts ( void  )
static

Definition at line 2474 of file pg_stat_statements.c.

2475{
2476 char *qbuffer;
2478 FILE *qfile = NULL;
2480 pgssEntry *entry;
2481 Size extent;
2482 int nentries;
2483
2484 /*
2485 * When called from pgss_store, some other session might have proceeded
2486 * with garbage collection in the no-lock-held interim of lock strength
2487 * escalation. Check once more that this is actually necessary.
2488 */
2489 if (!need_gc_qtexts())
2490 return;
2491
2492 /*
2493 * Load the old texts file. If we fail (out of memory, for instance),
2494 * invalidate query texts. Hopefully this is rare. It might seem better
2495 * to leave things alone on an OOM failure, but the problem is that the
2496 * file is only going to get bigger; hoping for a future non-OOM result is
2497 * risky and can easily lead to complete denial of service.
2498 */
2500 if (qbuffer == NULL)
2501 goto gc_fail;
2502
2503 /*
2504 * We overwrite the query texts file in place, so as to reduce the risk of
2505 * an out-of-disk-space failure. Since the file is guaranteed not to get
2506 * larger, this should always work on traditional filesystems; though we
2507 * could still lose on copy-on-write filesystems.
2508 */
2510 if (qfile == NULL)
2511 {
2512 ereport(LOG,
2514 errmsg("could not write file \"%s\": %m",
2515 PGSS_TEXT_FILE)));
2516 goto gc_fail;
2517 }
2518
2519 extent = 0;
2520 nentries = 0;
2521
2523 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2524 {
2525 int query_len = entry->query_len;
2526 char *qry = qtext_fetch(entry->query_offset,
2527 query_len,
2528 qbuffer,
2529 qbuffer_size);
2530
2531 if (qry == NULL)
2532 {
2533 /* Trouble ... drop the text */
2534 entry->query_offset = 0;
2535 entry->query_len = -1;
2536 /* entry will not be counted in mean query length computation */
2537 continue;
2538 }
2539
2540 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2541 {
2542 ereport(LOG,
2544 errmsg("could not write file \"%s\": %m",
2545 PGSS_TEXT_FILE)));
2547 goto gc_fail;
2548 }
2549
2550 entry->query_offset = extent;
2551 extent += query_len + 1;
2552 nentries++;
2553 }
2554
2555 /*
2556 * Truncate away any now-unused space. If this fails for some odd reason,
2557 * we log it, but there's no need to fail.
2558 */
2559 if (ftruncate(fileno(qfile), extent) != 0)
2560 ereport(LOG,
2562 errmsg("could not truncate file \"%s\": %m",
2563 PGSS_TEXT_FILE)));
2564
2565 if (FreeFile(qfile))
2566 {
2567 ereport(LOG,
2569 errmsg("could not write file \"%s\": %m",
2570 PGSS_TEXT_FILE)));
2571 qfile = NULL;
2572 goto gc_fail;
2573 }
2574
2575 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2576 pgss->extent, extent);
2577
2578 /* Reset the shared extent pointer */
2579 pgss->extent = extent;
2580
2581 /*
2582 * Also update the mean query length, to be sure that need_gc_qtexts()
2583 * won't still think we have a problem.
2584 */
2585 if (nentries > 0)
2586 pgss->mean_query_len = extent / nentries;
2587 else
2589
2590 pfree(qbuffer);
2591
2592 /*
2593 * OK, count a garbage collection cycle. (Note: even though we have
2594 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2595 * other processes may examine gc_count while holding only the mutex.
2596 * Also, we have to advance the count *after* we've rewritten the file,
2597 * else other processes might not realize they read a stale file.)
2598 */
2600
2601 return;
2602
2603gc_fail:
2604 /* clean up resources */
2605 if (qfile)
2606 FreeFile(qfile);
2607 if (qbuffer)
2608 pfree(qbuffer);
2609
2610 /*
2611 * Since the contents of the external file are now uncertain, mark all
2612 * hashtable entries as having invalid texts.
2613 */
2615 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2616 {
2617 entry->query_offset = 0;
2618 entry->query_len = -1;
2619 }
2620
2621 /*
2622 * Destroy the query text file and create a new, empty one
2623 */
2626 if (qfile == NULL)
2627 ereport(LOG,
2629 errmsg("could not recreate file \"%s\": %m",
2630 PGSS_TEXT_FILE)));
2631 else
2632 FreeFile(qfile);
2633
2634 /* Reset the shared extent pointer */
2635 pgss->extent = 0;
2636
2637 /* Reset mean_query_len to match the new state */
2639
2640 /*
2641 * Bump the GC count even though we failed.
2642 *
2643 * This is needed to make concurrent readers of file without any lock on
2644 * pgss->lock notice existence of new version of file. Once readers
2645 * subsequently observe a change in GC count with pgss->lock held, that
2646 * forces a safe reopen of file. Writers also require that we bump here,
2647 * of course. (As required by locking protocol, readers and writers don't
2648 * trust earlier file contents until gc_count is found unchanged after
2649 * pgss->lock acquired in shared or exclusive mode respectively.)
2650 */
2652}

References AllocateFile(), ASSUMED_LENGTH_INIT, DEBUG1, elog, ereport, errcode_for_file_access(), errmsg, pgssSharedState::extent, fb(), FreeFile(), hash_seq_init(), hash_seq_search(), hash_seq_term(), LOG, pgssSharedState::mean_query_len, need_gc_qtexts(), pfree(), PG_BINARY_W, pgss, pgss_hash, PGSS_TEXT_FILE, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, and record_gc_qtexts.

Referenced by pgss_store().

◆ generate_normalized_query()

static char * generate_normalized_query ( const JumbleState jstate,
const char query,
int  query_loc,
int query_len_p 
)
static

Definition at line 2811 of file pg_stat_statements.c.

2813{
2814 char *norm_query;
2815 int query_len = *query_len_p;
2816 int norm_query_buflen, /* Space allowed for norm_query */
2817 len_to_wrt, /* Length (in bytes) to write */
2818 quer_loc = 0, /* Source query byte location */
2819 n_quer_loc = 0, /* Normalized query byte location */
2820 last_off = 0, /* Offset from start for previous tok */
2821 last_tok_len = 0; /* Length (in bytes) of that tok */
2822 int num_constants_replaced = 0;
2824
2825 /*
2826 * Determine constants' lengths (core system only gives us locations), and
2827 * return a sorted copy of jstate's LocationLen data with lengths filled
2828 * in.
2829 */
2831
2832 /*
2833 * Allow for $n symbols to be longer than the constants they replace.
2834 * Constants must take at least one byte in text form, while a $n symbol
2835 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2836 * could refine that limit based on the max value of n for the current
2837 * query, but it hardly seems worth any extra effort to do so.
2838 */
2839 norm_query_buflen = query_len + jstate->clocations_count * 10;
2840
2841 /* Allocate result buffer */
2843
2844 for (int i = 0; i < jstate->clocations_count; i++)
2845 {
2846 int off, /* Offset from start for cur tok */
2847 tok_len; /* Length (in bytes) of that tok */
2848
2849 /*
2850 * If we have an external param at this location, but no lists are
2851 * being squashed across the query, then we skip here; this will make
2852 * us print the characters found in the original query that represent
2853 * the parameter in the next iteration (or after the loop is done),
2854 * which is a bit odd but seems to work okay in most cases.
2855 */
2856 if (locs[i].extern_param && !jstate->has_squashed_lists)
2857 continue;
2858
2859 off = locs[i].location;
2860
2861 /* Adjust recorded location if we're dealing with partial string */
2862 off -= query_loc;
2863
2864 tok_len = locs[i].length;
2865
2866 if (tok_len < 0)
2867 continue; /* ignore any duplicates */
2868
2869 /* Copy next chunk (what precedes the next constant) */
2870 len_to_wrt = off - last_off;
2872 Assert(len_to_wrt >= 0);
2875
2876 /*
2877 * And insert a param symbol in place of the constant token; and, if
2878 * we have a squashable list, insert a placeholder comment starting
2879 * from the list's second value.
2880 */
2882 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2883 locs[i].squashed ? " /*, ... */" : "");
2885
2886 /* move forward */
2887 quer_loc = off + tok_len;
2888 last_off = off;
2890 }
2891
2892 /* Clean up, if needed */
2893 if (locs)
2894 pfree(locs);
2895
2896 /*
2897 * We've copied up until the last ignorable constant. Copy over the
2898 * remaining bytes of the original query string.
2899 */
2900 len_to_wrt = query_len - quer_loc;
2901
2902 Assert(len_to_wrt >= 0);
2905
2907 norm_query[n_quer_loc] = '\0';
2908
2910 return norm_query;
2911}

References Assert, ComputeConstantLengths(), fb(), i, memcpy(), palloc(), pfree(), and sprintf.

Referenced by pgss_store().

◆ need_gc_qtexts()

static bool need_gc_qtexts ( void  )
static

Definition at line 2425 of file pg_stat_statements.c.

2426{
2427 Size extent;
2428
2429 /* Read shared extent pointer */
2431 extent = pgss->extent;
2433
2434 /*
2435 * Don't proceed if file does not exceed 512 bytes per possible entry.
2436 *
2437 * Here and in the next test, 32-bit machines have overflow hazards if
2438 * pgss_max and/or mean_query_len are large. Force the multiplications
2439 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2440 */
2441 if ((uint64) extent < (uint64) 512 * pgss_max)
2442 return false;
2443
2444 /*
2445 * Don't proceed if file is less than about 50% bloat. Nothing can or
2446 * should be done in the event of unusually large query texts accounting
2447 * for file's large size. We go to the trouble of maintaining the mean
2448 * query length in order to prevent garbage collection from thrashing
2449 * uselessly.
2450 */
2451 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2452 return false;
2453
2454 return true;
2455}

References pgssSharedState::extent, pgssSharedState::mean_query_len, pgssSharedState::mutex, pgss, pgss_max, SpinLockAcquire(), and SpinLockRelease().

Referenced by gc_qtexts(), and pgss_store().

◆ PG_FUNCTION_INFO_V1() [1/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements  )

◆ PG_FUNCTION_INFO_V1() [2/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_10  )

◆ PG_FUNCTION_INFO_V1() [3/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_11  )

◆ PG_FUNCTION_INFO_V1() [4/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_12  )

◆ PG_FUNCTION_INFO_V1() [5/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_13  )

◆ PG_FUNCTION_INFO_V1() [6/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_2  )

◆ PG_FUNCTION_INFO_V1() [7/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_3  )

◆ PG_FUNCTION_INFO_V1() [8/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_8  )

◆ PG_FUNCTION_INFO_V1() [9/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_9  )

◆ PG_FUNCTION_INFO_V1() [10/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_info  )

◆ PG_FUNCTION_INFO_V1() [11/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset  )

◆ PG_FUNCTION_INFO_V1() [12/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset_1_11  )

◆ PG_FUNCTION_INFO_V1() [13/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset_1_7  )

◆ PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( name = "pg_stat_statements",
version = PG_VERSION 
)

◆ pg_stat_statements()

Datum pg_stat_statements ( PG_FUNCTION_ARGS  )

Definition at line 1656 of file pg_stat_statements.c.

1657{
1658 /* If it's really API 1.1, we'll figure that out below */
1660
1661 return (Datum) 0;
1662}

References pg_stat_statements_internal(), and PGSS_V1_0.

◆ pg_stat_statements_1_10()

Datum pg_stat_statements_1_10 ( PG_FUNCTION_ARGS  )

Definition at line 1602 of file pg_stat_statements.c.

1603{
1604 bool showtext = PG_GETARG_BOOL(0);
1605
1607
1608 return (Datum) 0;
1609}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_10.

◆ pg_stat_statements_1_11()

Datum pg_stat_statements_1_11 ( PG_FUNCTION_ARGS  )

Definition at line 1592 of file pg_stat_statements.c.

1593{
1594 bool showtext = PG_GETARG_BOOL(0);
1595
1597
1598 return (Datum) 0;
1599}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_11.

◆ pg_stat_statements_1_12()

Datum pg_stat_statements_1_12 ( PG_FUNCTION_ARGS  )

Definition at line 1582 of file pg_stat_statements.c.

1583{
1584 bool showtext = PG_GETARG_BOOL(0);
1585
1587
1588 return (Datum) 0;
1589}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_12.

◆ pg_stat_statements_1_13()

Datum pg_stat_statements_1_13 ( PG_FUNCTION_ARGS  )

Definition at line 1572 of file pg_stat_statements.c.

1573{
1574 bool showtext = PG_GETARG_BOOL(0);
1575
1577
1578 return (Datum) 0;
1579}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_13.

◆ pg_stat_statements_1_2()

Datum pg_stat_statements_1_2 ( PG_FUNCTION_ARGS  )

Definition at line 1642 of file pg_stat_statements.c.

1643{
1644 bool showtext = PG_GETARG_BOOL(0);
1645
1647
1648 return (Datum) 0;
1649}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_2.

◆ pg_stat_statements_1_3()

Datum pg_stat_statements_1_3 ( PG_FUNCTION_ARGS  )

Definition at line 1632 of file pg_stat_statements.c.

1633{
1634 bool showtext = PG_GETARG_BOOL(0);
1635
1637
1638 return (Datum) 0;
1639}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_3.

◆ pg_stat_statements_1_8()

Datum pg_stat_statements_1_8 ( PG_FUNCTION_ARGS  )

Definition at line 1622 of file pg_stat_statements.c.

1623{
1624 bool showtext = PG_GETARG_BOOL(0);
1625
1627
1628 return (Datum) 0;
1629}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_8.

◆ pg_stat_statements_1_9()

Datum pg_stat_statements_1_9 ( PG_FUNCTION_ARGS  )

Definition at line 1612 of file pg_stat_statements.c.

1613{
1614 bool showtext = PG_GETARG_BOOL(0);
1615
1617
1618 return (Datum) 0;
1619}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_9.

◆ pg_stat_statements_info()

Datum pg_stat_statements_info ( PG_FUNCTION_ARGS  )

Definition at line 2035 of file pg_stat_statements.c.

2036{
2037 pgssGlobalStats stats;
2038 TupleDesc tupdesc;
2040 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2041
2042 if (!pgss || !pgss_hash)
2043 ereport(ERROR,
2045 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2046
2047 /* Build a tuple descriptor for our result type */
2048 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2049 elog(ERROR, "return type must be a row type");
2050
2051 /* Read global statistics for pg_stat_statements */
2053 stats = pgss->stats;
2055
2056 values[0] = Int64GetDatum(stats.dealloc);
2058
2060}

References pgssGlobalStats::dealloc, elog, ereport, errcode(), errmsg, ERROR, fb(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), Int64GetDatum(), pgssSharedState::mutex, PG_RETURN_DATUM, PG_STAT_STATEMENTS_INFO_COLS, pgss, pgss_hash, SpinLockAcquire(), SpinLockRelease(), pgssSharedState::stats, pgssGlobalStats::stats_reset, TimestampTzGetDatum(), TYPEFUNC_COMPOSITE, and values.

◆ pg_stat_statements_internal()

static void pg_stat_statements_internal ( FunctionCallInfo  fcinfo,
pgssVersion  api_version,
bool  showtext 
)
static

Definition at line 1666 of file pg_stat_statements.c.

1669{
1671 Oid userid = GetUserId();
1672 bool is_allowed_role = false;
1673 char *qbuffer = NULL;
1674 Size qbuffer_size = 0;
1675 Size extent = 0;
1676 int gc_count = 0;
1678 pgssEntry *entry;
1679
1680 /*
1681 * Superusers or roles with the privileges of pg_read_all_stats members
1682 * are allowed
1683 */
1685
1686 /* hash table must exist already */
1687 if (!pgss || !pgss_hash)
1688 ereport(ERROR,
1690 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1691
1692 InitMaterializedSRF(fcinfo, 0);
1693
1694 /*
1695 * Check we have the expected number of output arguments. Aside from
1696 * being a good safety check, we need a kluge here to detect API version
1697 * 1.1, which was wedged into the code in an ill-considered way.
1698 */
1699 switch (rsinfo->setDesc->natts)
1700 {
1702 if (api_version != PGSS_V1_0)
1703 elog(ERROR, "incorrect number of output arguments");
1704 break;
1706 /* pg_stat_statements() should have told us 1.0 */
1707 if (api_version != PGSS_V1_0)
1708 elog(ERROR, "incorrect number of output arguments");
1709 api_version = PGSS_V1_1;
1710 break;
1712 if (api_version != PGSS_V1_2)
1713 elog(ERROR, "incorrect number of output arguments");
1714 break;
1716 if (api_version != PGSS_V1_3)
1717 elog(ERROR, "incorrect number of output arguments");
1718 break;
1720 if (api_version != PGSS_V1_8)
1721 elog(ERROR, "incorrect number of output arguments");
1722 break;
1724 if (api_version != PGSS_V1_9)
1725 elog(ERROR, "incorrect number of output arguments");
1726 break;
1728 if (api_version != PGSS_V1_10)
1729 elog(ERROR, "incorrect number of output arguments");
1730 break;
1732 if (api_version != PGSS_V1_11)
1733 elog(ERROR, "incorrect number of output arguments");
1734 break;
1736 if (api_version != PGSS_V1_12)
1737 elog(ERROR, "incorrect number of output arguments");
1738 break;
1740 if (api_version != PGSS_V1_13)
1741 elog(ERROR, "incorrect number of output arguments");
1742 break;
1743 default:
1744 elog(ERROR, "incorrect number of output arguments");
1745 }
1746
1747 /*
1748 * We'd like to load the query text file (if needed) while not holding any
1749 * lock on pgss->lock. In the worst case we'll have to do this again
1750 * after we have the lock, but it's unlikely enough to make this a win
1751 * despite occasional duplicated work. We need to reload if anybody
1752 * writes to the file (either a retail qtext_store(), or a garbage
1753 * collection) between this point and where we've gotten shared lock. If
1754 * a qtext_store is actually in progress when we look, we might as well
1755 * skip the speculative load entirely.
1756 */
1757 if (showtext)
1758 {
1759 int n_writers;
1760
1761 /* Take the mutex so we can examine variables */
1763 extent = pgss->extent;
1764 n_writers = pgss->n_writers;
1765 gc_count = pgss->gc_count;
1767
1768 /* No point in loading file now if there are active writers */
1769 if (n_writers == 0)
1771 }
1772
1773 /*
1774 * Get shared lock, load or reload the query text file if we must, and
1775 * iterate over the hashtable entries.
1776 *
1777 * With a large hash table, we might be holding the lock rather longer
1778 * than one could wish. However, this only blocks creation of new hash
1779 * table entries, and the larger the hash table the less likely that is to
1780 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1781 * we need to partition the hash table to limit the time spent holding any
1782 * one lock.
1783 */
1785
1786 if (showtext)
1787 {
1788 /*
1789 * Here it is safe to examine extent and gc_count without taking the
1790 * mutex. Note that although other processes might change
1791 * pgss->extent just after we look at it, the strings they then write
1792 * into the file cannot yet be referenced in the hashtable, so we
1793 * don't care whether we see them or not.
1794 *
1795 * If qtext_load_file fails, we just press on; we'll return NULL for
1796 * every query text.
1797 */
1798 if (qbuffer == NULL ||
1799 pgss->extent != extent ||
1800 pgss->gc_count != gc_count)
1801 {
1802 if (qbuffer)
1803 pfree(qbuffer);
1805 }
1806 }
1807
1809 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1810 {
1812 bool nulls[PG_STAT_STATEMENTS_COLS];
1813 int i = 0;
1814 Counters tmp;
1815 double stddev;
1816 int64 queryid = entry->key.queryid;
1817 TimestampTz stats_since;
1818 TimestampTz minmax_stats_since;
1819
1820 memset(values, 0, sizeof(values));
1821 memset(nulls, 0, sizeof(nulls));
1822
1823 values[i++] = ObjectIdGetDatum(entry->key.userid);
1824 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1825 if (api_version >= PGSS_V1_9)
1826 values[i++] = BoolGetDatum(entry->key.toplevel);
1827
1828 if (is_allowed_role || entry->key.userid == userid)
1829 {
1830 if (api_version >= PGSS_V1_2)
1831 values[i++] = Int64GetDatumFast(queryid);
1832
1833 if (showtext)
1834 {
1835 char *qstr = qtext_fetch(entry->query_offset,
1836 entry->query_len,
1837 qbuffer,
1838 qbuffer_size);
1839
1840 if (qstr)
1841 {
1842 char *enc;
1843
1845 entry->query_len,
1846 entry->encoding);
1847
1849
1850 if (enc != qstr)
1851 pfree(enc);
1852 }
1853 else
1854 {
1855 /* Just return a null if we fail to find the text */
1856 nulls[i++] = true;
1857 }
1858 }
1859 else
1860 {
1861 /* Query text not requested */
1862 nulls[i++] = true;
1863 }
1864 }
1865 else
1866 {
1867 /* Don't show queryid */
1868 if (api_version >= PGSS_V1_2)
1869 nulls[i++] = true;
1870
1871 /*
1872 * Don't show query text, but hint as to the reason for not doing
1873 * so if it was requested
1874 */
1875 if (showtext)
1876 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1877 else
1878 nulls[i++] = true;
1879 }
1880
1881 /* copy counters to a local variable to keep locking time short */
1882 SpinLockAcquire(&entry->mutex);
1883 tmp = entry->counters;
1884 SpinLockRelease(&entry->mutex);
1885
1886 /*
1887 * The spinlock is not required when reading these two as they are
1888 * always updated when holding pgss->lock exclusively.
1889 */
1890 stats_since = entry->stats_since;
1891 minmax_stats_since = entry->minmax_stats_since;
1892
1893 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1894 if (IS_STICKY(tmp))
1895 continue;
1896
1897 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1898 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1899 {
1900 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1901 {
1902 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1903 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1904 }
1905
1906 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1907 api_version >= PGSS_V1_8)
1908 {
1909 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1910 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1911 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1912
1913 /*
1914 * Note we are calculating the population variance here, not
1915 * the sample variance, as we have data for the whole
1916 * population, so Bessel's correction is not used, and we
1917 * don't divide by tmp.calls - 1.
1918 */
1919 if (tmp.calls[kind] > 1)
1920 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1921 else
1922 stddev = 0.0;
1923 values[i++] = Float8GetDatumFast(stddev);
1924 }
1925 }
1926 values[i++] = Int64GetDatumFast(tmp.rows);
1929 if (api_version >= PGSS_V1_1)
1934 if (api_version >= PGSS_V1_1)
1939 if (api_version >= PGSS_V1_1)
1940 {
1943 }
1944 if (api_version >= PGSS_V1_11)
1945 {
1948 }
1949 if (api_version >= PGSS_V1_10)
1950 {
1953 }
1954 if (api_version >= PGSS_V1_8)
1955 {
1956 char buf[256];
1957 Datum wal_bytes;
1958
1961
1962 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1963
1964 /* Convert to numeric. */
1965 wal_bytes = DirectFunctionCall3(numeric_in,
1968 Int32GetDatum(-1));
1969 values[i++] = wal_bytes;
1970 }
1971 if (api_version >= PGSS_V1_12)
1972 {
1974 }
1975 if (api_version >= PGSS_V1_10)
1976 {
1985 }
1986 if (api_version >= PGSS_V1_11)
1987 {
1990 }
1991 if (api_version >= PGSS_V1_12)
1992 {
1995 }
1996 if (api_version >= PGSS_V1_13)
1997 {
2000 }
2001 if (api_version >= PGSS_V1_11)
2002 {
2003 values[i++] = TimestampTzGetDatum(stats_since);
2004 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2005 }
2006
2007 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2008 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2009 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2010 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2011 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2012 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2013 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2014 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2015 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2016 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2017 -1 /* fail if you forget to update this assert */ ));
2018
2019 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2020 }
2021
2023
2024 if (qbuffer)
2025 pfree(qbuffer);
2026}

References Assert, BoolGetDatum(), buf, Counters::calls, pgssEntry::counters, CStringGetDatum(), CStringGetTextDatum, Counters::custom_plan_calls, pgssHashKey::dbid, DirectFunctionCall3, elog, enc, pgssEntry::encoding, ereport, errcode(), errmsg, ERROR, pgssSharedState::extent, fb(), Float8GetDatumFast, pgssSharedState::gc_count, Counters::generic_plan_calls, GetUserId(), has_privs_of_role(), hash_seq_init(), hash_seq_search(), i, InitMaterializedSRF(), Int32GetDatum(), Int64GetDatumFast, IS_STICKY, Counters::jit_deform_count, Counters::jit_deform_time, Counters::jit_emission_count, Counters::jit_emission_time, Counters::jit_functions, Counters::jit_generation_time, Counters::jit_inlining_count, Counters::jit_inlining_time, Counters::jit_optimization_count, Counters::jit_optimization_time, pgssEntry::key, Counters::local_blk_read_time, Counters::local_blk_write_time, Counters::local_blks_dirtied, Counters::local_blks_hit, Counters::local_blks_read, Counters::local_blks_written, pgssSharedState::lock, LWLockPadded::lock, LW_SHARED, LWLockAcquire(), LWLockRelease(), Counters::max_time, Counters::mean_time, Counters::min_time, pgssEntry::minmax_stats_since, pgssEntry::mutex, pgssSharedState::mutex, pgssSharedState::n_writers, numeric_in(), ObjectIdGetDatum(), Counters::parallel_workers_launched, Counters::parallel_workers_to_launch, pfree(), pg_any_to_server(), PG_STAT_STATEMENTS_COLS, PG_STAT_STATEMENTS_COLS_V1_0, PG_STAT_STATEMENTS_COLS_V1_1, PG_STAT_STATEMENTS_COLS_V1_10, PG_STAT_STATEMENTS_COLS_V1_11, PG_STAT_STATEMENTS_COLS_V1_12, PG_STAT_STATEMENTS_COLS_V1_13, PG_STAT_STATEMENTS_COLS_V1_2, PG_STAT_STATEMENTS_COLS_V1_3, PG_STAT_STATEMENTS_COLS_V1_8, PG_STAT_STATEMENTS_COLS_V1_9, pgss, PGSS_EXEC, pgss_hash, PGSS_NUMKIND, PGSS_V1_0, PGSS_V1_1, PGSS_V1_10, PGSS_V1_11, PGSS_V1_12, PGSS_V1_13, PGSS_V1_2, PGSS_V1_3, PGSS_V1_8, PGSS_V1_9, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, pgssHashKey::queryid, FunctionCallInfoBaseData::resultinfo, Counters::rows, Counters::shared_blk_read_time, Counters::shared_blk_write_time, Counters::shared_blks_dirtied, Counters::shared_blks_hit, Counters::shared_blks_read, Counters::shared_blks_written, snprintf, SpinLockAcquire(), SpinLockRelease(), pgssEntry::stats_since, Counters::sum_var_time, Counters::temp_blk_read_time, Counters::temp_blk_write_time, Counters::temp_blks_read, Counters::temp_blks_written, TimestampTzGetDatum(), pgssHashKey::toplevel, Counters::total_time, tuplestore_putvalues(), UINT64_FORMAT, pgssHashKey::userid, values, Counters::wal_buffers_full, Counters::wal_bytes, Counters::wal_fpi, and Counters::wal_records.

Referenced by pg_stat_statements(), pg_stat_statements_1_10(), pg_stat_statements_1_11(), pg_stat_statements_1_12(), pg_stat_statements_1_13(), pg_stat_statements_1_2(), pg_stat_statements_1_3(), pg_stat_statements_1_8(), and pg_stat_statements_1_9().

◆ pg_stat_statements_reset()

Datum pg_stat_statements_reset ( PG_FUNCTION_ARGS  )

Definition at line 1541 of file pg_stat_statements.c.

1542{
1543 entry_reset(0, 0, 0, false);
1544
1546}

References entry_reset(), and PG_RETURN_VOID.

◆ pg_stat_statements_reset_1_11()

Datum pg_stat_statements_reset_1_11 ( PG_FUNCTION_ARGS  )

Definition at line 1522 of file pg_stat_statements.c.

1523{
1524 Oid userid;
1525 Oid dbid;
1526 int64 queryid;
1527 bool minmax_only;
1528
1529 userid = PG_GETARG_OID(0);
1530 dbid = PG_GETARG_OID(1);
1531 queryid = PG_GETARG_INT64(2);
1533
1534 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1535}

References entry_reset(), fb(), PG_GETARG_BOOL, PG_GETARG_INT64, PG_GETARG_OID, and PG_RETURN_TIMESTAMPTZ.

◆ pg_stat_statements_reset_1_7()

Datum pg_stat_statements_reset_1_7 ( PG_FUNCTION_ARGS  )

Definition at line 1506 of file pg_stat_statements.c.

1507{
1508 Oid userid;
1509 Oid dbid;
1510 int64 queryid;
1511
1512 userid = PG_GETARG_OID(0);
1513 dbid = PG_GETARG_OID(1);
1514 queryid = PG_GETARG_INT64(2);
1515
1516 entry_reset(userid, dbid, queryid, false);
1517
1519}

References entry_reset(), PG_GETARG_INT64, PG_GETARG_OID, and PG_RETURN_VOID.

◆ pgss_ExecutorEnd()

static void pgss_ExecutorEnd ( QueryDesc queryDesc)
static

Definition at line 1059 of file pg_stat_statements.c.

1060{
1061 int64 queryId = queryDesc->plannedstmt->queryId;
1062
1063 if (queryId != INT64CONST(0) && queryDesc->query_instr &&
1065 {
1066 pgss_store(queryDesc->sourceText,
1067 queryId,
1068 queryDesc->plannedstmt->stmt_location,
1069 queryDesc->plannedstmt->stmt_len,
1070 PGSS_EXEC,
1072 queryDesc->estate->es_total_processed,
1073 &queryDesc->query_instr->bufusage,
1074 &queryDesc->query_instr->walusage,
1075 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1076 NULL,
1077 queryDesc->estate->es_parallel_workers_to_launch,
1078 queryDesc->estate->es_parallel_workers_launched,
1079 queryDesc->plannedstmt->planOrigin);
1080 }
1081
1082 if (prev_ExecutorEnd)
1083 prev_ExecutorEnd(queryDesc);
1084 else
1085 standard_ExecutorEnd(queryDesc);
1086}

References Instrumentation::bufusage, EState::es_jit, EState::es_parallel_workers_launched, EState::es_parallel_workers_to_launch, EState::es_total_processed, QueryDesc::estate, fb(), JitContext::instr, INSTR_TIME_GET_MILLISEC, INT64CONST, nesting_level, pgss_enabled, PGSS_EXEC, pgss_store(), QueryDesc::plannedstmt, PlannedStmt::planOrigin, prev_ExecutorEnd, QueryDesc::query_instr, PlannedStmt::queryId, QueryDesc::sourceText, standard_ExecutorEnd(), PlannedStmt::stmt_len, PlannedStmt::stmt_location, Instrumentation::total, and Instrumentation::walusage.

Referenced by _PG_init().

◆ pgss_ExecutorFinish()

static void pgss_ExecutorFinish ( QueryDesc queryDesc)
static

Definition at line 1038 of file pg_stat_statements.c.

1039{
1040 nesting_level++;
1041 PG_TRY();
1042 {
1044 prev_ExecutorFinish(queryDesc);
1045 else
1046 standard_ExecutorFinish(queryDesc);
1047 }
1048 PG_FINALLY();
1049 {
1050 nesting_level--;
1051 }
1052 PG_END_TRY();
1053}

References nesting_level, PG_END_TRY, PG_FINALLY, PG_TRY, prev_ExecutorFinish, and standard_ExecutorFinish().

Referenced by _PG_init().

◆ pgss_ExecutorRun()

static void pgss_ExecutorRun ( QueryDesc queryDesc,
ScanDirection  direction,
uint64  count 
)
static

Definition at line 1017 of file pg_stat_statements.c.

1018{
1019 nesting_level++;
1020 PG_TRY();
1021 {
1022 if (prev_ExecutorRun)
1023 prev_ExecutorRun(queryDesc, direction, count);
1024 else
1025 standard_ExecutorRun(queryDesc, direction, count);
1026 }
1027 PG_FINALLY();
1028 {
1029 nesting_level--;
1030 }
1031 PG_END_TRY();
1032}

References nesting_level, PG_END_TRY, PG_FINALLY, PG_TRY, prev_ExecutorRun, and standard_ExecutorRun().

Referenced by _PG_init().

◆ pgss_ExecutorStart()

static void pgss_ExecutorStart ( QueryDesc queryDesc,
int  eflags 
)
static

Definition at line 994 of file pg_stat_statements.c.

995{
996 /*
997 * If query has queryId zero, don't track it. This prevents double
998 * counting of optimizable statements that are directly contained in
999 * utility statements.
1000 */
1001 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1002 {
1003 /* Request all summary instrumentation, i.e. timing, buffers and WAL */
1004 queryDesc->query_instr_options |= INSTRUMENT_ALL;
1005 }
1006
1008 prev_ExecutorStart(queryDesc, eflags);
1009 else
1010 standard_ExecutorStart(queryDesc, eflags);
1011}

References INSTRUMENT_ALL, INT64CONST, nesting_level, pgss_enabled, QueryDesc::plannedstmt, prev_ExecutorStart, QueryDesc::query_instr_options, PlannedStmt::queryId, and standard_ExecutorStart().

Referenced by _PG_init().

◆ pgss_planner()

static PlannedStmt * pgss_planner ( Query parse,
const char query_string,
int  cursorOptions,
ParamListInfo  boundParams,
ExplainState es 
)
static

Definition at line 887 of file pg_stat_statements.c.

892{
894
895 /*
896 * We can't process the query if no query_string is provided, as
897 * pgss_store needs it. We also ignore query without queryid, as it would
898 * be treated as a utility statement, which may not be the case.
899 */
901 && pgss_track_planning && query_string
902 && parse->queryId != INT64CONST(0))
903 {
906 BufferUsage bufusage_start,
907 bufusage;
908 WalUsage walusage_start,
909 walusage;
910
911 /* We need to track buffer usage as the planner can access them. */
912 bufusage_start = pgBufferUsage;
913
914 /*
915 * Similarly the planner could write some WAL records in some cases
916 * (e.g. setting a hint bit with those being WAL-logged)
917 */
918 walusage_start = pgWalUsage;
920
922 PG_TRY();
923 {
925 result = prev_planner_hook(parse, query_string, cursorOptions,
926 boundParams, es);
927 else
928 result = standard_planner(parse, query_string, cursorOptions,
929 boundParams, es);
930 }
931 PG_FINALLY();
932 {
934 }
935 PG_END_TRY();
936
939
940 /* calc differences of buffer counters. */
941 memset(&bufusage, 0, sizeof(BufferUsage));
942 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
943
944 /* calc differences of WAL counters. */
945 memset(&walusage, 0, sizeof(WalUsage));
946 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
947
948 pgss_store(query_string,
949 parse->queryId,
950 parse->stmt_location,
951 parse->stmt_len,
952 PGSS_PLAN,
954 0,
955 &bufusage,
956 &walusage,
957 NULL,
958 NULL,
959 0,
960 0,
961 result->planOrigin);
962 }
963 else
964 {
965 /*
966 * Even though we're not tracking plan time for this statement, we
967 * must still increment the nesting level, to ensure that functions
968 * evaluated during planning are not seen as top-level calls.
969 */
971 PG_TRY();
972 {
974 result = prev_planner_hook(parse, query_string, cursorOptions,
975 boundParams, es);
976 else
977 result = standard_planner(parse, query_string, cursorOptions,
978 boundParams, es);
979 }
980 PG_FINALLY();
981 {
983 }
984 PG_END_TRY();
985 }
986
987 return result;
988}

References BufferUsageAccumDiff(), duration, fb(), INSTR_TIME_GET_MILLISEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, INT64CONST, nesting_level, parse(), PG_END_TRY, PG_FINALLY, PG_TRY, pgBufferUsage, pgss_enabled, PGSS_PLAN, pgss_store(), pgss_track_planning, pgWalUsage, prev_planner_hook, result, standard_planner(), start, and WalUsageAccumDiff().

Referenced by _PG_init().

◆ pgss_post_parse_analyze()

static void pgss_post_parse_analyze ( ParseState pstate,
Query query,
const JumbleState jstate 
)
static

Definition at line 834 of file pg_stat_statements.c.

835{
837 prev_post_parse_analyze_hook(pstate, query, jstate);
838
839 /* Safety check... */
841 return;
842
843 /*
844 * If it's EXECUTE, clear the queryId so that stats will accumulate for
845 * the underlying PREPARE. But don't do this if we're not tracking
846 * utility statements, to avoid messing up another extension that might be
847 * tracking them.
848 */
849 if (query->utilityStmt)
850 {
852 {
853 query->queryId = INT64CONST(0);
854 return;
855 }
856 }
857
858 /*
859 * If query jumbling were able to identify any ignorable constants, we
860 * immediately create a hash table entry for the query, so that we can
861 * record the normalized form of the query string. If there were no such
862 * constants, the normalized string would be the same as the query text
863 * anyway, so there's no need for an early entry.
864 */
865 if (jstate && jstate->clocations_count > 0)
866 pgss_store(pstate->p_sourcetext,
867 query->queryId,
868 query->stmt_location,
869 query->stmt_len,
871 0,
872 0,
873 NULL,
874 NULL,
875 NULL,
876 jstate,
877 0,
878 0,
880}

References fb(), INT64CONST, IsA, nesting_level, ParseState::p_sourcetext, pgss, pgss_enabled, pgss_hash, PGSS_INVALID, pgss_store(), pgss_track_utility, PLAN_STMT_UNKNOWN, prev_post_parse_analyze_hook, Query::stmt_location, and Query::utilityStmt.

Referenced by _PG_init().

◆ pgss_ProcessUtility()

static void pgss_ProcessUtility ( PlannedStmt pstmt,
const char queryString,
bool  readOnlyTree,
ProcessUtilityContext  context,
ParamListInfo  params,
QueryEnvironment queryEnv,
DestReceiver dest,
QueryCompletion qc 
)
static

Definition at line 1092 of file pg_stat_statements.c.

1097{
1098 Node *parsetree = pstmt->utilityStmt;
1099 int64 saved_queryId = pstmt->queryId;
1101 int saved_stmt_len = pstmt->stmt_len;
1103
1104 /*
1105 * Force utility statements to get queryId zero. We do this even in cases
1106 * where the statement contains an optimizable statement for which a
1107 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1108 * cases, runtime control will first go through ProcessUtility and then
1109 * the executor, and we don't want the executor hooks to do anything,
1110 * since we are already measuring the statement's costs at the utility
1111 * level.
1112 *
1113 * Note that this is only done if pg_stat_statements is enabled and
1114 * configured to track utility statements, in the unlikely possibility
1115 * that user configured another extension to handle utility statements
1116 * only.
1117 */
1118 if (enabled)
1119 pstmt->queryId = INT64CONST(0);
1120
1121 /*
1122 * If it's an EXECUTE statement, we don't track it and don't increment the
1123 * nesting level. This allows the cycles to be charged to the underlying
1124 * PREPARE instead (by the Executor hooks), which is much more useful.
1125 *
1126 * We also don't track execution of PREPARE. If we did, we would get one
1127 * hash table entry for the PREPARE (with hash calculated from the query
1128 * string), and then a different one with the same query string (but hash
1129 * calculated from the query tree) would be used to accumulate costs of
1130 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1131 * actually run the planner (only parse+rewrite), its costs are generally
1132 * pretty negligible and it seems okay to just ignore it.
1133 */
1134 if (enabled &&
1135 !IsA(parsetree, ExecuteStmt) &&
1136 !IsA(parsetree, PrepareStmt))
1137 {
1140 uint64 rows;
1141 BufferUsage bufusage_start,
1142 bufusage;
1143 WalUsage walusage_start,
1144 walusage;
1145
1146 bufusage_start = pgBufferUsage;
1147 walusage_start = pgWalUsage;
1149
1150 nesting_level++;
1151 PG_TRY();
1152 {
1154 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1155 context, params, queryEnv,
1156 dest, qc);
1157 else
1158 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1159 context, params, queryEnv,
1160 dest, qc);
1161 }
1162 PG_FINALLY();
1163 {
1164 nesting_level--;
1165 }
1166 PG_END_TRY();
1167
1168 /*
1169 * CAUTION: do not access the *pstmt data structure again below here.
1170 * If it was a ROLLBACK or similar, that data structure may have been
1171 * freed. We must copy everything we still need into local variables,
1172 * which we did above.
1173 *
1174 * For the same reason, we can't risk restoring pstmt->queryId to its
1175 * former value, which'd otherwise be a good idea.
1176 */
1177
1180
1181 /*
1182 * Track the total number of rows retrieved or affected by the utility
1183 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1184 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1185 */
1186 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1187 qc->commandTag == CMDTAG_FETCH ||
1188 qc->commandTag == CMDTAG_SELECT ||
1190 qc->nprocessed : 0;
1191
1192 /* calc differences of buffer counters. */
1193 memset(&bufusage, 0, sizeof(BufferUsage));
1194 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1195
1196 /* calc differences of WAL counters. */
1197 memset(&walusage, 0, sizeof(WalUsage));
1198 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1199
1200 pgss_store(queryString,
1204 PGSS_EXEC,
1206 rows,
1207 &bufusage,
1208 &walusage,
1209 NULL,
1210 NULL,
1211 0,
1212 0,
1213 pstmt->planOrigin);
1214 }
1215 else
1216 {
1217 /*
1218 * Even though we're not tracking execution time for this statement,
1219 * we must still increment the nesting level, to ensure that functions
1220 * evaluated within it are not seen as top-level calls. But don't do
1221 * so for EXECUTE; that way, when control reaches pgss_planner or
1222 * pgss_ExecutorStart, we will treat the costs as top-level if
1223 * appropriate. Likewise, don't bump for PREPARE, so that parse
1224 * analysis will treat the statement as top-level if appropriate.
1225 *
1226 * To be absolutely certain we don't mess up the nesting level,
1227 * evaluate the bump_level condition just once.
1228 */
1229 bool bump_level =
1230 !IsA(parsetree, ExecuteStmt) &&
1231 !IsA(parsetree, PrepareStmt);
1232
1233 if (bump_level)
1234 nesting_level++;
1235 PG_TRY();
1236 {
1238 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1239 context, params, queryEnv,
1240 dest, qc);
1241 else
1242 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1243 context, params, queryEnv,
1244 dest, qc);
1245 }
1246 PG_FINALLY();
1247 {
1248 if (bump_level)
1249 nesting_level--;
1250 }
1251 PG_END_TRY();
1252 }
1253}

References BufferUsageAccumDiff(), QueryCompletion::commandTag, duration, fb(), INSTR_TIME_GET_MILLISEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, INT64CONST, IsA, nesting_level, QueryCompletion::nprocessed, PG_END_TRY, PG_FINALLY, PG_TRY, pgBufferUsage, pgss_enabled, PGSS_EXEC, pgss_store(), pgss_track_utility, pgWalUsage, PlannedStmt::planOrigin, prev_ProcessUtility, PlannedStmt::queryId, standard_ProcessUtility(), start, PlannedStmt::stmt_len, PlannedStmt::stmt_location, PlannedStmt::utilityStmt, and WalUsageAccumDiff().

Referenced by _PG_init().

◆ pgss_shmem_init()

static void pgss_shmem_init ( void arg)
static

Definition at line 530 of file pg_stat_statements.c.

531{
532 int tranche_id;
533 FILE *file = NULL;
534 FILE *qfile = NULL;
535 uint32 header;
536 int32 num;
537 int32 pgver;
538 int32 i;
539 int buffer_size;
540 char *buffer = NULL;
541
542 /*
543 * We already checked that we're loaded from shared_preload_libraries in
544 * _PG_init(), so we should not get here after postmaster startup.
545 */
547
548 /*
549 * Initialize the shmem area with no statistics.
550 */
551 tranche_id = LWLockNewTrancheId("pg_stat_statements");
552 LWLockInitialize(&pgss->lock.lock, tranche_id);
556 pgss->extent = 0;
557 pgss->n_writers = 0;
558 pgss->gc_count = 0;
559 pgss->stats.dealloc = 0;
561
562 /* The hash table must've also been initialized by now */
564
565 /*
566 * Set up a shmem exit hook to dump the statistics to disk on postmaster
567 * (or standalone backend) exit.
568 */
570
571 /*
572 * Load any pre-existing statistics from file.
573 *
574 * Note: we don't bother with locks here, because there should be no other
575 * processes running when this code is reached.
576 */
577
578 /* Unlink query text file possibly left over from crash */
580
581 /* Allocate new query text temp file */
583 if (qfile == NULL)
584 goto write_error;
585
586 /*
587 * If we were told not to load old statistics, we're done. (Note we do
588 * not try to unlink any old dump file in this case. This seems a bit
589 * questionable but it's the historical behavior.)
590 */
591 if (!pgss_save)
592 {
594 return;
595 }
596
597 /*
598 * Attempt to load old statistics from the dump file.
599 */
601 if (file == NULL)
602 {
603 if (errno != ENOENT)
604 goto read_error;
605 /* No existing persisted stats file, so we're done */
607 return;
608 }
609
610 buffer_size = 2048;
611 buffer = (char *) palloc(buffer_size);
612
613 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
614 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
615 fread(&num, sizeof(int32), 1, file) != 1)
616 goto read_error;
617
618 if (header != PGSS_FILE_HEADER ||
620 goto data_error;
621
622 for (i = 0; i < num; i++)
623 {
625 pgssEntry *entry;
626 Size query_offset;
627
628 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
629 goto read_error;
630
631 /* Encoding is the only field we can easily sanity-check */
632 if (!PG_VALID_BE_ENCODING(temp.encoding))
633 goto data_error;
634
635 /* Resize buffer as needed */
636 if (temp.query_len >= buffer_size)
637 {
638 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
639 buffer = repalloc(buffer, buffer_size);
640 }
641
642 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
643 goto read_error;
644
645 /* Should have a trailing null, but let's make sure */
646 buffer[temp.query_len] = '\0';
647
648 /* Skip loading "sticky" entries */
649 if (IS_STICKY(temp.counters))
650 continue;
651
652 /* Store the query text */
653 query_offset = pgss->extent;
654 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
655 goto write_error;
656 pgss->extent += temp.query_len + 1;
657
658 /* make the hashtable entry (discards old entries if too many) */
659 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
660 temp.encoding,
661 false);
662
663 /* copy in the actual stats */
664 entry->counters = temp.counters;
665 entry->stats_since = temp.stats_since;
666 entry->minmax_stats_since = temp.minmax_stats_since;
667 }
668
669 /* Read global statistics for pg_stat_statements */
670 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
671 goto read_error;
672
673 pfree(buffer);
674 FreeFile(file);
676
677 /*
678 * Remove the persisted stats file so it's not included in
679 * backups/replication standbys, etc. A new file will be written on next
680 * shutdown.
681 *
682 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
683 * because we remove that file on startup; it acts inversely to
684 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
685 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
686 * when the server is not running. Leaving the file creates no danger of
687 * a newly restored database having a spurious record of execution costs,
688 * which is what we're really concerned about here.
689 */
691
692 return;
693
695 ereport(LOG,
697 errmsg("could not read file \"%s\": %m",
699 goto fail;
701 ereport(LOG,
703 errmsg("ignoring invalid data in file \"%s\"",
705 goto fail;
707 ereport(LOG,
709 errmsg("could not write file \"%s\": %m",
711fail:
712 if (buffer)
713 pfree(buffer);
714 if (file)
715 FreeFile(file);
716 if (qfile)
718 /* If possible, throw away the bogus file; ignore any error */
720
721 /*
722 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
723 * server is running with pg_stat_statements enabled
724 */
725}

References AllocateFile(), Assert, ASSUMED_LENGTH_INIT, ASSUMED_MEDIAN_INIT, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssGlobalStats::dealloc, entry_alloc(), ereport, errcode(), errcode_for_file_access(), errmsg, pgssSharedState::extent, fb(), FreeFile(), pgssSharedState::gc_count, GetCurrentTimestamp(), i, IS_STICKY, IsUnderPostmaster, pgssSharedState::lock, LWLockPadded::lock, LOG, LWLockInitialize(), LWLockNewTrancheId(), Max, pgssSharedState::mean_query_len, pgssEntry::minmax_stats_since, pgssSharedState::mutex, pgssSharedState::n_writers, on_shmem_exit(), palloc(), pfree(), PG_BINARY_R, PG_BINARY_W, PG_VALID_BE_ENCODING, pgss, PGSS_DUMP_FILE, PGSS_FILE_HEADER, pgss_hash, PGSS_PG_MAJOR_VERSION, pgss_save, pgss_shmem_shutdown(), PGSS_TEXT_FILE, repalloc(), SpinLockInit(), pgssSharedState::stats, pgssGlobalStats::stats_reset, and pgssEntry::stats_since.

◆ pgss_shmem_request()

static void pgss_shmem_request ( void arg)
static

Definition at line 506 of file pg_stat_statements.c.

507{
508 ShmemRequestHash(.name = "pg_stat_statements hash",
509 .nelems = pgss_max,
510 .hash_info.keysize = sizeof(pgssHashKey),
511 .hash_info.entrysize = sizeof(pgssEntry),
512 .hash_flags = HASH_ELEM | HASH_BLOBS,
513 .ptr = &pgss_hash,
514 );
515 ShmemRequestStruct(.name = "pg_stat_statements",
516 .size = sizeof(pgssSharedState),
517 .ptr = (void **) &pgss,
518 );
519}

References HASH_BLOBS, HASH_ELEM, name, pgss, pgss_hash, pgss_max, ShmemRequestHash, and ShmemRequestStruct.

◆ pgss_shmem_shutdown()

static void pgss_shmem_shutdown ( int  code,
Datum  arg 
)
static

Definition at line 734 of file pg_stat_statements.c.

735{
736 FILE *file;
737 char *qbuffer = NULL;
738 Size qbuffer_size = 0;
740 int32 num_entries;
741 pgssEntry *entry;
742
743 /* Don't try to dump during a crash. */
744 if (code)
745 return;
746
747 /* Safety check ... shouldn't get here unless shmem is set up. */
748 if (!pgss || !pgss_hash)
749 return;
750
751 /* Don't dump if told not to. */
752 if (!pgss_save)
753 return;
754
756 if (file == NULL)
757 goto error;
758
759 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
760 goto error;
761 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
762 goto error;
763 num_entries = hash_get_num_entries(pgss_hash);
764 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
765 goto error;
766
768 if (qbuffer == NULL)
769 goto error;
770
771 /*
772 * When serializing to disk, we store query texts immediately after their
773 * entry data. Any orphaned query texts are thereby excluded.
774 */
776 while ((entry = hash_seq_search(&hash_seq)) != NULL)
777 {
778 int len = entry->query_len;
779 char *qstr = qtext_fetch(entry->query_offset, len,
781
782 if (qstr == NULL)
783 continue; /* Ignore any entries with bogus texts */
784
785 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
786 fwrite(qstr, 1, len + 1, file) != len + 1)
787 {
788 /* note: we assume hash_seq_term won't change errno */
790 goto error;
791 }
792 }
793
794 /* Dump global statistics for pg_stat_statements */
795 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
796 goto error;
797
798 pfree(qbuffer);
799 qbuffer = NULL;
800
801 if (FreeFile(file))
802 {
803 file = NULL;
804 goto error;
805 }
806
807 /*
808 * Rename file into place, so we atomically replace any old one.
809 */
811
812 /* Unlink query-texts file; it's not needed while shutdown */
814
815 return;
816
817error:
818 ereport(LOG,
820 errmsg("could not write file \"%s\": %m",
821 PGSS_DUMP_FILE ".tmp")));
822 if (qbuffer)
823 pfree(qbuffer);
824 if (file)
825 FreeFile(file);
826 unlink(PGSS_DUMP_FILE ".tmp");
828}

References AllocateFile(), durable_rename(), ereport, errcode_for_file_access(), errmsg, error(), fb(), FreeFile(), hash_get_num_entries(), hash_seq_init(), hash_seq_search(), hash_seq_term(), len, LOG, pfree(), PG_BINARY_W, pgss, PGSS_DUMP_FILE, PGSS_FILE_HEADER, pgss_hash, PGSS_PG_MAJOR_VERSION, pgss_save, PGSS_TEXT_FILE, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, and pgssSharedState::stats.

Referenced by pgss_shmem_init().

◆ pgss_store()

static void pgss_store ( const char query,
int64  queryId,
int  query_location,
int  query_len,
pgssStoreKind  kind,
double  total_time,
uint64  rows,
const BufferUsage bufusage,
const WalUsage walusage,
const struct JitInstrumentation jitusage,
const JumbleState jstate,
int  parallel_workers_to_launch,
int  parallel_workers_launched,
PlannedStmtOrigin  planOrigin 
)
static

Definition at line 1267 of file pg_stat_statements.c.

1278{
1280 pgssEntry *entry;
1281 char *norm_query = NULL;
1283
1284 Assert(query != NULL);
1285
1286 /* Safety check... */
1287 if (!pgss || !pgss_hash)
1288 return;
1289
1290 /*
1291 * Nothing to do if compute_query_id isn't enabled and no other module
1292 * computed a query identifier.
1293 */
1294 if (queryId == INT64CONST(0))
1295 return;
1296
1297 /*
1298 * Confine our attention to the relevant part of the string, if the query
1299 * is a portion of a multi-statement source string, and update query
1300 * location and length if needed.
1301 */
1302 query = CleanQuerytext(query, &query_location, &query_len);
1303
1304 /* Set up key for hashtable search */
1305
1306 /* clear padding */
1307 memset(&key, 0, sizeof(pgssHashKey));
1308
1309 key.userid = GetUserId();
1310 key.dbid = MyDatabaseId;
1311 key.queryid = queryId;
1312 key.toplevel = (nesting_level == 0);
1313
1314 /* Lookup the hash table entry with shared lock. */
1316
1317 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1318
1319 /* Create new entry, if not present */
1320 if (!entry)
1321 {
1322 Size query_offset;
1323 int gc_count;
1324 bool stored;
1325 bool do_gc;
1326
1327 /*
1328 * Create a new, normalized query string if caller asked. We don't
1329 * need to hold the lock while doing this work. (Note: in any case,
1330 * it's possible that someone else creates a duplicate hashtable entry
1331 * in the interval where we don't hold the lock below. That case is
1332 * handled by entry_alloc.)
1333 */
1334 if (jstate)
1335 {
1339 &query_len);
1341 }
1342
1343 /* Append new query text to file with only shared lock held */
1344 stored = qtext_store(norm_query ? norm_query : query, query_len,
1345 &query_offset, &gc_count);
1346
1347 /*
1348 * Determine whether we need to garbage collect external query texts
1349 * while the shared lock is still held. This micro-optimization
1350 * avoids taking the time to decide this while holding exclusive lock.
1351 */
1353
1354 /* Need exclusive lock to make a new hashtable entry - promote */
1357
1358 /*
1359 * A garbage collection may have occurred while we weren't holding the
1360 * lock. In the unlikely event that this happens, the query text we
1361 * stored above will have been garbage collected, so write it again.
1362 * This should be infrequent enough that doing it while holding
1363 * exclusive lock isn't a performance problem.
1364 */
1365 if (!stored || pgss->gc_count != gc_count)
1366 stored = qtext_store(norm_query ? norm_query : query, query_len,
1367 &query_offset, NULL);
1368
1369 /* If we failed to write to the text file, give up */
1370 if (!stored)
1371 goto done;
1372
1373 /* OK to create a new hashtable entry */
1374 entry = entry_alloc(&key, query_offset, query_len, encoding,
1375 jstate != NULL);
1376
1377 /* If needed, perform garbage collection while exclusive lock held */
1378 if (do_gc)
1379 gc_qtexts();
1380 }
1381
1382 /* Increment the counts, except when jstate is not NULL */
1383 if (!jstate)
1384 {
1385 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1386
1387 /*
1388 * Grab the spinlock while updating the counters (see comment about
1389 * locking rules at the head of the file)
1390 */
1391 SpinLockAcquire(&entry->mutex);
1392
1393 /* "Unstick" entry if it was previously sticky */
1394 if (IS_STICKY(entry->counters))
1395 entry->counters.usage = USAGE_INIT;
1396
1397 entry->counters.calls[kind] += 1;
1398 entry->counters.total_time[kind] += total_time;
1399
1400 if (entry->counters.calls[kind] == 1)
1401 {
1402 entry->counters.min_time[kind] = total_time;
1403 entry->counters.max_time[kind] = total_time;
1404 entry->counters.mean_time[kind] = total_time;
1405 }
1406 else
1407 {
1408 /*
1409 * Welford's method for accurately computing variance. See
1410 * <http://www.johndcook.com/blog/standard_deviation/>
1411 */
1412 double old_mean = entry->counters.mean_time[kind];
1413
1414 entry->counters.mean_time[kind] +=
1415 (total_time - old_mean) / entry->counters.calls[kind];
1416 entry->counters.sum_var_time[kind] +=
1417 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1418
1419 /*
1420 * Calculate min and max time. min = 0 and max = 0 means that the
1421 * min/max statistics were reset
1422 */
1423 if (entry->counters.min_time[kind] == 0
1424 && entry->counters.max_time[kind] == 0)
1425 {
1426 entry->counters.min_time[kind] = total_time;
1427 entry->counters.max_time[kind] = total_time;
1428 }
1429 else
1430 {
1431 if (entry->counters.min_time[kind] > total_time)
1432 entry->counters.min_time[kind] = total_time;
1433 if (entry->counters.max_time[kind] < total_time)
1434 entry->counters.max_time[kind] = total_time;
1435 }
1436 }
1437 entry->counters.rows += rows;
1438 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1439 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1442 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1443 entry->counters.local_blks_read += bufusage->local_blks_read;
1446 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1447 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1454 entry->counters.usage += USAGE_EXEC(total_time);
1455 entry->counters.wal_records += walusage->wal_records;
1456 entry->counters.wal_fpi += walusage->wal_fpi;
1457 entry->counters.wal_bytes += walusage->wal_bytes;
1458 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1459 if (jitusage)
1460 {
1461 entry->counters.jit_functions += jitusage->created_functions;
1462 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1463
1464 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1465 entry->counters.jit_deform_count++;
1466 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1467
1468 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1470 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1471
1472 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1474 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1475
1476 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1478 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1479 }
1480
1481 /* parallel worker counters */
1482 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1483 entry->counters.parallel_workers_launched += parallel_workers_launched;
1484
1485 /* plan cache counters */
1486 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1488 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1489 entry->counters.custom_plan_calls++;
1490
1491 SpinLockRelease(&entry->mutex);
1492 }
1493
1494done:
1496
1497 /* We postpone this clean-up until we're out of the lock */
1498 if (norm_query)
1500}

References Assert, Counters::calls, CleanQuerytext(), pgssEntry::counters, Counters::custom_plan_calls, encoding, entry_alloc(), fb(), pgssSharedState::gc_count, gc_qtexts(), generate_normalized_query(), Counters::generic_plan_calls, GetDatabaseEncoding(), GetUserId(), HASH_FIND, hash_search(), INSTR_TIME_GET_MILLISEC, INT64CONST, IS_STICKY, Counters::jit_deform_count, Counters::jit_deform_time, Counters::jit_emission_count, Counters::jit_emission_time, Counters::jit_functions, Counters::jit_generation_time, Counters::jit_inlining_count, Counters::jit_inlining_time, Counters::jit_optimization_count, Counters::jit_optimization_time, Counters::local_blk_read_time, BufferUsage::local_blk_read_time, Counters::local_blk_write_time, BufferUsage::local_blk_write_time, Counters::local_blks_dirtied, BufferUsage::local_blks_dirtied, Counters::local_blks_hit, BufferUsage::local_blks_hit, Counters::local_blks_read, BufferUsage::local_blks_read, Counters::local_blks_written, BufferUsage::local_blks_written, pgssSharedState::lock, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), Counters::max_time, Counters::mean_time, Counters::min_time, pgssEntry::mutex, MyDatabaseId, need_gc_qtexts(), nesting_level, Counters::parallel_workers_launched, Counters::parallel_workers_to_launch, pfree(), pgss, PGSS_EXEC, pgss_hash, PGSS_PLAN, PLAN_STMT_CACHE_CUSTOM, PLAN_STMT_CACHE_GENERIC, qtext_store(), Counters::rows, Counters::shared_blk_read_time, BufferUsage::shared_blk_read_time, Counters::shared_blk_write_time, BufferUsage::shared_blk_write_time, Counters::shared_blks_dirtied, BufferUsage::shared_blks_dirtied, Counters::shared_blks_hit, BufferUsage::shared_blks_hit, Counters::shared_blks_read, BufferUsage::shared_blks_read, Counters::shared_blks_written, BufferUsage::shared_blks_written, SpinLockAcquire(), SpinLockRelease(), Counters::sum_var_time, Counters::temp_blk_read_time, BufferUsage::temp_blk_read_time, Counters::temp_blk_write_time, BufferUsage::temp_blk_write_time, Counters::temp_blks_read, BufferUsage::temp_blks_read, Counters::temp_blks_written, BufferUsage::temp_blks_written, Counters::total_time, Counters::usage, USAGE_EXEC, USAGE_INIT, Counters::wal_buffers_full, WalUsage::wal_buffers_full, Counters::wal_bytes, WalUsage::wal_bytes, Counters::wal_fpi, WalUsage::wal_fpi, Counters::wal_records, and WalUsage::wal_records.

Referenced by pgss_ExecutorEnd(), pgss_planner(), pgss_post_parse_analyze(), and pgss_ProcessUtility().

◆ qtext_fetch()

static char * qtext_fetch ( Size  query_offset,
int  query_len,
char buffer,
Size  buffer_size 
)
static

Definition at line 2402 of file pg_stat_statements.c.

2404{
2405 /* File read failed? */
2406 if (buffer == NULL)
2407 return NULL;
2408 /* Bogus offset/length? */
2409 if (query_len < 0 ||
2410 query_offset + query_len >= buffer_size)
2411 return NULL;
2412 /* As a further sanity check, make sure there's a trailing null */
2413 if (buffer[query_offset + query_len] != '\0')
2414 return NULL;
2415 /* Looks OK */
2416 return buffer + query_offset;
2417}

References fb().

Referenced by gc_qtexts(), pg_stat_statements_internal(), and pgss_shmem_shutdown().

◆ qtext_load_file()

static char * qtext_load_file ( Size buffer_size)
static

Definition at line 2309 of file pg_stat_statements.c.

2310{
2311 char *buf;
2312 int fd;
2313 struct stat stat;
2314 Size nread;
2315
2317 if (fd < 0)
2318 {
2319 if (errno != ENOENT)
2320 ereport(LOG,
2322 errmsg("could not read file \"%s\": %m",
2323 PGSS_TEXT_FILE)));
2324 return NULL;
2325 }
2326
2327 /* Get file length */
2328 if (fstat(fd, &stat))
2329 {
2330 ereport(LOG,
2332 errmsg("could not stat file \"%s\": %m",
2333 PGSS_TEXT_FILE)));
2335 return NULL;
2336 }
2337
2338 /* Allocate buffer; beware that off_t might be wider than size_t */
2341 else
2342 buf = NULL;
2343 if (buf == NULL)
2344 {
2345 ereport(LOG,
2347 errmsg("out of memory"),
2348 errdetail("Could not allocate enough memory to read file \"%s\".",
2349 PGSS_TEXT_FILE)));
2351 return NULL;
2352 }
2353
2354 /*
2355 * OK, slurp in the file. Windows fails if we try to read more than
2356 * INT_MAX bytes at once, and other platforms might not like that either,
2357 * so read a very large file in 1GB segments.
2358 */
2359 nread = 0;
2360 while (nread < stat.st_size)
2361 {
2362 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2363
2364 /*
2365 * If we get a short read and errno doesn't get set, the reason is
2366 * probably that garbage collection truncated the file since we did
2367 * the fstat(), so we don't log a complaint --- but we don't return
2368 * the data, either, since it's most likely corrupt due to concurrent
2369 * writes from garbage collection.
2370 */
2371 errno = 0;
2372 if (read(fd, buf + nread, toread) != toread)
2373 {
2374 if (errno)
2375 ereport(LOG,
2377 errmsg("could not read file \"%s\": %m",
2378 PGSS_TEXT_FILE)));
2379 pfree(buf);
2381 return NULL;
2382 }
2383 nread += toread;
2384 }
2385
2386 if (CloseTransientFile(fd) != 0)
2387 ereport(LOG,
2389 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2390
2391 *buffer_size = nread;
2392 return buf;
2393}

References buf, CloseTransientFile(), ereport, errcode(), errcode_for_file_access(), errdetail(), errmsg, fb(), fd(), fstat, LOG, MaxAllocHugeSize, MCXT_ALLOC_HUGE, MCXT_ALLOC_NO_OOM, Min, OpenTransientFile(), palloc_extended(), pfree(), PG_BINARY, PGSS_TEXT_FILE, read, and stat::st_size.

Referenced by gc_qtexts(), pg_stat_statements_internal(), and pgss_shmem_shutdown().

◆ qtext_store()

static bool qtext_store ( const char query,
int  query_len,
Size query_offset,
int gc_count 
)
static

Definition at line 2229 of file pg_stat_statements.c.

2231{
2232 Size off;
2233 int fd;
2234
2235 /*
2236 * We use a spinlock to protect extent/n_writers/gc_count, so that
2237 * multiple processes may execute this function concurrently.
2238 */
2240 off = pgss->extent;
2241 pgss->extent += query_len + 1;
2242 pgss->n_writers++;
2243 if (gc_count)
2244 *gc_count = pgss->gc_count;
2246
2247 *query_offset = off;
2248
2249 /*
2250 * Don't allow the file to grow larger than what qtext_load_file can
2251 * (theoretically) handle. This has been seen to be reachable on 32-bit
2252 * platforms.
2253 */
2254 if (unlikely(query_len >= MaxAllocHugeSize - off))
2255 {
2256 errno = EFBIG; /* not quite right, but it'll do */
2257 fd = -1;
2258 goto error;
2259 }
2260
2261 /* Now write the data into the successfully-reserved part of the file */
2263 if (fd < 0)
2264 goto error;
2265
2266 if (pg_pwrite(fd, query, query_len, off) != query_len)
2267 goto error;
2268 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2269 goto error;
2270
2272
2273 /* Mark our write complete */
2275 pgss->n_writers--;
2277
2278 return true;
2279
2280error:
2281 ereport(LOG,
2283 errmsg("could not write file \"%s\": %m",
2284 PGSS_TEXT_FILE)));
2285
2286 if (fd >= 0)
2288
2289 /* Mark our write complete */
2291 pgss->n_writers--;
2293
2294 return false;
2295}

References CloseTransientFile(), ereport, errcode_for_file_access(), errmsg, error(), pgssSharedState::extent, fb(), fd(), pgssSharedState::gc_count, LOG, MaxAllocHugeSize, pgssSharedState::mutex, pgssSharedState::n_writers, OpenTransientFile(), PG_BINARY, pg_pwrite, pgss, PGSS_TEXT_FILE, SpinLockAcquire(), SpinLockRelease(), and unlikely.

Referenced by pgss_store().

Variable Documentation

◆ nesting_level

◆ pgss

◆ PGSS_FILE_HEADER

const uint32 PGSS_FILE_HEADER = 0x20250731
static

Definition at line 88 of file pg_stat_statements.c.

Referenced by pgss_shmem_init(), and pgss_shmem_shutdown().

◆ pgss_hash

◆ pgss_max

int pgss_max = 5000
static

Definition at line 303 of file pg_stat_statements.c.

Referenced by _PG_init(), entry_alloc(), need_gc_qtexts(), and pgss_shmem_request().

◆ PGSS_PG_MAJOR_VERSION

const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100
static

Definition at line 91 of file pg_stat_statements.c.

Referenced by pgss_shmem_init(), and pgss_shmem_shutdown().

◆ pgss_save

bool pgss_save = true
static

Definition at line 308 of file pg_stat_statements.c.

Referenced by _PG_init(), pgss_shmem_init(), and pgss_shmem_shutdown().

◆ pgss_shmem_callbacks

const ShmemCallbacks pgss_shmem_callbacks
static
Initial value:
= {
.request_fn = pgss_shmem_request,
.init_fn = pgss_shmem_init,
}

Definition at line 267 of file pg_stat_statements.c.

267 {
268 .request_fn = pgss_shmem_request,
269 .init_fn = pgss_shmem_init,
270};

Referenced by _PG_init().

◆ pgss_track

int pgss_track = PGSS_TRACK_TOP
static

Definition at line 304 of file pg_stat_statements.c.

Referenced by _PG_init().

◆ pgss_track_planning

bool pgss_track_planning = false
static

Definition at line 306 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_planner().

◆ pgss_track_utility

bool pgss_track_utility = true
static

Definition at line 305 of file pg_stat_statements.c.

Referenced by _PG_init(), pgss_post_parse_analyze(), and pgss_ProcessUtility().

◆ prev_ExecutorEnd

ExecutorEnd_hook_type prev_ExecutorEnd = NULL
static

Definition at line 283 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorEnd().

◆ prev_ExecutorFinish

ExecutorFinish_hook_type prev_ExecutorFinish = NULL
static

Definition at line 282 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorFinish().

◆ prev_ExecutorRun

ExecutorRun_hook_type prev_ExecutorRun = NULL
static

Definition at line 281 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorRun().

◆ prev_ExecutorStart

ExecutorStart_hook_type prev_ExecutorStart = NULL
static

Definition at line 280 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorStart().

◆ prev_planner_hook

planner_hook_type prev_planner_hook = NULL
static

Definition at line 279 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_planner().

◆ prev_post_parse_analyze_hook

post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL
static

Definition at line 278 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_post_parse_analyze().

◆ prev_ProcessUtility

ProcessUtility_hook_type prev_ProcessUtility = NULL
static

Definition at line 284 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ProcessUtility().

◆ track_options

const struct config_enum_entry track_options[]
static
Initial value:
=
{
{"none", PGSS_TRACK_NONE, false},
{"top", PGSS_TRACK_TOP, false},
{"all", PGSS_TRACK_ALL, false},
{NULL, 0, false}
}

Definition at line 295 of file pg_stat_statements.c.

296{
297 {"none", PGSS_TRACK_NONE, false},
298 {"top", PGSS_TRACK_TOP, false},
299 {"all", PGSS_TRACK_ALL, false},
300 {NULL, 0, false}
301};

Referenced by _PG_init().