PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_stat_statements.c File Reference
#include "postgres.h"
#include <math.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/htup_details.h"
#include "access/parallel.h"
#include "catalog/pg_authid.h"
#include "executor/instrument.h"
#include "funcapi.h"
#include "jit/jit.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/queryjumble.h"
#include "optimizer/planner.h"
#include "parser/analyze.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
#include "utils/tuplestore.h"
Include dependency graph for pg_stat_statements.c:

Go to the source code of this file.

Data Structures

struct  pgssHashKey
 
struct  Counters
 
struct  pgssGlobalStats
 
struct  pgssEntry
 
struct  pgssSharedState
 

Macros

#define PGSS_DUMP_FILE   PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
 
#define PGSS_TEXT_FILE   PG_STAT_TMP_DIR "/pgss_query_texts.stat"
 
#define USAGE_EXEC(duration)   (1.0)
 
#define USAGE_INIT   (1.0) /* including initial planning */
 
#define ASSUMED_MEDIAN_INIT   (10.0) /* initial assumed median usage */
 
#define ASSUMED_LENGTH_INIT   1024 /* initial assumed mean query length */
 
#define USAGE_DECREASE_FACTOR   (0.99) /* decreased every entry_dealloc */
 
#define STICKY_DECREASE_FACTOR   (0.50) /* factor for sticky entries */
 
#define USAGE_DEALLOC_PERCENT   5 /* free this % of entries at once */
 
#define IS_STICKY(c)   ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
 
#define PGSS_NUMKIND   (PGSS_EXEC + 1)
 
#define pgss_enabled(level)
 
#define record_gc_qtexts()
 
#define PG_STAT_STATEMENTS_COLS_V1_0   14
 
#define PG_STAT_STATEMENTS_COLS_V1_1   18
 
#define PG_STAT_STATEMENTS_COLS_V1_2   19
 
#define PG_STAT_STATEMENTS_COLS_V1_3   23
 
#define PG_STAT_STATEMENTS_COLS_V1_8   32
 
#define PG_STAT_STATEMENTS_COLS_V1_9   33
 
#define PG_STAT_STATEMENTS_COLS_V1_10   43
 
#define PG_STAT_STATEMENTS_COLS_V1_11   49
 
#define PG_STAT_STATEMENTS_COLS_V1_12   52
 
#define PG_STAT_STATEMENTS_COLS_V1_13   54
 
#define PG_STAT_STATEMENTS_COLS   54 /* maximum of above */
 
#define PG_STAT_STATEMENTS_INFO_COLS   2
 
#define SINGLE_ENTRY_RESET(e)
 

Typedefs

typedef enum pgssVersion pgssVersion
 
typedef enum pgssStoreKind pgssStoreKind
 
typedef struct pgssHashKey pgssHashKey
 
typedef struct Counters Counters
 
typedef struct pgssGlobalStats pgssGlobalStats
 
typedef struct pgssEntry pgssEntry
 
typedef struct pgssSharedState pgssSharedState
 

Enumerations

enum  pgssVersion {
  PGSS_V1_0 = 0 , PGSS_V1_1 , PGSS_V1_2 , PGSS_V1_3 ,
  PGSS_V1_8 , PGSS_V1_9 , PGSS_V1_10 , PGSS_V1_11 ,
  PGSS_V1_12 , PGSS_V1_13
}
 
enum  pgssStoreKind { PGSS_INVALID = -1 , PGSS_PLAN = 0 , PGSS_EXEC }
 
enum  PGSSTrackLevel { PGSS_TRACK_NONE , PGSS_TRACK_TOP , PGSS_TRACK_ALL }
 

Functions

 PG_MODULE_MAGIC_EXT (.name="pg_stat_statements",.version=PG_VERSION)
 
static void pgss_shmem_request (void *arg)
 
static void pgss_shmem_init (void *arg)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset_1_7)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_reset_1_11)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_2)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_3)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_8)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_9)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_10)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_11)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_12)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_1_13)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements)
 
 PG_FUNCTION_INFO_V1 (pg_stat_statements_info)
 
static void pgss_shmem_shutdown (int code, Datum arg)
 
static void pgss_post_parse_analyze (ParseState *pstate, Query *query, const JumbleState *jstate)
 
static PlannedStmtpgss_planner (Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
 
static void pgss_ExecutorStart (QueryDesc *queryDesc, int eflags)
 
static void pgss_ExecutorRun (QueryDesc *queryDesc, ScanDirection direction, uint64 count)
 
static void pgss_ExecutorFinish (QueryDesc *queryDesc)
 
static void pgss_ExecutorEnd (QueryDesc *queryDesc)
 
static void pgss_ProcessUtility (PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
 
static void pgss_store (const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
 
static void pg_stat_statements_internal (FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
 
static pgssEntryentry_alloc (pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
 
static void entry_dealloc (void)
 
static bool qtext_store (const char *query, int query_len, Size *query_offset, int *gc_count)
 
static charqtext_load_file (Size *buffer_size)
 
static charqtext_fetch (Size query_offset, int query_len, char *buffer, Size buffer_size)
 
static bool need_gc_qtexts (void)
 
static void gc_qtexts (void)
 
static TimestampTz entry_reset (Oid userid, Oid dbid, int64 queryid, bool minmax_only)
 
static chargenerate_normalized_query (const JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
 
void _PG_init (void)
 
Datum pg_stat_statements_reset_1_7 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_reset_1_11 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_reset (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_13 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_12 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_11 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_10 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_9 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_8 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_3 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_1_2 (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements (PG_FUNCTION_ARGS)
 
Datum pg_stat_statements_info (PG_FUNCTION_ARGS)
 
static int entry_cmp (const void *lhs, const void *rhs)
 

Variables

static const uint32 PGSS_FILE_HEADER = 0x20250731
 
static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100
 
static pgssSharedStatepgss
 
static HTABpgss_hash
 
static const ShmemCallbacks pgss_shmem_callbacks
 
static int nesting_level = 0
 
static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL
 
static planner_hook_type prev_planner_hook = NULL
 
static ExecutorStart_hook_type prev_ExecutorStart = NULL
 
static ExecutorRun_hook_type prev_ExecutorRun = NULL
 
static ExecutorFinish_hook_type prev_ExecutorFinish = NULL
 
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL
 
static ProcessUtility_hook_type prev_ProcessUtility = NULL
 
static const struct config_enum_entry track_options []
 
static int pgss_max = 5000
 
static int pgss_track = PGSS_TRACK_TOP
 
static bool pgss_track_utility = true
 
static bool pgss_track_planning = false
 
static bool pgss_save = true
 

Macro Definition Documentation

◆ ASSUMED_LENGTH_INIT

#define ASSUMED_LENGTH_INIT   1024 /* initial assumed mean query length */

Definition at line 97 of file pg_stat_statements.c.

◆ ASSUMED_MEDIAN_INIT

#define ASSUMED_MEDIAN_INIT   (10.0) /* initial assumed median usage */

Definition at line 96 of file pg_stat_statements.c.

◆ IS_STICKY

#define IS_STICKY (   c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)

Definition at line 101 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS

#define PG_STAT_STATEMENTS_COLS   54 /* maximum of above */

Definition at line 1561 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_0

#define PG_STAT_STATEMENTS_COLS_V1_0   14

Definition at line 1551 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_1

#define PG_STAT_STATEMENTS_COLS_V1_1   18

Definition at line 1552 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_10

#define PG_STAT_STATEMENTS_COLS_V1_10   43

Definition at line 1557 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_11

#define PG_STAT_STATEMENTS_COLS_V1_11   49

Definition at line 1558 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_12

#define PG_STAT_STATEMENTS_COLS_V1_12   52

Definition at line 1559 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_13

#define PG_STAT_STATEMENTS_COLS_V1_13   54

Definition at line 1560 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_2

#define PG_STAT_STATEMENTS_COLS_V1_2   19

Definition at line 1553 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_3

#define PG_STAT_STATEMENTS_COLS_V1_3   23

Definition at line 1554 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_8

#define PG_STAT_STATEMENTS_COLS_V1_8   32

Definition at line 1555 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_COLS_V1_9

#define PG_STAT_STATEMENTS_COLS_V1_9   33

Definition at line 1556 of file pg_stat_statements.c.

◆ PG_STAT_STATEMENTS_INFO_COLS

#define PG_STAT_STATEMENTS_INFO_COLS   2

Definition at line 2031 of file pg_stat_statements.c.

◆ PGSS_DUMP_FILE

#define PGSS_DUMP_FILE   PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"

Definition at line 80 of file pg_stat_statements.c.

◆ pgss_enabled

#define pgss_enabled (   level)
Value:
(pgss_track == PGSS_TRACK_TOP && (level) == 0)))
#define IsParallelWorker()
Definition parallel.h:62
static int pgss_track
@ PGSS_TRACK_ALL
@ PGSS_TRACK_TOP

Definition at line 310 of file pg_stat_statements.c.

315 { \
317 pgss->gc_count++; \
319 } while(0)
320
321/*---- Function declarations ----*/
322
336
337static void pgss_shmem_shutdown(int code, Datum arg);
338static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
339 const JumbleState *jstate);
340static PlannedStmt *pgss_planner(Query *parse,
341 const char *query_string,
342 int cursorOptions,
343 ParamListInfo boundParams,
344 ExplainState *es);
345static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
346static void pgss_ExecutorRun(QueryDesc *queryDesc,
347 ScanDirection direction,
348 uint64 count);
349static void pgss_ExecutorFinish(QueryDesc *queryDesc);
350static void pgss_ExecutorEnd(QueryDesc *queryDesc);
351static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
352 bool readOnlyTree,
353 ProcessUtilityContext context, ParamListInfo params,
354 QueryEnvironment *queryEnv,
355 DestReceiver *dest, QueryCompletion *qc);
356static void pgss_store(const char *query, int64 queryId,
357 int query_location, int query_len,
358 pgssStoreKind kind,
359 double total_time, uint64 rows,
360 const BufferUsage *bufusage,
361 const WalUsage *walusage,
362 const struct JitInstrumentation *jitusage,
363 const JumbleState *jstate,
364 int parallel_workers_to_launch,
365 int parallel_workers_launched,
366 PlannedStmtOrigin planOrigin);
368 pgssVersion api_version,
369 bool showtext);
370static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
371 int encoding, bool sticky);
372static void entry_dealloc(void);
373static bool qtext_store(const char *query, int query_len,
374 Size *query_offset, int *gc_count);
375static char *qtext_load_file(Size *buffer_size);
376static char *qtext_fetch(Size query_offset, int query_len,
377 char *buffer, Size buffer_size);
378static bool need_gc_qtexts(void);
379static void gc_qtexts(void);
380static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
382 const char *query,
383 int query_loc, int *query_len_p);
384
385/*
386 * Module load callback
387 */
388void
389_PG_init(void)
390{
391 /*
392 * In order to create our shared memory area, we have to be loaded via
393 * shared_preload_libraries. If not, fall out without hooking into any of
394 * the main system. (We don't throw error here because it seems useful to
395 * allow the pg_stat_statements functions to be created even when the
396 * module isn't active. The functions must protect themselves against
397 * being called then, however.)
398 */
400 return;
401
402 /*
403 * Inform the postmaster that we want to enable query_id calculation if
404 * compute_query_id is set to auto.
405 */
407
408 /*
409 * Define (or redefine) custom GUC variables.
410 */
411 DefineCustomIntVariable("pg_stat_statements.max",
412 "Sets the maximum number of statements tracked by pg_stat_statements.",
413 NULL,
414 &pgss_max,
415 5000,
416 100,
417 INT_MAX / 2,
419 0,
420 NULL,
421 NULL,
422 NULL);
423
424 DefineCustomEnumVariable("pg_stat_statements.track",
425 "Selects which statements are tracked by pg_stat_statements.",
426 NULL,
427 &pgss_track,
430 PGC_SUSET,
431 0,
432 NULL,
433 NULL,
434 NULL);
435
436 DefineCustomBoolVariable("pg_stat_statements.track_utility",
437 "Selects whether utility commands are tracked by pg_stat_statements.",
438 NULL,
440 true,
441 PGC_SUSET,
442 0,
443 NULL,
444 NULL,
445 NULL);
446
447 DefineCustomBoolVariable("pg_stat_statements.track_planning",
448 "Selects whether planning duration is tracked by pg_stat_statements.",
449 NULL,
451 false,
452 PGC_SUSET,
453 0,
454 NULL,
455 NULL,
456 NULL);
457
458 DefineCustomBoolVariable("pg_stat_statements.save",
459 "Save pg_stat_statements statistics across server shutdowns.",
460 NULL,
461 &pgss_save,
462 true,
464 0,
465 NULL,
466 NULL,
467 NULL);
468
469 MarkGUCPrefixReserved("pg_stat_statements");
470
471 /*
472 * Register our shared memory needs.
473 */
475
476 /*
477 * Install hooks.
478 */
493}
494
495/*
496 * shmem request callback: Request shared memory resources.
497 *
498 * This is called at postmaster startup. Note that the shared memory isn't
499 * allocated here yet, this merely register our needs.
500 *
501 * In EXEC_BACKEND mode, this is also called in each backend, to re-attach to
502 * the shared memory area that was already initialized.
503 */
504static void
506{
507 ShmemRequestHash(.name = "pg_stat_statements hash",
508 .nelems = pgss_max,
509 .hash_info.keysize = sizeof(pgssHashKey),
510 .hash_info.entrysize = sizeof(pgssEntry),
511 .hash_flags = HASH_ELEM | HASH_BLOBS,
512 .ptr = &pgss_hash,
513 );
514 ShmemRequestStruct(.name = "pg_stat_statements",
515 .size = sizeof(pgssSharedState),
516 .ptr = (void **) &pgss,
517 );
518}
519
520/*
521 * shmem init callback: Initialize our shared memory data structures at
522 * postmaster startup.
523 *
524 * Load any pre-existing statistics from file. Also create and load the
525 * query-texts file, which is expected to exist (even if empty) while the
526 * module is enabled.
527 */
528static void
529pgss_shmem_init(void *arg)
530{
531 int tranche_id;
532 FILE *file = NULL;
533 FILE *qfile = NULL;
534 uint32 header;
535 int32 num;
536 int32 pgver;
537 int32 i;
538 int buffer_size;
539 char *buffer = NULL;
540
541 /*
542 * We already checked that we're loaded from shared_preload_libraries in
543 * _PG_init(), so we should not get here after postmaster startup.
544 */
546
547 /*
548 * Initialize the shmem area with no statistics.
549 */
550 tranche_id = LWLockNewTrancheId("pg_stat_statements");
551 LWLockInitialize(&pgss->lock.lock, tranche_id);
555 pgss->extent = 0;
556 pgss->n_writers = 0;
557 pgss->gc_count = 0;
558 pgss->stats.dealloc = 0;
560
561 /* The hash table must've also been initialized by now */
563
564 /*
565 * Set up a shmem exit hook to dump the statistics to disk on postmaster
566 * (or standalone backend) exit.
567 */
569
570 /*
571 * Load any pre-existing statistics from file.
572 *
573 * Note: we don't bother with locks here, because there should be no other
574 * processes running when this code is reached.
575 */
576
577 /* Unlink query text file possibly left over from crash */
579
580 /* Allocate new query text temp file */
582 if (qfile == NULL)
583 goto write_error;
584
585 /*
586 * If we were told not to load old statistics, we're done. (Note we do
587 * not try to unlink any old dump file in this case. This seems a bit
588 * questionable but it's the historical behavior.)
589 */
590 if (!pgss_save)
591 {
593 return;
594 }
595
596 /*
597 * Attempt to load old statistics from the dump file.
598 */
600 if (file == NULL)
601 {
602 if (errno != ENOENT)
603 goto read_error;
604 /* No existing persisted stats file, so we're done */
606 return;
607 }
608
609 buffer_size = 2048;
610 buffer = (char *) palloc(buffer_size);
611
612 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
613 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
614 fread(&num, sizeof(int32), 1, file) != 1)
615 goto read_error;
616
617 if (header != PGSS_FILE_HEADER ||
619 goto data_error;
620
621 for (i = 0; i < num; i++)
622 {
624 pgssEntry *entry;
625 Size query_offset;
626
627 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
628 goto read_error;
629
630 /* Encoding is the only field we can easily sanity-check */
631 if (!PG_VALID_BE_ENCODING(temp.encoding))
632 goto data_error;
633
634 /* Resize buffer as needed */
635 if (temp.query_len >= buffer_size)
636 {
637 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
638 buffer = repalloc(buffer, buffer_size);
639 }
640
641 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
642 goto read_error;
643
644 /* Should have a trailing null, but let's make sure */
645 buffer[temp.query_len] = '\0';
646
647 /* Skip loading "sticky" entries */
648 if (IS_STICKY(temp.counters))
649 continue;
650
651 /* Store the query text */
652 query_offset = pgss->extent;
653 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
654 goto write_error;
655 pgss->extent += temp.query_len + 1;
656
657 /* make the hashtable entry (discards old entries if too many) */
658 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
659 temp.encoding,
660 false);
661
662 /* copy in the actual stats */
663 entry->counters = temp.counters;
664 entry->stats_since = temp.stats_since;
665 entry->minmax_stats_since = temp.minmax_stats_since;
666 }
667
668 /* Read global statistics for pg_stat_statements */
669 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
670 goto read_error;
671
672 pfree(buffer);
673 FreeFile(file);
675
676 /*
677 * Remove the persisted stats file so it's not included in
678 * backups/replication standbys, etc. A new file will be written on next
679 * shutdown.
680 *
681 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
682 * because we remove that file on startup; it acts inversely to
683 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
684 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
685 * when the server is not running. Leaving the file creates no danger of
686 * a newly restored database having a spurious record of execution costs,
687 * which is what we're really concerned about here.
688 */
690
691 return;
692
694 ereport(LOG,
696 errmsg("could not read file \"%s\": %m",
698 goto fail;
700 ereport(LOG,
702 errmsg("ignoring invalid data in file \"%s\"",
704 goto fail;
706 ereport(LOG,
708 errmsg("could not write file \"%s\": %m",
710fail:
711 if (buffer)
712 pfree(buffer);
713 if (file)
714 FreeFile(file);
715 if (qfile)
717 /* If possible, throw away the bogus file; ignore any error */
719
720 /*
721 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
722 * server is running with pg_stat_statements enabled
723 */
724}
725
726/*
727 * shmem_shutdown hook: Dump statistics into file.
728 *
729 * Note: we don't bother with acquiring lock, because there should be no
730 * other processes running when this is called.
731 */
732static void
734{
735 FILE *file;
736 char *qbuffer = NULL;
737 Size qbuffer_size = 0;
739 int32 num_entries;
740 pgssEntry *entry;
741
742 /* Don't try to dump during a crash. */
743 if (code)
744 return;
745
746 /* Safety check ... shouldn't get here unless shmem is set up. */
747 if (!pgss || !pgss_hash)
748 return;
749
750 /* Don't dump if told not to. */
751 if (!pgss_save)
752 return;
753
755 if (file == NULL)
756 goto error;
757
758 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
759 goto error;
760 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
761 goto error;
762 num_entries = hash_get_num_entries(pgss_hash);
763 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
764 goto error;
765
767 if (qbuffer == NULL)
768 goto error;
769
770 /*
771 * When serializing to disk, we store query texts immediately after their
772 * entry data. Any orphaned query texts are thereby excluded.
773 */
775 while ((entry = hash_seq_search(&hash_seq)) != NULL)
776 {
777 int len = entry->query_len;
778 char *qstr = qtext_fetch(entry->query_offset, len,
780
781 if (qstr == NULL)
782 continue; /* Ignore any entries with bogus texts */
783
784 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
785 fwrite(qstr, 1, len + 1, file) != len + 1)
786 {
787 /* note: we assume hash_seq_term won't change errno */
789 goto error;
790 }
791 }
792
793 /* Dump global statistics for pg_stat_statements */
794 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
795 goto error;
796
797 pfree(qbuffer);
798 qbuffer = NULL;
799
800 if (FreeFile(file))
801 {
802 file = NULL;
803 goto error;
804 }
805
806 /*
807 * Rename file into place, so we atomically replace any old one.
808 */
810
811 /* Unlink query-texts file; it's not needed while shutdown */
813
814 return;
815
816error:
817 ereport(LOG,
819 errmsg("could not write file \"%s\": %m",
820 PGSS_DUMP_FILE ".tmp")));
821 if (qbuffer)
822 pfree(qbuffer);
823 if (file)
824 FreeFile(file);
825 unlink(PGSS_DUMP_FILE ".tmp");
827}
828
829/*
830 * Post-parse-analysis hook: mark query with a queryId
831 */
832static void
834{
836 prev_post_parse_analyze_hook(pstate, query, jstate);
837
838 /* Safety check... */
840 return;
841
842 /*
843 * If it's EXECUTE, clear the queryId so that stats will accumulate for
844 * the underlying PREPARE. But don't do this if we're not tracking
845 * utility statements, to avoid messing up another extension that might be
846 * tracking them.
847 */
848 if (query->utilityStmt)
849 {
851 {
852 query->queryId = INT64CONST(0);
853 return;
854 }
855 }
856
857 /*
858 * If query jumbling were able to identify any ignorable constants, we
859 * immediately create a hash table entry for the query, so that we can
860 * record the normalized form of the query string. If there were no such
861 * constants, the normalized string would be the same as the query text
862 * anyway, so there's no need for an early entry.
863 */
864 if (jstate && jstate->clocations_count > 0)
865 pgss_store(pstate->p_sourcetext,
866 query->queryId,
867 query->stmt_location,
868 query->stmt_len,
870 0,
871 0,
872 NULL,
873 NULL,
874 NULL,
875 jstate,
876 0,
877 0,
879}
880
881/*
882 * Planner hook: forward to regular planner, but measure planning time
883 * if needed.
884 */
885static PlannedStmt *
886pgss_planner(Query *parse,
887 const char *query_string,
888 int cursorOptions,
889 ParamListInfo boundParams,
890 ExplainState *es)
891{
893
894 /*
895 * We can't process the query if no query_string is provided, as
896 * pgss_store needs it. We also ignore query without queryid, as it would
897 * be treated as a utility statement, which may not be the case.
898 */
900 && pgss_track_planning && query_string
901 && parse->queryId != INT64CONST(0))
902 {
905 BufferUsage bufusage_start,
906 bufusage;
907 WalUsage walusage_start,
908 walusage;
909
910 /* We need to track buffer usage as the planner can access them. */
911 bufusage_start = pgBufferUsage;
912
913 /*
914 * Similarly the planner could write some WAL records in some cases
915 * (e.g. setting a hint bit with those being WAL-logged)
916 */
917 walusage_start = pgWalUsage;
919
921 PG_TRY();
922 {
924 result = prev_planner_hook(parse, query_string, cursorOptions,
925 boundParams, es);
926 else
927 result = standard_planner(parse, query_string, cursorOptions,
928 boundParams, es);
929 }
930 PG_FINALLY();
931 {
933 }
934 PG_END_TRY();
935
938
939 /* calc differences of buffer counters. */
940 memset(&bufusage, 0, sizeof(BufferUsage));
941 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
942
943 /* calc differences of WAL counters. */
944 memset(&walusage, 0, sizeof(WalUsage));
945 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
946
947 pgss_store(query_string,
948 parse->queryId,
949 parse->stmt_location,
950 parse->stmt_len,
951 PGSS_PLAN,
953 0,
954 &bufusage,
955 &walusage,
956 NULL,
957 NULL,
958 0,
959 0,
960 result->planOrigin);
961 }
962 else
963 {
964 /*
965 * Even though we're not tracking plan time for this statement, we
966 * must still increment the nesting level, to ensure that functions
967 * evaluated during planning are not seen as top-level calls.
968 */
970 PG_TRY();
971 {
973 result = prev_planner_hook(parse, query_string, cursorOptions,
974 boundParams, es);
975 else
976 result = standard_planner(parse, query_string, cursorOptions,
977 boundParams, es);
978 }
979 PG_FINALLY();
980 {
982 }
983 PG_END_TRY();
984 }
985
986 return result;
987}
988
989/*
990 * ExecutorStart hook: start up tracking if needed
991 */
992static void
993pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
994{
995 /*
996 * If query has queryId zero, don't track it. This prevents double
997 * counting of optimizable statements that are directly contained in
998 * utility statements.
999 */
1000 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1001 {
1002 /* Request all summary instrumentation, i.e. timing, buffers and WAL */
1003 queryDesc->query_instr_options |= INSTRUMENT_ALL;
1004 }
1005
1007 prev_ExecutorStart(queryDesc, eflags);
1008 else
1009 standard_ExecutorStart(queryDesc, eflags);
1010}
1011
1012/*
1013 * ExecutorRun hook: all we need do is track nesting depth
1014 */
1015static void
1016pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1017{
1018 nesting_level++;
1019 PG_TRY();
1020 {
1021 if (prev_ExecutorRun)
1022 prev_ExecutorRun(queryDesc, direction, count);
1023 else
1024 standard_ExecutorRun(queryDesc, direction, count);
1025 }
1026 PG_FINALLY();
1027 {
1028 nesting_level--;
1029 }
1030 PG_END_TRY();
1031}
1032
1033/*
1034 * ExecutorFinish hook: all we need do is track nesting depth
1035 */
1036static void
1038{
1039 nesting_level++;
1040 PG_TRY();
1041 {
1043 prev_ExecutorFinish(queryDesc);
1044 else
1045 standard_ExecutorFinish(queryDesc);
1046 }
1047 PG_FINALLY();
1048 {
1049 nesting_level--;
1050 }
1051 PG_END_TRY();
1052}
1053
1054/*
1055 * ExecutorEnd hook: store results if needed
1056 */
1057static void
1058pgss_ExecutorEnd(QueryDesc *queryDesc)
1059{
1060 int64 queryId = queryDesc->plannedstmt->queryId;
1061
1062 if (queryId != INT64CONST(0) && queryDesc->query_instr &&
1064 {
1065 pgss_store(queryDesc->sourceText,
1066 queryId,
1067 queryDesc->plannedstmt->stmt_location,
1068 queryDesc->plannedstmt->stmt_len,
1069 PGSS_EXEC,
1071 queryDesc->estate->es_total_processed,
1072 &queryDesc->query_instr->bufusage,
1073 &queryDesc->query_instr->walusage,
1074 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1075 NULL,
1076 queryDesc->estate->es_parallel_workers_to_launch,
1077 queryDesc->estate->es_parallel_workers_launched,
1078 queryDesc->plannedstmt->planOrigin);
1079 }
1080
1081 if (prev_ExecutorEnd)
1082 prev_ExecutorEnd(queryDesc);
1083 else
1084 standard_ExecutorEnd(queryDesc);
1085}
1086
1087/*
1088 * ProcessUtility hook
1089 */
1090static void
1091pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1092 bool readOnlyTree,
1093 ProcessUtilityContext context,
1094 ParamListInfo params, QueryEnvironment *queryEnv,
1095 DestReceiver *dest, QueryCompletion *qc)
1096{
1097 Node *parsetree = pstmt->utilityStmt;
1098 int64 saved_queryId = pstmt->queryId;
1100 int saved_stmt_len = pstmt->stmt_len;
1103
1104 /*
1105 * Force utility statements to get queryId zero. We do this even in cases
1106 * where the statement contains an optimizable statement for which a
1107 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1108 * cases, runtime control will first go through ProcessUtility and then
1109 * the executor, and we don't want the executor hooks to do anything,
1110 * since we are already measuring the statement's costs at the utility
1111 * level.
1112 *
1113 * Note that this is only done if pg_stat_statements is enabled and
1114 * configured to track utility statements, in the unlikely possibility
1115 * that user configured another extension to handle utility statements
1116 * only.
1117 */
1118 if (enabled)
1119 pstmt->queryId = INT64CONST(0);
1120
1121 /*
1122 * If it's an EXECUTE statement, we don't track it and don't increment the
1123 * nesting level. This allows the cycles to be charged to the underlying
1124 * PREPARE instead (by the Executor hooks), which is much more useful.
1125 *
1126 * We also don't track execution of PREPARE. If we did, we would get one
1127 * hash table entry for the PREPARE (with hash calculated from the query
1128 * string), and then a different one with the same query string (but hash
1129 * calculated from the query tree) would be used to accumulate costs of
1130 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1131 * actually run the planner (only parse+rewrite), its costs are generally
1132 * pretty negligible and it seems okay to just ignore it.
1133 */
1134 if (enabled &&
1135 !IsA(parsetree, ExecuteStmt) &&
1136 !IsA(parsetree, PrepareStmt))
1137 {
1140 uint64 rows;
1141 BufferUsage bufusage_start,
1142 bufusage;
1143 WalUsage walusage_start,
1144 walusage;
1145
1146 bufusage_start = pgBufferUsage;
1147 walusage_start = pgWalUsage;
1149
1150 nesting_level++;
1151 PG_TRY();
1152 {
1154 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1155 context, params, queryEnv,
1156 dest, qc);
1157 else
1158 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1159 context, params, queryEnv,
1160 dest, qc);
1161 }
1162 PG_FINALLY();
1163 {
1164 nesting_level--;
1165 }
1166 PG_END_TRY();
1167
1168 /*
1169 * CAUTION: do not access the *pstmt data structure again below here.
1170 * If it was a ROLLBACK or similar, that data structure may have been
1171 * freed. We must copy everything we still need into local variables,
1172 * which we did above.
1173 *
1174 * For the same reason, we can't risk restoring pstmt->queryId to its
1175 * former value, which'd otherwise be a good idea.
1176 */
1177 pstmt = NULL;
1178
1181
1182 /*
1183 * Track the total number of rows retrieved or affected by the utility
1184 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1185 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1186 */
1187 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1188 qc->commandTag == CMDTAG_FETCH ||
1189 qc->commandTag == CMDTAG_SELECT ||
1191 qc->nprocessed : 0;
1192
1193 /* calc differences of buffer counters. */
1194 memset(&bufusage, 0, sizeof(BufferUsage));
1195 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1196
1197 /* calc differences of WAL counters. */
1198 memset(&walusage, 0, sizeof(WalUsage));
1199 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1200
1201 pgss_store(queryString,
1205 PGSS_EXEC,
1207 rows,
1208 &bufusage,
1209 &walusage,
1210 NULL,
1211 NULL,
1212 0,
1213 0,
1215 }
1216 else
1217 {
1218 /*
1219 * Even though we're not tracking execution time for this statement,
1220 * we must still increment the nesting level, to ensure that functions
1221 * evaluated within it are not seen as top-level calls. But don't do
1222 * so for EXECUTE; that way, when control reaches pgss_planner or
1223 * pgss_ExecutorStart, we will treat the costs as top-level if
1224 * appropriate. Likewise, don't bump for PREPARE, so that parse
1225 * analysis will treat the statement as top-level if appropriate.
1226 *
1227 * To be absolutely certain we don't mess up the nesting level,
1228 * evaluate the bump_level condition just once.
1229 */
1230 bool bump_level =
1231 !IsA(parsetree, ExecuteStmt) &&
1232 !IsA(parsetree, PrepareStmt);
1233
1234 if (bump_level)
1235 nesting_level++;
1236 PG_TRY();
1237 {
1239 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1240 context, params, queryEnv,
1241 dest, qc);
1242 else
1243 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1244 context, params, queryEnv,
1245 dest, qc);
1246 }
1247 PG_FINALLY();
1248 {
1249 if (bump_level)
1250 nesting_level--;
1251 }
1252 PG_END_TRY();
1253 }
1254}
1255
1256/*
1257 * Store some statistics for a statement.
1258 *
1259 * If jstate is not NULL then we're trying to create an entry for which
1260 * we have no statistics as yet; we just want to record the normalized
1261 * query string. total_time, rows, bufusage and walusage are ignored in this
1262 * case.
1263 *
1264 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1265 * for the arrays in the Counters field.
1266 */
1267static void
1268pgss_store(const char *query, int64 queryId,
1269 int query_location, int query_len,
1270 pgssStoreKind kind,
1271 double total_time, uint64 rows,
1272 const BufferUsage *bufusage,
1273 const WalUsage *walusage,
1274 const struct JitInstrumentation *jitusage,
1275 const JumbleState *jstate,
1276 int parallel_workers_to_launch,
1277 int parallel_workers_launched,
1278 PlannedStmtOrigin planOrigin)
1279{
1281 pgssEntry *entry;
1282 char *norm_query = NULL;
1284
1285 Assert(query != NULL);
1286
1287 /* Safety check... */
1288 if (!pgss || !pgss_hash)
1289 return;
1290
1291 /*
1292 * Nothing to do if compute_query_id isn't enabled and no other module
1293 * computed a query identifier.
1294 */
1295 if (queryId == INT64CONST(0))
1296 return;
1297
1298 /*
1299 * Confine our attention to the relevant part of the string, if the query
1300 * is a portion of a multi-statement source string, and update query
1301 * location and length if needed.
1302 */
1303 query = CleanQuerytext(query, &query_location, &query_len);
1304
1305 /* Set up key for hashtable search */
1306
1307 /* clear padding */
1308 memset(&key, 0, sizeof(pgssHashKey));
1309
1310 key.userid = GetUserId();
1311 key.dbid = MyDatabaseId;
1312 key.queryid = queryId;
1313 key.toplevel = (nesting_level == 0);
1314
1315 /* Lookup the hash table entry with shared lock. */
1317
1318 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1319
1320 /* Create new entry, if not present */
1321 if (!entry)
1322 {
1323 Size query_offset;
1324 int gc_count;
1325 bool stored;
1326 bool do_gc;
1327
1328 /*
1329 * Create a new, normalized query string if caller asked. We don't
1330 * need to hold the lock while doing this work. (Note: in any case,
1331 * it's possible that someone else creates a duplicate hashtable entry
1332 * in the interval where we don't hold the lock below. That case is
1333 * handled by entry_alloc.)
1334 */
1335 if (jstate)
1336 {
1340 &query_len);
1342 }
1343
1344 /* Append new query text to file with only shared lock held */
1345 stored = qtext_store(norm_query ? norm_query : query, query_len,
1346 &query_offset, &gc_count);
1347
1348 /*
1349 * Determine whether we need to garbage collect external query texts
1350 * while the shared lock is still held. This micro-optimization
1351 * avoids taking the time to decide this while holding exclusive lock.
1352 */
1354
1355 /* Need exclusive lock to make a new hashtable entry - promote */
1358
1359 /*
1360 * A garbage collection may have occurred while we weren't holding the
1361 * lock. In the unlikely event that this happens, the query text we
1362 * stored above will have been garbage collected, so write it again.
1363 * This should be infrequent enough that doing it while holding
1364 * exclusive lock isn't a performance problem.
1365 */
1366 if (!stored || pgss->gc_count != gc_count)
1367 stored = qtext_store(norm_query ? norm_query : query, query_len,
1368 &query_offset, NULL);
1369
1370 /* If we failed to write to the text file, give up */
1371 if (!stored)
1372 goto done;
1373
1374 /* OK to create a new hashtable entry */
1375 entry = entry_alloc(&key, query_offset, query_len, encoding,
1376 jstate != NULL);
1377
1378 /* If needed, perform garbage collection while exclusive lock held */
1379 if (do_gc)
1380 gc_qtexts();
1381 }
1382
1383 /* Increment the counts, except when jstate is not NULL */
1384 if (!jstate)
1385 {
1386 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1387
1388 /*
1389 * Grab the spinlock while updating the counters (see comment about
1390 * locking rules at the head of the file)
1391 */
1392 SpinLockAcquire(&entry->mutex);
1393
1394 /* "Unstick" entry if it was previously sticky */
1395 if (IS_STICKY(entry->counters))
1396 entry->counters.usage = USAGE_INIT;
1397
1398 entry->counters.calls[kind] += 1;
1399 entry->counters.total_time[kind] += total_time;
1400
1401 if (entry->counters.calls[kind] == 1)
1402 {
1403 entry->counters.min_time[kind] = total_time;
1404 entry->counters.max_time[kind] = total_time;
1405 entry->counters.mean_time[kind] = total_time;
1406 }
1407 else
1408 {
1409 /*
1410 * Welford's method for accurately computing variance. See
1411 * <http://www.johndcook.com/blog/standard_deviation/>
1412 */
1413 double old_mean = entry->counters.mean_time[kind];
1414
1415 entry->counters.mean_time[kind] +=
1416 (total_time - old_mean) / entry->counters.calls[kind];
1417 entry->counters.sum_var_time[kind] +=
1418 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1419
1420 /*
1421 * Calculate min and max time. min = 0 and max = 0 means that the
1422 * min/max statistics were reset
1423 */
1424 if (entry->counters.min_time[kind] == 0
1425 && entry->counters.max_time[kind] == 0)
1426 {
1427 entry->counters.min_time[kind] = total_time;
1428 entry->counters.max_time[kind] = total_time;
1429 }
1430 else
1431 {
1432 if (entry->counters.min_time[kind] > total_time)
1433 entry->counters.min_time[kind] = total_time;
1434 if (entry->counters.max_time[kind] < total_time)
1435 entry->counters.max_time[kind] = total_time;
1436 }
1437 }
1438 entry->counters.rows += rows;
1439 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1440 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1443 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1444 entry->counters.local_blks_read += bufusage->local_blks_read;
1447 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1448 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1455 entry->counters.usage += USAGE_EXEC(total_time);
1456 entry->counters.wal_records += walusage->wal_records;
1457 entry->counters.wal_fpi += walusage->wal_fpi;
1458 entry->counters.wal_bytes += walusage->wal_bytes;
1459 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1460 if (jitusage)
1461 {
1462 entry->counters.jit_functions += jitusage->created_functions;
1463 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1464
1465 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1466 entry->counters.jit_deform_count++;
1467 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1468
1469 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1471 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1472
1473 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1475 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1476
1477 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1479 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1480 }
1481
1482 /* parallel worker counters */
1483 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1484 entry->counters.parallel_workers_launched += parallel_workers_launched;
1485
1486 /* plan cache counters */
1487 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1489 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1490 entry->counters.custom_plan_calls++;
1491
1492 SpinLockRelease(&entry->mutex);
1493 }
1494
1495done:
1497
1498 /* We postpone this clean-up until we're out of the lock */
1499 if (norm_query)
1501}
1502
1503/*
1504 * Reset statement statistics corresponding to userid, dbid, and queryid.
1505 */
1506Datum
1508{
1509 Oid userid;
1510 Oid dbid;
1511 int64 queryid;
1512
1513 userid = PG_GETARG_OID(0);
1514 dbid = PG_GETARG_OID(1);
1515 queryid = PG_GETARG_INT64(2);
1516
1517 entry_reset(userid, dbid, queryid, false);
1518
1520}
1521
1522Datum
1524{
1525 Oid userid;
1526 Oid dbid;
1527 int64 queryid;
1528 bool minmax_only;
1529
1530 userid = PG_GETARG_OID(0);
1531 dbid = PG_GETARG_OID(1);
1532 queryid = PG_GETARG_INT64(2);
1534
1535 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1536}
1537
1538/*
1539 * Reset statement statistics.
1540 */
1541Datum
1543{
1544 entry_reset(0, 0, 0, false);
1545
1547}
1548
1549/* Number of output arguments (columns) for various API versions */
1550#define PG_STAT_STATEMENTS_COLS_V1_0 14
1551#define PG_STAT_STATEMENTS_COLS_V1_1 18
1552#define PG_STAT_STATEMENTS_COLS_V1_2 19
1553#define PG_STAT_STATEMENTS_COLS_V1_3 23
1554#define PG_STAT_STATEMENTS_COLS_V1_8 32
1555#define PG_STAT_STATEMENTS_COLS_V1_9 33
1556#define PG_STAT_STATEMENTS_COLS_V1_10 43
1557#define PG_STAT_STATEMENTS_COLS_V1_11 49
1558#define PG_STAT_STATEMENTS_COLS_V1_12 52
1559#define PG_STAT_STATEMENTS_COLS_V1_13 54
1560#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1561
1562/*
1563 * Retrieve statement statistics.
1564 *
1565 * The SQL API of this function has changed multiple times, and will likely
1566 * do so again in future. To support the case where a newer version of this
1567 * loadable module is being used with an old SQL declaration of the function,
1568 * we continue to support the older API versions. For 1.2 and later, the
1569 * expected API version is identified by embedding it in the C name of the
1570 * function. Unfortunately we weren't bright enough to do that for 1.1.
1571 */
1572Datum
1574{
1575 bool showtext = PG_GETARG_BOOL(0);
1576
1578
1579 return (Datum) 0;
1580}
1581
1582Datum
1584{
1585 bool showtext = PG_GETARG_BOOL(0);
1586
1588
1589 return (Datum) 0;
1590}
1591
1592Datum
1594{
1595 bool showtext = PG_GETARG_BOOL(0);
1596
1598
1599 return (Datum) 0;
1600}
1601
1602Datum
1604{
1605 bool showtext = PG_GETARG_BOOL(0);
1606
1608
1609 return (Datum) 0;
1610}
1611
1612Datum
1614{
1615 bool showtext = PG_GETARG_BOOL(0);
1616
1618
1619 return (Datum) 0;
1620}
1621
1622Datum
1624{
1625 bool showtext = PG_GETARG_BOOL(0);
1626
1628
1629 return (Datum) 0;
1630}
1631
1632Datum
1634{
1635 bool showtext = PG_GETARG_BOOL(0);
1636
1638
1639 return (Datum) 0;
1640}
1641
1642Datum
1644{
1645 bool showtext = PG_GETARG_BOOL(0);
1646
1648
1649 return (Datum) 0;
1650}
1651
1652/*
1653 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1654 * This can be removed someday, perhaps.
1655 */
1656Datum
1658{
1659 /* If it's really API 1.1, we'll figure that out below */
1661
1662 return (Datum) 0;
1663}
1664
1665/* Common code for all versions of pg_stat_statements() */
1666static void
1668 pgssVersion api_version,
1669 bool showtext)
1670{
1672 Oid userid = GetUserId();
1673 bool is_allowed_role = false;
1674 char *qbuffer = NULL;
1675 Size qbuffer_size = 0;
1676 Size extent = 0;
1677 int gc_count = 0;
1679 pgssEntry *entry;
1680
1681 /*
1682 * Superusers or roles with the privileges of pg_read_all_stats members
1683 * are allowed
1684 */
1686
1687 /* hash table must exist already */
1688 if (!pgss || !pgss_hash)
1689 ereport(ERROR,
1691 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1692
1693 InitMaterializedSRF(fcinfo, 0);
1694
1695 /*
1696 * Check we have the expected number of output arguments. Aside from
1697 * being a good safety check, we need a kluge here to detect API version
1698 * 1.1, which was wedged into the code in an ill-considered way.
1699 */
1700 switch (rsinfo->setDesc->natts)
1701 {
1703 if (api_version != PGSS_V1_0)
1704 elog(ERROR, "incorrect number of output arguments");
1705 break;
1707 /* pg_stat_statements() should have told us 1.0 */
1708 if (api_version != PGSS_V1_0)
1709 elog(ERROR, "incorrect number of output arguments");
1710 api_version = PGSS_V1_1;
1711 break;
1713 if (api_version != PGSS_V1_2)
1714 elog(ERROR, "incorrect number of output arguments");
1715 break;
1717 if (api_version != PGSS_V1_3)
1718 elog(ERROR, "incorrect number of output arguments");
1719 break;
1721 if (api_version != PGSS_V1_8)
1722 elog(ERROR, "incorrect number of output arguments");
1723 break;
1725 if (api_version != PGSS_V1_9)
1726 elog(ERROR, "incorrect number of output arguments");
1727 break;
1729 if (api_version != PGSS_V1_10)
1730 elog(ERROR, "incorrect number of output arguments");
1731 break;
1733 if (api_version != PGSS_V1_11)
1734 elog(ERROR, "incorrect number of output arguments");
1735 break;
1737 if (api_version != PGSS_V1_12)
1738 elog(ERROR, "incorrect number of output arguments");
1739 break;
1741 if (api_version != PGSS_V1_13)
1742 elog(ERROR, "incorrect number of output arguments");
1743 break;
1744 default:
1745 elog(ERROR, "incorrect number of output arguments");
1746 }
1747
1748 /*
1749 * We'd like to load the query text file (if needed) while not holding any
1750 * lock on pgss->lock. In the worst case we'll have to do this again
1751 * after we have the lock, but it's unlikely enough to make this a win
1752 * despite occasional duplicated work. We need to reload if anybody
1753 * writes to the file (either a retail qtext_store(), or a garbage
1754 * collection) between this point and where we've gotten shared lock. If
1755 * a qtext_store is actually in progress when we look, we might as well
1756 * skip the speculative load entirely.
1757 */
1758 if (showtext)
1759 {
1760 int n_writers;
1761
1762 /* Take the mutex so we can examine variables */
1764 extent = pgss->extent;
1765 n_writers = pgss->n_writers;
1766 gc_count = pgss->gc_count;
1768
1769 /* No point in loading file now if there are active writers */
1770 if (n_writers == 0)
1772 }
1773
1774 /*
1775 * Get shared lock, load or reload the query text file if we must, and
1776 * iterate over the hashtable entries.
1777 *
1778 * With a large hash table, we might be holding the lock rather longer
1779 * than one could wish. However, this only blocks creation of new hash
1780 * table entries, and the larger the hash table the less likely that is to
1781 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1782 * we need to partition the hash table to limit the time spent holding any
1783 * one lock.
1784 */
1786
1787 if (showtext)
1788 {
1789 /*
1790 * Here it is safe to examine extent and gc_count without taking the
1791 * mutex. Note that although other processes might change
1792 * pgss->extent just after we look at it, the strings they then write
1793 * into the file cannot yet be referenced in the hashtable, so we
1794 * don't care whether we see them or not.
1795 *
1796 * If qtext_load_file fails, we just press on; we'll return NULL for
1797 * every query text.
1798 */
1799 if (qbuffer == NULL ||
1800 pgss->extent != extent ||
1801 pgss->gc_count != gc_count)
1802 {
1803 if (qbuffer)
1804 pfree(qbuffer);
1806 }
1807 }
1808
1810 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1811 {
1813 bool nulls[PG_STAT_STATEMENTS_COLS];
1814 int i = 0;
1815 Counters tmp;
1816 double stddev;
1817 int64 queryid = entry->key.queryid;
1818 TimestampTz stats_since;
1819 TimestampTz minmax_stats_since;
1820
1821 memset(values, 0, sizeof(values));
1822 memset(nulls, 0, sizeof(nulls));
1823
1824 values[i++] = ObjectIdGetDatum(entry->key.userid);
1825 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1826 if (api_version >= PGSS_V1_9)
1827 values[i++] = BoolGetDatum(entry->key.toplevel);
1828
1829 if (is_allowed_role || entry->key.userid == userid)
1830 {
1831 if (api_version >= PGSS_V1_2)
1832 values[i++] = Int64GetDatumFast(queryid);
1833
1834 if (showtext)
1835 {
1836 char *qstr = qtext_fetch(entry->query_offset,
1837 entry->query_len,
1838 qbuffer,
1839 qbuffer_size);
1840
1841 if (qstr)
1842 {
1843 char *enc;
1844
1846 entry->query_len,
1847 entry->encoding);
1848
1850
1851 if (enc != qstr)
1852 pfree(enc);
1853 }
1854 else
1855 {
1856 /* Just return a null if we fail to find the text */
1857 nulls[i++] = true;
1858 }
1859 }
1860 else
1861 {
1862 /* Query text not requested */
1863 nulls[i++] = true;
1864 }
1865 }
1866 else
1867 {
1868 /* Don't show queryid */
1869 if (api_version >= PGSS_V1_2)
1870 nulls[i++] = true;
1871
1872 /*
1873 * Don't show query text, but hint as to the reason for not doing
1874 * so if it was requested
1875 */
1876 if (showtext)
1877 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1878 else
1879 nulls[i++] = true;
1880 }
1881
1882 /* copy counters to a local variable to keep locking time short */
1883 SpinLockAcquire(&entry->mutex);
1884 tmp = entry->counters;
1885 SpinLockRelease(&entry->mutex);
1886
1887 /*
1888 * The spinlock is not required when reading these two as they are
1889 * always updated when holding pgss->lock exclusively.
1890 */
1891 stats_since = entry->stats_since;
1892 minmax_stats_since = entry->minmax_stats_since;
1893
1894 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1895 if (IS_STICKY(tmp))
1896 continue;
1897
1898 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1899 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1900 {
1901 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1902 {
1903 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1904 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1905 }
1906
1907 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1908 api_version >= PGSS_V1_8)
1909 {
1910 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1911 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1912 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1913
1914 /*
1915 * Note we are calculating the population variance here, not
1916 * the sample variance, as we have data for the whole
1917 * population, so Bessel's correction is not used, and we
1918 * don't divide by tmp.calls - 1.
1919 */
1920 if (tmp.calls[kind] > 1)
1921 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1922 else
1923 stddev = 0.0;
1924 values[i++] = Float8GetDatumFast(stddev);
1925 }
1926 }
1927 values[i++] = Int64GetDatumFast(tmp.rows);
1930 if (api_version >= PGSS_V1_1)
1935 if (api_version >= PGSS_V1_1)
1940 if (api_version >= PGSS_V1_1)
1941 {
1944 }
1945 if (api_version >= PGSS_V1_11)
1946 {
1949 }
1950 if (api_version >= PGSS_V1_10)
1951 {
1954 }
1955 if (api_version >= PGSS_V1_8)
1956 {
1957 char buf[256];
1958 Datum wal_bytes;
1959
1962
1963 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1964
1965 /* Convert to numeric. */
1966 wal_bytes = DirectFunctionCall3(numeric_in,
1969 Int32GetDatum(-1));
1970 values[i++] = wal_bytes;
1971 }
1972 if (api_version >= PGSS_V1_12)
1973 {
1975 }
1976 if (api_version >= PGSS_V1_10)
1977 {
1986 }
1987 if (api_version >= PGSS_V1_11)
1988 {
1991 }
1992 if (api_version >= PGSS_V1_12)
1993 {
1996 }
1997 if (api_version >= PGSS_V1_13)
1998 {
2001 }
2002 if (api_version >= PGSS_V1_11)
2003 {
2004 values[i++] = TimestampTzGetDatum(stats_since);
2005 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2006 }
2007
2008 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2009 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2010 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2011 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2012 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2013 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2014 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2015 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2016 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2017 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2018 -1 /* fail if you forget to update this assert */ ));
2019
2020 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2021 }
2022
2024
2025 if (qbuffer)
2026 pfree(qbuffer);
2027}
2028
2029/* Number of output arguments (columns) for pg_stat_statements_info */
2030#define PG_STAT_STATEMENTS_INFO_COLS 2
2031
2032/*
2033 * Return statistics of pg_stat_statements.
2034 */
2035Datum
2037{
2038 pgssGlobalStats stats;
2039 TupleDesc tupdesc;
2041 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2042
2043 if (!pgss || !pgss_hash)
2044 ereport(ERROR,
2046 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2047
2048 /* Build a tuple descriptor for our result type */
2049 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2050 elog(ERROR, "return type must be a row type");
2051
2052 /* Read global statistics for pg_stat_statements */
2054 stats = pgss->stats;
2056
2057 values[0] = Int64GetDatum(stats.dealloc);
2059
2061}
2062
2063/*
2064 * Allocate a new hashtable entry.
2065 * caller must hold an exclusive lock on pgss->lock
2066 *
2067 * "query" need not be null-terminated; we rely on query_len instead
2068 *
2069 * If "sticky" is true, make the new entry artificially sticky so that it will
2070 * probably still be there when the query finishes execution. We do this by
2071 * giving it a median usage value rather than the normal value. (Strictly
2072 * speaking, query strings are normalized on a best effort basis, though it
2073 * would be difficult to demonstrate this even under artificial conditions.)
2074 *
2075 * Note: despite needing exclusive lock, it's not an error for the target
2076 * entry to already exist. This is because pgss_store releases and
2077 * reacquires lock after failing to find a match; so someone else could
2078 * have made the entry while we waited to get exclusive lock.
2079 */
2080static pgssEntry *
2081entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2082 bool sticky)
2083{
2084 pgssEntry *entry;
2085 bool found;
2086
2087 /* Make space if needed */
2089 entry_dealloc();
2090
2091 /* Find or create an entry with desired hash code */
2092 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2093
2094 if (!found)
2095 {
2096 /* New entry, initialize it */
2097
2098 /* reset the statistics */
2099 memset(&entry->counters, 0, sizeof(Counters));
2100 /* set the appropriate initial usage count */
2102 /* re-initialize the mutex each time ... we assume no one using it */
2103 SpinLockInit(&entry->mutex);
2104 /* ... and don't forget the query text metadata */
2105 Assert(query_len >= 0);
2106 entry->query_offset = query_offset;
2107 entry->query_len = query_len;
2108 entry->encoding = encoding;
2110 entry->minmax_stats_since = entry->stats_since;
2111 }
2112
2113 return entry;
2114}
2115
2116/*
2117 * qsort comparator for sorting into increasing usage order
2118 */
2119static int
2120entry_cmp(const void *lhs, const void *rhs)
2121{
2122 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2123 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2124
2125 if (l_usage < r_usage)
2126 return -1;
2127 else if (l_usage > r_usage)
2128 return +1;
2129 else
2130 return 0;
2131}
2132
2133/*
2134 * Deallocate least-used entries.
2135 *
2136 * Caller must hold an exclusive lock on pgss->lock.
2137 */
2138static void
2139entry_dealloc(void)
2140{
2142 pgssEntry **entries;
2143 pgssEntry *entry;
2144 int nvictims;
2145 int i;
2147 int nvalidtexts;
2148
2149 /*
2150 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2151 * While we're scanning the table, apply the decay factor to the usage
2152 * values, and update the mean query length.
2153 *
2154 * Note that the mean query length is almost immediately obsolete, since
2155 * we compute it before not after discarding the least-used entries.
2156 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2157 * making two passes to get a more current result. Likewise, the new
2158 * cur_median_usage includes the entries we're about to zap.
2159 */
2160
2161 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2162
2163 i = 0;
2164 tottextlen = 0;
2165 nvalidtexts = 0;
2166
2168 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2169 {
2170 entries[i++] = entry;
2171 /* "Sticky" entries get a different usage decay rate. */
2172 if (IS_STICKY(entry->counters))
2174 else
2176 /* In the mean length computation, ignore dropped texts. */
2177 if (entry->query_len >= 0)
2178 {
2179 tottextlen += entry->query_len + 1;
2180 nvalidtexts++;
2181 }
2182 }
2183
2184 /* Sort into increasing order by usage */
2185 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2186
2187 /* Record the (approximate) median usage */
2188 if (i > 0)
2189 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2190 /* Record the mean query length */
2191 if (nvalidtexts > 0)
2193 else
2195
2196 /* Now zap an appropriate fraction of lowest-usage entries */
2197 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2198 nvictims = Min(nvictims, i);
2199
2200 for (i = 0; i < nvictims; i++)
2201 {
2202 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2203 }
2204
2205 pfree(entries);
2206
2207 /* Increment the number of times entries are deallocated */
2209 pgss->stats.dealloc += 1;
2211}
2212
2213/*
2214 * Given a query string (not necessarily null-terminated), allocate a new
2215 * entry in the external query text file and store the string there.
2216 *
2217 * If successful, returns true, and stores the new entry's offset in the file
2218 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2219 * number of garbage collections that have occurred so far.
2220 *
2221 * On failure, returns false.
2222 *
2223 * At least a shared lock on pgss->lock must be held by the caller, so as
2224 * to prevent a concurrent garbage collection. Share-lock-holding callers
2225 * should pass a gc_count pointer to obtain the number of garbage collections,
2226 * so that they can recheck the count after obtaining exclusive lock to
2227 * detect whether a garbage collection occurred (and removed this entry).
2228 */
2229static bool
2230qtext_store(const char *query, int query_len,
2231 Size *query_offset, int *gc_count)
2232{
2233 Size off;
2234 int fd;
2235
2236 /*
2237 * We use a spinlock to protect extent/n_writers/gc_count, so that
2238 * multiple processes may execute this function concurrently.
2239 */
2241 off = pgss->extent;
2242 pgss->extent += query_len + 1;
2243 pgss->n_writers++;
2244 if (gc_count)
2245 *gc_count = pgss->gc_count;
2247
2248 *query_offset = off;
2249
2250 /*
2251 * Don't allow the file to grow larger than what qtext_load_file can
2252 * (theoretically) handle. This has been seen to be reachable on 32-bit
2253 * platforms.
2254 */
2255 if (unlikely(query_len >= MaxAllocHugeSize - off))
2256 {
2257 errno = EFBIG; /* not quite right, but it'll do */
2258 fd = -1;
2259 goto error;
2260 }
2261
2262 /* Now write the data into the successfully-reserved part of the file */
2264 if (fd < 0)
2265 goto error;
2266
2267 if (pg_pwrite(fd, query, query_len, off) != query_len)
2268 goto error;
2269 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2270 goto error;
2271
2273
2274 /* Mark our write complete */
2276 pgss->n_writers--;
2278
2279 return true;
2280
2281error:
2282 ereport(LOG,
2284 errmsg("could not write file \"%s\": %m",
2285 PGSS_TEXT_FILE)));
2286
2287 if (fd >= 0)
2289
2290 /* Mark our write complete */
2292 pgss->n_writers--;
2294
2295 return false;
2296}
2297
2298/*
2299 * Read the external query text file into a palloc'd buffer.
2300 *
2301 * Returns NULL (without throwing an error) if unable to read, eg
2302 * file not there or insufficient memory.
2303 *
2304 * On success, the buffer size is also returned into *buffer_size.
2305 *
2306 * This can be called without any lock on pgss->lock, but in that case
2307 * the caller is responsible for verifying that the result is sane.
2308 */
2309static char *
2310qtext_load_file(Size *buffer_size)
2311{
2312 char *buf;
2313 int fd;
2314 struct stat stat;
2315 Size nread;
2316
2318 if (fd < 0)
2319 {
2320 if (errno != ENOENT)
2321 ereport(LOG,
2323 errmsg("could not read file \"%s\": %m",
2324 PGSS_TEXT_FILE)));
2325 return NULL;
2326 }
2327
2328 /* Get file length */
2329 if (fstat(fd, &stat))
2330 {
2331 ereport(LOG,
2333 errmsg("could not stat file \"%s\": %m",
2334 PGSS_TEXT_FILE)));
2336 return NULL;
2337 }
2338
2339 /* Allocate buffer; beware that off_t might be wider than size_t */
2342 else
2343 buf = NULL;
2344 if (buf == NULL)
2345 {
2346 ereport(LOG,
2348 errmsg("out of memory"),
2349 errdetail("Could not allocate enough memory to read file \"%s\".",
2350 PGSS_TEXT_FILE)));
2352 return NULL;
2353 }
2354
2355 /*
2356 * OK, slurp in the file. Windows fails if we try to read more than
2357 * INT_MAX bytes at once, and other platforms might not like that either,
2358 * so read a very large file in 1GB segments.
2359 */
2360 nread = 0;
2361 while (nread < stat.st_size)
2362 {
2363 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2364
2365 /*
2366 * If we get a short read and errno doesn't get set, the reason is
2367 * probably that garbage collection truncated the file since we did
2368 * the fstat(), so we don't log a complaint --- but we don't return
2369 * the data, either, since it's most likely corrupt due to concurrent
2370 * writes from garbage collection.
2371 */
2372 errno = 0;
2373 if (read(fd, buf + nread, toread) != toread)
2374 {
2375 if (errno)
2376 ereport(LOG,
2378 errmsg("could not read file \"%s\": %m",
2379 PGSS_TEXT_FILE)));
2380 pfree(buf);
2382 return NULL;
2383 }
2384 nread += toread;
2385 }
2386
2387 if (CloseTransientFile(fd) != 0)
2388 ereport(LOG,
2390 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2391
2392 *buffer_size = nread;
2393 return buf;
2394}
2395
2396/*
2397 * Locate a query text in the file image previously read by qtext_load_file().
2398 *
2399 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2400 * the result points to a null-terminated string within the buffer.
2401 */
2402static char *
2403qtext_fetch(Size query_offset, int query_len,
2404 char *buffer, Size buffer_size)
2405{
2406 /* File read failed? */
2407 if (buffer == NULL)
2408 return NULL;
2409 /* Bogus offset/length? */
2410 if (query_len < 0 ||
2411 query_offset + query_len >= buffer_size)
2412 return NULL;
2413 /* As a further sanity check, make sure there's a trailing null */
2414 if (buffer[query_offset + query_len] != '\0')
2415 return NULL;
2416 /* Looks OK */
2417 return buffer + query_offset;
2418}
2419
2420/*
2421 * Do we need to garbage-collect the external query text file?
2422 *
2423 * Caller should hold at least a shared lock on pgss->lock.
2424 */
2425static bool
2426need_gc_qtexts(void)
2427{
2428 Size extent;
2429
2430 /* Read shared extent pointer */
2432 extent = pgss->extent;
2434
2435 /*
2436 * Don't proceed if file does not exceed 512 bytes per possible entry.
2437 *
2438 * Here and in the next test, 32-bit machines have overflow hazards if
2439 * pgss_max and/or mean_query_len are large. Force the multiplications
2440 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2441 */
2442 if ((uint64) extent < (uint64) 512 * pgss_max)
2443 return false;
2444
2445 /*
2446 * Don't proceed if file is less than about 50% bloat. Nothing can or
2447 * should be done in the event of unusually large query texts accounting
2448 * for file's large size. We go to the trouble of maintaining the mean
2449 * query length in order to prevent garbage collection from thrashing
2450 * uselessly.
2451 */
2452 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2453 return false;
2454
2455 return true;
2456}
2457
2458/*
2459 * Garbage-collect orphaned query texts in external file.
2460 *
2461 * This won't be called often in the typical case, since it's likely that
2462 * there won't be too much churn, and besides, a similar compaction process
2463 * occurs when serializing to disk at shutdown or as part of resetting.
2464 * Despite this, it seems prudent to plan for the edge case where the file
2465 * becomes unreasonably large, with no other method of compaction likely to
2466 * occur in the foreseeable future.
2467 *
2468 * The caller must hold an exclusive lock on pgss->lock.
2469 *
2470 * At the first sign of trouble we unlink the query text file to get a clean
2471 * slate (although existing statistics are retained), rather than risk
2472 * thrashing by allowing the same problem case to recur indefinitely.
2473 */
2474static void
2475gc_qtexts(void)
2476{
2477 char *qbuffer;
2479 FILE *qfile = NULL;
2481 pgssEntry *entry;
2482 Size extent;
2483 int nentries;
2484
2485 /*
2486 * When called from pgss_store, some other session might have proceeded
2487 * with garbage collection in the no-lock-held interim of lock strength
2488 * escalation. Check once more that this is actually necessary.
2489 */
2490 if (!need_gc_qtexts())
2491 return;
2492
2493 /*
2494 * Load the old texts file. If we fail (out of memory, for instance),
2495 * invalidate query texts. Hopefully this is rare. It might seem better
2496 * to leave things alone on an OOM failure, but the problem is that the
2497 * file is only going to get bigger; hoping for a future non-OOM result is
2498 * risky and can easily lead to complete denial of service.
2499 */
2501 if (qbuffer == NULL)
2502 goto gc_fail;
2503
2504 /*
2505 * We overwrite the query texts file in place, so as to reduce the risk of
2506 * an out-of-disk-space failure. Since the file is guaranteed not to get
2507 * larger, this should always work on traditional filesystems; though we
2508 * could still lose on copy-on-write filesystems.
2509 */
2511 if (qfile == NULL)
2512 {
2513 ereport(LOG,
2515 errmsg("could not write file \"%s\": %m",
2516 PGSS_TEXT_FILE)));
2517 goto gc_fail;
2518 }
2519
2520 extent = 0;
2521 nentries = 0;
2522
2524 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2525 {
2526 int query_len = entry->query_len;
2527 char *qry = qtext_fetch(entry->query_offset,
2528 query_len,
2529 qbuffer,
2530 qbuffer_size);
2531
2532 if (qry == NULL)
2533 {
2534 /* Trouble ... drop the text */
2535 entry->query_offset = 0;
2536 entry->query_len = -1;
2537 /* entry will not be counted in mean query length computation */
2538 continue;
2539 }
2540
2541 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2542 {
2543 ereport(LOG,
2545 errmsg("could not write file \"%s\": %m",
2546 PGSS_TEXT_FILE)));
2548 goto gc_fail;
2549 }
2550
2551 entry->query_offset = extent;
2552 extent += query_len + 1;
2553 nentries++;
2554 }
2555
2556 /*
2557 * Truncate away any now-unused space. If this fails for some odd reason,
2558 * we log it, but there's no need to fail.
2559 */
2560 if (ftruncate(fileno(qfile), extent) != 0)
2561 ereport(LOG,
2563 errmsg("could not truncate file \"%s\": %m",
2564 PGSS_TEXT_FILE)));
2565
2566 if (FreeFile(qfile))
2567 {
2568 ereport(LOG,
2570 errmsg("could not write file \"%s\": %m",
2571 PGSS_TEXT_FILE)));
2572 qfile = NULL;
2573 goto gc_fail;
2574 }
2575
2576 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2577 pgss->extent, extent);
2578
2579 /* Reset the shared extent pointer */
2580 pgss->extent = extent;
2581
2582 /*
2583 * Also update the mean query length, to be sure that need_gc_qtexts()
2584 * won't still think we have a problem.
2585 */
2586 if (nentries > 0)
2587 pgss->mean_query_len = extent / nentries;
2588 else
2590
2591 pfree(qbuffer);
2592
2593 /*
2594 * OK, count a garbage collection cycle. (Note: even though we have
2595 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2596 * other processes may examine gc_count while holding only the mutex.
2597 * Also, we have to advance the count *after* we've rewritten the file,
2598 * else other processes might not realize they read a stale file.)
2599 */
2601
2602 return;
2603
2604gc_fail:
2605 /* clean up resources */
2606 if (qfile)
2607 FreeFile(qfile);
2608 if (qbuffer)
2609 pfree(qbuffer);
2610
2611 /*
2612 * Since the contents of the external file are now uncertain, mark all
2613 * hashtable entries as having invalid texts.
2614 */
2616 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2617 {
2618 entry->query_offset = 0;
2619 entry->query_len = -1;
2620 }
2621
2622 /*
2623 * Destroy the query text file and create a new, empty one
2624 */
2627 if (qfile == NULL)
2628 ereport(LOG,
2630 errmsg("could not recreate file \"%s\": %m",
2631 PGSS_TEXT_FILE)));
2632 else
2633 FreeFile(qfile);
2634
2635 /* Reset the shared extent pointer */
2636 pgss->extent = 0;
2637
2638 /* Reset mean_query_len to match the new state */
2640
2641 /*
2642 * Bump the GC count even though we failed.
2643 *
2644 * This is needed to make concurrent readers of file without any lock on
2645 * pgss->lock notice existence of new version of file. Once readers
2646 * subsequently observe a change in GC count with pgss->lock held, that
2647 * forces a safe reopen of file. Writers also require that we bump here,
2648 * of course. (As required by locking protocol, readers and writers don't
2649 * trust earlier file contents until gc_count is found unchanged after
2650 * pgss->lock acquired in shared or exclusive mode respectively.)
2651 */
2653}
2654
2655#define SINGLE_ENTRY_RESET(e) \
2656if (e) { \
2657 if (minmax_only) { \
2658 /* When requested reset only min/max statistics of an entry */ \
2659 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2660 { \
2661 e->counters.max_time[kind] = 0; \
2662 e->counters.min_time[kind] = 0; \
2663 } \
2664 e->minmax_stats_since = stats_reset; \
2665 } \
2666 else \
2667 { \
2668 /* Remove the key otherwise */ \
2669 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2670 num_remove++; \
2671 } \
2672}
2673
2674/*
2675 * Reset entries corresponding to parameters passed.
2676 */
2677static TimestampTz
2678entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2679{
2681 pgssEntry *entry;
2682 FILE *qfile;
2683 int64 num_entries;
2684 int64 num_remove = 0;
2686 TimestampTz stats_reset;
2687
2688 if (!pgss || !pgss_hash)
2689 ereport(ERROR,
2691 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2692
2694 num_entries = hash_get_num_entries(pgss_hash);
2695
2696 stats_reset = GetCurrentTimestamp();
2697
2698 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2699 {
2700 /* If all the parameters are available, use the fast path. */
2701 memset(&key, 0, sizeof(pgssHashKey));
2702 key.userid = userid;
2703 key.dbid = dbid;
2704 key.queryid = queryid;
2705
2706 /*
2707 * Reset the entry if it exists, starting with the non-top-level
2708 * entry.
2709 */
2710 key.toplevel = false;
2711 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2712
2713 SINGLE_ENTRY_RESET(entry);
2714
2715 /* Also reset the top-level entry if it exists. */
2716 key.toplevel = true;
2717 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2718
2719 SINGLE_ENTRY_RESET(entry);
2720 }
2721 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2722 {
2723 /* Reset entries corresponding to valid parameters. */
2725 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2726 {
2727 if ((!userid || entry->key.userid == userid) &&
2728 (!dbid || entry->key.dbid == dbid) &&
2729 (!queryid || entry->key.queryid == queryid))
2730 {
2731 SINGLE_ENTRY_RESET(entry);
2732 }
2733 }
2734 }
2735 else
2736 {
2737 /* Reset all entries. */
2739 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2740 {
2741 SINGLE_ENTRY_RESET(entry);
2742 }
2743 }
2744
2745 /* All entries are removed? */
2746 if (num_entries != num_remove)
2747 goto release_lock;
2748
2749 /*
2750 * Reset global statistics for pg_stat_statements since all entries are
2751 * removed.
2752 */
2754 pgss->stats.dealloc = 0;
2755 pgss->stats.stats_reset = stats_reset;
2757
2758 /*
2759 * Write new empty query file, perhaps even creating a new one to recover
2760 * if the file was missing.
2761 */
2763 if (qfile == NULL)
2764 {
2765 ereport(LOG,
2767 errmsg("could not create file \"%s\": %m",
2768 PGSS_TEXT_FILE)));
2769 goto done;
2770 }
2771
2772 /* If ftruncate fails, log it, but it's not a fatal problem */
2773 if (ftruncate(fileno(qfile), 0) != 0)
2774 ereport(LOG,
2776 errmsg("could not truncate file \"%s\": %m",
2777 PGSS_TEXT_FILE)));
2778
2779 FreeFile(qfile);
2780
2781done:
2782 pgss->extent = 0;
2783 /* This counts as a query text garbage collection for our purposes */
2785
2788
2789 return stats_reset;
2790}
2791
2792/*
2793 * Generate a normalized version of the query string that will be used to
2794 * represent all similar queries.
2795 *
2796 * Note that the normalized representation may well vary depending on
2797 * just which "equivalent" query is used to create the hashtable entry.
2798 * We assume this is OK.
2799 *
2800 * If query_loc > 0, then "query" has been advanced by that much compared to
2801 * the original string start, so we need to translate the provided locations
2802 * to compensate. (This lets us avoid re-scanning statements before the one
2803 * of interest, so it's worth doing.)
2804 *
2805 * *query_len_p contains the input string length, and is updated with
2806 * the result string length on exit. The resulting string might be longer
2807 * or shorter depending on what happens with replacement of constants.
2808 *
2809 * Returns a palloc'd string.
2810 */
2811static char *
2812generate_normalized_query(const JumbleState *jstate, const char *query,
2813 int query_loc, int *query_len_p)
2814{
2815 char *norm_query;
2816 int query_len = *query_len_p;
2817 int norm_query_buflen, /* Space allowed for norm_query */
2818 len_to_wrt, /* Length (in bytes) to write */
2819 quer_loc = 0, /* Source query byte location */
2820 n_quer_loc = 0, /* Normalized query byte location */
2821 last_off = 0, /* Offset from start for previous tok */
2822 last_tok_len = 0; /* Length (in bytes) of that tok */
2823 int num_constants_replaced = 0;
2825
2826 /*
2827 * Determine constants' lengths (core system only gives us locations), and
2828 * return a sorted copy of jstate's LocationLen data with lengths filled
2829 * in.
2830 */
2832
2833 /*
2834 * Allow for $n symbols to be longer than the constants they replace.
2835 * Constants must take at least one byte in text form, while a $n symbol
2836 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2837 * could refine that limit based on the max value of n for the current
2838 * query, but it hardly seems worth any extra effort to do so.
2839 */
2840 norm_query_buflen = query_len + jstate->clocations_count * 10;
2841
2842 /* Allocate result buffer */
2844
2845 for (int i = 0; i < jstate->clocations_count; i++)
2846 {
2847 int off, /* Offset from start for cur tok */
2848 tok_len; /* Length (in bytes) of that tok */
2849
2850 /*
2851 * If we have an external param at this location, but no lists are
2852 * being squashed across the query, then we skip here; this will make
2853 * us print the characters found in the original query that represent
2854 * the parameter in the next iteration (or after the loop is done),
2855 * which is a bit odd but seems to work okay in most cases.
2856 */
2857 if (locs[i].extern_param && !jstate->has_squashed_lists)
2858 continue;
2859
2860 off = locs[i].location;
2861
2862 /* Adjust recorded location if we're dealing with partial string */
2863 off -= query_loc;
2864
2865 tok_len = locs[i].length;
2866
2867 if (tok_len < 0)
2868 continue; /* ignore any duplicates */
2869
2870 /* Copy next chunk (what precedes the next constant) */
2871 len_to_wrt = off - last_off;
2873 Assert(len_to_wrt >= 0);
2876
2877 /*
2878 * And insert a param symbol in place of the constant token; and, if
2879 * we have a squashable list, insert a placeholder comment starting
2880 * from the list's second value.
2881 */
2883 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2884 locs[i].squashed ? " /*, ... */" : "");
2886
2887 /* move forward */
2888 quer_loc = off + tok_len;
2889 last_off = off;
2891 }
2892
2893 /* Clean up, if needed */
2894 if (locs)
2895 pfree(locs);
2896
2897 /*
2898 * We've copied up until the last ignorable constant. Copy over the
2899 * remaining bytes of the original query string.
2900 */
2901 len_to_wrt = query_len - quer_loc;
2902
2903 Assert(len_to_wrt >= 0);
2906
2908 norm_query[n_quer_loc] = '\0';
2909
2911 return norm_query;
2912}
bool has_privs_of_role(Oid member, Oid role)
Definition acl.c:5314
Datum numeric_in(PG_FUNCTION_ARGS)
Definition numeric.c:626
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1649
static Datum values[MAXATTR]
Definition bootstrap.c:190
#define CStringGetTextDatum(s)
Definition builtins.h:98
#define INT64CONST(x)
Definition c.h:630
#define Min(x, y)
Definition c.h:1091
#define PG_BINARY_R
Definition c.h:1376
#define Max(x, y)
Definition c.h:1085
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
#define PG_BINARY
Definition c.h:1374
#define UINT64_FORMAT
Definition c.h:635
int32_t int32
Definition c.h:620
uint64_t uint64
Definition c.h:625
#define unlikely(x)
Definition c.h:438
uint32_t uint32
Definition c.h:624
#define PG_BINARY_W
Definition c.h:1377
size_t Size
Definition c.h:689
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int64 TimestampTz
Definition timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:889
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1352
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition dynahash.c:1444
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1273
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1317
Datum arg
Definition elog.c:1323
int errcode_for_file_access(void)
Definition elog.c:898
int errcode(int sqlerrcode)
Definition elog.c:875
#define LOG
Definition elog.h:32
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define PG_TRY(...)
Definition elog.h:374
#define PG_END_TRY(...)
Definition elog.h:399
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define PG_FINALLY(...)
Definition elog.h:391
#define ereport(elevel,...)
Definition elog.h:152
ExecutorEnd_hook_type ExecutorEnd_hook
Definition execMain.c:73
ExecutorFinish_hook_type ExecutorFinish_hook
Definition execMain.c:72
ExecutorStart_hook_type ExecutorStart_hook
Definition execMain.c:70
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition execMain.c:143
ExecutorRun_hook_type ExecutorRun_hook
Definition execMain.c:71
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition execMain.c:318
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition execMain.c:486
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition execMain.c:426
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:783
int CloseTransientFile(int fd)
Definition fd.c:2855
int FreeFile(FILE *file)
Definition fd.c:2827
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2628
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
#define MCXT_ALLOC_HUGE
Definition fe_memutils.h:28
#define MCXT_ALLOC_NO_OOM
Definition fe_memutils.h:29
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_BOOL(n)
Definition fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:692
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, uint32 flags)
Definition funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
bool IsUnderPostmaster
Definition globals.c:122
Oid MyDatabaseId
Definition globals.c:96
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5152
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5049
void MarkGUCPrefixReserved(const char *className)
Definition guc.c:5186
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition guc.c:5073
@ PGC_SUSET
Definition guc.h:78
@ PGC_POSTMASTER
Definition guc.h:74
@ PGC_SIGHUP
Definition guc.h:75
return str start
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1025
@ HASH_FIND
Definition hsearch.h:108
@ HASH_REMOVE
Definition hsearch.h:110
@ HASH_ENTER
Definition hsearch.h:109
#define HASH_ELEM
Definition hsearch.h:90
#define HASH_BLOBS
Definition hsearch.h:92
void parse(int)
Definition parse.c:49
static char * encoding
Definition initdb.c:139
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:434
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:444
#define INSTR_TIME_GET_MILLISEC(t)
Definition instr_time.h:459
WalUsage pgWalUsage
Definition instrument.c:27
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:367
BufferUsage pgBufferUsage
Definition instrument.c:25
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:327
@ INSTRUMENT_ALL
Definition instrument.h:68
#define read(a, b, c)
Definition win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
int LWLockNewTrancheId(const char *name)
Definition lwlock.c:562
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
int GetDatabaseEncoding(void)
Definition mbutils.c:1389
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:687
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1635
void pfree(void *pointer)
Definition mcxt.c:1619
void * palloc(Size size)
Definition mcxt.c:1390
void * palloc_extended(Size size, int flags)
Definition mcxt.c:1442
#define MaxAllocHugeSize
Definition memutils.h:45
Oid GetUserId(void)
Definition miscinit.c:470
bool process_shared_preload_libraries_in_progress
Definition miscinit.c:1788
#define IsA(nodeptr, _type_)
Definition nodes.h:164
static char * errmsg
post_parse_analyze_hook_type post_parse_analyze_hook
Definition analyze.c:74
const void size_t len
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
@ PGSS_V1_13
#define SINGLE_ENTRY_RESET(e)
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
static ExecutorRun_hook_type prev_ExecutorRun
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS_V1_13
void _PG_init(void)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, const JumbleState *jstate)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
static char * generate_normalized_query(const JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS)
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
static const ShmemCallbacks pgss_shmem_callbacks
#define PG_STAT_STATEMENTS_COLS_V1_3
#define PGSS_NUMKIND
static bool pgss_save
static int nesting_level
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
static void pgss_store(const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
static void pgss_shmem_init(void *arg)
static void pgss_shmem_request(void *arg)
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define PG_VALID_BE_ENCODING(_enc)
Definition pg_wchar.h:134
static int duration
Definition pgbench.c:175
planner_hook_type planner_hook
Definition planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition planner.c:351
PlannedStmtOrigin
Definition plannodes.h:36
@ PLAN_STMT_UNKNOWN
Definition plannodes.h:37
@ PLAN_STMT_CACHE_CUSTOM
Definition plannodes.h:41
@ PLAN_STMT_CACHE_GENERIC
Definition plannodes.h:40
#define sprintf
Definition port.h:263
#define pg_pwrite
Definition port.h:249
#define snprintf
Definition port.h:261
#define qsort(a, b, c, d)
Definition port.h:496
static Datum Int64GetDatum(int64 X)
Definition postgres.h:426
#define Int64GetDatumFast(X)
Definition postgres.h:538
#define Float8GetDatumFast(X)
Definition postgres.h:540
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Datum CStringGetDatum(const char *X)
Definition postgres.h:383
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
LocationLen * ComputeConstantLengths(const JumbleState *jstate, const char *query, int query_loc)
ScanDirection
Definition sdir.h:25
void RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
Definition shmem.c:873
#define ShmemRequestHash(...)
Definition shmem.h:179
#define ShmemRequestStruct(...)
Definition shmem.h:176
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
static void error(void)
instr_time local_blk_read_time
Definition instrument.h:38
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
instr_time temp_blk_write_time
Definition instrument.h:41
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
int64 local_blks_written
Definition instrument.h:33
instr_time temp_blk_read_time
Definition instrument.h:40
instr_time local_blk_write_time
Definition instrument.h:39
int64 temp_blks_read
Definition instrument.h:34
int64 shared_blks_read
Definition instrument.h:27
int64 shared_blks_written
Definition instrument.h:29
int64 temp_blks_written
Definition instrument.h:35
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
double min_time[PGSS_NUMKIND]
int64 local_blks_written
int64 generic_plan_calls
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
double jit_emission_time
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 custom_plan_calls
int64 local_blks_dirtied
int64 jit_inlining_count
double jit_deform_time
int64 parallel_workers_to_launch
double shared_blk_read_time
double jit_inlining_time
struct JitContext * es_jit
Definition execnodes.h:800
uint64 es_total_processed
Definition execnodes.h:752
WalUsage walusage
Definition instrument.h:90
instr_time total
Definition instrument.h:88
BufferUsage bufusage
Definition instrument.h:89
JitInstrumentation instr
Definition jit.h:62
Definition nodes.h:135
const char * p_sourcetext
Definition parse_node.h:214
int64 queryId
Definition plannodes.h:69
ParseLoc stmt_len
Definition plannodes.h:171
PlannedStmtOrigin planOrigin
Definition plannodes.h:75
ParseLoc stmt_location
Definition plannodes.h:169
Node * utilityStmt
Definition plannodes.h:153
uint64 nprocessed
Definition cmdtag.h:32
CommandTag commandTag
Definition cmdtag.h:31
const char * sourceText
Definition execdesc.h:38
EState * estate
Definition execdesc.h:50
PlannedStmt * plannedstmt
Definition execdesc.h:37
int query_instr_options
Definition execdesc.h:45
struct Instrumentation * query_instr
Definition execdesc.h:57
Node * utilityStmt
Definition parsenodes.h:141
ParseLoc stmt_location
Definition parsenodes.h:258
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
int64 wal_records
Definition instrument.h:53
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
pgssGlobalStats stats
__int64 st_size
Definition win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:785
LWLock lock
Definition lwlock.h:70
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition utility.c:548
ProcessUtility_hook_type ProcessUtility_hook
Definition utility.c:72
ProcessUtilityContext
Definition utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition timestamp.h:68
const char * name
#define fstat
Definition win32_port.h:73

◆ PGSS_NUMKIND

#define PGSS_NUMKIND   (PGSS_EXEC + 1)

Definition at line 133 of file pg_stat_statements.c.

◆ PGSS_TEXT_FILE

#define PGSS_TEXT_FILE   PG_STAT_TMP_DIR "/pgss_query_texts.stat"

Definition at line 85 of file pg_stat_statements.c.

◆ record_gc_qtexts

#define record_gc_qtexts ( )
Value:

Definition at line 315 of file pg_stat_statements.c.

316 { \
318 pgss->gc_count++; \
320 } while(0)

◆ SINGLE_ENTRY_RESET

#define SINGLE_ENTRY_RESET (   e)
Value:
if (e) { \
/* When requested reset only min/max statistics of an entry */ \
for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
{ \
e->counters.max_time[kind] = 0; \
e->counters.min_time[kind] = 0; \
} \
e->minmax_stats_since = stats_reset; \
} \
else \
{ \
/* Remove the key otherwise */ \
} \
}
e

Definition at line 2656 of file pg_stat_statements.c.

2657 { \
2658 if (minmax_only) { \
2659 /* When requested reset only min/max statistics of an entry */ \
2660 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2661 { \
2662 e->counters.max_time[kind] = 0; \
2663 e->counters.min_time[kind] = 0; \
2664 } \
2665 e->minmax_stats_since = stats_reset; \
2666 } \
2667 else \
2668 { \
2669 /* Remove the key otherwise */ \
2671 num_remove++; \
2672 } \
2673}

◆ STICKY_DECREASE_FACTOR

#define STICKY_DECREASE_FACTOR   (0.50) /* factor for sticky entries */

Definition at line 99 of file pg_stat_statements.c.

◆ USAGE_DEALLOC_PERCENT

#define USAGE_DEALLOC_PERCENT   5 /* free this % of entries at once */

Definition at line 100 of file pg_stat_statements.c.

◆ USAGE_DECREASE_FACTOR

#define USAGE_DECREASE_FACTOR   (0.99) /* decreased every entry_dealloc */

Definition at line 98 of file pg_stat_statements.c.

◆ USAGE_EXEC

#define USAGE_EXEC (   duration)    (1.0)

Definition at line 94 of file pg_stat_statements.c.

◆ USAGE_INIT

#define USAGE_INIT   (1.0) /* including initial planning */

Definition at line 95 of file pg_stat_statements.c.

Typedef Documentation

◆ Counters

◆ pgssEntry

◆ pgssGlobalStats

◆ pgssHashKey

◆ pgssSharedState

◆ pgssStoreKind

◆ pgssVersion

Enumeration Type Documentation

◆ pgssStoreKind

Enumerator
PGSS_INVALID 
PGSS_PLAN 
PGSS_EXEC 

Definition at line 120 of file pg_stat_statements.c.

121{
122 PGSS_INVALID = -1,
123
124 /*
125 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
126 * reference the underlying values in the arrays in the Counters struct,
127 * and this order is required in pg_stat_statements_internal().
128 */
129 PGSS_PLAN = 0,
130 PGSS_EXEC,

◆ PGSSTrackLevel

Enumerator
PGSS_TRACK_NONE 
PGSS_TRACK_TOP 
PGSS_TRACK_ALL 

Definition at line 288 of file pg_stat_statements.c.

289{
290 PGSS_TRACK_NONE, /* track no statements */
291 PGSS_TRACK_TOP, /* only top level statements */
292 PGSS_TRACK_ALL, /* all statements, including nested ones */
@ PGSS_TRACK_NONE

◆ pgssVersion

Enumerator
PGSS_V1_0 
PGSS_V1_1 
PGSS_V1_2 
PGSS_V1_3 
PGSS_V1_8 
PGSS_V1_9 
PGSS_V1_10 
PGSS_V1_11 
PGSS_V1_12 
PGSS_V1_13 

Definition at line 106 of file pg_stat_statements.c.

107{
108 PGSS_V1_0 = 0,
109 PGSS_V1_1,
110 PGSS_V1_2,
111 PGSS_V1_3,
112 PGSS_V1_8,
113 PGSS_V1_9,

Function Documentation

◆ _PG_init()

void _PG_init ( void  )

Definition at line 390 of file pg_stat_statements.c.

391{
392 /*
393 * In order to create our shared memory area, we have to be loaded via
394 * shared_preload_libraries. If not, fall out without hooking into any of
395 * the main system. (We don't throw error here because it seems useful to
396 * allow the pg_stat_statements functions to be created even when the
397 * module isn't active. The functions must protect themselves against
398 * being called then, however.)
399 */
401 return;
402
403 /*
404 * Inform the postmaster that we want to enable query_id calculation if
405 * compute_query_id is set to auto.
406 */
408
409 /*
410 * Define (or redefine) custom GUC variables.
411 */
412 DefineCustomIntVariable("pg_stat_statements.max",
413 "Sets the maximum number of statements tracked by pg_stat_statements.",
414 NULL,
415 &pgss_max,
416 5000,
417 100,
418 INT_MAX / 2,
420 0,
421 NULL,
422 NULL,
423 NULL);
424
425 DefineCustomEnumVariable("pg_stat_statements.track",
426 "Selects which statements are tracked by pg_stat_statements.",
427 NULL,
428 &pgss_track,
431 PGC_SUSET,
432 0,
433 NULL,
434 NULL,
435 NULL);
436
437 DefineCustomBoolVariable("pg_stat_statements.track_utility",
438 "Selects whether utility commands are tracked by pg_stat_statements.",
439 NULL,
441 true,
442 PGC_SUSET,
443 0,
444 NULL,
445 NULL,
446 NULL);
447
448 DefineCustomBoolVariable("pg_stat_statements.track_planning",
449 "Selects whether planning duration is tracked by pg_stat_statements.",
450 NULL,
452 false,
453 PGC_SUSET,
454 0,
455 NULL,
456 NULL,
457 NULL);
458
459 DefineCustomBoolVariable("pg_stat_statements.save",
460 "Save pg_stat_statements statistics across server shutdowns.",
461 NULL,
462 &pgss_save,
463 true,
465 0,
466 NULL,
467 NULL,
468 NULL);
469
470 MarkGUCPrefixReserved("pg_stat_statements");
471
472 /*
473 * Register our shared memory needs.
474 */
476
477 /*
478 * Install hooks.
479 */
494}

References DefineCustomBoolVariable(), DefineCustomEnumVariable(), DefineCustomIntVariable(), EnableQueryId(), ExecutorEnd_hook, ExecutorFinish_hook, ExecutorRun_hook, ExecutorStart_hook, fb(), MarkGUCPrefixReserved(), PGC_POSTMASTER, PGC_SIGHUP, PGC_SUSET, pgss_ExecutorEnd(), pgss_ExecutorFinish(), pgss_ExecutorRun(), pgss_ExecutorStart(), pgss_max, pgss_planner(), pgss_post_parse_analyze(), pgss_ProcessUtility(), pgss_save, pgss_shmem_callbacks, pgss_track, pgss_track_planning, PGSS_TRACK_TOP, pgss_track_utility, planner_hook, post_parse_analyze_hook, prev_ExecutorEnd, prev_ExecutorFinish, prev_ExecutorRun, prev_ExecutorStart, prev_planner_hook, prev_post_parse_analyze_hook, prev_ProcessUtility, process_shared_preload_libraries_in_progress, ProcessUtility_hook, RegisterShmemCallbacks(), and track_options.

◆ entry_alloc()

static pgssEntry * entry_alloc ( pgssHashKey key,
Size  query_offset,
int  query_len,
int  encoding,
bool  sticky 
)
static

Definition at line 2082 of file pg_stat_statements.c.

2084{
2085 pgssEntry *entry;
2086 bool found;
2087
2088 /* Make space if needed */
2090 entry_dealloc();
2091
2092 /* Find or create an entry with desired hash code */
2093 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2094
2095 if (!found)
2096 {
2097 /* New entry, initialize it */
2098
2099 /* reset the statistics */
2100 memset(&entry->counters, 0, sizeof(Counters));
2101 /* set the appropriate initial usage count */
2103 /* re-initialize the mutex each time ... we assume no one using it */
2104 SpinLockInit(&entry->mutex);
2105 /* ... and don't forget the query text metadata */
2106 Assert(query_len >= 0);
2107 entry->query_offset = query_offset;
2108 entry->query_len = query_len;
2109 entry->encoding = encoding;
2111 entry->minmax_stats_since = entry->stats_since;
2112 }
2113
2114 return entry;
2115}

References Assert, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssEntry::encoding, encoding, entry_dealloc(), fb(), GetCurrentTimestamp(), HASH_ENTER, hash_get_num_entries(), hash_search(), pgssEntry::minmax_stats_since, pgssEntry::mutex, pgss, pgss_hash, pgss_max, pgssEntry::query_len, pgssEntry::query_offset, SpinLockInit(), pgssEntry::stats_since, Counters::usage, and USAGE_INIT.

Referenced by pgss_shmem_init(), and pgss_store().

◆ entry_cmp()

static int entry_cmp ( const void lhs,
const void rhs 
)
static

Definition at line 2121 of file pg_stat_statements.c.

2122{
2123 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2124 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2125
2126 if (l_usage < r_usage)
2127 return -1;
2128 else if (l_usage > r_usage)
2129 return +1;
2130 else
2131 return 0;
2132}

References pgssEntry::counters, fb(), and Counters::usage.

Referenced by entry_dealloc().

◆ entry_dealloc()

static void entry_dealloc ( void  )
static

Definition at line 2140 of file pg_stat_statements.c.

2141{
2143 pgssEntry **entries;
2144 pgssEntry *entry;
2145 int nvictims;
2146 int i;
2148 int nvalidtexts;
2149
2150 /*
2151 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2152 * While we're scanning the table, apply the decay factor to the usage
2153 * values, and update the mean query length.
2154 *
2155 * Note that the mean query length is almost immediately obsolete, since
2156 * we compute it before not after discarding the least-used entries.
2157 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2158 * making two passes to get a more current result. Likewise, the new
2159 * cur_median_usage includes the entries we're about to zap.
2160 */
2161
2162 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2163
2164 i = 0;
2165 tottextlen = 0;
2166 nvalidtexts = 0;
2167
2169 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2170 {
2171 entries[i++] = entry;
2172 /* "Sticky" entries get a different usage decay rate. */
2173 if (IS_STICKY(entry->counters))
2175 else
2177 /* In the mean length computation, ignore dropped texts. */
2178 if (entry->query_len >= 0)
2179 {
2180 tottextlen += entry->query_len + 1;
2181 nvalidtexts++;
2182 }
2183 }
2184
2185 /* Sort into increasing order by usage */
2186 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2187
2188 /* Record the (approximate) median usage */
2189 if (i > 0)
2190 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2191 /* Record the mean query length */
2192 if (nvalidtexts > 0)
2194 else
2196
2197 /* Now zap an appropriate fraction of lowest-usage entries */
2198 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2199 nvictims = Min(nvictims, i);
2200
2201 for (i = 0; i < nvictims; i++)
2202 {
2203 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2204 }
2205
2206 pfree(entries);
2207
2208 /* Increment the number of times entries are deallocated */
2210 pgss->stats.dealloc += 1;
2212}

References ASSUMED_LENGTH_INIT, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssGlobalStats::dealloc, entry_cmp(), fb(), hash_get_num_entries(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), i, IS_STICKY, Max, pgssSharedState::mean_query_len, Min, pgssSharedState::mutex, palloc(), pfree(), pgss, pgss_hash, qsort, pgssEntry::query_len, SpinLockAcquire(), SpinLockRelease(), pgssSharedState::stats, STICKY_DECREASE_FACTOR, Counters::usage, USAGE_DEALLOC_PERCENT, and USAGE_DECREASE_FACTOR.

Referenced by entry_alloc().

◆ entry_reset()

static TimestampTz entry_reset ( Oid  userid,
Oid  dbid,
int64  queryid,
bool  minmax_only 
)
static

Definition at line 2679 of file pg_stat_statements.c.

2680{
2682 pgssEntry *entry;
2683 FILE *qfile;
2684 int64 num_entries;
2685 int64 num_remove = 0;
2687 TimestampTz stats_reset;
2688
2689 if (!pgss || !pgss_hash)
2690 ereport(ERROR,
2692 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2693
2695 num_entries = hash_get_num_entries(pgss_hash);
2696
2697 stats_reset = GetCurrentTimestamp();
2698
2699 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2700 {
2701 /* If all the parameters are available, use the fast path. */
2702 memset(&key, 0, sizeof(pgssHashKey));
2703 key.userid = userid;
2704 key.dbid = dbid;
2705 key.queryid = queryid;
2706
2707 /*
2708 * Reset the entry if it exists, starting with the non-top-level
2709 * entry.
2710 */
2711 key.toplevel = false;
2712 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2713
2714 SINGLE_ENTRY_RESET(entry);
2715
2716 /* Also reset the top-level entry if it exists. */
2717 key.toplevel = true;
2718 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2719
2720 SINGLE_ENTRY_RESET(entry);
2721 }
2722 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2723 {
2724 /* Reset entries corresponding to valid parameters. */
2726 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2727 {
2728 if ((!userid || entry->key.userid == userid) &&
2729 (!dbid || entry->key.dbid == dbid) &&
2730 (!queryid || entry->key.queryid == queryid))
2731 {
2732 SINGLE_ENTRY_RESET(entry);
2733 }
2734 }
2735 }
2736 else
2737 {
2738 /* Reset all entries. */
2740 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2741 {
2742 SINGLE_ENTRY_RESET(entry);
2743 }
2744 }
2745
2746 /* All entries are removed? */
2747 if (num_entries != num_remove)
2748 goto release_lock;
2749
2750 /*
2751 * Reset global statistics for pg_stat_statements since all entries are
2752 * removed.
2753 */
2755 pgss->stats.dealloc = 0;
2756 pgss->stats.stats_reset = stats_reset;
2758
2759 /*
2760 * Write new empty query file, perhaps even creating a new one to recover
2761 * if the file was missing.
2762 */
2764 if (qfile == NULL)
2765 {
2766 ereport(LOG,
2768 errmsg("could not create file \"%s\": %m",
2769 PGSS_TEXT_FILE)));
2770 goto done;
2771 }
2772
2773 /* If ftruncate fails, log it, but it's not a fatal problem */
2774 if (ftruncate(fileno(qfile), 0) != 0)
2775 ereport(LOG,
2777 errmsg("could not truncate file \"%s\": %m",
2778 PGSS_TEXT_FILE)));
2779
2780 FreeFile(qfile);
2781
2782done:
2783 pgss->extent = 0;
2784 /* This counts as a query text garbage collection for our purposes */
2786
2789
2790 return stats_reset;
2791}

References AllocateFile(), pgssHashKey::dbid, pgssGlobalStats::dealloc, ereport, errcode(), errcode_for_file_access(), errmsg, ERROR, pgssSharedState::extent, fb(), FreeFile(), GetCurrentTimestamp(), HASH_FIND, hash_get_num_entries(), hash_search(), hash_seq_init(), hash_seq_search(), INT64CONST, pgssEntry::key, pgssSharedState::lock, LWLockPadded::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pgssSharedState::mutex, PG_BINARY_W, pgss, pgss_hash, PGSS_TEXT_FILE, pgssHashKey::queryid, record_gc_qtexts, SINGLE_ENTRY_RESET, SpinLockAcquire(), SpinLockRelease(), pgssSharedState::stats, pgssGlobalStats::stats_reset, and pgssHashKey::userid.

Referenced by pg_stat_statements_reset(), pg_stat_statements_reset_1_11(), and pg_stat_statements_reset_1_7().

◆ gc_qtexts()

static void gc_qtexts ( void  )
static

Definition at line 2476 of file pg_stat_statements.c.

2477{
2478 char *qbuffer;
2480 FILE *qfile = NULL;
2482 pgssEntry *entry;
2483 Size extent;
2484 int nentries;
2485
2486 /*
2487 * When called from pgss_store, some other session might have proceeded
2488 * with garbage collection in the no-lock-held interim of lock strength
2489 * escalation. Check once more that this is actually necessary.
2490 */
2491 if (!need_gc_qtexts())
2492 return;
2493
2494 /*
2495 * Load the old texts file. If we fail (out of memory, for instance),
2496 * invalidate query texts. Hopefully this is rare. It might seem better
2497 * to leave things alone on an OOM failure, but the problem is that the
2498 * file is only going to get bigger; hoping for a future non-OOM result is
2499 * risky and can easily lead to complete denial of service.
2500 */
2502 if (qbuffer == NULL)
2503 goto gc_fail;
2504
2505 /*
2506 * We overwrite the query texts file in place, so as to reduce the risk of
2507 * an out-of-disk-space failure. Since the file is guaranteed not to get
2508 * larger, this should always work on traditional filesystems; though we
2509 * could still lose on copy-on-write filesystems.
2510 */
2512 if (qfile == NULL)
2513 {
2514 ereport(LOG,
2516 errmsg("could not write file \"%s\": %m",
2517 PGSS_TEXT_FILE)));
2518 goto gc_fail;
2519 }
2520
2521 extent = 0;
2522 nentries = 0;
2523
2525 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2526 {
2527 int query_len = entry->query_len;
2528 char *qry = qtext_fetch(entry->query_offset,
2529 query_len,
2530 qbuffer,
2531 qbuffer_size);
2532
2533 if (qry == NULL)
2534 {
2535 /* Trouble ... drop the text */
2536 entry->query_offset = 0;
2537 entry->query_len = -1;
2538 /* entry will not be counted in mean query length computation */
2539 continue;
2540 }
2541
2542 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2543 {
2544 ereport(LOG,
2546 errmsg("could not write file \"%s\": %m",
2547 PGSS_TEXT_FILE)));
2549 goto gc_fail;
2550 }
2551
2552 entry->query_offset = extent;
2553 extent += query_len + 1;
2554 nentries++;
2555 }
2556
2557 /*
2558 * Truncate away any now-unused space. If this fails for some odd reason,
2559 * we log it, but there's no need to fail.
2560 */
2561 if (ftruncate(fileno(qfile), extent) != 0)
2562 ereport(LOG,
2564 errmsg("could not truncate file \"%s\": %m",
2565 PGSS_TEXT_FILE)));
2566
2567 if (FreeFile(qfile))
2568 {
2569 ereport(LOG,
2571 errmsg("could not write file \"%s\": %m",
2572 PGSS_TEXT_FILE)));
2573 qfile = NULL;
2574 goto gc_fail;
2575 }
2576
2577 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2578 pgss->extent, extent);
2579
2580 /* Reset the shared extent pointer */
2581 pgss->extent = extent;
2582
2583 /*
2584 * Also update the mean query length, to be sure that need_gc_qtexts()
2585 * won't still think we have a problem.
2586 */
2587 if (nentries > 0)
2588 pgss->mean_query_len = extent / nentries;
2589 else
2591
2592 pfree(qbuffer);
2593
2594 /*
2595 * OK, count a garbage collection cycle. (Note: even though we have
2596 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2597 * other processes may examine gc_count while holding only the mutex.
2598 * Also, we have to advance the count *after* we've rewritten the file,
2599 * else other processes might not realize they read a stale file.)
2600 */
2602
2603 return;
2604
2605gc_fail:
2606 /* clean up resources */
2607 if (qfile)
2608 FreeFile(qfile);
2609 if (qbuffer)
2610 pfree(qbuffer);
2611
2612 /*
2613 * Since the contents of the external file are now uncertain, mark all
2614 * hashtable entries as having invalid texts.
2615 */
2617 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2618 {
2619 entry->query_offset = 0;
2620 entry->query_len = -1;
2621 }
2622
2623 /*
2624 * Destroy the query text file and create a new, empty one
2625 */
2628 if (qfile == NULL)
2629 ereport(LOG,
2631 errmsg("could not recreate file \"%s\": %m",
2632 PGSS_TEXT_FILE)));
2633 else
2634 FreeFile(qfile);
2635
2636 /* Reset the shared extent pointer */
2637 pgss->extent = 0;
2638
2639 /* Reset mean_query_len to match the new state */
2641
2642 /*
2643 * Bump the GC count even though we failed.
2644 *
2645 * This is needed to make concurrent readers of file without any lock on
2646 * pgss->lock notice existence of new version of file. Once readers
2647 * subsequently observe a change in GC count with pgss->lock held, that
2648 * forces a safe reopen of file. Writers also require that we bump here,
2649 * of course. (As required by locking protocol, readers and writers don't
2650 * trust earlier file contents until gc_count is found unchanged after
2651 * pgss->lock acquired in shared or exclusive mode respectively.)
2652 */
2654}

References AllocateFile(), ASSUMED_LENGTH_INIT, DEBUG1, elog, ereport, errcode_for_file_access(), errmsg, pgssSharedState::extent, fb(), FreeFile(), hash_seq_init(), hash_seq_search(), hash_seq_term(), LOG, pgssSharedState::mean_query_len, need_gc_qtexts(), pfree(), PG_BINARY_W, pgss, pgss_hash, PGSS_TEXT_FILE, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, and record_gc_qtexts.

Referenced by pgss_store().

◆ generate_normalized_query()

static char * generate_normalized_query ( const JumbleState jstate,
const char query,
int  query_loc,
int query_len_p 
)
static

Definition at line 2813 of file pg_stat_statements.c.

2815{
2816 char *norm_query;
2817 int query_len = *query_len_p;
2818 int norm_query_buflen, /* Space allowed for norm_query */
2819 len_to_wrt, /* Length (in bytes) to write */
2820 quer_loc = 0, /* Source query byte location */
2821 n_quer_loc = 0, /* Normalized query byte location */
2822 last_off = 0, /* Offset from start for previous tok */
2823 last_tok_len = 0; /* Length (in bytes) of that tok */
2824 int num_constants_replaced = 0;
2826
2827 /*
2828 * Determine constants' lengths (core system only gives us locations), and
2829 * return a sorted copy of jstate's LocationLen data with lengths filled
2830 * in.
2831 */
2833
2834 /*
2835 * Allow for $n symbols to be longer than the constants they replace.
2836 * Constants must take at least one byte in text form, while a $n symbol
2837 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2838 * could refine that limit based on the max value of n for the current
2839 * query, but it hardly seems worth any extra effort to do so.
2840 */
2841 norm_query_buflen = query_len + jstate->clocations_count * 10;
2842
2843 /* Allocate result buffer */
2845
2846 for (int i = 0; i < jstate->clocations_count; i++)
2847 {
2848 int off, /* Offset from start for cur tok */
2849 tok_len; /* Length (in bytes) of that tok */
2850
2851 /*
2852 * If we have an external param at this location, but no lists are
2853 * being squashed across the query, then we skip here; this will make
2854 * us print the characters found in the original query that represent
2855 * the parameter in the next iteration (or after the loop is done),
2856 * which is a bit odd but seems to work okay in most cases.
2857 */
2858 if (locs[i].extern_param && !jstate->has_squashed_lists)
2859 continue;
2860
2861 off = locs[i].location;
2862
2863 /* Adjust recorded location if we're dealing with partial string */
2864 off -= query_loc;
2865
2866 tok_len = locs[i].length;
2867
2868 if (tok_len < 0)
2869 continue; /* ignore any duplicates */
2870
2871 /* Copy next chunk (what precedes the next constant) */
2872 len_to_wrt = off - last_off;
2874 Assert(len_to_wrt >= 0);
2877
2878 /*
2879 * And insert a param symbol in place of the constant token; and, if
2880 * we have a squashable list, insert a placeholder comment starting
2881 * from the list's second value.
2882 */
2884 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2885 locs[i].squashed ? " /*, ... */" : "");
2887
2888 /* move forward */
2889 quer_loc = off + tok_len;
2890 last_off = off;
2892 }
2893
2894 /* Clean up, if needed */
2895 if (locs)
2896 pfree(locs);
2897
2898 /*
2899 * We've copied up until the last ignorable constant. Copy over the
2900 * remaining bytes of the original query string.
2901 */
2902 len_to_wrt = query_len - quer_loc;
2903
2904 Assert(len_to_wrt >= 0);
2907
2909 norm_query[n_quer_loc] = '\0';
2910
2912 return norm_query;
2913}

References Assert, ComputeConstantLengths(), fb(), i, memcpy(), palloc(), pfree(), and sprintf.

Referenced by pgss_store().

◆ need_gc_qtexts()

static bool need_gc_qtexts ( void  )
static

Definition at line 2427 of file pg_stat_statements.c.

2428{
2429 Size extent;
2430
2431 /* Read shared extent pointer */
2433 extent = pgss->extent;
2435
2436 /*
2437 * Don't proceed if file does not exceed 512 bytes per possible entry.
2438 *
2439 * Here and in the next test, 32-bit machines have overflow hazards if
2440 * pgss_max and/or mean_query_len are large. Force the multiplications
2441 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2442 */
2443 if ((uint64) extent < (uint64) 512 * pgss_max)
2444 return false;
2445
2446 /*
2447 * Don't proceed if file is less than about 50% bloat. Nothing can or
2448 * should be done in the event of unusually large query texts accounting
2449 * for file's large size. We go to the trouble of maintaining the mean
2450 * query length in order to prevent garbage collection from thrashing
2451 * uselessly.
2452 */
2453 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2454 return false;
2455
2456 return true;
2457}

References pgssSharedState::extent, pgssSharedState::mean_query_len, pgssSharedState::mutex, pgss, pgss_max, SpinLockAcquire(), and SpinLockRelease().

Referenced by gc_qtexts(), and pgss_store().

◆ PG_FUNCTION_INFO_V1() [1/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements  )

◆ PG_FUNCTION_INFO_V1() [2/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_10  )

◆ PG_FUNCTION_INFO_V1() [3/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_11  )

◆ PG_FUNCTION_INFO_V1() [4/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_12  )

◆ PG_FUNCTION_INFO_V1() [5/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_13  )

◆ PG_FUNCTION_INFO_V1() [6/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_2  )

◆ PG_FUNCTION_INFO_V1() [7/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_3  )

◆ PG_FUNCTION_INFO_V1() [8/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_8  )

◆ PG_FUNCTION_INFO_V1() [9/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_1_9  )

◆ PG_FUNCTION_INFO_V1() [10/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_info  )

◆ PG_FUNCTION_INFO_V1() [11/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset  )

◆ PG_FUNCTION_INFO_V1() [12/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset_1_11  )

◆ PG_FUNCTION_INFO_V1() [13/13]

PG_FUNCTION_INFO_V1 ( pg_stat_statements_reset_1_7  )

◆ PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( name = "pg_stat_statements",
version = PG_VERSION 
)

◆ pg_stat_statements()

Datum pg_stat_statements ( PG_FUNCTION_ARGS  )

Definition at line 1658 of file pg_stat_statements.c.

1659{
1660 /* If it's really API 1.1, we'll figure that out below */
1662
1663 return (Datum) 0;
1664}

References pg_stat_statements_internal(), and PGSS_V1_0.

◆ pg_stat_statements_1_10()

Datum pg_stat_statements_1_10 ( PG_FUNCTION_ARGS  )

Definition at line 1604 of file pg_stat_statements.c.

1605{
1606 bool showtext = PG_GETARG_BOOL(0);
1607
1609
1610 return (Datum) 0;
1611}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_10.

◆ pg_stat_statements_1_11()

Datum pg_stat_statements_1_11 ( PG_FUNCTION_ARGS  )

Definition at line 1594 of file pg_stat_statements.c.

1595{
1596 bool showtext = PG_GETARG_BOOL(0);
1597
1599
1600 return (Datum) 0;
1601}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_11.

◆ pg_stat_statements_1_12()

Datum pg_stat_statements_1_12 ( PG_FUNCTION_ARGS  )

Definition at line 1584 of file pg_stat_statements.c.

1585{
1586 bool showtext = PG_GETARG_BOOL(0);
1587
1589
1590 return (Datum) 0;
1591}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_12.

◆ pg_stat_statements_1_13()

Datum pg_stat_statements_1_13 ( PG_FUNCTION_ARGS  )

Definition at line 1574 of file pg_stat_statements.c.

1575{
1576 bool showtext = PG_GETARG_BOOL(0);
1577
1579
1580 return (Datum) 0;
1581}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_13.

◆ pg_stat_statements_1_2()

Datum pg_stat_statements_1_2 ( PG_FUNCTION_ARGS  )

Definition at line 1644 of file pg_stat_statements.c.

1645{
1646 bool showtext = PG_GETARG_BOOL(0);
1647
1649
1650 return (Datum) 0;
1651}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_2.

◆ pg_stat_statements_1_3()

Datum pg_stat_statements_1_3 ( PG_FUNCTION_ARGS  )

Definition at line 1634 of file pg_stat_statements.c.

1635{
1636 bool showtext = PG_GETARG_BOOL(0);
1637
1639
1640 return (Datum) 0;
1641}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_3.

◆ pg_stat_statements_1_8()

Datum pg_stat_statements_1_8 ( PG_FUNCTION_ARGS  )

Definition at line 1624 of file pg_stat_statements.c.

1625{
1626 bool showtext = PG_GETARG_BOOL(0);
1627
1629
1630 return (Datum) 0;
1631}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_8.

◆ pg_stat_statements_1_9()

Datum pg_stat_statements_1_9 ( PG_FUNCTION_ARGS  )

Definition at line 1614 of file pg_stat_statements.c.

1615{
1616 bool showtext = PG_GETARG_BOOL(0);
1617
1619
1620 return (Datum) 0;
1621}

References fb(), PG_GETARG_BOOL, pg_stat_statements_internal(), and PGSS_V1_9.

◆ pg_stat_statements_info()

Datum pg_stat_statements_info ( PG_FUNCTION_ARGS  )

Definition at line 2037 of file pg_stat_statements.c.

2038{
2039 pgssGlobalStats stats;
2040 TupleDesc tupdesc;
2042 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2043
2044 if (!pgss || !pgss_hash)
2045 ereport(ERROR,
2047 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2048
2049 /* Build a tuple descriptor for our result type */
2050 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2051 elog(ERROR, "return type must be a row type");
2052
2053 /* Read global statistics for pg_stat_statements */
2055 stats = pgss->stats;
2057
2058 values[0] = Int64GetDatum(stats.dealloc);
2060
2062}

References pgssGlobalStats::dealloc, elog, ereport, errcode(), errmsg, ERROR, fb(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), Int64GetDatum(), pgssSharedState::mutex, PG_RETURN_DATUM, PG_STAT_STATEMENTS_INFO_COLS, pgss, pgss_hash, SpinLockAcquire(), SpinLockRelease(), pgssSharedState::stats, pgssGlobalStats::stats_reset, TimestampTzGetDatum(), TYPEFUNC_COMPOSITE, and values.

◆ pg_stat_statements_internal()

static void pg_stat_statements_internal ( FunctionCallInfo  fcinfo,
pgssVersion  api_version,
bool  showtext 
)
static

Definition at line 1668 of file pg_stat_statements.c.

1671{
1673 Oid userid = GetUserId();
1674 bool is_allowed_role = false;
1675 char *qbuffer = NULL;
1676 Size qbuffer_size = 0;
1677 Size extent = 0;
1678 int gc_count = 0;
1680 pgssEntry *entry;
1681
1682 /*
1683 * Superusers or roles with the privileges of pg_read_all_stats members
1684 * are allowed
1685 */
1687
1688 /* hash table must exist already */
1689 if (!pgss || !pgss_hash)
1690 ereport(ERROR,
1692 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1693
1694 InitMaterializedSRF(fcinfo, 0);
1695
1696 /*
1697 * Check we have the expected number of output arguments. Aside from
1698 * being a good safety check, we need a kluge here to detect API version
1699 * 1.1, which was wedged into the code in an ill-considered way.
1700 */
1701 switch (rsinfo->setDesc->natts)
1702 {
1704 if (api_version != PGSS_V1_0)
1705 elog(ERROR, "incorrect number of output arguments");
1706 break;
1708 /* pg_stat_statements() should have told us 1.0 */
1709 if (api_version != PGSS_V1_0)
1710 elog(ERROR, "incorrect number of output arguments");
1711 api_version = PGSS_V1_1;
1712 break;
1714 if (api_version != PGSS_V1_2)
1715 elog(ERROR, "incorrect number of output arguments");
1716 break;
1718 if (api_version != PGSS_V1_3)
1719 elog(ERROR, "incorrect number of output arguments");
1720 break;
1722 if (api_version != PGSS_V1_8)
1723 elog(ERROR, "incorrect number of output arguments");
1724 break;
1726 if (api_version != PGSS_V1_9)
1727 elog(ERROR, "incorrect number of output arguments");
1728 break;
1730 if (api_version != PGSS_V1_10)
1731 elog(ERROR, "incorrect number of output arguments");
1732 break;
1734 if (api_version != PGSS_V1_11)
1735 elog(ERROR, "incorrect number of output arguments");
1736 break;
1738 if (api_version != PGSS_V1_12)
1739 elog(ERROR, "incorrect number of output arguments");
1740 break;
1742 if (api_version != PGSS_V1_13)
1743 elog(ERROR, "incorrect number of output arguments");
1744 break;
1745 default:
1746 elog(ERROR, "incorrect number of output arguments");
1747 }
1748
1749 /*
1750 * We'd like to load the query text file (if needed) while not holding any
1751 * lock on pgss->lock. In the worst case we'll have to do this again
1752 * after we have the lock, but it's unlikely enough to make this a win
1753 * despite occasional duplicated work. We need to reload if anybody
1754 * writes to the file (either a retail qtext_store(), or a garbage
1755 * collection) between this point and where we've gotten shared lock. If
1756 * a qtext_store is actually in progress when we look, we might as well
1757 * skip the speculative load entirely.
1758 */
1759 if (showtext)
1760 {
1761 int n_writers;
1762
1763 /* Take the mutex so we can examine variables */
1765 extent = pgss->extent;
1766 n_writers = pgss->n_writers;
1767 gc_count = pgss->gc_count;
1769
1770 /* No point in loading file now if there are active writers */
1771 if (n_writers == 0)
1773 }
1774
1775 /*
1776 * Get shared lock, load or reload the query text file if we must, and
1777 * iterate over the hashtable entries.
1778 *
1779 * With a large hash table, we might be holding the lock rather longer
1780 * than one could wish. However, this only blocks creation of new hash
1781 * table entries, and the larger the hash table the less likely that is to
1782 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1783 * we need to partition the hash table to limit the time spent holding any
1784 * one lock.
1785 */
1787
1788 if (showtext)
1789 {
1790 /*
1791 * Here it is safe to examine extent and gc_count without taking the
1792 * mutex. Note that although other processes might change
1793 * pgss->extent just after we look at it, the strings they then write
1794 * into the file cannot yet be referenced in the hashtable, so we
1795 * don't care whether we see them or not.
1796 *
1797 * If qtext_load_file fails, we just press on; we'll return NULL for
1798 * every query text.
1799 */
1800 if (qbuffer == NULL ||
1801 pgss->extent != extent ||
1802 pgss->gc_count != gc_count)
1803 {
1804 if (qbuffer)
1805 pfree(qbuffer);
1807 }
1808 }
1809
1811 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1812 {
1814 bool nulls[PG_STAT_STATEMENTS_COLS];
1815 int i = 0;
1816 Counters tmp;
1817 double stddev;
1818 int64 queryid = entry->key.queryid;
1819 TimestampTz stats_since;
1820 TimestampTz minmax_stats_since;
1821
1822 memset(values, 0, sizeof(values));
1823 memset(nulls, 0, sizeof(nulls));
1824
1825 values[i++] = ObjectIdGetDatum(entry->key.userid);
1826 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1827 if (api_version >= PGSS_V1_9)
1828 values[i++] = BoolGetDatum(entry->key.toplevel);
1829
1830 if (is_allowed_role || entry->key.userid == userid)
1831 {
1832 if (api_version >= PGSS_V1_2)
1833 values[i++] = Int64GetDatumFast(queryid);
1834
1835 if (showtext)
1836 {
1837 char *qstr = qtext_fetch(entry->query_offset,
1838 entry->query_len,
1839 qbuffer,
1840 qbuffer_size);
1841
1842 if (qstr)
1843 {
1844 char *enc;
1845
1847 entry->query_len,
1848 entry->encoding);
1849
1851
1852 if (enc != qstr)
1853 pfree(enc);
1854 }
1855 else
1856 {
1857 /* Just return a null if we fail to find the text */
1858 nulls[i++] = true;
1859 }
1860 }
1861 else
1862 {
1863 /* Query text not requested */
1864 nulls[i++] = true;
1865 }
1866 }
1867 else
1868 {
1869 /* Don't show queryid */
1870 if (api_version >= PGSS_V1_2)
1871 nulls[i++] = true;
1872
1873 /*
1874 * Don't show query text, but hint as to the reason for not doing
1875 * so if it was requested
1876 */
1877 if (showtext)
1878 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1879 else
1880 nulls[i++] = true;
1881 }
1882
1883 /* copy counters to a local variable to keep locking time short */
1884 SpinLockAcquire(&entry->mutex);
1885 tmp = entry->counters;
1886 SpinLockRelease(&entry->mutex);
1887
1888 /*
1889 * The spinlock is not required when reading these two as they are
1890 * always updated when holding pgss->lock exclusively.
1891 */
1892 stats_since = entry->stats_since;
1893 minmax_stats_since = entry->minmax_stats_since;
1894
1895 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1896 if (IS_STICKY(tmp))
1897 continue;
1898
1899 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1900 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1901 {
1902 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1903 {
1904 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1905 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1906 }
1907
1908 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1909 api_version >= PGSS_V1_8)
1910 {
1911 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1912 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1913 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1914
1915 /*
1916 * Note we are calculating the population variance here, not
1917 * the sample variance, as we have data for the whole
1918 * population, so Bessel's correction is not used, and we
1919 * don't divide by tmp.calls - 1.
1920 */
1921 if (tmp.calls[kind] > 1)
1922 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1923 else
1924 stddev = 0.0;
1925 values[i++] = Float8GetDatumFast(stddev);
1926 }
1927 }
1928 values[i++] = Int64GetDatumFast(tmp.rows);
1931 if (api_version >= PGSS_V1_1)
1936 if (api_version >= PGSS_V1_1)
1941 if (api_version >= PGSS_V1_1)
1942 {
1945 }
1946 if (api_version >= PGSS_V1_11)
1947 {
1950 }
1951 if (api_version >= PGSS_V1_10)
1952 {
1955 }
1956 if (api_version >= PGSS_V1_8)
1957 {
1958 char buf[256];
1959 Datum wal_bytes;
1960
1963
1964 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1965
1966 /* Convert to numeric. */
1967 wal_bytes = DirectFunctionCall3(numeric_in,
1970 Int32GetDatum(-1));
1971 values[i++] = wal_bytes;
1972 }
1973 if (api_version >= PGSS_V1_12)
1974 {
1976 }
1977 if (api_version >= PGSS_V1_10)
1978 {
1987 }
1988 if (api_version >= PGSS_V1_11)
1989 {
1992 }
1993 if (api_version >= PGSS_V1_12)
1994 {
1997 }
1998 if (api_version >= PGSS_V1_13)
1999 {
2002 }
2003 if (api_version >= PGSS_V1_11)
2004 {
2005 values[i++] = TimestampTzGetDatum(stats_since);
2006 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2007 }
2008
2009 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2010 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2011 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2012 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2013 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2014 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2015 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2016 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2017 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2018 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2019 -1 /* fail if you forget to update this assert */ ));
2020
2021 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2022 }
2023
2025
2026 if (qbuffer)
2027 pfree(qbuffer);
2028}

References Assert, BoolGetDatum(), buf, Counters::calls, pgssEntry::counters, CStringGetDatum(), CStringGetTextDatum, Counters::custom_plan_calls, pgssHashKey::dbid, DirectFunctionCall3, elog, enc, pgssEntry::encoding, ereport, errcode(), errmsg, ERROR, pgssSharedState::extent, fb(), Float8GetDatumFast, pgssSharedState::gc_count, Counters::generic_plan_calls, GetUserId(), has_privs_of_role(), hash_seq_init(), hash_seq_search(), i, InitMaterializedSRF(), Int32GetDatum(), Int64GetDatumFast, IS_STICKY, Counters::jit_deform_count, Counters::jit_deform_time, Counters::jit_emission_count, Counters::jit_emission_time, Counters::jit_functions, Counters::jit_generation_time, Counters::jit_inlining_count, Counters::jit_inlining_time, Counters::jit_optimization_count, Counters::jit_optimization_time, pgssEntry::key, Counters::local_blk_read_time, Counters::local_blk_write_time, Counters::local_blks_dirtied, Counters::local_blks_hit, Counters::local_blks_read, Counters::local_blks_written, pgssSharedState::lock, LWLockPadded::lock, LW_SHARED, LWLockAcquire(), LWLockRelease(), Counters::max_time, Counters::mean_time, Counters::min_time, pgssEntry::minmax_stats_since, pgssEntry::mutex, pgssSharedState::mutex, pgssSharedState::n_writers, numeric_in(), ObjectIdGetDatum(), Counters::parallel_workers_launched, Counters::parallel_workers_to_launch, pfree(), pg_any_to_server(), PG_STAT_STATEMENTS_COLS, PG_STAT_STATEMENTS_COLS_V1_0, PG_STAT_STATEMENTS_COLS_V1_1, PG_STAT_STATEMENTS_COLS_V1_10, PG_STAT_STATEMENTS_COLS_V1_11, PG_STAT_STATEMENTS_COLS_V1_12, PG_STAT_STATEMENTS_COLS_V1_13, PG_STAT_STATEMENTS_COLS_V1_2, PG_STAT_STATEMENTS_COLS_V1_3, PG_STAT_STATEMENTS_COLS_V1_8, PG_STAT_STATEMENTS_COLS_V1_9, pgss, PGSS_EXEC, pgss_hash, PGSS_NUMKIND, PGSS_V1_0, PGSS_V1_1, PGSS_V1_10, PGSS_V1_11, PGSS_V1_12, PGSS_V1_13, PGSS_V1_2, PGSS_V1_3, PGSS_V1_8, PGSS_V1_9, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, pgssHashKey::queryid, FunctionCallInfoBaseData::resultinfo, Counters::rows, Counters::shared_blk_read_time, Counters::shared_blk_write_time, Counters::shared_blks_dirtied, Counters::shared_blks_hit, Counters::shared_blks_read, Counters::shared_blks_written, snprintf, SpinLockAcquire(), SpinLockRelease(), pgssEntry::stats_since, Counters::sum_var_time, Counters::temp_blk_read_time, Counters::temp_blk_write_time, Counters::temp_blks_read, Counters::temp_blks_written, TimestampTzGetDatum(), pgssHashKey::toplevel, Counters::total_time, tuplestore_putvalues(), UINT64_FORMAT, pgssHashKey::userid, values, Counters::wal_buffers_full, Counters::wal_bytes, Counters::wal_fpi, and Counters::wal_records.

Referenced by pg_stat_statements(), pg_stat_statements_1_10(), pg_stat_statements_1_11(), pg_stat_statements_1_12(), pg_stat_statements_1_13(), pg_stat_statements_1_2(), pg_stat_statements_1_3(), pg_stat_statements_1_8(), and pg_stat_statements_1_9().

◆ pg_stat_statements_reset()

Datum pg_stat_statements_reset ( PG_FUNCTION_ARGS  )

Definition at line 1543 of file pg_stat_statements.c.

1544{
1545 entry_reset(0, 0, 0, false);
1546
1548}

References entry_reset(), and PG_RETURN_VOID.

◆ pg_stat_statements_reset_1_11()

Datum pg_stat_statements_reset_1_11 ( PG_FUNCTION_ARGS  )

Definition at line 1524 of file pg_stat_statements.c.

1525{
1526 Oid userid;
1527 Oid dbid;
1528 int64 queryid;
1529 bool minmax_only;
1530
1531 userid = PG_GETARG_OID(0);
1532 dbid = PG_GETARG_OID(1);
1533 queryid = PG_GETARG_INT64(2);
1535
1536 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1537}

References entry_reset(), fb(), PG_GETARG_BOOL, PG_GETARG_INT64, PG_GETARG_OID, and PG_RETURN_TIMESTAMPTZ.

◆ pg_stat_statements_reset_1_7()

Datum pg_stat_statements_reset_1_7 ( PG_FUNCTION_ARGS  )

Definition at line 1508 of file pg_stat_statements.c.

1509{
1510 Oid userid;
1511 Oid dbid;
1512 int64 queryid;
1513
1514 userid = PG_GETARG_OID(0);
1515 dbid = PG_GETARG_OID(1);
1516 queryid = PG_GETARG_INT64(2);
1517
1518 entry_reset(userid, dbid, queryid, false);
1519
1521}

References entry_reset(), PG_GETARG_INT64, PG_GETARG_OID, and PG_RETURN_VOID.

◆ pgss_ExecutorEnd()

static void pgss_ExecutorEnd ( QueryDesc queryDesc)
static

Definition at line 1059 of file pg_stat_statements.c.

1060{
1061 int64 queryId = queryDesc->plannedstmt->queryId;
1062
1063 if (queryId != INT64CONST(0) && queryDesc->query_instr &&
1065 {
1066 pgss_store(queryDesc->sourceText,
1067 queryId,
1068 queryDesc->plannedstmt->stmt_location,
1069 queryDesc->plannedstmt->stmt_len,
1070 PGSS_EXEC,
1072 queryDesc->estate->es_total_processed,
1073 &queryDesc->query_instr->bufusage,
1074 &queryDesc->query_instr->walusage,
1075 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1076 NULL,
1077 queryDesc->estate->es_parallel_workers_to_launch,
1078 queryDesc->estate->es_parallel_workers_launched,
1079 queryDesc->plannedstmt->planOrigin);
1080 }
1081
1082 if (prev_ExecutorEnd)
1083 prev_ExecutorEnd(queryDesc);
1084 else
1085 standard_ExecutorEnd(queryDesc);
1086}

References Instrumentation::bufusage, EState::es_jit, EState::es_parallel_workers_launched, EState::es_parallel_workers_to_launch, EState::es_total_processed, QueryDesc::estate, fb(), JitContext::instr, INSTR_TIME_GET_MILLISEC, INT64CONST, nesting_level, pgss_enabled, PGSS_EXEC, pgss_store(), QueryDesc::plannedstmt, PlannedStmt::planOrigin, prev_ExecutorEnd, QueryDesc::query_instr, PlannedStmt::queryId, QueryDesc::sourceText, standard_ExecutorEnd(), PlannedStmt::stmt_len, PlannedStmt::stmt_location, Instrumentation::total, and Instrumentation::walusage.

Referenced by _PG_init().

◆ pgss_ExecutorFinish()

static void pgss_ExecutorFinish ( QueryDesc queryDesc)
static

Definition at line 1038 of file pg_stat_statements.c.

1039{
1040 nesting_level++;
1041 PG_TRY();
1042 {
1044 prev_ExecutorFinish(queryDesc);
1045 else
1046 standard_ExecutorFinish(queryDesc);
1047 }
1048 PG_FINALLY();
1049 {
1050 nesting_level--;
1051 }
1052 PG_END_TRY();
1053}

References nesting_level, PG_END_TRY, PG_FINALLY, PG_TRY, prev_ExecutorFinish, and standard_ExecutorFinish().

Referenced by _PG_init().

◆ pgss_ExecutorRun()

static void pgss_ExecutorRun ( QueryDesc queryDesc,
ScanDirection  direction,
uint64  count 
)
static

Definition at line 1017 of file pg_stat_statements.c.

1018{
1019 nesting_level++;
1020 PG_TRY();
1021 {
1022 if (prev_ExecutorRun)
1023 prev_ExecutorRun(queryDesc, direction, count);
1024 else
1025 standard_ExecutorRun(queryDesc, direction, count);
1026 }
1027 PG_FINALLY();
1028 {
1029 nesting_level--;
1030 }
1031 PG_END_TRY();
1032}

References nesting_level, PG_END_TRY, PG_FINALLY, PG_TRY, prev_ExecutorRun, and standard_ExecutorRun().

Referenced by _PG_init().

◆ pgss_ExecutorStart()

static void pgss_ExecutorStart ( QueryDesc queryDesc,
int  eflags 
)
static

Definition at line 994 of file pg_stat_statements.c.

995{
996 /*
997 * If query has queryId zero, don't track it. This prevents double
998 * counting of optimizable statements that are directly contained in
999 * utility statements.
1000 */
1001 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1002 {
1003 /* Request all summary instrumentation, i.e. timing, buffers and WAL */
1004 queryDesc->query_instr_options |= INSTRUMENT_ALL;
1005 }
1006
1008 prev_ExecutorStart(queryDesc, eflags);
1009 else
1010 standard_ExecutorStart(queryDesc, eflags);
1011}

References INSTRUMENT_ALL, INT64CONST, nesting_level, pgss_enabled, QueryDesc::plannedstmt, prev_ExecutorStart, QueryDesc::query_instr_options, PlannedStmt::queryId, and standard_ExecutorStart().

Referenced by _PG_init().

◆ pgss_planner()

static PlannedStmt * pgss_planner ( Query parse,
const char query_string,
int  cursorOptions,
ParamListInfo  boundParams,
ExplainState es 
)
static

Definition at line 887 of file pg_stat_statements.c.

892{
894
895 /*
896 * We can't process the query if no query_string is provided, as
897 * pgss_store needs it. We also ignore query without queryid, as it would
898 * be treated as a utility statement, which may not be the case.
899 */
901 && pgss_track_planning && query_string
902 && parse->queryId != INT64CONST(0))
903 {
906 BufferUsage bufusage_start,
907 bufusage;
908 WalUsage walusage_start,
909 walusage;
910
911 /* We need to track buffer usage as the planner can access them. */
912 bufusage_start = pgBufferUsage;
913
914 /*
915 * Similarly the planner could write some WAL records in some cases
916 * (e.g. setting a hint bit with those being WAL-logged)
917 */
918 walusage_start = pgWalUsage;
920
922 PG_TRY();
923 {
925 result = prev_planner_hook(parse, query_string, cursorOptions,
926 boundParams, es);
927 else
928 result = standard_planner(parse, query_string, cursorOptions,
929 boundParams, es);
930 }
931 PG_FINALLY();
932 {
934 }
935 PG_END_TRY();
936
939
940 /* calc differences of buffer counters. */
941 memset(&bufusage, 0, sizeof(BufferUsage));
942 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
943
944 /* calc differences of WAL counters. */
945 memset(&walusage, 0, sizeof(WalUsage));
946 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
947
948 pgss_store(query_string,
949 parse->queryId,
950 parse->stmt_location,
951 parse->stmt_len,
952 PGSS_PLAN,
954 0,
955 &bufusage,
956 &walusage,
957 NULL,
958 NULL,
959 0,
960 0,
961 result->planOrigin);
962 }
963 else
964 {
965 /*
966 * Even though we're not tracking plan time for this statement, we
967 * must still increment the nesting level, to ensure that functions
968 * evaluated during planning are not seen as top-level calls.
969 */
971 PG_TRY();
972 {
974 result = prev_planner_hook(parse, query_string, cursorOptions,
975 boundParams, es);
976 else
977 result = standard_planner(parse, query_string, cursorOptions,
978 boundParams, es);
979 }
980 PG_FINALLY();
981 {
983 }
984 PG_END_TRY();
985 }
986
987 return result;
988}

References BufferUsageAccumDiff(), duration, fb(), INSTR_TIME_GET_MILLISEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, INT64CONST, nesting_level, parse(), PG_END_TRY, PG_FINALLY, PG_TRY, pgBufferUsage, pgss_enabled, PGSS_PLAN, pgss_store(), pgss_track_planning, pgWalUsage, prev_planner_hook, result, standard_planner(), start, and WalUsageAccumDiff().

Referenced by _PG_init().

◆ pgss_post_parse_analyze()

static void pgss_post_parse_analyze ( ParseState pstate,
Query query,
const JumbleState jstate 
)
static

Definition at line 834 of file pg_stat_statements.c.

835{
837 prev_post_parse_analyze_hook(pstate, query, jstate);
838
839 /* Safety check... */
841 return;
842
843 /*
844 * If it's EXECUTE, clear the queryId so that stats will accumulate for
845 * the underlying PREPARE. But don't do this if we're not tracking
846 * utility statements, to avoid messing up another extension that might be
847 * tracking them.
848 */
849 if (query->utilityStmt)
850 {
852 {
853 query->queryId = INT64CONST(0);
854 return;
855 }
856 }
857
858 /*
859 * If query jumbling were able to identify any ignorable constants, we
860 * immediately create a hash table entry for the query, so that we can
861 * record the normalized form of the query string. If there were no such
862 * constants, the normalized string would be the same as the query text
863 * anyway, so there's no need for an early entry.
864 */
865 if (jstate && jstate->clocations_count > 0)
866 pgss_store(pstate->p_sourcetext,
867 query->queryId,
868 query->stmt_location,
869 query->stmt_len,
871 0,
872 0,
873 NULL,
874 NULL,
875 NULL,
876 jstate,
877 0,
878 0,
880}

References fb(), INT64CONST, IsA, nesting_level, ParseState::p_sourcetext, pgss, pgss_enabled, pgss_hash, PGSS_INVALID, pgss_store(), pgss_track_utility, PLAN_STMT_UNKNOWN, prev_post_parse_analyze_hook, Query::stmt_location, and Query::utilityStmt.

Referenced by _PG_init().

◆ pgss_ProcessUtility()

static void pgss_ProcessUtility ( PlannedStmt pstmt,
const char queryString,
bool  readOnlyTree,
ProcessUtilityContext  context,
ParamListInfo  params,
QueryEnvironment queryEnv,
DestReceiver dest,
QueryCompletion qc 
)
static

Definition at line 1092 of file pg_stat_statements.c.

1097{
1098 Node *parsetree = pstmt->utilityStmt;
1099 int64 saved_queryId = pstmt->queryId;
1101 int saved_stmt_len = pstmt->stmt_len;
1104
1105 /*
1106 * Force utility statements to get queryId zero. We do this even in cases
1107 * where the statement contains an optimizable statement for which a
1108 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1109 * cases, runtime control will first go through ProcessUtility and then
1110 * the executor, and we don't want the executor hooks to do anything,
1111 * since we are already measuring the statement's costs at the utility
1112 * level.
1113 *
1114 * Note that this is only done if pg_stat_statements is enabled and
1115 * configured to track utility statements, in the unlikely possibility
1116 * that user configured another extension to handle utility statements
1117 * only.
1118 */
1119 if (enabled)
1120 pstmt->queryId = INT64CONST(0);
1121
1122 /*
1123 * If it's an EXECUTE statement, we don't track it and don't increment the
1124 * nesting level. This allows the cycles to be charged to the underlying
1125 * PREPARE instead (by the Executor hooks), which is much more useful.
1126 *
1127 * We also don't track execution of PREPARE. If we did, we would get one
1128 * hash table entry for the PREPARE (with hash calculated from the query
1129 * string), and then a different one with the same query string (but hash
1130 * calculated from the query tree) would be used to accumulate costs of
1131 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1132 * actually run the planner (only parse+rewrite), its costs are generally
1133 * pretty negligible and it seems okay to just ignore it.
1134 */
1135 if (enabled &&
1136 !IsA(parsetree, ExecuteStmt) &&
1137 !IsA(parsetree, PrepareStmt))
1138 {
1141 uint64 rows;
1142 BufferUsage bufusage_start,
1143 bufusage;
1144 WalUsage walusage_start,
1145 walusage;
1146
1147 bufusage_start = pgBufferUsage;
1148 walusage_start = pgWalUsage;
1150
1151 nesting_level++;
1152 PG_TRY();
1153 {
1155 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1156 context, params, queryEnv,
1157 dest, qc);
1158 else
1159 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1160 context, params, queryEnv,
1161 dest, qc);
1162 }
1163 PG_FINALLY();
1164 {
1165 nesting_level--;
1166 }
1167 PG_END_TRY();
1168
1169 /*
1170 * CAUTION: do not access the *pstmt data structure again below here.
1171 * If it was a ROLLBACK or similar, that data structure may have been
1172 * freed. We must copy everything we still need into local variables,
1173 * which we did above.
1174 *
1175 * For the same reason, we can't risk restoring pstmt->queryId to its
1176 * former value, which'd otherwise be a good idea.
1177 */
1178 pstmt = NULL;
1179
1182
1183 /*
1184 * Track the total number of rows retrieved or affected by the utility
1185 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1186 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1187 */
1188 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1189 qc->commandTag == CMDTAG_FETCH ||
1190 qc->commandTag == CMDTAG_SELECT ||
1192 qc->nprocessed : 0;
1193
1194 /* calc differences of buffer counters. */
1195 memset(&bufusage, 0, sizeof(BufferUsage));
1196 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1197
1198 /* calc differences of WAL counters. */
1199 memset(&walusage, 0, sizeof(WalUsage));
1200 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1201
1202 pgss_store(queryString,
1206 PGSS_EXEC,
1208 rows,
1209 &bufusage,
1210 &walusage,
1211 NULL,
1212 NULL,
1213 0,
1214 0,
1216 }
1217 else
1218 {
1219 /*
1220 * Even though we're not tracking execution time for this statement,
1221 * we must still increment the nesting level, to ensure that functions
1222 * evaluated within it are not seen as top-level calls. But don't do
1223 * so for EXECUTE; that way, when control reaches pgss_planner or
1224 * pgss_ExecutorStart, we will treat the costs as top-level if
1225 * appropriate. Likewise, don't bump for PREPARE, so that parse
1226 * analysis will treat the statement as top-level if appropriate.
1227 *
1228 * To be absolutely certain we don't mess up the nesting level,
1229 * evaluate the bump_level condition just once.
1230 */
1231 bool bump_level =
1232 !IsA(parsetree, ExecuteStmt) &&
1233 !IsA(parsetree, PrepareStmt);
1234
1235 if (bump_level)
1236 nesting_level++;
1237 PG_TRY();
1238 {
1240 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1241 context, params, queryEnv,
1242 dest, qc);
1243 else
1244 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1245 context, params, queryEnv,
1246 dest, qc);
1247 }
1248 PG_FINALLY();
1249 {
1250 if (bump_level)
1251 nesting_level--;
1252 }
1253 PG_END_TRY();
1254 }
1255}

References BufferUsageAccumDiff(), QueryCompletion::commandTag, duration, fb(), INSTR_TIME_GET_MILLISEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, INT64CONST, IsA, nesting_level, QueryCompletion::nprocessed, PG_END_TRY, PG_FINALLY, PG_TRY, pgBufferUsage, pgss_enabled, PGSS_EXEC, pgss_store(), pgss_track_utility, pgWalUsage, PlannedStmt::planOrigin, prev_ProcessUtility, PlannedStmt::queryId, standard_ProcessUtility(), start, PlannedStmt::stmt_len, PlannedStmt::stmt_location, PlannedStmt::utilityStmt, and WalUsageAccumDiff().

Referenced by _PG_init().

◆ pgss_shmem_init()

static void pgss_shmem_init ( void arg)
static

Definition at line 530 of file pg_stat_statements.c.

531{
532 int tranche_id;
533 FILE *file = NULL;
534 FILE *qfile = NULL;
535 uint32 header;
536 int32 num;
537 int32 pgver;
538 int32 i;
539 int buffer_size;
540 char *buffer = NULL;
541
542 /*
543 * We already checked that we're loaded from shared_preload_libraries in
544 * _PG_init(), so we should not get here after postmaster startup.
545 */
547
548 /*
549 * Initialize the shmem area with no statistics.
550 */
551 tranche_id = LWLockNewTrancheId("pg_stat_statements");
552 LWLockInitialize(&pgss->lock.lock, tranche_id);
556 pgss->extent = 0;
557 pgss->n_writers = 0;
558 pgss->gc_count = 0;
559 pgss->stats.dealloc = 0;
561
562 /* The hash table must've also been initialized by now */
564
565 /*
566 * Set up a shmem exit hook to dump the statistics to disk on postmaster
567 * (or standalone backend) exit.
568 */
570
571 /*
572 * Load any pre-existing statistics from file.
573 *
574 * Note: we don't bother with locks here, because there should be no other
575 * processes running when this code is reached.
576 */
577
578 /* Unlink query text file possibly left over from crash */
580
581 /* Allocate new query text temp file */
583 if (qfile == NULL)
584 goto write_error;
585
586 /*
587 * If we were told not to load old statistics, we're done. (Note we do
588 * not try to unlink any old dump file in this case. This seems a bit
589 * questionable but it's the historical behavior.)
590 */
591 if (!pgss_save)
592 {
594 return;
595 }
596
597 /*
598 * Attempt to load old statistics from the dump file.
599 */
601 if (file == NULL)
602 {
603 if (errno != ENOENT)
604 goto read_error;
605 /* No existing persisted stats file, so we're done */
607 return;
608 }
609
610 buffer_size = 2048;
611 buffer = (char *) palloc(buffer_size);
612
613 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
614 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
615 fread(&num, sizeof(int32), 1, file) != 1)
616 goto read_error;
617
618 if (header != PGSS_FILE_HEADER ||
620 goto data_error;
621
622 for (i = 0; i < num; i++)
623 {
625 pgssEntry *entry;
626 Size query_offset;
627
628 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
629 goto read_error;
630
631 /* Encoding is the only field we can easily sanity-check */
632 if (!PG_VALID_BE_ENCODING(temp.encoding))
633 goto data_error;
634
635 /* Resize buffer as needed */
636 if (temp.query_len >= buffer_size)
637 {
638 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
639 buffer = repalloc(buffer, buffer_size);
640 }
641
642 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
643 goto read_error;
644
645 /* Should have a trailing null, but let's make sure */
646 buffer[temp.query_len] = '\0';
647
648 /* Skip loading "sticky" entries */
649 if (IS_STICKY(temp.counters))
650 continue;
651
652 /* Store the query text */
653 query_offset = pgss->extent;
654 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
655 goto write_error;
656 pgss->extent += temp.query_len + 1;
657
658 /* make the hashtable entry (discards old entries if too many) */
659 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
660 temp.encoding,
661 false);
662
663 /* copy in the actual stats */
664 entry->counters = temp.counters;
665 entry->stats_since = temp.stats_since;
666 entry->minmax_stats_since = temp.minmax_stats_since;
667 }
668
669 /* Read global statistics for pg_stat_statements */
670 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
671 goto read_error;
672
673 pfree(buffer);
674 FreeFile(file);
676
677 /*
678 * Remove the persisted stats file so it's not included in
679 * backups/replication standbys, etc. A new file will be written on next
680 * shutdown.
681 *
682 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
683 * because we remove that file on startup; it acts inversely to
684 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
685 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
686 * when the server is not running. Leaving the file creates no danger of
687 * a newly restored database having a spurious record of execution costs,
688 * which is what we're really concerned about here.
689 */
691
692 return;
693
695 ereport(LOG,
697 errmsg("could not read file \"%s\": %m",
699 goto fail;
701 ereport(LOG,
703 errmsg("ignoring invalid data in file \"%s\"",
705 goto fail;
707 ereport(LOG,
709 errmsg("could not write file \"%s\": %m",
711fail:
712 if (buffer)
713 pfree(buffer);
714 if (file)
715 FreeFile(file);
716 if (qfile)
718 /* If possible, throw away the bogus file; ignore any error */
720
721 /*
722 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
723 * server is running with pg_stat_statements enabled
724 */
725}

References AllocateFile(), Assert, ASSUMED_LENGTH_INIT, ASSUMED_MEDIAN_INIT, pgssEntry::counters, pgssSharedState::cur_median_usage, pgssGlobalStats::dealloc, entry_alloc(), ereport, errcode(), errcode_for_file_access(), errmsg, pgssSharedState::extent, fb(), FreeFile(), pgssSharedState::gc_count, GetCurrentTimestamp(), i, IS_STICKY, IsUnderPostmaster, pgssSharedState::lock, LWLockPadded::lock, LOG, LWLockInitialize(), LWLockNewTrancheId(), Max, pgssSharedState::mean_query_len, pgssEntry::minmax_stats_since, pgssSharedState::mutex, pgssSharedState::n_writers, on_shmem_exit(), palloc(), pfree(), PG_BINARY_R, PG_BINARY_W, PG_VALID_BE_ENCODING, pgss, PGSS_DUMP_FILE, PGSS_FILE_HEADER, pgss_hash, PGSS_PG_MAJOR_VERSION, pgss_save, pgss_shmem_shutdown(), PGSS_TEXT_FILE, repalloc(), SpinLockInit(), pgssSharedState::stats, pgssGlobalStats::stats_reset, and pgssEntry::stats_since.

◆ pgss_shmem_request()

static void pgss_shmem_request ( void arg)
static

Definition at line 506 of file pg_stat_statements.c.

507{
508 ShmemRequestHash(.name = "pg_stat_statements hash",
509 .nelems = pgss_max,
510 .hash_info.keysize = sizeof(pgssHashKey),
511 .hash_info.entrysize = sizeof(pgssEntry),
512 .hash_flags = HASH_ELEM | HASH_BLOBS,
513 .ptr = &pgss_hash,
514 );
515 ShmemRequestStruct(.name = "pg_stat_statements",
516 .size = sizeof(pgssSharedState),
517 .ptr = (void **) &pgss,
518 );
519}

References HASH_BLOBS, HASH_ELEM, name, pgss, pgss_hash, pgss_max, ShmemRequestHash, and ShmemRequestStruct.

◆ pgss_shmem_shutdown()

static void pgss_shmem_shutdown ( int  code,
Datum  arg 
)
static

Definition at line 734 of file pg_stat_statements.c.

735{
736 FILE *file;
737 char *qbuffer = NULL;
738 Size qbuffer_size = 0;
740 int32 num_entries;
741 pgssEntry *entry;
742
743 /* Don't try to dump during a crash. */
744 if (code)
745 return;
746
747 /* Safety check ... shouldn't get here unless shmem is set up. */
748 if (!pgss || !pgss_hash)
749 return;
750
751 /* Don't dump if told not to. */
752 if (!pgss_save)
753 return;
754
756 if (file == NULL)
757 goto error;
758
759 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
760 goto error;
761 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
762 goto error;
763 num_entries = hash_get_num_entries(pgss_hash);
764 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
765 goto error;
766
768 if (qbuffer == NULL)
769 goto error;
770
771 /*
772 * When serializing to disk, we store query texts immediately after their
773 * entry data. Any orphaned query texts are thereby excluded.
774 */
776 while ((entry = hash_seq_search(&hash_seq)) != NULL)
777 {
778 int len = entry->query_len;
779 char *qstr = qtext_fetch(entry->query_offset, len,
781
782 if (qstr == NULL)
783 continue; /* Ignore any entries with bogus texts */
784
785 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
786 fwrite(qstr, 1, len + 1, file) != len + 1)
787 {
788 /* note: we assume hash_seq_term won't change errno */
790 goto error;
791 }
792 }
793
794 /* Dump global statistics for pg_stat_statements */
795 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
796 goto error;
797
798 pfree(qbuffer);
799 qbuffer = NULL;
800
801 if (FreeFile(file))
802 {
803 file = NULL;
804 goto error;
805 }
806
807 /*
808 * Rename file into place, so we atomically replace any old one.
809 */
811
812 /* Unlink query-texts file; it's not needed while shutdown */
814
815 return;
816
817error:
818 ereport(LOG,
820 errmsg("could not write file \"%s\": %m",
821 PGSS_DUMP_FILE ".tmp")));
822 if (qbuffer)
823 pfree(qbuffer);
824 if (file)
825 FreeFile(file);
826 unlink(PGSS_DUMP_FILE ".tmp");
828}

References AllocateFile(), durable_rename(), ereport, errcode_for_file_access(), errmsg, error(), fb(), FreeFile(), hash_get_num_entries(), hash_seq_init(), hash_seq_search(), hash_seq_term(), len, LOG, pfree(), PG_BINARY_W, pgss, PGSS_DUMP_FILE, PGSS_FILE_HEADER, pgss_hash, PGSS_PG_MAJOR_VERSION, pgss_save, PGSS_TEXT_FILE, qtext_fetch(), qtext_load_file(), pgssEntry::query_len, pgssEntry::query_offset, and pgssSharedState::stats.

Referenced by pgss_shmem_init().

◆ pgss_store()

static void pgss_store ( const char query,
int64  queryId,
int  query_location,
int  query_len,
pgssStoreKind  kind,
double  total_time,
uint64  rows,
const BufferUsage bufusage,
const WalUsage walusage,
const struct JitInstrumentation jitusage,
const JumbleState jstate,
int  parallel_workers_to_launch,
int  parallel_workers_launched,
PlannedStmtOrigin  planOrigin 
)
static

Definition at line 1269 of file pg_stat_statements.c.

1280{
1282 pgssEntry *entry;
1283 char *norm_query = NULL;
1285
1286 Assert(query != NULL);
1287
1288 /* Safety check... */
1289 if (!pgss || !pgss_hash)
1290 return;
1291
1292 /*
1293 * Nothing to do if compute_query_id isn't enabled and no other module
1294 * computed a query identifier.
1295 */
1296 if (queryId == INT64CONST(0))
1297 return;
1298
1299 /*
1300 * Confine our attention to the relevant part of the string, if the query
1301 * is a portion of a multi-statement source string, and update query
1302 * location and length if needed.
1303 */
1304 query = CleanQuerytext(query, &query_location, &query_len);
1305
1306 /* Set up key for hashtable search */
1307
1308 /* clear padding */
1309 memset(&key, 0, sizeof(pgssHashKey));
1310
1311 key.userid = GetUserId();
1312 key.dbid = MyDatabaseId;
1313 key.queryid = queryId;
1314 key.toplevel = (nesting_level == 0);
1315
1316 /* Lookup the hash table entry with shared lock. */
1318
1319 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1320
1321 /* Create new entry, if not present */
1322 if (!entry)
1323 {
1324 Size query_offset;
1325 int gc_count;
1326 bool stored;
1327 bool do_gc;
1328
1329 /*
1330 * Create a new, normalized query string if caller asked. We don't
1331 * need to hold the lock while doing this work. (Note: in any case,
1332 * it's possible that someone else creates a duplicate hashtable entry
1333 * in the interval where we don't hold the lock below. That case is
1334 * handled by entry_alloc.)
1335 */
1336 if (jstate)
1337 {
1341 &query_len);
1343 }
1344
1345 /* Append new query text to file with only shared lock held */
1346 stored = qtext_store(norm_query ? norm_query : query, query_len,
1347 &query_offset, &gc_count);
1348
1349 /*
1350 * Determine whether we need to garbage collect external query texts
1351 * while the shared lock is still held. This micro-optimization
1352 * avoids taking the time to decide this while holding exclusive lock.
1353 */
1355
1356 /* Need exclusive lock to make a new hashtable entry - promote */
1359
1360 /*
1361 * A garbage collection may have occurred while we weren't holding the
1362 * lock. In the unlikely event that this happens, the query text we
1363 * stored above will have been garbage collected, so write it again.
1364 * This should be infrequent enough that doing it while holding
1365 * exclusive lock isn't a performance problem.
1366 */
1367 if (!stored || pgss->gc_count != gc_count)
1368 stored = qtext_store(norm_query ? norm_query : query, query_len,
1369 &query_offset, NULL);
1370
1371 /* If we failed to write to the text file, give up */
1372 if (!stored)
1373 goto done;
1374
1375 /* OK to create a new hashtable entry */
1376 entry = entry_alloc(&key, query_offset, query_len, encoding,
1377 jstate != NULL);
1378
1379 /* If needed, perform garbage collection while exclusive lock held */
1380 if (do_gc)
1381 gc_qtexts();
1382 }
1383
1384 /* Increment the counts, except when jstate is not NULL */
1385 if (!jstate)
1386 {
1387 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1388
1389 /*
1390 * Grab the spinlock while updating the counters (see comment about
1391 * locking rules at the head of the file)
1392 */
1393 SpinLockAcquire(&entry->mutex);
1394
1395 /* "Unstick" entry if it was previously sticky */
1396 if (IS_STICKY(entry->counters))
1397 entry->counters.usage = USAGE_INIT;
1398
1399 entry->counters.calls[kind] += 1;
1400 entry->counters.total_time[kind] += total_time;
1401
1402 if (entry->counters.calls[kind] == 1)
1403 {
1404 entry->counters.min_time[kind] = total_time;
1405 entry->counters.max_time[kind] = total_time;
1406 entry->counters.mean_time[kind] = total_time;
1407 }
1408 else
1409 {
1410 /*
1411 * Welford's method for accurately computing variance. See
1412 * <http://www.johndcook.com/blog/standard_deviation/>
1413 */
1414 double old_mean = entry->counters.mean_time[kind];
1415
1416 entry->counters.mean_time[kind] +=
1417 (total_time - old_mean) / entry->counters.calls[kind];
1418 entry->counters.sum_var_time[kind] +=
1419 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1420
1421 /*
1422 * Calculate min and max time. min = 0 and max = 0 means that the
1423 * min/max statistics were reset
1424 */
1425 if (entry->counters.min_time[kind] == 0
1426 && entry->counters.max_time[kind] == 0)
1427 {
1428 entry->counters.min_time[kind] = total_time;
1429 entry->counters.max_time[kind] = total_time;
1430 }
1431 else
1432 {
1433 if (entry->counters.min_time[kind] > total_time)
1434 entry->counters.min_time[kind] = total_time;
1435 if (entry->counters.max_time[kind] < total_time)
1436 entry->counters.max_time[kind] = total_time;
1437 }
1438 }
1439 entry->counters.rows += rows;
1440 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1441 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1444 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1445 entry->counters.local_blks_read += bufusage->local_blks_read;
1448 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1449 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1456 entry->counters.usage += USAGE_EXEC(total_time);
1457 entry->counters.wal_records += walusage->wal_records;
1458 entry->counters.wal_fpi += walusage->wal_fpi;
1459 entry->counters.wal_bytes += walusage->wal_bytes;
1460 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1461 if (jitusage)
1462 {
1463 entry->counters.jit_functions += jitusage->created_functions;
1464 entry->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
1465
1466 if (INSTR_TIME_GET_MILLISEC(jitusage->deform_counter))
1467 entry->counters.jit_deform_count++;
1468 entry->counters.jit_deform_time += INSTR_TIME_GET_MILLISEC(jitusage->deform_counter);
1469
1470 if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
1472 entry->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
1473
1474 if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
1476 entry->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
1477
1478 if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
1480 entry->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
1481 }
1482
1483 /* parallel worker counters */
1484 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1485 entry->counters.parallel_workers_launched += parallel_workers_launched;
1486
1487 /* plan cache counters */
1488 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1490 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1491 entry->counters.custom_plan_calls++;
1492
1493 SpinLockRelease(&entry->mutex);
1494 }
1495
1496done:
1498
1499 /* We postpone this clean-up until we're out of the lock */
1500 if (norm_query)
1502}

References Assert, Counters::calls, CleanQuerytext(), pgssEntry::counters, Counters::custom_plan_calls, encoding, entry_alloc(), fb(), pgssSharedState::gc_count, gc_qtexts(), generate_normalized_query(), Counters::generic_plan_calls, GetDatabaseEncoding(), GetUserId(), HASH_FIND, hash_search(), INSTR_TIME_GET_MILLISEC, INT64CONST, IS_STICKY, Counters::jit_deform_count, Counters::jit_deform_time, Counters::jit_emission_count, Counters::jit_emission_time, Counters::jit_functions, Counters::jit_generation_time, Counters::jit_inlining_count, Counters::jit_inlining_time, Counters::jit_optimization_count, Counters::jit_optimization_time, Counters::local_blk_read_time, BufferUsage::local_blk_read_time, Counters::local_blk_write_time, BufferUsage::local_blk_write_time, Counters::local_blks_dirtied, BufferUsage::local_blks_dirtied, Counters::local_blks_hit, BufferUsage::local_blks_hit, Counters::local_blks_read, BufferUsage::local_blks_read, Counters::local_blks_written, BufferUsage::local_blks_written, pgssSharedState::lock, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), Counters::max_time, Counters::mean_time, Counters::min_time, pgssEntry::mutex, MyDatabaseId, need_gc_qtexts(), nesting_level, Counters::parallel_workers_launched, Counters::parallel_workers_to_launch, pfree(), pgss, PGSS_EXEC, pgss_hash, PGSS_PLAN, PLAN_STMT_CACHE_CUSTOM, PLAN_STMT_CACHE_GENERIC, qtext_store(), Counters::rows, Counters::shared_blk_read_time, BufferUsage::shared_blk_read_time, Counters::shared_blk_write_time, BufferUsage::shared_blk_write_time, Counters::shared_blks_dirtied, BufferUsage::shared_blks_dirtied, Counters::shared_blks_hit, BufferUsage::shared_blks_hit, Counters::shared_blks_read, BufferUsage::shared_blks_read, Counters::shared_blks_written, BufferUsage::shared_blks_written, SpinLockAcquire(), SpinLockRelease(), Counters::sum_var_time, Counters::temp_blk_read_time, BufferUsage::temp_blk_read_time, Counters::temp_blk_write_time, BufferUsage::temp_blk_write_time, Counters::temp_blks_read, BufferUsage::temp_blks_read, Counters::temp_blks_written, BufferUsage::temp_blks_written, Counters::total_time, Counters::usage, USAGE_EXEC, USAGE_INIT, Counters::wal_buffers_full, WalUsage::wal_buffers_full, Counters::wal_bytes, WalUsage::wal_bytes, Counters::wal_fpi, WalUsage::wal_fpi, Counters::wal_records, and WalUsage::wal_records.

Referenced by pgss_ExecutorEnd(), pgss_planner(), pgss_post_parse_analyze(), and pgss_ProcessUtility().

◆ qtext_fetch()

static char * qtext_fetch ( Size  query_offset,
int  query_len,
char buffer,
Size  buffer_size 
)
static

Definition at line 2404 of file pg_stat_statements.c.

2406{
2407 /* File read failed? */
2408 if (buffer == NULL)
2409 return NULL;
2410 /* Bogus offset/length? */
2411 if (query_len < 0 ||
2412 query_offset + query_len >= buffer_size)
2413 return NULL;
2414 /* As a further sanity check, make sure there's a trailing null */
2415 if (buffer[query_offset + query_len] != '\0')
2416 return NULL;
2417 /* Looks OK */
2418 return buffer + query_offset;
2419}

References fb().

Referenced by gc_qtexts(), pg_stat_statements_internal(), and pgss_shmem_shutdown().

◆ qtext_load_file()

static char * qtext_load_file ( Size buffer_size)
static

Definition at line 2311 of file pg_stat_statements.c.

2312{
2313 char *buf;
2314 int fd;
2315 struct stat stat;
2316 Size nread;
2317
2319 if (fd < 0)
2320 {
2321 if (errno != ENOENT)
2322 ereport(LOG,
2324 errmsg("could not read file \"%s\": %m",
2325 PGSS_TEXT_FILE)));
2326 return NULL;
2327 }
2328
2329 /* Get file length */
2330 if (fstat(fd, &stat))
2331 {
2332 ereport(LOG,
2334 errmsg("could not stat file \"%s\": %m",
2335 PGSS_TEXT_FILE)));
2337 return NULL;
2338 }
2339
2340 /* Allocate buffer; beware that off_t might be wider than size_t */
2343 else
2344 buf = NULL;
2345 if (buf == NULL)
2346 {
2347 ereport(LOG,
2349 errmsg("out of memory"),
2350 errdetail("Could not allocate enough memory to read file \"%s\".",
2351 PGSS_TEXT_FILE)));
2353 return NULL;
2354 }
2355
2356 /*
2357 * OK, slurp in the file. Windows fails if we try to read more than
2358 * INT_MAX bytes at once, and other platforms might not like that either,
2359 * so read a very large file in 1GB segments.
2360 */
2361 nread = 0;
2362 while (nread < stat.st_size)
2363 {
2364 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2365
2366 /*
2367 * If we get a short read and errno doesn't get set, the reason is
2368 * probably that garbage collection truncated the file since we did
2369 * the fstat(), so we don't log a complaint --- but we don't return
2370 * the data, either, since it's most likely corrupt due to concurrent
2371 * writes from garbage collection.
2372 */
2373 errno = 0;
2374 if (read(fd, buf + nread, toread) != toread)
2375 {
2376 if (errno)
2377 ereport(LOG,
2379 errmsg("could not read file \"%s\": %m",
2380 PGSS_TEXT_FILE)));
2381 pfree(buf);
2383 return NULL;
2384 }
2385 nread += toread;
2386 }
2387
2388 if (CloseTransientFile(fd) != 0)
2389 ereport(LOG,
2391 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2392
2393 *buffer_size = nread;
2394 return buf;
2395}

References buf, CloseTransientFile(), ereport, errcode(), errcode_for_file_access(), errdetail(), errmsg, fb(), fd(), fstat, LOG, MaxAllocHugeSize, MCXT_ALLOC_HUGE, MCXT_ALLOC_NO_OOM, Min, OpenTransientFile(), palloc_extended(), pfree(), PG_BINARY, PGSS_TEXT_FILE, read, and stat::st_size.

Referenced by gc_qtexts(), pg_stat_statements_internal(), and pgss_shmem_shutdown().

◆ qtext_store()

static bool qtext_store ( const char query,
int  query_len,
Size query_offset,
int gc_count 
)
static

Definition at line 2231 of file pg_stat_statements.c.

2233{
2234 Size off;
2235 int fd;
2236
2237 /*
2238 * We use a spinlock to protect extent/n_writers/gc_count, so that
2239 * multiple processes may execute this function concurrently.
2240 */
2242 off = pgss->extent;
2243 pgss->extent += query_len + 1;
2244 pgss->n_writers++;
2245 if (gc_count)
2246 *gc_count = pgss->gc_count;
2248
2249 *query_offset = off;
2250
2251 /*
2252 * Don't allow the file to grow larger than what qtext_load_file can
2253 * (theoretically) handle. This has been seen to be reachable on 32-bit
2254 * platforms.
2255 */
2256 if (unlikely(query_len >= MaxAllocHugeSize - off))
2257 {
2258 errno = EFBIG; /* not quite right, but it'll do */
2259 fd = -1;
2260 goto error;
2261 }
2262
2263 /* Now write the data into the successfully-reserved part of the file */
2265 if (fd < 0)
2266 goto error;
2267
2268 if (pg_pwrite(fd, query, query_len, off) != query_len)
2269 goto error;
2270 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2271 goto error;
2272
2274
2275 /* Mark our write complete */
2277 pgss->n_writers--;
2279
2280 return true;
2281
2282error:
2283 ereport(LOG,
2285 errmsg("could not write file \"%s\": %m",
2286 PGSS_TEXT_FILE)));
2287
2288 if (fd >= 0)
2290
2291 /* Mark our write complete */
2293 pgss->n_writers--;
2295
2296 return false;
2297}

References CloseTransientFile(), ereport, errcode_for_file_access(), errmsg, error(), pgssSharedState::extent, fb(), fd(), pgssSharedState::gc_count, LOG, MaxAllocHugeSize, pgssSharedState::mutex, pgssSharedState::n_writers, OpenTransientFile(), PG_BINARY, pg_pwrite, pgss, PGSS_TEXT_FILE, SpinLockAcquire(), SpinLockRelease(), and unlikely.

Referenced by pgss_store().

Variable Documentation

◆ nesting_level

◆ pgss

◆ PGSS_FILE_HEADER

const uint32 PGSS_FILE_HEADER = 0x20250731
static

Definition at line 88 of file pg_stat_statements.c.

Referenced by pgss_shmem_init(), and pgss_shmem_shutdown().

◆ pgss_hash

◆ pgss_max

int pgss_max = 5000
static

Definition at line 303 of file pg_stat_statements.c.

Referenced by _PG_init(), entry_alloc(), need_gc_qtexts(), and pgss_shmem_request().

◆ PGSS_PG_MAJOR_VERSION

const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100
static

Definition at line 91 of file pg_stat_statements.c.

Referenced by pgss_shmem_init(), and pgss_shmem_shutdown().

◆ pgss_save

bool pgss_save = true
static

Definition at line 308 of file pg_stat_statements.c.

Referenced by _PG_init(), pgss_shmem_init(), and pgss_shmem_shutdown().

◆ pgss_shmem_callbacks

const ShmemCallbacks pgss_shmem_callbacks
static
Initial value:
= {
.request_fn = pgss_shmem_request,
.init_fn = pgss_shmem_init,
}

Definition at line 267 of file pg_stat_statements.c.

267 {
268 .request_fn = pgss_shmem_request,
269 .init_fn = pgss_shmem_init,
270};

Referenced by _PG_init().

◆ pgss_track

int pgss_track = PGSS_TRACK_TOP
static

Definition at line 304 of file pg_stat_statements.c.

Referenced by _PG_init().

◆ pgss_track_planning

bool pgss_track_planning = false
static

Definition at line 306 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_planner().

◆ pgss_track_utility

bool pgss_track_utility = true
static

Definition at line 305 of file pg_stat_statements.c.

Referenced by _PG_init(), pgss_post_parse_analyze(), and pgss_ProcessUtility().

◆ prev_ExecutorEnd

ExecutorEnd_hook_type prev_ExecutorEnd = NULL
static

Definition at line 283 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorEnd().

◆ prev_ExecutorFinish

ExecutorFinish_hook_type prev_ExecutorFinish = NULL
static

Definition at line 282 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorFinish().

◆ prev_ExecutorRun

ExecutorRun_hook_type prev_ExecutorRun = NULL
static

Definition at line 281 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorRun().

◆ prev_ExecutorStart

ExecutorStart_hook_type prev_ExecutorStart = NULL
static

Definition at line 280 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ExecutorStart().

◆ prev_planner_hook

planner_hook_type prev_planner_hook = NULL
static

Definition at line 279 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_planner().

◆ prev_post_parse_analyze_hook

post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL
static

Definition at line 278 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_post_parse_analyze().

◆ prev_ProcessUtility

ProcessUtility_hook_type prev_ProcessUtility = NULL
static

Definition at line 284 of file pg_stat_statements.c.

Referenced by _PG_init(), and pgss_ProcessUtility().

◆ track_options

const struct config_enum_entry track_options[]
static
Initial value:
=
{
{"none", PGSS_TRACK_NONE, false},
{"top", PGSS_TRACK_TOP, false},
{"all", PGSS_TRACK_ALL, false},
{NULL, 0, false}
}

Definition at line 295 of file pg_stat_statements.c.

296{
297 {"none", PGSS_TRACK_NONE, false},
298 {"top", PGSS_TRACK_TOP, false},
299 {"all", PGSS_TRACK_ALL, false},
300 {NULL, 0, false}
301};

Referenced by _PG_init().