PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_buffercache_pages.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "access/relation.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "port/pg_numa.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
Include dependency graph for pg_buffercache_pages.c:

Go to the source code of this file.

Data Structures

struct  BufferCachePagesRec
 
struct  BufferCachePagesContext
 
struct  BufferCacheNumaRec
 
struct  BufferCacheNumaContext
 

Macros

#define NUM_BUFFERCACHE_PAGES_MIN_ELEM   8
 
#define NUM_BUFFERCACHE_PAGES_ELEM   9
 
#define NUM_BUFFERCACHE_SUMMARY_ELEM   5
 
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM   4
 
#define NUM_BUFFERCACHE_EVICT_ELEM   2
 
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM   3
 
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM   3
 
#define NUM_BUFFERCACHE_NUMA_ELEM   3
 

Functions

 PG_MODULE_MAGIC_EXT (.name="pg_buffercache",.version=PG_VERSION)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_pages)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_numa_pages)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_summary)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_usage_counts)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_evict)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_evict_relation)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_evict_all)
 
Datum pg_buffercache_pages (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_numa_pages (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_summary (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_usage_counts (PG_FUNCTION_ARGS)
 
static void pg_buffercache_superuser_check (char *func_name)
 
Datum pg_buffercache_evict (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_evict_relation (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_evict_all (PG_FUNCTION_ARGS)
 

Variables

static bool firstNumaTouch = true
 

Macro Definition Documentation

◆ NUM_BUFFERCACHE_EVICT_ALL_ELEM

#define NUM_BUFFERCACHE_EVICT_ALL_ELEM   3

Definition at line 27 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_EVICT_ELEM

#define NUM_BUFFERCACHE_EVICT_ELEM   2

Definition at line 25 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_EVICT_RELATION_ELEM

#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM   3

Definition at line 26 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_NUMA_ELEM

#define NUM_BUFFERCACHE_NUMA_ELEM   3

Definition at line 29 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_PAGES_ELEM

#define NUM_BUFFERCACHE_PAGES_ELEM   9

Definition at line 22 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_PAGES_MIN_ELEM

#define NUM_BUFFERCACHE_PAGES_MIN_ELEM   8

Definition at line 21 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_SUMMARY_ELEM

#define NUM_BUFFERCACHE_SUMMARY_ELEM   5

Definition at line 23 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_USAGE_COUNTS_ELEM

#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM   4

Definition at line 24 of file pg_buffercache_pages.c.

Function Documentation

◆ pg_buffercache_evict()

Datum pg_buffercache_evict ( PG_FUNCTION_ARGS  )

Definition at line 668 of file pg_buffercache_pages.c.

669{
670 Datum result;
671 TupleDesc tupledesc;
672 HeapTuple tuple;
674 bool nulls[NUM_BUFFERCACHE_EVICT_ELEM] = {0};
675
677 bool buffer_flushed;
678
679 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
680 elog(ERROR, "return type must be a row type");
681
682 pg_buffercache_superuser_check("pg_buffercache_evict");
683
684 if (buf < 1 || buf > NBuffers)
685 elog(ERROR, "bad buffer ID: %d", buf);
686
687 values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed));
688 values[1] = BoolGetDatum(buffer_flushed);
689
690 tuple = heap_form_tuple(tupledesc, values, nulls);
691 result = HeapTupleGetDatum(tuple);
692
693 PG_RETURN_DATUM(result);
694}
static Datum values[MAXATTR]
Definition: bootstrap.c:153
int Buffer
Definition: buf.h:23
bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed)
Definition: bufmgr.c:6654
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
int NBuffers
Definition: globals.c:142
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
static void pg_buffercache_superuser_check(char *func_name)
#define NUM_BUFFERCACHE_EVICT_ELEM
static char * buf
Definition: pg_test_fsync.c:72
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
uint64_t Datum
Definition: postgres.h:70

References BoolGetDatum(), buf, elog, ERROR, EvictUnpinnedBuffer(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), NBuffers, NUM_BUFFERCACHE_EVICT_ELEM, pg_buffercache_superuser_check(), PG_GETARG_INT32, PG_RETURN_DATUM, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_evict_all()

Datum pg_buffercache_evict_all ( PG_FUNCTION_ARGS  )

Definition at line 750 of file pg_buffercache_pages.c.

751{
752 Datum result;
753 TupleDesc tupledesc;
754 HeapTuple tuple;
756 bool nulls[NUM_BUFFERCACHE_EVICT_ALL_ELEM] = {0};
757
758 int32 buffers_evicted = 0;
759 int32 buffers_flushed = 0;
760 int32 buffers_skipped = 0;
761
762 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
763 elog(ERROR, "return type must be a row type");
764
765 pg_buffercache_superuser_check("pg_buffercache_evict_all");
766
767 EvictAllUnpinnedBuffers(&buffers_evicted, &buffers_flushed,
768 &buffers_skipped);
769
770 values[0] = Int32GetDatum(buffers_evicted);
771 values[1] = Int32GetDatum(buffers_flushed);
772 values[2] = Int32GetDatum(buffers_skipped);
773
774 tuple = heap_form_tuple(tupledesc, values, nulls);
775 result = HeapTupleGetDatum(tuple);
776
777 PG_RETURN_DATUM(result);
778}
void EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6683
int32_t int32
Definition: c.h:538
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222

References elog, ERROR, EvictAllUnpinnedBuffers(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), Int32GetDatum(), NUM_BUFFERCACHE_EVICT_ALL_ELEM, pg_buffercache_superuser_check(), PG_RETURN_DATUM, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_evict_relation()

Datum pg_buffercache_evict_relation ( PG_FUNCTION_ARGS  )

Definition at line 700 of file pg_buffercache_pages.c.

701{
702 Datum result;
703 TupleDesc tupledesc;
704 HeapTuple tuple;
706 bool nulls[NUM_BUFFERCACHE_EVICT_RELATION_ELEM] = {0};
707
708 Oid relOid;
709 Relation rel;
710
711 int32 buffers_evicted = 0;
712 int32 buffers_flushed = 0;
713 int32 buffers_skipped = 0;
714
715 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
716 elog(ERROR, "return type must be a row type");
717
718 pg_buffercache_superuser_check("pg_buffercache_evict_relation");
719
720 relOid = PG_GETARG_OID(0);
721
722 rel = relation_open(relOid, AccessShareLock);
723
726 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
727 errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
728 "pg_buffercache_evict_relation")));
729
730 EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
731 &buffers_skipped);
732
734
735 values[0] = Int32GetDatum(buffers_evicted);
736 values[1] = Int32GetDatum(buffers_flushed);
737 values[2] = Int32GetDatum(buffers_skipped);
738
739 tuple = heap_form_tuple(tupledesc, values, nulls);
740 result = HeapTupleGetDatum(tuple);
741
742 PG_RETURN_DATUM(result);
743}
void EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6733
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ereport(elevel,...)
Definition: elog.h:150
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define AccessShareLock
Definition: lockdefs.h:36
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM
unsigned int Oid
Definition: postgres_ext.h:32
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:647
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47

References AccessShareLock, elog, ereport, errcode(), errmsg(), ERROR, EvictRelUnpinnedBuffers(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), Int32GetDatum(), NUM_BUFFERCACHE_EVICT_RELATION_ELEM, pg_buffercache_superuser_check(), PG_GETARG_OID, PG_RETURN_DATUM, relation_close(), relation_open(), RelationUsesLocalBuffers, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_numa_pages()

Datum pg_buffercache_numa_pages ( PG_FUNCTION_ARGS  )

Definition at line 305 of file pg_buffercache_pages.c.

306{
307 FuncCallContext *funcctx;
308 MemoryContext oldcontext;
309 BufferCacheNumaContext *fctx; /* User function context. */
310 TupleDesc tupledesc;
311 TupleDesc expected_tupledesc;
312 HeapTuple tuple;
313 Datum result;
314
315 if (SRF_IS_FIRSTCALL())
316 {
317 int i,
318 idx;
319 Size os_page_size;
320 void **os_page_ptrs;
321 int *os_page_status;
322 uint64 os_page_count;
323 int pages_per_buffer;
324 int max_entries;
325 char *startptr,
326 *endptr;
327
328 if (pg_numa_init() == -1)
329 elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
330
331 /*
332 * The database block size and OS memory page size are unlikely to be
333 * the same. The block size is 1-32KB, the memory page size depends on
334 * platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
335 * there are also features like THP etc. Moreover, we don't quite know
336 * how the pages and buffers "align" in memory - the buffers may be
337 * shifted in some way, using more memory pages than necessary.
338 *
339 * So we need to be careful about mapping buffers to memory pages. We
340 * calculate the maximum number of pages a buffer might use, so that
341 * we allocate enough space for the entries. And then we count the
342 * actual number of entries as we scan the buffers.
343 *
344 * This information is needed before calling move_pages() for NUMA
345 * node id inquiry.
346 */
347 os_page_size = pg_get_shmem_pagesize();
348
349 /*
350 * The pages and block size is expected to be 2^k, so one divides the
351 * other (we don't know in which direction). This does not say
352 * anything about relative alignment of pages/buffers.
353 */
354 Assert((os_page_size % BLCKSZ == 0) || (BLCKSZ % os_page_size == 0));
355
356 /*
357 * How many addresses we are going to query? Simply get the page for
358 * the first buffer, and first page after the last buffer, and count
359 * the pages from that.
360 */
361 startptr = (char *) TYPEALIGN_DOWN(os_page_size,
362 BufferGetBlock(1));
363 endptr = (char *) TYPEALIGN(os_page_size,
364 (char *) BufferGetBlock(NBuffers) + BLCKSZ);
365 os_page_count = (endptr - startptr) / os_page_size;
366
367 /* Used to determine the NUMA node for all OS pages at once */
368 os_page_ptrs = palloc0(sizeof(void *) * os_page_count);
369 os_page_status = palloc(sizeof(uint64) * os_page_count);
370
371 /* Fill pointers for all the memory pages. */
372 idx = 0;
373 for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
374 {
375 os_page_ptrs[idx++] = ptr;
376
377 /* Only need to touch memory once per backend process lifetime */
378 if (firstNumaTouch)
380 }
381
382 Assert(idx == os_page_count);
383
384 elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
385 "os_page_size=%zu", NBuffers, os_page_count, os_page_size);
386
387 /*
388 * If we ever get 0xff back from kernel inquiry, then we probably have
389 * bug in our buffers to OS page mapping code here.
390 */
391 memset(os_page_status, 0xff, sizeof(int) * os_page_count);
392
393 /* Query NUMA status for all the pointers */
394 if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
395 elog(ERROR, "failed NUMA pages inquiry: %m");
396
397 /* Initialize the multi-call context, load entries about buffers */
398
399 funcctx = SRF_FIRSTCALL_INIT();
400
401 /* Switch context when allocating stuff to be used in later calls */
402 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
403
404 /* Create a user function context for cross-call persistence */
406
407 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
408 elog(ERROR, "return type must be a row type");
409
410 if (expected_tupledesc->natts != NUM_BUFFERCACHE_NUMA_ELEM)
411 elog(ERROR, "incorrect number of output arguments");
412
413 /* Construct a tuple descriptor for the result rows. */
414 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
415 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
416 INT4OID, -1, 0);
417 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
418 INT8OID, -1, 0);
419 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
420 INT4OID, -1, 0);
421
422 fctx->tupdesc = BlessTupleDesc(tupledesc);
423
424 /*
425 * Each buffer needs at least one entry, but it might be offset in
426 * some way, and use one extra entry. So we allocate space for the
427 * maximum number of entries we might need, and then count the exact
428 * number as we're walking buffers. That way we can do it in one pass,
429 * without reallocating memory.
430 */
431 pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
432 max_entries = NBuffers * pages_per_buffer;
433
434 /* Allocate entries for BufferCachePagesRec records. */
435 fctx->record = (BufferCacheNumaRec *)
437 sizeof(BufferCacheNumaRec) * max_entries);
438
439 /* Return to original context when allocating transient memory */
440 MemoryContextSwitchTo(oldcontext);
441
442 if (firstNumaTouch)
443 elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
444
445 /*
446 * Scan through all the buffers, saving the relevant fields in the
447 * fctx->record structure.
448 *
449 * We don't hold the partition locks, so we don't get a consistent
450 * snapshot across all buffers, but we do grab the buffer header
451 * locks, so the information of each buffer is self-consistent.
452 *
453 * This loop touches and stores addresses into os_page_ptrs[] as input
454 * to one big move_pages(2) inquiry system call. Basically we ask for
455 * all memory pages for NBuffers.
456 */
457 startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
458 idx = 0;
459 for (i = 0; i < NBuffers; i++)
460 {
461 char *buffptr = (char *) BufferGetBlock(i + 1);
462 BufferDesc *bufHdr;
463 uint32 bufferid;
464 int32 page_num;
465 char *startptr_buff,
466 *endptr_buff;
467
469
470 bufHdr = GetBufferDescriptor(i);
471
472 /* Lock each buffer header before inspecting. */
473 LockBufHdr(bufHdr);
474 bufferid = BufferDescriptorGetBuffer(bufHdr);
475 UnlockBufHdr(bufHdr);
476
477 /* start of the first page of this buffer */
478 startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
479
480 /* end of the buffer (no need to align to memory page) */
481 endptr_buff = buffptr + BLCKSZ;
482
483 Assert(startptr_buff < endptr_buff);
484
485 /* calculate ID of the first page for this buffer */
486 page_num = (startptr_buff - startptr) / os_page_size;
487
488 /* Add an entry for each OS page overlapping with this buffer. */
489 for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
490 {
491 fctx->record[idx].bufferid = bufferid;
492 fctx->record[idx].page_num = page_num;
493 fctx->record[idx].numa_node = os_page_status[page_num];
494
495 /* advance to the next entry/page */
496 ++idx;
497 ++page_num;
498 }
499 }
500
501 Assert((idx >= os_page_count) && (idx <= max_entries));
502
503 /* Set max calls and remember the user function context. */
504 funcctx->max_calls = idx;
505 funcctx->user_fctx = fctx;
506
507 /* Remember this backend touched the pages */
508 firstNumaTouch = false;
509 }
510
511 funcctx = SRF_PERCALL_SETUP();
512
513 /* Get the saved state */
514 fctx = funcctx->user_fctx;
515
516 if (funcctx->call_cntr < funcctx->max_calls)
517 {
518 uint32 i = funcctx->call_cntr;
520 bool nulls[NUM_BUFFERCACHE_NUMA_ELEM];
521
522 values[0] = Int32GetDatum(fctx->record[i].bufferid);
523 nulls[0] = false;
524
525 values[1] = Int64GetDatum(fctx->record[i].page_num);
526 nulls[1] = false;
527
529 nulls[2] = false;
530
531 /* Build and return the tuple. */
532 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
533 result = HeapTupleGetDatum(tuple);
534
535 SRF_RETURN_NEXT(funcctx, result);
536 }
537 else
538 SRF_RETURN_DONE(funcctx);
539}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
int16 AttrNumber
Definition: attnum.h:21
static void UnlockBufHdr(BufferDesc *desc)
static BufferDesc * GetBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6264
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:392
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:807
#define Max(x, y)
Definition: c.h:1001
#define UINT64_FORMAT
Definition: c.h:561
uint64_t uint64
Definition: c.h:543
uint32_t uint32
Definition: c.h:542
size_t Size
Definition: c.h:614
#define TYPEALIGN_DOWN(ALIGNVAL, LEN)
Definition: c.h:819
#define DEBUG1
Definition: elog.h:30
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2260
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
Assert(PointerIsAligned(start, uint64))
int i
Definition: isn.c:77
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1703
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
static bool firstNumaTouch
#define NUM_BUFFERCACHE_NUMA_ELEM
#define pg_numa_touch_mem_if_required(ptr)
Definition: pg_numa.h:37
PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
Definition: pg_numa.c:120
PGDLLIMPORT int pg_numa_init(void)
Definition: pg_numa.c:113
static Datum Int64GetDatum(int64 X)
Definition: postgres.h:403
Size pg_get_shmem_pagesize(void)
Definition: shmem.c:738
BufferCacheNumaRec * record
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:182
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:842

References Assert(), BlessTupleDesc(), BufferDescriptorGetBuffer(), BufferGetBlock(), BufferCacheNumaRec::bufferid, FuncCallContext::call_cntr, CHECK_FOR_INTERRUPTS, CreateTemplateTupleDesc(), CurrentMemoryContext, DEBUG1, elog, ERROR, firstNumaTouch, get_call_result_type(), GetBufferDescriptor(), heap_form_tuple(), HeapTupleGetDatum(), i, idx(), Int32GetDatum(), Int64GetDatum(), LockBufHdr(), Max, FuncCallContext::max_calls, MemoryContextAllocHuge(), MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, TupleDescData::natts, NBuffers, NUM_BUFFERCACHE_NUMA_ELEM, BufferCacheNumaRec::numa_node, BufferCacheNumaRec::page_num, palloc(), palloc0(), pg_get_shmem_pagesize(), pg_numa_init(), pg_numa_query_pages(), pg_numa_touch_mem_if_required, BufferCacheNumaContext::record, SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, BufferCacheNumaContext::tupdesc, TupleDescInitEntry(), TYPEALIGN, TYPEALIGN_DOWN, TYPEFUNC_COMPOSITE, UINT64_FORMAT, UnlockBufHdr(), FuncCallContext::user_fctx, and values.

◆ pg_buffercache_pages()

Datum pg_buffercache_pages ( PG_FUNCTION_ARGS  )

Definition at line 110 of file pg_buffercache_pages.c.

111{
112 FuncCallContext *funcctx;
113 Datum result;
114 MemoryContext oldcontext;
115 BufferCachePagesContext *fctx; /* User function context. */
116 TupleDesc tupledesc;
117 TupleDesc expected_tupledesc;
118 HeapTuple tuple;
119
120 if (SRF_IS_FIRSTCALL())
121 {
122 int i;
123
124 funcctx = SRF_FIRSTCALL_INIT();
125
126 /* Switch context when allocating stuff to be used in later calls */
127 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
128
129 /* Create a user function context for cross-call persistence */
131
132 /*
133 * To smoothly support upgrades from version 1.0 of this extension
134 * transparently handle the (non-)existence of the pinning_backends
135 * column. We unfortunately have to get the result type for that... -
136 * we can't use the result type determined by the function definition
137 * without potentially crashing when somebody uses the old (or even
138 * wrong) function definition though.
139 */
140 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
141 elog(ERROR, "return type must be a row type");
142
143 if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
144 expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
145 elog(ERROR, "incorrect number of output arguments");
146
147 /* Construct a tuple descriptor for the result rows. */
148 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
149 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
150 INT4OID, -1, 0);
151 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
152 OIDOID, -1, 0);
153 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
154 OIDOID, -1, 0);
155 TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
156 OIDOID, -1, 0);
157 TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
158 INT2OID, -1, 0);
159 TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
160 INT8OID, -1, 0);
161 TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
162 BOOLOID, -1, 0);
163 TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
164 INT2OID, -1, 0);
165
166 if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
167 TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
168 INT4OID, -1, 0);
169
170 fctx->tupdesc = BlessTupleDesc(tupledesc);
171
172 /* Allocate NBuffers worth of BufferCachePagesRec records. */
173 fctx->record = (BufferCachePagesRec *)
175 sizeof(BufferCachePagesRec) * NBuffers);
176
177 /* Set max calls and remember the user function context. */
178 funcctx->max_calls = NBuffers;
179 funcctx->user_fctx = fctx;
180
181 /* Return to original context when allocating transient memory */
182 MemoryContextSwitchTo(oldcontext);
183
184 /*
185 * Scan through all the buffers, saving the relevant fields in the
186 * fctx->record structure.
187 *
188 * We don't hold the partition locks, so we don't get a consistent
189 * snapshot across all buffers, but we do grab the buffer header
190 * locks, so the information of each buffer is self-consistent.
191 */
192 for (i = 0; i < NBuffers; i++)
193 {
194 BufferDesc *bufHdr;
195 uint32 buf_state;
196
198
199 bufHdr = GetBufferDescriptor(i);
200 /* Lock each buffer header before inspecting. */
201 buf_state = LockBufHdr(bufHdr);
202
204 fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
205 fctx->record[i].reltablespace = bufHdr->tag.spcOid;
206 fctx->record[i].reldatabase = bufHdr->tag.dbOid;
207 fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
208 fctx->record[i].blocknum = bufHdr->tag.blockNum;
209 fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
211
212 if (buf_state & BM_DIRTY)
213 fctx->record[i].isdirty = true;
214 else
215 fctx->record[i].isdirty = false;
216
217 /* Note if the buffer is valid, and has storage created */
218 if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
219 fctx->record[i].isvalid = true;
220 else
221 fctx->record[i].isvalid = false;
222
223 UnlockBufHdr(bufHdr);
224 }
225 }
226
227 funcctx = SRF_PERCALL_SETUP();
228
229 /* Get the saved state */
230 fctx = funcctx->user_fctx;
231
232 if (funcctx->call_cntr < funcctx->max_calls)
233 {
234 uint32 i = funcctx->call_cntr;
236 bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
237
238 values[0] = Int32GetDatum(fctx->record[i].bufferid);
239 nulls[0] = false;
240
241 /*
242 * Set all fields except the bufferid to null if the buffer is unused
243 * or not valid.
244 */
245 if (fctx->record[i].blocknum == InvalidBlockNumber ||
246 fctx->record[i].isvalid == false)
247 {
248 nulls[1] = true;
249 nulls[2] = true;
250 nulls[3] = true;
251 nulls[4] = true;
252 nulls[5] = true;
253 nulls[6] = true;
254 nulls[7] = true;
255 /* unused for v1.0 callers, but the array is always long enough */
256 nulls[8] = true;
257 }
258 else
259 {
261 nulls[1] = false;
263 nulls[2] = false;
265 nulls[3] = false;
267 nulls[4] = false;
268 values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
269 nulls[5] = false;
270 values[6] = BoolGetDatum(fctx->record[i].isdirty);
271 nulls[6] = false;
273 nulls[7] = false;
274 /* unused for v1.0 callers, but the array is always long enough */
276 nulls[8] = false;
277 }
278
279 /* Build and return the tuple. */
280 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
281 result = HeapTupleGetDatum(tuple);
282
283 SRF_RETURN_NEXT(funcctx, result);
284 }
285 else
286 SRF_RETURN_DONE(funcctx);
287}
#define InvalidBlockNumber
Definition: block.h:33
#define BM_TAG_VALID
Definition: buf_internals.h:71
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_DIRTY
Definition: buf_internals.h:69
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:60
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
#define BM_VALID
Definition: buf_internals.h:70
int64_t int64
Definition: c.h:539
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM
#define NUM_BUFFERCACHE_PAGES_ELEM
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:182
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
BufferCachePagesRec * record
BufferTag tag
BlockNumber blockNum
Oid spcOid

References BlessTupleDesc(), BufferCachePagesRec::blocknum, buftag::blockNum, BM_DIRTY, BM_TAG_VALID, BM_VALID, BoolGetDatum(), BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BufferDescriptorGetBuffer(), BufferCachePagesRec::bufferid, BufTagGetForkNum(), BufTagGetRelNumber(), FuncCallContext::call_cntr, CHECK_FOR_INTERRUPTS, CreateTemplateTupleDesc(), CurrentMemoryContext, buftag::dbOid, elog, ERROR, BufferCachePagesRec::forknum, get_call_result_type(), GetBufferDescriptor(), heap_form_tuple(), HeapTupleGetDatum(), i, Int16GetDatum(), Int32GetDatum(), Int64GetDatum(), InvalidBlockNumber, BufferCachePagesRec::isdirty, BufferCachePagesRec::isvalid, LockBufHdr(), FuncCallContext::max_calls, MemoryContextAllocHuge(), MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, TupleDescData::natts, NBuffers, NUM_BUFFERCACHE_PAGES_ELEM, NUM_BUFFERCACHE_PAGES_MIN_ELEM, ObjectIdGetDatum(), palloc(), BufferCachePagesRec::pinning_backends, BufferCachePagesContext::record, BufferCachePagesRec::reldatabase, BufferCachePagesRec::relfilenumber, BufferCachePagesRec::reltablespace, buftag::spcOid, SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, BufferDesc::tag, BufferCachePagesContext::tupdesc, TupleDescInitEntry(), TYPEFUNC_COMPOSITE, UnlockBufHdr(), BufferCachePagesRec::usagecount, FuncCallContext::user_fctx, and values.

◆ pg_buffercache_summary()

Datum pg_buffercache_summary ( PG_FUNCTION_ARGS  )

Definition at line 542 of file pg_buffercache_pages.c.

543{
544 Datum result;
545 TupleDesc tupledesc;
546 HeapTuple tuple;
549
550 int32 buffers_used = 0;
551 int32 buffers_unused = 0;
552 int32 buffers_dirty = 0;
553 int32 buffers_pinned = 0;
554 int64 usagecount_total = 0;
555
556 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
557 elog(ERROR, "return type must be a row type");
558
559 for (int i = 0; i < NBuffers; i++)
560 {
561 BufferDesc *bufHdr;
562 uint32 buf_state;
563
565
566 /*
567 * This function summarizes the state of all headers. Locking the
568 * buffer headers wouldn't provide an improved result as the state of
569 * the buffer can still change after we release the lock and it'd
570 * noticeably increase the cost of the function.
571 */
572 bufHdr = GetBufferDescriptor(i);
573 buf_state = pg_atomic_read_u32(&bufHdr->state);
574
575 if (buf_state & BM_VALID)
576 {
577 buffers_used++;
578 usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
579
580 if (buf_state & BM_DIRTY)
581 buffers_dirty++;
582 }
583 else
584 buffers_unused++;
585
586 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
587 buffers_pinned++;
588 }
589
590 memset(nulls, 0, sizeof(nulls));
591 values[0] = Int32GetDatum(buffers_used);
592 values[1] = Int32GetDatum(buffers_unused);
593 values[2] = Int32GetDatum(buffers_dirty);
594 values[3] = Int32GetDatum(buffers_pinned);
595
596 if (buffers_used != 0)
597 values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
598 else
599 nulls[4] = true;
600
601 /* Build and return the tuple. */
602 tuple = heap_form_tuple(tupledesc, values, nulls);
603 result = HeapTupleGetDatum(tuple);
604
605 PG_RETURN_DATUM(result);
606}
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
#define NUM_BUFFERCACHE_SUMMARY_ELEM
static Datum Float8GetDatum(float8 X)
Definition: postgres.h:492
pg_atomic_uint32 state

References BM_DIRTY, BM_VALID, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, CHECK_FOR_INTERRUPTS, elog, ERROR, Float8GetDatum(), get_call_result_type(), GetBufferDescriptor(), heap_form_tuple(), HeapTupleGetDatum(), i, Int32GetDatum(), NBuffers, NUM_BUFFERCACHE_SUMMARY_ELEM, pg_atomic_read_u32(), PG_RETURN_DATUM, BufferDesc::state, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_superuser_check()

static void pg_buffercache_superuser_check ( char *  func_name)
static

Definition at line 655 of file pg_buffercache_pages.c.

656{
657 if (!superuser())
659 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
660 errmsg("must be superuser to use %s()",
661 func_name)));
662}
bool superuser(void)
Definition: superuser.c:46

References ereport, errcode(), errmsg(), ERROR, and superuser().

Referenced by pg_buffercache_evict(), pg_buffercache_evict_all(), and pg_buffercache_evict_relation().

◆ pg_buffercache_usage_counts()

Datum pg_buffercache_usage_counts ( PG_FUNCTION_ARGS  )

Definition at line 609 of file pg_buffercache_pages.c.

610{
611 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
612 int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
613 int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
614 int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
616 bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
617
618 InitMaterializedSRF(fcinfo, 0);
619
620 for (int i = 0; i < NBuffers; i++)
621 {
623 uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
624 int usage_count;
625
627
628 usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
629 usage_counts[usage_count]++;
630
631 if (buf_state & BM_DIRTY)
632 dirty[usage_count]++;
633
634 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
635 pinned[usage_count]++;
636 }
637
638 for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
639 {
640 values[0] = Int32GetDatum(i);
641 values[1] = Int32GetDatum(usage_counts[i]);
642 values[2] = Int32GetDatum(dirty[i]);
643 values[3] = Int32GetDatum(pinned[i]);
644
645 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
646 }
647
648 return (Datum) 0;
649}
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:86
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784

References BM_MAX_USAGE_COUNT.

◆ PG_FUNCTION_INFO_V1() [1/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_evict  )

◆ PG_FUNCTION_INFO_V1() [2/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_evict_all  )

◆ PG_FUNCTION_INFO_V1() [3/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_evict_relation  )

◆ PG_FUNCTION_INFO_V1() [4/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_numa_pages  )

◆ PG_FUNCTION_INFO_V1() [5/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_pages  )

◆ PG_FUNCTION_INFO_V1() [6/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_summary  )

◆ PG_FUNCTION_INFO_V1() [7/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_usage_counts  )

◆ PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( name = "pg_buffercache",
version = PG_VERSION 
)

Variable Documentation

◆ firstNumaTouch

bool firstNumaTouch = true
static

Definition at line 106 of file pg_buffercache_pages.c.

Referenced by pg_buffercache_numa_pages().