PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_buffercache_pages.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_buffercache_pages.c
4 * display some contents of the buffer cache
5 *
6 * contrib/pg_buffercache/pg_buffercache_pages.c
7 *-------------------------------------------------------------------------
8 */
9#include "postgres.h"
10
11#include "access/htup_details.h"
12#include "access/relation.h"
13#include "catalog/pg_type.h"
14#include "funcapi.h"
15#include "port/pg_numa.h"
17#include "storage/bufmgr.h"
18#include "utils/rel.h"
19
20
21#define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
22#define NUM_BUFFERCACHE_PAGES_ELEM 9
23#define NUM_BUFFERCACHE_SUMMARY_ELEM 5
24#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
25#define NUM_BUFFERCACHE_EVICT_ELEM 2
26#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3
27#define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
28
29#define NUM_BUFFERCACHE_NUMA_ELEM 3
30
32 .name = "pg_buffercache",
33 .version = PG_VERSION
34);
35
36/*
37 * Record structure holding the to be exposed cache data.
38 */
39typedef struct
40{
47 bool isvalid;
48 bool isdirty;
50
51 /*
52 * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
53 * being pinned by too many backends and each backend will only pin once
54 * because of bufmgr.c's PrivateRefCount infrastructure.
55 */
58
59
60/*
61 * Function context for data persisting over repeated calls.
62 */
63typedef struct
64{
68
69/*
70 * Record structure holding the to be exposed cache data.
71 */
72typedef struct
73{
78
79/*
80 * Function context for data persisting over repeated calls.
81 */
82typedef struct
83{
90
91
92/*
93 * Function returning data from the shared buffer cache - buffer number,
94 * relation node/tablespace/database/blocknum and dirty indicator.
95 */
103
104
105/* Only need to touch memory once per backend process lifetime */
106static bool firstNumaTouch = true;
107
108
109Datum
111{
112 FuncCallContext *funcctx;
113 Datum result;
114 MemoryContext oldcontext;
115 BufferCachePagesContext *fctx; /* User function context. */
116 TupleDesc tupledesc;
117 TupleDesc expected_tupledesc;
118 HeapTuple tuple;
119
120 if (SRF_IS_FIRSTCALL())
121 {
122 int i;
123
124 funcctx = SRF_FIRSTCALL_INIT();
125
126 /* Switch context when allocating stuff to be used in later calls */
127 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
128
129 /* Create a user function context for cross-call persistence */
131
132 /*
133 * To smoothly support upgrades from version 1.0 of this extension
134 * transparently handle the (non-)existence of the pinning_backends
135 * column. We unfortunately have to get the result type for that... -
136 * we can't use the result type determined by the function definition
137 * without potentially crashing when somebody uses the old (or even
138 * wrong) function definition though.
139 */
140 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
141 elog(ERROR, "return type must be a row type");
142
143 if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
144 expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
145 elog(ERROR, "incorrect number of output arguments");
146
147 /* Construct a tuple descriptor for the result rows. */
148 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
149 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
150 INT4OID, -1, 0);
151 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
152 OIDOID, -1, 0);
153 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
154 OIDOID, -1, 0);
155 TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
156 OIDOID, -1, 0);
157 TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
158 INT2OID, -1, 0);
159 TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
160 INT8OID, -1, 0);
161 TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
162 BOOLOID, -1, 0);
163 TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
164 INT2OID, -1, 0);
165
166 if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
167 TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
168 INT4OID, -1, 0);
169
170 fctx->tupdesc = BlessTupleDesc(tupledesc);
171
172 /* Allocate NBuffers worth of BufferCachePagesRec records. */
173 fctx->record = (BufferCachePagesRec *)
175 sizeof(BufferCachePagesRec) * NBuffers);
176
177 /* Set max calls and remember the user function context. */
178 funcctx->max_calls = NBuffers;
179 funcctx->user_fctx = fctx;
180
181 /* Return to original context when allocating transient memory */
182 MemoryContextSwitchTo(oldcontext);
183
184 /*
185 * Scan through all the buffers, saving the relevant fields in the
186 * fctx->record structure.
187 *
188 * We don't hold the partition locks, so we don't get a consistent
189 * snapshot across all buffers, but we do grab the buffer header
190 * locks, so the information of each buffer is self-consistent.
191 */
192 for (i = 0; i < NBuffers; i++)
193 {
194 BufferDesc *bufHdr;
195 uint32 buf_state;
196
197 bufHdr = GetBufferDescriptor(i);
198 /* Lock each buffer header before inspecting. */
199 buf_state = LockBufHdr(bufHdr);
200
202 fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
203 fctx->record[i].reltablespace = bufHdr->tag.spcOid;
204 fctx->record[i].reldatabase = bufHdr->tag.dbOid;
205 fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
206 fctx->record[i].blocknum = bufHdr->tag.blockNum;
207 fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
209
210 if (buf_state & BM_DIRTY)
211 fctx->record[i].isdirty = true;
212 else
213 fctx->record[i].isdirty = false;
214
215 /* Note if the buffer is valid, and has storage created */
216 if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
217 fctx->record[i].isvalid = true;
218 else
219 fctx->record[i].isvalid = false;
220
221 UnlockBufHdr(bufHdr, buf_state);
222 }
223 }
224
225 funcctx = SRF_PERCALL_SETUP();
226
227 /* Get the saved state */
228 fctx = funcctx->user_fctx;
229
230 if (funcctx->call_cntr < funcctx->max_calls)
231 {
232 uint32 i = funcctx->call_cntr;
234 bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
235
236 values[0] = Int32GetDatum(fctx->record[i].bufferid);
237 nulls[0] = false;
238
239 /*
240 * Set all fields except the bufferid to null if the buffer is unused
241 * or not valid.
242 */
243 if (fctx->record[i].blocknum == InvalidBlockNumber ||
244 fctx->record[i].isvalid == false)
245 {
246 nulls[1] = true;
247 nulls[2] = true;
248 nulls[3] = true;
249 nulls[4] = true;
250 nulls[5] = true;
251 nulls[6] = true;
252 nulls[7] = true;
253 /* unused for v1.0 callers, but the array is always long enough */
254 nulls[8] = true;
255 }
256 else
257 {
259 nulls[1] = false;
261 nulls[2] = false;
263 nulls[3] = false;
265 nulls[4] = false;
266 values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
267 nulls[5] = false;
268 values[6] = BoolGetDatum(fctx->record[i].isdirty);
269 nulls[6] = false;
271 nulls[7] = false;
272 /* unused for v1.0 callers, but the array is always long enough */
274 nulls[8] = false;
275 }
276
277 /* Build and return the tuple. */
278 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
279 result = HeapTupleGetDatum(tuple);
280
281 SRF_RETURN_NEXT(funcctx, result);
282 }
283 else
284 SRF_RETURN_DONE(funcctx);
285}
286
287/*
288 * Inquire about NUMA memory mappings for shared buffers.
289 *
290 * Returns NUMA node ID for each memory page used by the buffer. Buffers may
291 * be smaller or larger than OS memory pages. For each buffer we return one
292 * entry for each memory page used by the buffer (it fhe buffer is smaller,
293 * it only uses a part of one memory page).
294 *
295 * We expect both sizes (for buffers and memory pages) to be a power-of-2, so
296 * one is always a multiple of the other.
297 *
298 * In order to get reliable results we also need to touch memory pages, so
299 * that the inquiry about NUMA memory node doesn't return -2 (which indicates
300 * unmapped/unallocated pages).
301 */
302Datum
304{
305 FuncCallContext *funcctx;
306 MemoryContext oldcontext;
307 BufferCacheNumaContext *fctx; /* User function context. */
308 TupleDesc tupledesc;
309 TupleDesc expected_tupledesc;
310 HeapTuple tuple;
311 Datum result;
312
313 if (SRF_IS_FIRSTCALL())
314 {
315 int i,
316 idx;
317 Size os_page_size;
318 void **os_page_ptrs;
319 int *os_page_status;
320 uint64 os_page_count;
321 int pages_per_buffer;
322 int max_entries;
323 volatile uint64 touch pg_attribute_unused();
324 char *startptr,
325 *endptr;
326
327 if (pg_numa_init() == -1)
328 elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
329
330 /*
331 * The database block size and OS memory page size are unlikely to be
332 * the same. The block size is 1-32KB, the memory page size depends on
333 * platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
334 * there are also features like THP etc. Moreover, we don't quite know
335 * how the pages and buffers "align" in memory - the buffers may be
336 * shifted in some way, using more memory pages than necessary.
337 *
338 * So we need to be careful about mappping buffers to memory pages. We
339 * calculate the maximum number of pages a buffer might use, so that
340 * we allocate enough space for the entries. And then we count the
341 * actual number of entries as we scan the buffers.
342 *
343 * This information is needed before calling move_pages() for NUMA
344 * node id inquiry.
345 */
346 os_page_size = pg_get_shmem_pagesize();
347
348 /*
349 * The pages and block size is expected to be 2^k, so one divides the
350 * other (we don't know in which direction). This does not say
351 * anything about relative alignment of pages/buffers.
352 */
353 Assert((os_page_size % BLCKSZ == 0) || (BLCKSZ % os_page_size == 0));
354
355 /*
356 * How many addresses we are going to query? Simply get the page for
357 * the first buffer, and first page after the last buffer, and count
358 * the pages from that.
359 */
360 startptr = (char *) TYPEALIGN_DOWN(os_page_size,
361 BufferGetBlock(1));
362 endptr = (char *) TYPEALIGN(os_page_size,
363 (char *) BufferGetBlock(NBuffers) + BLCKSZ);
364 os_page_count = (endptr - startptr) / os_page_size;
365
366 /* Used to determine the NUMA node for all OS pages at once */
367 os_page_ptrs = palloc0(sizeof(void *) * os_page_count);
368 os_page_status = palloc(sizeof(uint64) * os_page_count);
369
370 /* Fill pointers for all the memory pages. */
371 idx = 0;
372 for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
373 {
374 os_page_ptrs[idx++] = ptr;
375
376 /* Only need to touch memory once per backend process lifetime */
377 if (firstNumaTouch)
379 }
380
381 Assert(idx == os_page_count);
382
383 elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
384 "os_page_size=%zu", NBuffers, os_page_count, os_page_size);
385
386 /*
387 * If we ever get 0xff back from kernel inquiry, then we probably have
388 * bug in our buffers to OS page mapping code here.
389 */
390 memset(os_page_status, 0xff, sizeof(int) * os_page_count);
391
392 /* Query NUMA status for all the pointers */
393 if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
394 elog(ERROR, "failed NUMA pages inquiry: %m");
395
396 /* Initialize the multi-call context, load entries about buffers */
397
398 funcctx = SRF_FIRSTCALL_INIT();
399
400 /* Switch context when allocating stuff to be used in later calls */
401 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
402
403 /* Create a user function context for cross-call persistence */
405
406 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
407 elog(ERROR, "return type must be a row type");
408
409 if (expected_tupledesc->natts != NUM_BUFFERCACHE_NUMA_ELEM)
410 elog(ERROR, "incorrect number of output arguments");
411
412 /* Construct a tuple descriptor for the result rows. */
413 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
414 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
415 INT4OID, -1, 0);
416 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
417 INT8OID, -1, 0);
418 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
419 INT4OID, -1, 0);
420
421 fctx->tupdesc = BlessTupleDesc(tupledesc);
422
423 /*
424 * Each buffer needs at least one entry, but it might be offset in
425 * some way, and use one extra entry. So we allocate space for the
426 * maximum number of entries we might need, and then count the exact
427 * number as we're walking buffers. That way we can do it in one pass,
428 * without reallocating memory.
429 */
430 pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
431 max_entries = NBuffers * pages_per_buffer;
432
433 /* Allocate entries for BufferCachePagesRec records. */
434 fctx->record = (BufferCacheNumaRec *)
436 sizeof(BufferCacheNumaRec) * max_entries);
437
438 /* Return to original context when allocating transient memory */
439 MemoryContextSwitchTo(oldcontext);
440
441 if (firstNumaTouch)
442 elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
443
444 /*
445 * Scan through all the buffers, saving the relevant fields in the
446 * fctx->record structure.
447 *
448 * We don't hold the partition locks, so we don't get a consistent
449 * snapshot across all buffers, but we do grab the buffer header
450 * locks, so the information of each buffer is self-consistent.
451 *
452 * This loop touches and stores addresses into os_page_ptrs[] as input
453 * to one big big move_pages(2) inquiry system call. Basically we ask
454 * for all memory pages for NBuffers.
455 */
456 startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
457 idx = 0;
458 for (i = 0; i < NBuffers; i++)
459 {
460 char *buffptr = (char *) BufferGetBlock(i + 1);
461 BufferDesc *bufHdr;
462 uint32 buf_state;
463 uint32 bufferid;
464 int32 page_num;
465 char *startptr_buff,
466 *endptr_buff;
467
469
470 bufHdr = GetBufferDescriptor(i);
471
472 /* Lock each buffer header before inspecting. */
473 buf_state = LockBufHdr(bufHdr);
474 bufferid = BufferDescriptorGetBuffer(bufHdr);
475 UnlockBufHdr(bufHdr, buf_state);
476
477 /* start of the first page of this buffer */
478 startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
479
480 /* end of the buffer (no need to align to memory page) */
481 endptr_buff = buffptr + BLCKSZ;
482
483 Assert(startptr_buff < endptr_buff);
484
485 /* calculate ID of the first page for this buffer */
486 page_num = (startptr_buff - startptr) / os_page_size;
487
488 /* Add an entry for each OS page overlapping with this buffer. */
489 for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
490 {
491 fctx->record[idx].bufferid = bufferid;
492 fctx->record[idx].page_num = page_num;
493 fctx->record[idx].numa_node = os_page_status[page_num];
494
495 /* advance to the next entry/page */
496 ++idx;
497 ++page_num;
498 }
499 }
500
501 Assert((idx >= os_page_count) && (idx <= max_entries));
502
503 /* Set max calls and remember the user function context. */
504 funcctx->max_calls = idx;
505 funcctx->user_fctx = fctx;
506
507 /* Remember this backend touched the pages */
508 firstNumaTouch = false;
509 }
510
511 funcctx = SRF_PERCALL_SETUP();
512
513 /* Get the saved state */
514 fctx = funcctx->user_fctx;
515
516 if (funcctx->call_cntr < funcctx->max_calls)
517 {
518 uint32 i = funcctx->call_cntr;
520 bool nulls[NUM_BUFFERCACHE_NUMA_ELEM];
521
522 values[0] = Int32GetDatum(fctx->record[i].bufferid);
523 nulls[0] = false;
524
525 values[1] = Int64GetDatum(fctx->record[i].page_num);
526 nulls[1] = false;
527
529 nulls[2] = false;
530
531 /* Build and return the tuple. */
532 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
533 result = HeapTupleGetDatum(tuple);
534
535 SRF_RETURN_NEXT(funcctx, result);
536 }
537 else
538 SRF_RETURN_DONE(funcctx);
539}
540
541Datum
543{
544 Datum result;
545 TupleDesc tupledesc;
546 HeapTuple tuple;
549
550 int32 buffers_used = 0;
551 int32 buffers_unused = 0;
552 int32 buffers_dirty = 0;
553 int32 buffers_pinned = 0;
554 int64 usagecount_total = 0;
555
556 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
557 elog(ERROR, "return type must be a row type");
558
559 for (int i = 0; i < NBuffers; i++)
560 {
561 BufferDesc *bufHdr;
562 uint32 buf_state;
563
564 /*
565 * This function summarizes the state of all headers. Locking the
566 * buffer headers wouldn't provide an improved result as the state of
567 * the buffer can still change after we release the lock and it'd
568 * noticeably increase the cost of the function.
569 */
570 bufHdr = GetBufferDescriptor(i);
571 buf_state = pg_atomic_read_u32(&bufHdr->state);
572
573 if (buf_state & BM_VALID)
574 {
575 buffers_used++;
576 usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
577
578 if (buf_state & BM_DIRTY)
579 buffers_dirty++;
580 }
581 else
582 buffers_unused++;
583
584 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
585 buffers_pinned++;
586 }
587
588 memset(nulls, 0, sizeof(nulls));
589 values[0] = Int32GetDatum(buffers_used);
590 values[1] = Int32GetDatum(buffers_unused);
591 values[2] = Int32GetDatum(buffers_dirty);
592 values[3] = Int32GetDatum(buffers_pinned);
593
594 if (buffers_used != 0)
595 values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
596 else
597 nulls[4] = true;
598
599 /* Build and return the tuple. */
600 tuple = heap_form_tuple(tupledesc, values, nulls);
601 result = HeapTupleGetDatum(tuple);
602
603 PG_RETURN_DATUM(result);
604}
605
606Datum
608{
609 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
610 int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
611 int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
612 int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
614 bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
615
616 InitMaterializedSRF(fcinfo, 0);
617
618 for (int i = 0; i < NBuffers; i++)
619 {
621 uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
622 int usage_count;
623
624 usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
625 usage_counts[usage_count]++;
626
627 if (buf_state & BM_DIRTY)
628 dirty[usage_count]++;
629
630 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
631 pinned[usage_count]++;
632 }
633
634 for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
635 {
636 values[0] = Int32GetDatum(i);
637 values[1] = Int32GetDatum(usage_counts[i]);
638 values[2] = Int32GetDatum(dirty[i]);
639 values[3] = Int32GetDatum(pinned[i]);
640
641 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
642 }
643
644 return (Datum) 0;
645}
646
647/*
648 * Helper function to check if the user has superuser privileges.
649 */
650static void
652{
653 if (!superuser())
655 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
656 errmsg("must be superuser to use %s()",
657 func_name)));
658}
659
660/*
661 * Try to evict a shared buffer.
662 */
663Datum
665{
666 Datum result;
667 TupleDesc tupledesc;
668 HeapTuple tuple;
670 bool nulls[NUM_BUFFERCACHE_EVICT_ELEM] = {0};
671
673 bool buffer_flushed;
674
675 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
676 elog(ERROR, "return type must be a row type");
677
678 pg_buffercache_superuser_check("pg_buffercache_evict");
679
680 if (buf < 1 || buf > NBuffers)
681 elog(ERROR, "bad buffer ID: %d", buf);
682
683 values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed));
684 values[1] = BoolGetDatum(buffer_flushed);
685
686 tuple = heap_form_tuple(tupledesc, values, nulls);
687 result = HeapTupleGetDatum(tuple);
688
689 PG_RETURN_DATUM(result);
690}
691
692/*
693 * Try to evict specified relation.
694 */
695Datum
697{
698 Datum result;
699 TupleDesc tupledesc;
700 HeapTuple tuple;
702 bool nulls[NUM_BUFFERCACHE_EVICT_RELATION_ELEM] = {0};
703
704 Oid relOid;
705 Relation rel;
706
707 int32 buffers_evicted = 0;
708 int32 buffers_flushed = 0;
709 int32 buffers_skipped = 0;
710
711 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
712 elog(ERROR, "return type must be a row type");
713
714 pg_buffercache_superuser_check("pg_buffercache_evict_relation");
715
716 relOid = PG_GETARG_OID(0);
717
718 rel = relation_open(relOid, AccessShareLock);
719
722 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
723 errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
724 "pg_buffercache_evict_relation")));
725
726 EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
727 &buffers_skipped);
728
730
731 values[0] = Int32GetDatum(buffers_evicted);
732 values[1] = Int32GetDatum(buffers_flushed);
733 values[2] = Int32GetDatum(buffers_skipped);
734
735 tuple = heap_form_tuple(tupledesc, values, nulls);
736 result = HeapTupleGetDatum(tuple);
737
738 PG_RETURN_DATUM(result);
739}
740
741
742/*
743 * Try to evict all shared buffers.
744 */
745Datum
747{
748 Datum result;
749 TupleDesc tupledesc;
750 HeapTuple tuple;
752 bool nulls[NUM_BUFFERCACHE_EVICT_ALL_ELEM] = {0};
753
754 int32 buffers_evicted = 0;
755 int32 buffers_flushed = 0;
756 int32 buffers_skipped = 0;
757
758 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
759 elog(ERROR, "return type must be a row type");
760
761 pg_buffercache_superuser_check("pg_buffercache_evict_all");
762
763 EvictAllUnpinnedBuffers(&buffers_evicted, &buffers_flushed,
764 &buffers_skipped);
765
766 values[0] = Int32GetDatum(buffers_evicted);
767 values[1] = Int32GetDatum(buffers_flushed);
768 values[2] = Int32GetDatum(buffers_skipped);
769
770 tuple = heap_form_tuple(tupledesc, values, nulls);
771 result = HeapTupleGetDatum(tuple);
772
773 PG_RETURN_DATUM(result);
774}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:239
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:151
int Buffer
Definition: buf.h:23
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:86
#define BM_TAG_VALID
Definition: buf_internals.h:71
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_DIRTY
Definition: buf_internals.h:69
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:60
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
#define BM_VALID
Definition: buf_internals.h:70
static BufferDesc * GetBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
void EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6610
void EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6658
bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed)
Definition: bufmgr.c:6581
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6189
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:381
#define pg_attribute_unused()
Definition: c.h:133
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:775
#define Max(x, y)
Definition: c.h:969
int64_t int64
Definition: c.h:499
#define UINT64_FORMAT
Definition: c.h:521
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
uint16_t uint16
Definition: c.h:501
uint32_t uint32
Definition: c.h:502
size_t Size
Definition: c.h:576
#define TYPEALIGN_DOWN(ALIGNVAL, LEN)
Definition: c.h:787
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2260
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1816
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
int NBuffers
Definition: globals.c:143
Assert(PointerIsAligned(start, uint64))
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
int i
Definition: isn.c:77
#define AccessShareLock
Definition: lockdefs.h:36
void * palloc0(Size size)
Definition: mcxt.c:1970
void * palloc(Size size)
Definition: mcxt.c:1940
MemoryContext CurrentMemoryContext
Definition: mcxt.c:159
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:2266
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
PG_MODULE_MAGIC_EXT(.name="pg_buffercache",.version=PG_VERSION)
Datum pg_buffercache_evict_relation(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM
Datum pg_buffercache_evict(PG_FUNCTION_ARGS)
Datum pg_buffercache_summary(PG_FUNCTION_ARGS)
static void pg_buffercache_superuser_check(char *func_name)
PG_FUNCTION_INFO_V1(pg_buffercache_pages)
Datum pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_SUMMARY_ELEM
Datum pg_buffercache_pages(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_EVICT_ELEM
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM
Datum pg_buffercache_evict_all(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_PAGES_ELEM
Datum pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
static bool firstNumaTouch
#define NUM_BUFFERCACHE_NUMA_ELEM
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM
#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr)
Definition: pg_numa.h:32
PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
Definition: pg_numa.c:69
PGDLLIMPORT int pg_numa_init(void)
Definition: pg_numa.c:62
static char * buf
Definition: pg_test_fsync.c:72
uintptr_t Datum
Definition: postgres.h:69
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:177
static Datum BoolGetDatum(bool X)
Definition: postgres.h:107
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:217
unsigned int Oid
Definition: postgres_ext.h:30
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:648
Oid RelFileNumber
Definition: relpath.h:25
ForkNumber
Definition: relpath.h:56
Size pg_get_shmem_pagesize(void)
Definition: shmem.c:742
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
BufferCacheNumaRec * record
BufferCachePagesRec * record
BufferTag tag
pg_atomic_uint32 state
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
BlockNumber blockNum
Oid spcOid
bool superuser(void)
Definition: superuser.c:46
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:175
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:835
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
const char * name