PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pgstatapprox.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pgstatapprox.c
4 * Bloat estimation functions
5 *
6 * Copyright (c) 2014-2026, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/pgstattuple/pgstatapprox.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "postgres.h"
14
15#include "access/heapam.h"
16#include "access/htup_details.h"
17#include "access/relation.h"
19#include "catalog/pg_am_d.h"
20#include "commands/vacuum.h"
21#include "funcapi.h"
22#include "miscadmin.h"
23#include "storage/bufmgr.h"
24#include "storage/freespace.h"
25#include "storage/procarray.h"
26#include "storage/read_stream.h"
27
30
32
46
47#define NUM_OUTPUT_COLUMNS 10
48
49/*
50 * Struct for statapprox_heap read stream callback.
51 */
61
62/*
63 * Read stream callback for statapprox_heap.
64 *
65 * This callback checks the visibility map for each block. If the block is
66 * all-visible, we can get the free space from the FSM without reading the
67 * actual page, and skip to the next block. Only the blocks that are not
68 * all-visible are returned for actual reading after being locked.
69 */
70static BlockNumber
72 void *callback_private_data,
73 void *per_buffer_data)
74{
76 (StatApproxReadStreamPrivate *) callback_private_data;
77
78 while (p->current_blocknum < p->nblocks)
79 {
80 BlockNumber blkno = p->current_blocknum++;
81 Size freespace;
82
84
85 /*
86 * If the page has only visible tuples, then we can find out the free
87 * space from the FSM and move on without reading the page.
88 */
89 if (VM_ALL_VISIBLE(p->rel, blkno, &p->vmbuffer))
90 {
91 freespace = GetRecordedFreeSpace(p->rel, blkno);
92 p->stat->tuple_len += BLCKSZ - freespace;
93 p->stat->free_space += freespace;
94 continue;
95 }
96
97 /* This block needs to be read */
98 p->scanned++;
99 return blkno;
100 }
101
102 return InvalidBlockNumber;
103}
104
105/*
106 * This function takes an already open relation and scans its pages,
107 * skipping those that have the corresponding visibility map bit set.
108 * For pages we skip, we find the free space from the free space map
109 * and approximate tuple_len on that basis. For the others, we count
110 * the exact number of dead tuples etc.
111 *
112 * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
113 * we do not try to avoid skipping single pages.
114 */
115static void
117{
118 BlockNumber nblocks;
119 BufferAccessStrategy bstrategy;
120 TransactionId OldestXmin;
122 ReadStream *stream;
123
124 OldestXmin = GetOldestNonRemovableTransactionId(rel);
125 bstrategy = GetAccessStrategy(BAS_BULKREAD);
126
127 nblocks = RelationGetNumberOfBlocks(rel);
128
129 /* Initialize read stream private data */
130 p.rel = rel;
131 p.stat = stat;
132 p.current_blocknum = 0;
133 p.nblocks = nblocks;
134 p.scanned = 0;
136
137 /*
138 * Create the read stream. We don't use READ_STREAM_USE_BATCHING because
139 * the callback accesses the visibility map which may need to read VM
140 * pages. While this shouldn't cause deadlocks, we err on the side of
141 * caution.
142 */
144 bstrategy,
145 rel,
148 &p,
149 0);
150
151 for (;;)
152 {
153 Buffer buf;
154 Page page;
155 OffsetNumber offnum,
156 maxoff;
157 BlockNumber blkno;
158
160 if (buf == InvalidBuffer)
161 break;
162
164
165 page = BufferGetPage(buf);
166 blkno = BufferGetBlockNumber(buf);
167
168 stat->free_space += PageGetExactFreeSpace(page);
169
170 if (PageIsNew(page) || PageIsEmpty(page))
171 {
173 continue;
174 }
175
176 /*
177 * Look at each tuple on the page and decide whether it's live or
178 * dead, then count it and its size. Unlike lazy_scan_heap, we can
179 * afford to ignore problems and special cases.
180 */
181 maxoff = PageGetMaxOffsetNumber(page);
182
183 for (offnum = FirstOffsetNumber;
184 offnum <= maxoff;
185 offnum = OffsetNumberNext(offnum))
186 {
187 ItemId itemid;
188 HeapTupleData tuple;
189
190 itemid = PageGetItemId(page, offnum);
191
192 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
193 ItemIdIsDead(itemid))
194 {
195 continue;
196 }
197
198 Assert(ItemIdIsNormal(itemid));
199
200 ItemPointerSet(&(tuple.t_self), blkno, offnum);
201
202 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
203 tuple.t_len = ItemIdGetLength(itemid);
204 tuple.t_tableOid = RelationGetRelid(rel);
205
206 /*
207 * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
208 * as "dead" while DELETE_IN_PROGRESS tuples are "live". We don't
209 * bother distinguishing tuples inserted/deleted by our own
210 * transaction.
211 */
212 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
213 {
214 case HEAPTUPLE_LIVE:
216 stat->tuple_len += tuple.t_len;
217 stat->tuple_count++;
218 break;
219 case HEAPTUPLE_DEAD:
222 stat->dead_tuple_len += tuple.t_len;
223 stat->dead_tuple_count++;
224 break;
225 default:
226 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
227 break;
228 }
229 }
230
232 }
233
234 Assert(p.current_blocknum == nblocks);
235 read_stream_end(stream);
236
237 stat->table_len = (uint64) nblocks * BLCKSZ;
238
239 /*
240 * We don't know how many tuples are in the pages we didn't scan, so
241 * extrapolate the live-tuple count to the whole table in the same way
242 * that VACUUM does. (Like VACUUM, we're not taking a random sample, so
243 * just extrapolating linearly seems unsafe.) There should be no dead
244 * tuples in all-visible pages, so no correction is needed for that, and
245 * we already accounted for the space in those pages, too.
246 */
247 stat->tuple_count = vac_estimate_reltuples(rel, nblocks, p.scanned,
248 stat->tuple_count);
249
250 /* It's not clear if we could get -1 here, but be safe. */
251 stat->tuple_count = Max(stat->tuple_count, 0);
252
253 /*
254 * Calculate percentages if the relation has one or more pages.
255 */
256 if (nblocks != 0)
257 {
258 stat->scanned_percent = 100.0 * p.scanned / nblocks;
259 stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
260 stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
261 stat->free_percent = 100.0 * stat->free_space / stat->table_len;
262 }
263
264 if (BufferIsValid(p.vmbuffer))
265 {
268 }
269}
270
271/*
272 * Returns estimated live/dead tuple statistics for the given relid.
273 *
274 * The superuser() check here must be kept as the library might be upgraded
275 * without the extension being upgraded, meaning that in pre-1.5 installations
276 * these functions could be called by any user.
277 */
278Datum
280{
281 Oid relid = PG_GETARG_OID(0);
282
283 if (!superuser())
286 errmsg("must be superuser to use pgstattuple functions")));
287
289}
290
291/*
292 * As of pgstattuple version 1.5, we no longer need to check if the user
293 * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
294 * Users can then grant access to it based on their policies.
295 *
296 * Otherwise identical to pgstattuple_approx (above).
297 */
298Datum
305
306Datum
308{
309 Relation rel;
310 output_type stat = {0};
311 TupleDesc tupdesc;
312 bool nulls[NUM_OUTPUT_COLUMNS];
314 HeapTuple ret;
315 int i = 0;
316
317 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
318 elog(ERROR, "return type must be a row type");
319
320 if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
321 elog(ERROR, "incorrect number of output arguments");
322
323 rel = relation_open(relid, AccessShareLock);
324
325 /*
326 * Reject attempts to read non-local temporary relations; we would be
327 * likely to get wrong data since we have no visibility into the owning
328 * session's local buffers.
329 */
330 if (RELATION_IS_OTHER_TEMP(rel))
333 errmsg("cannot access temporary tables of other sessions")));
334
335 /*
336 * We support only relation kinds with a visibility map and a free space
337 * map.
338 */
339 if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
340 rel->rd_rel->relkind == RELKIND_MATVIEW ||
341 rel->rd_rel->relkind == RELKIND_TOASTVALUE))
344 errmsg("relation \"%s\" is of wrong relation kind",
347
348 if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
350 errmsg("only heap AM is supported")));
351
352 statapprox_heap(rel, &stat);
353
355
356 memset(nulls, 0, sizeof(nulls));
357
358 values[i++] = Int64GetDatum(stat.table_len);
359 values[i++] = Float8GetDatum(stat.scanned_percent);
360 values[i++] = Int64GetDatum(stat.tuple_count);
361 values[i++] = Int64GetDatum(stat.tuple_len);
362 values[i++] = Float8GetDatum(stat.tuple_percent);
363 values[i++] = Int64GetDatum(stat.dead_tuple_count);
364 values[i++] = Int64GetDatum(stat.dead_tuple_len);
365 values[i++] = Float8GetDatum(stat.dead_tuple_percent);
366 values[i++] = Int64GetDatum(stat.free_space);
367 values[i++] = Float8GetDatum(stat.free_percent);
368
369 ret = heap_form_tuple(tupdesc, values, nulls);
370 return HeapTupleGetDatum(ret);
371}
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static Datum values[MAXATTR]
Definition bootstrap.c:188
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5505
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5522
@ BAS_BULKREAD
Definition bufmgr.h:37
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:332
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:421
Size PageGetExactFreeSpace(const PageData *page)
Definition bufpage.c:957
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:249
static bool PageIsNew(const PageData *page)
Definition bufpage.h:259
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:379
PageData * Page
Definition bufpage.h:81
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:397
#define Max(x, y)
Definition c.h:1087
#define Assert(condition)
Definition c.h:945
uint64_t uint64
Definition c.h:619
uint32 TransactionId
Definition c.h:738
size_t Size
Definition c.h:691
int errcode(int sqlerrcode)
Definition elog.c:874
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition freespace.c:244
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:140
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:141
@ HEAPTUPLE_LIVE
Definition heapam.h:139
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:142
@ HEAPTUPLE_DEAD
Definition heapam.h:138
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1037
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
#define AccessShareLock
Definition lockdefs.h:36
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
static char * errmsg
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
int errdetail_relkind_not_supported(char relkind)
Definition pg_class.c:24
static char buf[DEFAULT_XLOG_SEG_SIZE]
static void statapprox_heap(Relation rel, output_type *stat)
Datum pgstattuple_approx(PG_FUNCTION_ARGS)
#define NUM_OUTPUT_COLUMNS
Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
Datum pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
static BlockNumber statapprox_heap_read_stream_next(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static Datum Int64GetDatum(int64 X)
Definition postgres.h:413
uint64_t Datum
Definition postgres.h:70
static Datum Float8GetDatum(float8 X)
Definition postgres.h:502
unsigned int Oid
static int fb(int x)
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition procarray.c:1952
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
#define READ_STREAM_FULL
Definition read_stream.h:43
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RELATION_IS_OTHER_TEMP(relation)
Definition rel.h:667
@ MAIN_FORKNUM
Definition relpath.h:58
void relation_close(Relation relation, LOCKMODE lockmode)
Definition relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition relation.c:47
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
Form_pg_class rd_rel
Definition rel.h:111
uint64 dead_tuple_count
double free_percent
double tuple_percent
double dead_tuple_percent
uint64 dead_tuple_len
uint64 tuple_count
uint64 free_space
uint64 table_len
uint64 tuple_len
double scanned_percent
bool superuser(void)
Definition superuser.c:47
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition vacuum.c:1330
#define VM_ALL_VISIBLE(r, b, v)
#define stat
Definition win32_port.h:74