PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
btreefuncs.c
Go to the documentation of this file.
1 /*
2  * contrib/pageinspect/btreefuncs.c
3  *
4  *
5  * btreefuncs.c
6  *
7  * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
8  *
9  * Permission to use, copy, modify, and distribute this software and
10  * its documentation for any purpose, without fee, and without a
11  * written agreement is hereby granted, provided that the above
12  * copyright notice and this paragraph and the following two
13  * paragraphs appear in all copies.
14  *
15  * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16  * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18  * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19  * OF THE POSSIBILITY OF SUCH DAMAGE.
20  *
21  * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24  * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26  */
27 
28 #include "postgres.h"
29 
30 #include "pageinspect.h"
31 
32 #include "access/nbtree.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_am.h"
35 #include "funcapi.h"
36 #include "miscadmin.h"
37 #include "utils/builtins.h"
38 #include "utils/rel.h"
39 #include "utils/varlena.h"
40 
41 
45 
46 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
47 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
48 
49 /* note: BlockNumber is unsigned, hence can't be negative */
50 #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
51  if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
52  elog(ERROR, "block number out of range"); }
53 
54 /* ------------------------------------------------
55  * structure for single btree page statistics
56  * ------------------------------------------------
57  */
58 typedef struct BTPageStat
59 {
67  char type;
68 
69  /* opaque data */
72  union
73  {
76  } btpo;
79 } BTPageStat;
80 
81 
82 /* -------------------------------------------------
83  * GetBTPageStatistics()
84  *
85  * Collect statistics of single b-tree page
86  * -------------------------------------------------
87  */
88 static void
90 {
91  Page page = BufferGetPage(buffer);
92  PageHeader phdr = (PageHeader) page;
93  OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
95  int item_size = 0;
96  int off;
97 
98  stat->blkno = blkno;
99 
100  stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
101 
102  stat->dead_items = stat->live_items = 0;
103 
104  stat->page_size = PageGetPageSize(page);
105 
106  /* page type (flags) */
107  if (P_ISDELETED(opaque))
108  {
109  stat->type = 'd';
110  stat->btpo.xact = opaque->btpo.xact;
111  return;
112  }
113  else if (P_IGNORE(opaque))
114  stat->type = 'e';
115  else if (P_ISLEAF(opaque))
116  stat->type = 'l';
117  else if (P_ISROOT(opaque))
118  stat->type = 'r';
119  else
120  stat->type = 'i';
121 
122  /* btpage opaque data */
123  stat->btpo_prev = opaque->btpo_prev;
124  stat->btpo_next = opaque->btpo_next;
125  stat->btpo.level = opaque->btpo.level;
126  stat->btpo_flags = opaque->btpo_flags;
127  stat->btpo_cycleid = opaque->btpo_cycleid;
128 
129  /* count live and dead tuples, and free space */
130  for (off = FirstOffsetNumber; off <= maxoff; off++)
131  {
132  IndexTuple itup;
133 
134  ItemId id = PageGetItemId(page, off);
135 
136  itup = (IndexTuple) PageGetItem(page, id);
137 
138  item_size += IndexTupleSize(itup);
139 
140  if (!ItemIdIsDead(id))
141  stat->live_items++;
142  else
143  stat->dead_items++;
144  }
145  stat->free_size = PageGetFreeSpace(page);
146 
147  if ((stat->live_items + stat->dead_items) > 0)
148  stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
149  else
150  stat->avg_item_size = 0;
151 }
152 
153 /* -----------------------------------------------
154  * bt_page_stats()
155  *
156  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
157  * -----------------------------------------------
158  */
159 Datum
161 {
162  text *relname = PG_GETARG_TEXT_PP(0);
163  uint32 blkno = PG_GETARG_UINT32(1);
164  Buffer buffer;
165  Relation rel;
166  RangeVar *relrv;
167  Datum result;
168  HeapTuple tuple;
170  int j;
171  char *values[11];
172  BTPageStat stat;
173 
174  if (!superuser())
175  ereport(ERROR,
176  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
177  (errmsg("must be superuser to use pageinspect functions"))));
178 
180  rel = relation_openrv(relrv, AccessShareLock);
181 
182  if (!IS_INDEX(rel) || !IS_BTREE(rel))
183  elog(ERROR, "relation \"%s\" is not a btree index",
185 
186  /*
187  * Reject attempts to read non-local temporary relations; we would be
188  * likely to get wrong data since we have no visibility into the owning
189  * session's local buffers.
190  */
191  if (RELATION_IS_OTHER_TEMP(rel))
192  ereport(ERROR,
193  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
194  errmsg("cannot access temporary tables of other sessions")));
195 
196  if (blkno == 0)
197  elog(ERROR, "block 0 is a meta page");
198 
199  CHECK_RELATION_BLOCK_RANGE(rel, blkno);
200 
201  buffer = ReadBuffer(rel, blkno);
202  LockBuffer(buffer, BUFFER_LOCK_SHARE);
203 
204  /* keep compiler quiet */
205  stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
206  stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
207 
208  GetBTPageStatistics(blkno, buffer, &stat);
209 
210  UnlockReleaseBuffer(buffer);
212 
213  /* Build a tuple descriptor for our result type */
214  if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
215  elog(ERROR, "return type must be a row type");
216 
217  j = 0;
218  values[j++] = psprintf("%d", stat.blkno);
219  values[j++] = psprintf("%c", stat.type);
220  values[j++] = psprintf("%d", stat.live_items);
221  values[j++] = psprintf("%d", stat.dead_items);
222  values[j++] = psprintf("%d", stat.avg_item_size);
223  values[j++] = psprintf("%d", stat.page_size);
224  values[j++] = psprintf("%d", stat.free_size);
225  values[j++] = psprintf("%d", stat.btpo_prev);
226  values[j++] = psprintf("%d", stat.btpo_next);
227  values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level);
228  values[j++] = psprintf("%d", stat.btpo_flags);
229 
231  values);
232 
233  result = HeapTupleGetDatum(tuple);
234 
235  PG_RETURN_DATUM(result);
236 }
237 
238 /*-------------------------------------------------------
239  * bt_page_items()
240  *
241  * Get IndexTupleData set in a btree page
242  *
243  * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
244  *-------------------------------------------------------
245  */
246 
247 /*
248  * cross-call data structure for SRF
249  */
250 struct user_args
251 {
254 };
255 
256 Datum
258 {
259  text *relname = PG_GETARG_TEXT_PP(0);
260  uint32 blkno = PG_GETARG_UINT32(1);
261  Datum result;
262  char *values[6];
263  HeapTuple tuple;
264  FuncCallContext *fctx;
265  MemoryContext mctx;
266  struct user_args *uargs;
267 
268  if (!superuser())
269  ereport(ERROR,
270  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
271  (errmsg("must be superuser to use pageinspect functions"))));
272 
273  if (SRF_IS_FIRSTCALL())
274  {
275  RangeVar *relrv;
276  Relation rel;
277  Buffer buffer;
278  BTPageOpaque opaque;
280 
281  fctx = SRF_FIRSTCALL_INIT();
282 
284  rel = relation_openrv(relrv, AccessShareLock);
285 
286  if (!IS_INDEX(rel) || !IS_BTREE(rel))
287  elog(ERROR, "relation \"%s\" is not a btree index",
289 
290  /*
291  * Reject attempts to read non-local temporary relations; we would be
292  * likely to get wrong data since we have no visibility into the
293  * owning session's local buffers.
294  */
295  if (RELATION_IS_OTHER_TEMP(rel))
296  ereport(ERROR,
297  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
298  errmsg("cannot access temporary tables of other sessions")));
299 
300  if (blkno == 0)
301  elog(ERROR, "block 0 is a meta page");
302 
303  CHECK_RELATION_BLOCK_RANGE(rel, blkno);
304 
305  buffer = ReadBuffer(rel, blkno);
306  LockBuffer(buffer, BUFFER_LOCK_SHARE);
307 
308  /*
309  * We copy the page into local storage to avoid holding pin on the
310  * buffer longer than we must, and possibly failing to release it at
311  * all if the calling query doesn't fetch all rows.
312  */
314 
315  uargs = palloc(sizeof(struct user_args));
316 
317  uargs->page = palloc(BLCKSZ);
318  memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
319 
320  UnlockReleaseBuffer(buffer);
322 
323  uargs->offset = FirstOffsetNumber;
324 
325  opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
326 
327  if (P_ISDELETED(opaque))
328  elog(NOTICE, "page is deleted");
329 
330  fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
331 
332  /* Build a tuple descriptor for our result type */
333  if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
334  elog(ERROR, "return type must be a row type");
335 
336  fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
337 
338  fctx->user_fctx = uargs;
339 
340  MemoryContextSwitchTo(mctx);
341  }
342 
343  fctx = SRF_PERCALL_SETUP();
344  uargs = fctx->user_fctx;
345 
346  if (fctx->call_cntr < fctx->max_calls)
347  {
348  ItemId id;
349  IndexTuple itup;
350  int j;
351  int off;
352  int dlen;
353  char *dump;
354  char *ptr;
355 
356  id = PageGetItemId(uargs->page, uargs->offset);
357 
358  if (!ItemIdIsValid(id))
359  elog(ERROR, "invalid ItemId");
360 
361  itup = (IndexTuple) PageGetItem(uargs->page, id);
362 
363  j = 0;
364  values[j++] = psprintf("%d", uargs->offset);
365  values[j++] = psprintf("(%u,%u)",
367  itup->t_tid.ip_posid);
368  values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
369  values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
370  values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
371 
372  ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
373  dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
374  dump = palloc0(dlen * 3 + 1);
375  values[j] = dump;
376  for (off = 0; off < dlen; off++)
377  {
378  if (off > 0)
379  *dump++ = ' ';
380  sprintf(dump, "%02x", *(ptr + off) & 0xff);
381  dump += 2;
382  }
383 
384  tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
385  result = HeapTupleGetDatum(tuple);
386 
387  uargs->offset = uargs->offset + 1;
388 
389  SRF_RETURN_NEXT(fctx, result);
390  }
391  else
392  {
393  pfree(uargs->page);
394  pfree(uargs);
395  SRF_RETURN_DONE(fctx);
396  }
397 }
398 
399 
400 /* ------------------------------------------------
401  * bt_metap()
402  *
403  * Get a btree's meta-page information
404  *
405  * Usage: SELECT * FROM bt_metap('t1_pkey')
406  * ------------------------------------------------
407  */
408 Datum
410 {
411  text *relname = PG_GETARG_TEXT_PP(0);
412  Datum result;
413  Relation rel;
414  RangeVar *relrv;
415  BTMetaPageData *metad;
417  int j;
418  char *values[6];
419  Buffer buffer;
420  Page page;
421  HeapTuple tuple;
422 
423  if (!superuser())
424  ereport(ERROR,
425  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
426  (errmsg("must be superuser to use pageinspect functions"))));
427 
429  rel = relation_openrv(relrv, AccessShareLock);
430 
431  if (!IS_INDEX(rel) || !IS_BTREE(rel))
432  elog(ERROR, "relation \"%s\" is not a btree index",
434 
435  /*
436  * Reject attempts to read non-local temporary relations; we would be
437  * likely to get wrong data since we have no visibility into the owning
438  * session's local buffers.
439  */
440  if (RELATION_IS_OTHER_TEMP(rel))
441  ereport(ERROR,
442  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
443  errmsg("cannot access temporary tables of other sessions")));
444 
445  buffer = ReadBuffer(rel, 0);
446  LockBuffer(buffer, BUFFER_LOCK_SHARE);
447 
448  page = BufferGetPage(buffer);
449  metad = BTPageGetMeta(page);
450 
451  /* Build a tuple descriptor for our result type */
452  if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
453  elog(ERROR, "return type must be a row type");
454 
455  j = 0;
456  values[j++] = psprintf("%d", metad->btm_magic);
457  values[j++] = psprintf("%d", metad->btm_version);
458  values[j++] = psprintf("%d", metad->btm_root);
459  values[j++] = psprintf("%d", metad->btm_level);
460  values[j++] = psprintf("%d", metad->btm_fastroot);
461  values[j++] = psprintf("%d", metad->btm_fastlevel);
462 
464  values);
465 
466  result = HeapTupleGetDatum(tuple);
467 
468  UnlockReleaseBuffer(buffer);
470 
471  PG_RETURN_DATUM(result);
472 }
uint32 avg_item_size
Definition: btreefuncs.c:66
struct BTPageStat BTPageStat
uint64 call_cntr
Definition: funcapi.h:65
#define PG_GETARG_UINT32(n)
Definition: fmgr.h:235
union BTPageStat::@1 btpo
#define BlockIdGetBlockNumber(blockId)
Definition: block.h:115
Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
Definition: heapam.c:1195
#define IndexInfoFindDataOffset(t_info)
Definition: itup.h:80
BlockNumber btpo_next
Definition: nbtree.h:57
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:211
uint32 dead_items
Definition: btreefuncs.c:62
#define CHECK_RELATION_BLOCK_RANGE(rel, blkno)
Definition: btreefuncs.c:50
Datum bt_page_items(PG_FUNCTION_ARGS)
Definition: btreefuncs.c:257
#define P_IGNORE(opaque)
Definition: nbtree.h:181
Datum bt_page_stats(PG_FUNCTION_ARGS)
Definition: btreefuncs.c:160
uint32 TransactionId
Definition: c.h:397
uint32 btm_version
Definition: nbtree.h:99
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:285
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
uint32 btm_magic
Definition: nbtree.h:98
ItemPointerData t_tid
Definition: itup.h:37
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
uint32 blkno
Definition: btreefuncs.c:60
int errcode(int sqlerrcode)
Definition: elog.c:575
#define IS_BTREE(r)
Definition: btreefuncs.c:47
bool superuser(void)
Definition: superuser.c:47
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: heapam.c:1263
union BTPageOpaqueData::@40 btpo
return result
Definition: formatting.c:1618
uint32 BlockNumber
Definition: block.h:31
char type
Definition: btreefuncs.c:67
#define IndexTupleHasNulls(itup)
Definition: itup.h:72
RangeVar * makeRangeVarFromNameList(List *names)
Definition: namespace.c:2913
#define SizeOfPageHeaderData
Definition: bufpage.h:213
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:289
TransactionId xact
Definition: nbtree.h:61
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:582
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
Definition: execTuples.c:1115
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
uint16 OffsetNumber
Definition: off.h:24
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:291
BlockNumber btm_fastroot
Definition: nbtree.h:102
unsigned short uint16
Definition: c.h:267
void pfree(void *pointer)
Definition: mcxt.c:950
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
OffsetNumber offset
Definition: btreefuncs.c:253
uint16 BTCycleId
Definition: nbtree.h:26
BlockIdData ip_blkid
Definition: itemptr.h:38
BlockNumber btpo_next
Definition: btreefuncs.c:71
BTCycleId btpo_cycleid
Definition: nbtree.h:64
#define BTPageGetMeta(p)
Definition: nbtree.h:106
BlockNumber btpo_prev
Definition: nbtree.h:56
Datum bt_metap(PG_FUNCTION_ARGS)
Definition: btreefuncs.c:409
#define FirstOffsetNumber
Definition: off.h:27
AttInMetadata * attinmeta
Definition: funcapi.h:99
IndexTupleData * IndexTuple
Definition: itup.h:53
#define PageGetPageSize(page)
Definition: bufpage.h:265
#define RelationGetRelationName(relation)
Definition: rel.h:437
unsigned int uint32
Definition: c.h:268
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define P_ISDELETED(opaque)
Definition: nbtree.h:179
#define P_ISROOT(opaque)
Definition: nbtree.h:178
List * textToQualifiedNameList(text *textval)
Definition: varlena.c:3182
uint32 live_items
Definition: btreefuncs.c:61
TransactionId xact
Definition: btreefuncs.c:75
LocationIndex pd_special
Definition: bufpage.h:156
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
uint32 btm_fastlevel
Definition: nbtree.h:103
uint32 level
Definition: nbtree.h:60
void * palloc0(Size size)
Definition: mcxt.c:878
BlockNumber btpo_prev
Definition: btreefuncs.c:70
uintptr_t Datum
Definition: postgres.h:372
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:313
uint32 free_size
Definition: btreefuncs.c:65
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:1068
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
BlockNumber btm_root
Definition: nbtree.h:100
Page page
Definition: btreefuncs.c:252
#define ItemIdIsValid(itemId)
Definition: itemid.h:85
#define NOTICE
Definition: elog.h:37
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define NULL
Definition: c.h:229
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:534
uint32 max_avail
Definition: btreefuncs.c:64
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:207
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:109
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
#define InvalidBlockNumber
Definition: block.h:33
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:222
BTCycleId btpo_cycleid
Definition: btreefuncs.c:78
uint16 btpo_flags
Definition: btreefuncs.c:77
#define IndexTupleHasVarwidths(itup)
Definition: itup.h:73
static void GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
Definition: btreefuncs.c:89
static Datum values[MAXATTR]
Definition: bootstrap.c:162
PG_FUNCTION_INFO_V1(bt_metap)
void * user_fctx
Definition: funcapi.h:90
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
uint32 btm_level
Definition: nbtree.h:101
uint32 page_size
Definition: btreefuncs.c:63
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
Definition: c.h:439
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define elog
Definition: elog.h:219
unsigned short t_info
Definition: itup.h:49
uint32 level
Definition: btreefuncs.c:74
uint16 btpo_flags
Definition: nbtree.h:63
int Buffer
Definition: buf.h:23
OffsetNumber ip_posid
Definition: itemptr.h:39
uint64 max_calls
Definition: funcapi.h:74
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define IndexTupleSize(itup)
Definition: itup.h:70
#define IS_INDEX(r)
Definition: btreefuncs.c:46
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:309
#define P_ISLEAF(opaque)
Definition: nbtree.h:177
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:287