PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
hashovfl.c File Reference
#include "postgres.h"
#include "access/hash.h"
#include "access/hash_xlog.h"
#include "miscadmin.h"
#include "utils/rel.h"
Include dependency graph for hashovfl.c:

Go to the source code of this file.

Functions

static uint32 _hash_firstfreebit (uint32 map)
 
static BlockNumber bitno_to_blkno (HashMetaPage metap, uint32 ovflbitnum)
 
uint32 _hash_ovflblkno_to_bitno (HashMetaPage metap, BlockNumber ovflblkno)
 
Buffer _hash_addovflpage (Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 
BlockNumber _hash_freeovflpage (Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
 
void _hash_initbitmapbuffer (Buffer buf, uint16 bmsize, bool initpage)
 
void _hash_squeezebucket (Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
 

Function Documentation

Buffer _hash_addovflpage ( Relation  rel,
Buffer  metabuf,
Buffer  buf,
bool  retain_pin 
)

Definition at line 111 of file hashovfl.c.

References _hash_checkpage(), _hash_firstfreebit(), _hash_getbuf(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_initbitmapbuffer(), _hash_relbuf(), ALL_SET, Assert, bit(), bitno_to_blkno(), BITS_PER_MAP, BlockNumberIsValid, xl_hash_add_ovfl_page::bmpage_found, BMPG_MASK, BMPG_SHIFT, BMPGSZ_BIT, xl_hash_add_ovfl_page::bmsize, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, HASH_MAX_BITMAPS, HASH_WRITE, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, i, InvalidBlockNumber, InvalidBuffer, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), NULL, PageGetSpecialPointer, PageSetLSN, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, SETBIT, SizeOfHashAddOvflPage, START_CRIT_SECTION, XLOG_HASH_ADD_OVFL_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert(), and _hash_splitbucket().

112 {
113  Buffer ovflbuf;
114  Page page;
115  Page ovflpage;
116  HashPageOpaque pageopaque;
117  HashPageOpaque ovflopaque;
118  HashMetaPage metap;
119  Buffer mapbuf = InvalidBuffer;
120  Buffer newmapbuf = InvalidBuffer;
121  BlockNumber blkno;
122  uint32 orig_firstfree;
123  uint32 splitnum;
124  uint32 *freep = NULL;
125  uint32 max_ovflpg;
126  uint32 bit;
127  uint32 bitmap_page_bit;
128  uint32 first_page;
129  uint32 last_bit;
130  uint32 last_page;
131  uint32 i,
132  j;
133  bool page_found = false;
134 
135  /*
136  * Write-lock the tail page. Here, we need to maintain locking order such
137  * that, first acquire the lock on tail page of bucket, then on meta page
138  * to find and lock the bitmap page and if it is found, then lock on meta
139  * page is released, then finally acquire the lock on new overflow buffer.
140  * We need this locking order to avoid deadlock with backends that are
141  * doing inserts.
142  *
143  * Note: We could have avoided locking many buffers here if we made two
144  * WAL records for acquiring an overflow page (one to allocate an overflow
145  * page and another to add it to overflow bucket chain). However, doing
146  * so can leak an overflow page, if the system crashes after allocation.
147  * Needless to say, it is better to have a single record from a
148  * performance point of view as well.
149  */
151 
152  /* probably redundant... */
154 
155  /* loop to find current tail page, in case someone else inserted too */
156  for (;;)
157  {
158  BlockNumber nextblkno;
159 
160  page = BufferGetPage(buf);
161  pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
162  nextblkno = pageopaque->hasho_nextblkno;
163 
164  if (!BlockNumberIsValid(nextblkno))
165  break;
166 
167  /* we assume we do not need to write the unmodified page */
168  if (retain_pin)
169  {
170  /* pin will be retained only for the primary bucket page */
171  Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
173  }
174  else
175  _hash_relbuf(rel, buf);
176 
177  retain_pin = false;
178 
179  buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
180  }
181 
182  /* Get exclusive lock on the meta page */
184 
185  _hash_checkpage(rel, metabuf, LH_META_PAGE);
186  metap = HashPageGetMeta(BufferGetPage(metabuf));
187 
188  /* start search at hashm_firstfree */
189  orig_firstfree = metap->hashm_firstfree;
190  first_page = orig_firstfree >> BMPG_SHIFT(metap);
191  bit = orig_firstfree & BMPG_MASK(metap);
192  i = first_page;
193  j = bit / BITS_PER_MAP;
194  bit &= ~(BITS_PER_MAP - 1);
195 
196  /* outer loop iterates once per bitmap page */
197  for (;;)
198  {
199  BlockNumber mapblkno;
200  Page mappage;
201  uint32 last_inpage;
202 
203  /* want to end search with the last existing overflow page */
204  splitnum = metap->hashm_ovflpoint;
205  max_ovflpg = metap->hashm_spares[splitnum] - 1;
206  last_page = max_ovflpg >> BMPG_SHIFT(metap);
207  last_bit = max_ovflpg & BMPG_MASK(metap);
208 
209  if (i > last_page)
210  break;
211 
212  Assert(i < metap->hashm_nmaps);
213  mapblkno = metap->hashm_mapp[i];
214 
215  if (i == last_page)
216  last_inpage = last_bit;
217  else
218  last_inpage = BMPGSZ_BIT(metap) - 1;
219 
220  /* Release exclusive lock on metapage while reading bitmap page */
221  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
222 
223  mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
224  mappage = BufferGetPage(mapbuf);
225  freep = HashPageGetBitmap(mappage);
226 
227  for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
228  {
229  if (freep[j] != ALL_SET)
230  {
231  page_found = true;
232 
233  /* Reacquire exclusive lock on the meta page */
235 
236  /* convert bit to bit number within page */
237  bit += _hash_firstfreebit(freep[j]);
238  bitmap_page_bit = bit;
239 
240  /* convert bit to absolute bit number */
241  bit += (i << BMPG_SHIFT(metap));
242  /* Calculate address of the recycled overflow page */
243  blkno = bitno_to_blkno(metap, bit);
244 
245  /* Fetch and init the recycled page */
246  ovflbuf = _hash_getinitbuf(rel, blkno);
247 
248  goto found;
249  }
250  }
251 
252  /* No free space here, try to advance to next map page */
253  _hash_relbuf(rel, mapbuf);
254  mapbuf = InvalidBuffer;
255  i++;
256  j = 0; /* scan from start of next map page */
257  bit = 0;
258 
259  /* Reacquire exclusive lock on the meta page */
261  }
262 
263  /*
264  * No free pages --- have to extend the relation to add an overflow page.
265  * First, check to see if we have to add a new bitmap page too.
266  */
267  if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
268  {
269  /*
270  * We create the new bitmap page with all pages marked "in use".
271  * Actually two pages in the new bitmap's range will exist
272  * immediately: the bitmap page itself, and the following page which
273  * is the one we return to the caller. Both of these are correctly
274  * marked "in use". Subsequent pages do not exist yet, but it is
275  * convenient to pre-mark them as "in use" too.
276  */
277  bit = metap->hashm_spares[splitnum];
278 
279  /* metapage already has a write lock */
280  if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
281  ereport(ERROR,
282  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
283  errmsg("out of overflow pages in hash index \"%s\"",
284  RelationGetRelationName(rel))));
285 
286  newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
287  }
288  else
289  {
290  /*
291  * Nothing to do here; since the page will be past the last used page,
292  * we know its bitmap bit was preinitialized to "in use".
293  */
294  }
295 
296  /* Calculate address of the new overflow page */
297  bit = BufferIsValid(newmapbuf) ?
298  metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
299  blkno = bitno_to_blkno(metap, bit);
300 
301  /*
302  * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
303  * relation length stays in sync with ours. XXX It's annoying to do this
304  * with metapage write lock held; would be better to use a lock that
305  * doesn't block incoming searches.
306  *
307  * It is okay to hold two buffer locks here (one on tail page of bucket
308  * and other on new overflow page) since there cannot be anyone else
309  * contending for access to ovflbuf.
310  */
311  ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
312 
313 found:
314 
315  /*
316  * Do the update. No ereport(ERROR) until changes are logged. We want to
317  * log the changes for bitmap page and overflow page together to avoid
318  * loss of pages in case the new page is added.
319  */
321 
322  if (page_found)
323  {
324  Assert(BufferIsValid(mapbuf));
325 
326  /* mark page "in use" in the bitmap */
327  SETBIT(freep, bitmap_page_bit);
328  MarkBufferDirty(mapbuf);
329  }
330  else
331  {
332  /* update the count to indicate new overflow page is added */
333  metap->hashm_spares[splitnum]++;
334 
335  if (BufferIsValid(newmapbuf))
336  {
337  _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
338  MarkBufferDirty(newmapbuf);
339 
340  /* add the new bitmap page to the metapage's list of bitmaps */
341  metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
342  metap->hashm_nmaps++;
343  metap->hashm_spares[splitnum]++;
344  MarkBufferDirty(metabuf);
345  }
346 
347  /*
348  * for new overflow page, we don't need to explicitly set the bit in
349  * bitmap page, as by default that will be set to "in use".
350  */
351  }
352 
353  /*
354  * Adjust hashm_firstfree to avoid redundant searches. But don't risk
355  * changing it if someone moved it while we were searching bitmap pages.
356  */
357  if (metap->hashm_firstfree == orig_firstfree)
358  {
359  metap->hashm_firstfree = bit + 1;
360  MarkBufferDirty(metabuf);
361  }
362 
363  /* initialize new overflow page */
364  ovflpage = BufferGetPage(ovflbuf);
365  ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
367  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
368  ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
369  ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
370  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
371 
372  MarkBufferDirty(ovflbuf);
373 
374  /* logically chain overflow page to previous page */
375  pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
376 
378 
379  /* XLOG stuff */
380  if (RelationNeedsWAL(rel))
381  {
382  XLogRecPtr recptr;
383  xl_hash_add_ovfl_page xlrec;
384 
385  xlrec.bmpage_found = page_found;
386  xlrec.bmsize = metap->hashm_bmsize;
387 
388  XLogBeginInsert();
389  XLogRegisterData((char *) &xlrec, SizeOfHashAddOvflPage);
390 
392  XLogRegisterBufData(0, (char *) &pageopaque->hasho_bucket, sizeof(Bucket));
393 
395 
396  if (BufferIsValid(mapbuf))
397  {
399  XLogRegisterBufData(2, (char *) &bitmap_page_bit, sizeof(uint32));
400  }
401 
402  if (BufferIsValid(newmapbuf))
403  XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
404 
405  XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
406  XLogRegisterBufData(4, (char *) &metap->hashm_firstfree, sizeof(uint32));
407 
408  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
409 
410  PageSetLSN(BufferGetPage(ovflbuf), recptr);
411  PageSetLSN(BufferGetPage(buf), recptr);
412 
413  if (BufferIsValid(mapbuf))
414  PageSetLSN(BufferGetPage(mapbuf), recptr);
415 
416  if (BufferIsValid(newmapbuf))
417  PageSetLSN(BufferGetPage(newmapbuf), recptr);
418 
419  PageSetLSN(BufferGetPage(metabuf), recptr);
420  }
421 
423 
424  if (retain_pin)
426  else
427  _hash_relbuf(rel, buf);
428 
429  if (BufferIsValid(mapbuf))
430  _hash_relbuf(rel, mapbuf);
431 
432  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
433 
434  if (BufferIsValid(newmapbuf))
435  _hash_relbuf(rel, newmapbuf);
436 
437  return ovflbuf;
438 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
uint16 hasho_page_id
Definition: hash.h:82
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define SETBIT(x, i)
Definition: blutils.c:33
#define HashPageGetBitmap(page)
Definition: hash.h:259
#define LH_BITMAP_PAGE
Definition: hash.h:55
static BlockNumber bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
Definition: hashovfl.c:34
#define LH_META_PAGE
Definition: hash.h:56
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define END_CRIT_SECTION()
Definition: miscadmin.h:135
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:32
#define ALL_SET
Definition: hash.h:245
#define START_CRIT_SECTION()
Definition: miscadmin.h:133
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
Definition: hashpage.c:206
Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno)
Definition: hashpage.c:143
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:78
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define LH_PAGE_TYPE
Definition: hash.h:62
static uint32 _hash_firstfreebit(uint32 map)
Definition: hashovfl.c:446
uint32 Bucket
Definition: hash.h:34
BlockNumber hasho_prevblkno
Definition: hash.h:78
#define ERROR
Definition: elog.h:43
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
uint32 hashm_nmaps
Definition: hash.h:218
static char * buf
Definition: pg_test_fsync.c:66
#define HASH_WRITE
Definition: hash.h:283
#define BMPG_MASK(metap)
Definition: hash.h:257
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define RelationGetRelationName(relation)
Definition: rel.h:436
unsigned int uint32
Definition: c.h:268
#define BITS_PER_MAP
Definition: hash.h:272
#define BMPG_SHIFT(metap)
Definition: hash.h:256
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define HASH_MAX_BITMAPS
Definition: hash.h:188
uint32 hashm_ovflpoint
Definition: hash.h:215
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:225
#define LH_OVERFLOW_PAGE
Definition: hash.h:53
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
uint32 hashm_firstfree
Definition: hash.h:217
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:220
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:274
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:739
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define LH_BUCKET_PAGE
Definition: hash.h:54
#define NULL
Definition: c.h:229
Datum bit(PG_FUNCTION_ARGS)
Definition: varbit.c:361
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
Bucket hasho_bucket
Definition: hash.h:80
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define InvalidBlockNumber
Definition: block.h:33
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:85
#define HASHO_PAGE_ID
Definition: hash.h:98
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define RelationNeedsWAL(relation)
Definition: rel.h:505
#define SizeOfHashAddOvflPage
Definition: hash_xlog.h:99
uint16 hasho_flag
Definition: hash.h:81
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define HashPageGetMeta(page)
Definition: hash.h:266
int errmsg(const char *fmt,...)
Definition: elog.c:797
int i
BlockNumber hasho_nextblkno
Definition: hash.h:79
uint16 hashm_bmsize
Definition: hash.h:209
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:222
int Buffer
Definition: buf.h:23
#define BMPGSZ_BIT(metap)
Definition: hash.h:255
Pointer Page
Definition: bufpage.h:74
static uint32 _hash_firstfreebit ( uint32  map)
static

Definition at line 446 of file hashovfl.c.

References BITS_PER_MAP, elog, ERROR, and i.

Referenced by _hash_addovflpage().

447 {
448  uint32 i,
449  mask;
450 
451  mask = 0x1;
452  for (i = 0; i < BITS_PER_MAP; i++)
453  {
454  if (!(mask & map))
455  return i;
456  mask <<= 1;
457  }
458 
459  elog(ERROR, "firstfreebit found no free bit");
460 
461  return 0; /* keep compiler quiet */
462 }
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:268
#define BITS_PER_MAP
Definition: hash.h:272
int i
#define elog
Definition: elog.h:219
BlockNumber _hash_freeovflpage ( Relation  rel,
Buffer  bucketbuf,
Buffer  ovflbuf,
Buffer  wbuf,
IndexTuple itups,
OffsetNumber itup_offsets,
Size tups_size,
uint16  nitups,
BufferAccessStrategy  bstrategy 
)

Definition at line 488 of file hashovfl.c.

References _hash_checkpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_ovflblkno_to_bitno(), _hash_pageinit(), _hash_pgaddmultitup(), _hash_relbuf(), Assert, BlockNumberIsValid, BMPG_MASK, BMPG_SHIFT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, BufferIsValid, CLRBIT, elog, END_CRIT_SECTION, ERROR, HASH_METAPAGE, HASH_READ, HASH_WRITE, HASH_XLOG_FREE_OVFL_BUFS, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, i, InvalidBlockNumber, InvalidBuffer, xl_hash_squeeze_page::is_prev_bucket_same_wrt, xl_hash_squeeze_page::is_prim_bucket_same_wrt, ISSET, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_UNUSED_PAGE, LockBuffer(), MarkBufferDirty(), xl_hash_squeeze_page::nextblkno, xl_hash_squeeze_page::ntups, PageGetSpecialPointer, PageSetLSN, PG_USED_FOR_ASSERTS_ONLY, xl_hash_squeeze_page::prevblkno, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashSqueezePage, START_CRIT_SECTION, XLOG_HASH_SQUEEZE_PAGE, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_squeezebucket().

492 {
493  HashMetaPage metap;
494  Buffer metabuf;
495  Buffer mapbuf;
496  BlockNumber ovflblkno;
497  BlockNumber prevblkno;
498  BlockNumber blkno;
499  BlockNumber nextblkno;
500  BlockNumber writeblkno;
501  HashPageOpaque ovflopaque;
502  Page ovflpage;
503  Page mappage;
504  uint32 *freep;
505  uint32 ovflbitno;
506  int32 bitmappage,
507  bitmapbit;
509  Buffer prevbuf = InvalidBuffer;
510  Buffer nextbuf = InvalidBuffer;
511  bool update_metap = false;
512 
513  /* Get information from the doomed page */
514  _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
515  ovflblkno = BufferGetBlockNumber(ovflbuf);
516  ovflpage = BufferGetPage(ovflbuf);
517  ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
518  nextblkno = ovflopaque->hasho_nextblkno;
519  prevblkno = ovflopaque->hasho_prevblkno;
520  writeblkno = BufferGetBlockNumber(wbuf);
521  bucket = ovflopaque->hasho_bucket;
522 
523  /*
524  * Fix up the bucket chain. this is a doubly-linked list, so we must fix
525  * up the bucket chain members behind and ahead of the overflow page being
526  * deleted. Concurrency issues are avoided by using lock chaining as
527  * described atop hashbucketcleanup.
528  */
529  if (BlockNumberIsValid(prevblkno))
530  {
531  if (prevblkno == writeblkno)
532  prevbuf = wbuf;
533  else
534  prevbuf = _hash_getbuf_with_strategy(rel,
535  prevblkno,
536  HASH_WRITE,
538  bstrategy);
539  }
540  if (BlockNumberIsValid(nextblkno))
541  nextbuf = _hash_getbuf_with_strategy(rel,
542  nextblkno,
543  HASH_WRITE,
545  bstrategy);
546 
547  /* Note: bstrategy is intentionally not used for metapage and bitmap */
548 
549  /* Read the metapage so we can determine which bitmap page to use */
551  metap = HashPageGetMeta(BufferGetPage(metabuf));
552 
553  /* Identify which bit to set */
554  ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
555 
556  bitmappage = ovflbitno >> BMPG_SHIFT(metap);
557  bitmapbit = ovflbitno & BMPG_MASK(metap);
558 
559  if (bitmappage >= metap->hashm_nmaps)
560  elog(ERROR, "invalid overflow bit number %u", ovflbitno);
561  blkno = metap->hashm_mapp[bitmappage];
562 
563  /* Release metapage lock while we access the bitmap page */
564  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
565 
566  /* read the bitmap page to clear the bitmap bit */
567  mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
568  mappage = BufferGetPage(mapbuf);
569  freep = HashPageGetBitmap(mappage);
570  Assert(ISSET(freep, bitmapbit));
571 
572  /* Get write-lock on metapage to update firstfree */
574 
575  /* This operation needs to log multiple tuples, prepare WAL for that */
576  if (RelationNeedsWAL(rel))
578 
580 
581  /*
582  * we have to insert tuples on the "write" page, being careful to preserve
583  * hashkey ordering. (If we insert many tuples into the same "write" page
584  * it would be worth qsort'ing them).
585  */
586  if (nitups > 0)
587  {
588  _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
589  MarkBufferDirty(wbuf);
590  }
591 
592  /*
593  * Reinitialize the freed overflow page. Just zeroing the page won't
594  * work, because WAL replay routines expect pages to be initialized. See
595  * explanation of RBM_NORMAL mode atop XLogReadBufferExtended. We are
596  * careful to make the special space valid here so that tools like
597  * pageinspect won't get confused.
598  */
599  _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
600 
601  ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
602 
603  ovflopaque->hasho_prevblkno = InvalidBlockNumber;
604  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
605  ovflopaque->hasho_bucket = -1;
606  ovflopaque->hasho_flag = LH_UNUSED_PAGE;
607  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
608 
609  MarkBufferDirty(ovflbuf);
610 
611  if (BufferIsValid(prevbuf))
612  {
613  Page prevpage = BufferGetPage(prevbuf);
614  HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
615 
616  Assert(prevopaque->hasho_bucket == bucket);
617  prevopaque->hasho_nextblkno = nextblkno;
618  MarkBufferDirty(prevbuf);
619  }
620  if (BufferIsValid(nextbuf))
621  {
622  Page nextpage = BufferGetPage(nextbuf);
623  HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
624 
625  Assert(nextopaque->hasho_bucket == bucket);
626  nextopaque->hasho_prevblkno = prevblkno;
627  MarkBufferDirty(nextbuf);
628  }
629 
630  /* Clear the bitmap bit to indicate that this overflow page is free */
631  CLRBIT(freep, bitmapbit);
632  MarkBufferDirty(mapbuf);
633 
634  /* if this is now the first free page, update hashm_firstfree */
635  if (ovflbitno < metap->hashm_firstfree)
636  {
637  metap->hashm_firstfree = ovflbitno;
638  update_metap = true;
639  MarkBufferDirty(metabuf);
640  }
641 
642  /* XLOG stuff */
643  if (RelationNeedsWAL(rel))
644  {
645  xl_hash_squeeze_page xlrec;
646  XLogRecPtr recptr;
647  int i;
648 
649  xlrec.prevblkno = prevblkno;
650  xlrec.nextblkno = nextblkno;
651  xlrec.ntups = nitups;
652  xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
653  xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
654 
655  XLogBeginInsert();
656  XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
657 
658  /*
659  * bucket buffer needs to be registered to ensure that we can acquire
660  * a cleanup lock on it during replay.
661  */
662  if (!xlrec.is_prim_bucket_same_wrt)
664 
666  if (xlrec.ntups > 0)
667  {
668  XLogRegisterBufData(1, (char *) itup_offsets,
669  nitups * sizeof(OffsetNumber));
670  for (i = 0; i < nitups; i++)
671  XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
672  }
673 
674  XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
675 
676  /*
677  * If prevpage and the writepage (block in which we are moving tuples
678  * from overflow) are same, then no need to separately register
679  * prevpage. During replay, we can directly update the nextblock in
680  * writepage.
681  */
682  if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
683  XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
684 
685  if (BufferIsValid(nextbuf))
686  XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
687 
689  XLogRegisterBufData(5, (char *) &bitmapbit, sizeof(uint32));
690 
691  if (update_metap)
692  {
693  XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
694  XLogRegisterBufData(6, (char *) &metap->hashm_firstfree, sizeof(uint32));
695  }
696 
697  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
698 
699  PageSetLSN(BufferGetPage(wbuf), recptr);
700  PageSetLSN(BufferGetPage(ovflbuf), recptr);
701 
702  if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
703  PageSetLSN(BufferGetPage(prevbuf), recptr);
704  if (BufferIsValid(nextbuf))
705  PageSetLSN(BufferGetPage(nextbuf), recptr);
706 
707  PageSetLSN(BufferGetPage(mapbuf), recptr);
708 
709  if (update_metap)
710  PageSetLSN(BufferGetPage(metabuf), recptr);
711  }
712 
714 
715  /* release previous bucket if it is not same as write bucket */
716  if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
717  _hash_relbuf(rel, prevbuf);
718 
719  if (BufferIsValid(ovflbuf))
720  _hash_relbuf(rel, ovflbuf);
721 
722  if (BufferIsValid(nextbuf))
723  _hash_relbuf(rel, nextbuf);
724 
725  _hash_relbuf(rel, mapbuf);
726  _hash_relbuf(rel, metabuf);
727 
728  return nextblkno;
729 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
uint16 hasho_page_id
Definition: hash.h:82
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition: hashinsert.c:297
#define HashPageGetBitmap(page)
Definition: hash.h:259
#define LH_BITMAP_PAGE
Definition: hash.h:55
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:597
#define LH_META_PAGE
Definition: hash.h:56
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition: hashpage.c:247
#define END_CRIT_SECTION()
Definition: miscadmin.h:135
#define InvalidBuffer
Definition: buf.h:25
#define START_CRIT_SECTION()
Definition: miscadmin.h:133
uint32 BlockNumber
Definition: block.h:31
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:78
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
signed int int32
Definition: c.h:256
BlockNumber prevblkno
Definition: hash_xlog.h:174
#define LH_UNUSED_PAGE
Definition: hash.h:52
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:177
uint16 OffsetNumber
Definition: off.h:24
#define HASH_READ
Definition: hash.h:282
uint32 Bucket
Definition: hash.h:34
#define SizeOfHashSqueezePage
Definition: hash_xlog.h:186
BlockNumber hasho_prevblkno
Definition: hash.h:78
#define ERROR
Definition: elog.h:43
BlockNumber nextblkno
Definition: hash_xlog.h:175
uint32 hashm_nmaps
Definition: hash.h:218
#define HASH_WRITE
Definition: hash.h:283
#define BMPG_MASK(metap)
Definition: hash.h:257
#define REGBUF_STANDARD
Definition: xloginsert.h:34
unsigned int uint32
Definition: c.h:268
#define BMPG_SHIFT(metap)
Definition: hash.h:256
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define HASH_XLOG_FREE_OVFL_BUFS
Definition: hash_xlog.h:22
#define ISSET(A, N)
Definition: hash.h:277
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:225
#define CLRBIT(x, i)
Definition: blutils.c:32
#define HASH_METAPAGE
Definition: hash.h:158
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
#define LH_OVERFLOW_PAGE
Definition: hash.h:53
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
uint32 hashm_firstfree
Definition: hash.h:217
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:274
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define LH_BUCKET_PAGE
Definition: hash.h:54
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
Bucket hasho_bucket
Definition: hash.h:80
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:180
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define REGBUF_NO_IMAGE
Definition: xloginsert.h:31
#define InvalidBlockNumber
Definition: block.h:33
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:85
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition: xloginsert.c:146
#define HASHO_PAGE_ID
Definition: hash.h:98
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define RelationNeedsWAL(relation)
Definition: rel.h:505
uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Definition: hashovfl.c:61
uint16 hasho_flag
Definition: hash.h:81
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define HashPageGetMeta(page)
Definition: hash.h:266
int i
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:36
BlockNumber hasho_nextblkno
Definition: hash.h:79
#define elog
Definition: elog.h:219
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:990
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:222
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
void _hash_initbitmapbuffer ( Buffer  buf,
uint16  bmsize,
bool  initpage 
)

Definition at line 739 of file hashovfl.c.

References _hash_pageinit(), BufferGetPage, BufferGetPageSize, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, InvalidBlockNumber, LH_BITMAP_PAGE, MemSet, and PageGetSpecialPointer.

Referenced by _hash_addovflpage(), _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_init_bitmap_page().

740 {
741  Page pg;
742  HashPageOpaque op;
743  uint32 *freep;
744 
745  pg = BufferGetPage(buf);
746 
747  /* initialize the page */
748  if (initpage)
750 
751  /* initialize the page's special space */
755  op->hasho_bucket = -1;
758 
759  /* set all of the bits to 1 */
760  freep = HashPageGetBitmap(pg);
761  MemSet(freep, 0xFF, bmsize);
762 
763  /*
764  * Set pd_lower just past the end of the bitmap page data. We could even
765  * set pd_lower equal to pd_upper, but this is more precise and makes the
766  * page look compressible to xlog.c.
767  */
768  ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
769 }
uint16 hasho_page_id
Definition: hash.h:82
#define HashPageGetBitmap(page)
Definition: hash.h:259
#define LH_BITMAP_PAGE
Definition: hash.h:55
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:597
#define MemSet(start, val, len)
Definition: c.h:857
BlockNumber hasho_prevblkno
Definition: hash.h:78
static char * buf
Definition: pg_test_fsync.c:66
unsigned int uint32
Definition: c.h:268
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
PageHeaderData * PageHeader
Definition: bufpage.h:162
Bucket hasho_bucket
Definition: hash.h:80
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define InvalidBlockNumber
Definition: block.h:33
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:85
#define HASHO_PAGE_ID
Definition: hash.h:98
uint16 hasho_flag
Definition: hash.h:81
BlockNumber hasho_nextblkno
Definition: hash.h:79
Pointer Page
Definition: bufpage.h:74
uint32 _hash_ovflblkno_to_bitno ( HashMetaPage  metap,
BlockNumber  ovflblkno 
)

Definition at line 61 of file hashovfl.c.

References _hash_get_totalbuckets(), ereport, errcode(), errmsg(), ERROR, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, and i.

Referenced by _hash_freeovflpage(), and hash_bitmap_info().

62 {
63  uint32 splitnum = metap->hashm_ovflpoint;
64  uint32 i;
65  uint32 bitnum;
66 
67  /* Determine the split number containing this page */
68  for (i = 1; i <= splitnum; i++)
69  {
70  if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
71  break; /* oops */
72  bitnum = ovflblkno - _hash_get_totalbuckets(i);
73 
74  /*
75  * bitnum has to be greater than number of overflow page added in
76  * previous split point. The overflow page at this splitnum (i) if any
77  * should start from (_hash_get_totalbuckets(i) +
78  * metap->hashm_spares[i - 1] + 1).
79  */
80  if (bitnum > metap->hashm_spares[i - 1] &&
81  bitnum <= metap->hashm_spares[i])
82  return bitnum - 1; /* -1 to convert 1-based to 0-based */
83  }
84 
85  ereport(ERROR,
86  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
87  errmsg("invalid overflow block number %u", ovflblkno)));
88  return 0; /* keep compiler quiet */
89 }
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
#define ERROR
Definition: elog.h:43
uint32 _hash_get_totalbuckets(uint32 splitpoint_phase)
Definition: hashutil.c:189
unsigned int uint32
Definition: c.h:268
#define ereport(elevel, rest)
Definition: elog.h:122
uint32 hashm_ovflpoint
Definition: hash.h:215
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:220
int errmsg(const char *fmt,...)
Definition: elog.c:797
int i
void _hash_squeezebucket ( Relation  rel,
Bucket  bucket,
BlockNumber  bucket_blkno,
Buffer  bucket_buf,
BufferAccessStrategy  bstrategy 
)

Definition at line 804 of file hashovfl.c.

References _hash_freeovflpage(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_relbuf(), Assert, BlockNumberIsValid, BUFFER_LOCK_UNLOCK, BufferGetPage, CopyIndexTuple(), END_CRIT_SECTION, FirstOffsetNumber, HASH_WRITE, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_prevblkno, i, IndexTupleDSize, InvalidBuffer, xl_hash_move_page_contents::is_prim_bucket_same_wrt, ItemIdIsDead, LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, MaxOffsetNumber, xl_hash_move_page_contents::ntups, OffsetNumberNext, PageGetFreeSpaceForMultipleTuples(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIndexMultiDelete(), PageIsEmpty, PageSetLSN, pfree(), REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashMovePageContents, START_CRIT_SECTION, XLOG_HASH_MOVE_PAGE_CONTENTS, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbucketcleanup().

809 {
810  BlockNumber wblkno;
811  BlockNumber rblkno;
812  Buffer wbuf;
813  Buffer rbuf;
814  Page wpage;
815  Page rpage;
816  HashPageOpaque wopaque;
817  HashPageOpaque ropaque;
818 
819  /*
820  * start squeezing into the primary bucket page.
821  */
822  wblkno = bucket_blkno;
823  wbuf = bucket_buf;
824  wpage = BufferGetPage(wbuf);
825  wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
826 
827  /*
828  * if there aren't any overflow pages, there's nothing to squeeze. caller
829  * is responsible for releasing the pin on primary bucket page.
830  */
831  if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
832  {
834  return;
835  }
836 
837  /*
838  * Find the last page in the bucket chain by starting at the base bucket
839  * page and working forward. Note: we assume that a hash bucket chain is
840  * usually smaller than the buffer ring being used by VACUUM, else using
841  * the access strategy here would be counterproductive.
842  */
843  rbuf = InvalidBuffer;
844  ropaque = wopaque;
845  do
846  {
847  rblkno = ropaque->hasho_nextblkno;
848  if (rbuf != InvalidBuffer)
849  _hash_relbuf(rel, rbuf);
850  rbuf = _hash_getbuf_with_strategy(rel,
851  rblkno,
852  HASH_WRITE,
854  bstrategy);
855  rpage = BufferGetPage(rbuf);
856  ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
857  Assert(ropaque->hasho_bucket == bucket);
858  } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
859 
860  /*
861  * squeeze the tuples.
862  */
863  for (;;)
864  {
865  OffsetNumber roffnum;
866  OffsetNumber maxroffnum;
867  OffsetNumber deletable[MaxOffsetNumber];
869  Size tups_size[MaxIndexTuplesPerPage];
870  OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
871  uint16 ndeletable = 0;
872  uint16 nitups = 0;
873  Size all_tups_size = 0;
874  int i;
875  bool retain_pin = false;
876 
877 readpage:
878  /* Scan each tuple in "read" page */
879  maxroffnum = PageGetMaxOffsetNumber(rpage);
880  for (roffnum = FirstOffsetNumber;
881  roffnum <= maxroffnum;
882  roffnum = OffsetNumberNext(roffnum))
883  {
884  IndexTuple itup;
885  Size itemsz;
886 
887  /* skip dead tuples */
888  if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
889  continue;
890 
891  itup = (IndexTuple) PageGetItem(rpage,
892  PageGetItemId(rpage, roffnum));
893  itemsz = IndexTupleDSize(*itup);
894  itemsz = MAXALIGN(itemsz);
895 
896  /*
897  * Walk up the bucket chain, looking for a page big enough for
898  * this item and all other accumulated items. Exit if we reach
899  * the read page.
900  */
901  while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
902  {
903  Buffer next_wbuf = InvalidBuffer;
904  bool tups_moved = false;
905 
906  Assert(!PageIsEmpty(wpage));
907 
908  if (wblkno == bucket_blkno)
909  retain_pin = true;
910 
911  wblkno = wopaque->hasho_nextblkno;
912  Assert(BlockNumberIsValid(wblkno));
913 
914  /* don't need to move to next page if we reached the read page */
915  if (wblkno != rblkno)
916  next_wbuf = _hash_getbuf_with_strategy(rel,
917  wblkno,
918  HASH_WRITE,
920  bstrategy);
921 
922  if (nitups > 0)
923  {
924  Assert(nitups == ndeletable);
925 
926  /*
927  * This operation needs to log multiple tuples, prepare
928  * WAL for that.
929  */
930  if (RelationNeedsWAL(rel))
931  XLogEnsureRecordSpace(0, 3 + nitups);
932 
934 
935  /*
936  * we have to insert tuples on the "write" page, being
937  * careful to preserve hashkey ordering. (If we insert
938  * many tuples into the same "write" page it would be
939  * worth qsort'ing them).
940  */
941  _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
942  MarkBufferDirty(wbuf);
943 
944  /* Delete tuples we already moved off read page */
945  PageIndexMultiDelete(rpage, deletable, ndeletable);
946  MarkBufferDirty(rbuf);
947 
948  /* XLOG stuff */
949  if (RelationNeedsWAL(rel))
950  {
951  XLogRecPtr recptr;
953 
954  xlrec.ntups = nitups;
955  xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf) ? true : false;
956 
957  XLogBeginInsert();
959 
960  /*
961  * bucket buffer needs to be registered to ensure that
962  * we can acquire a cleanup lock on it during replay.
963  */
964  if (!xlrec.is_prim_bucket_same_wrt)
966 
968  XLogRegisterBufData(1, (char *) itup_offsets,
969  nitups * sizeof(OffsetNumber));
970  for (i = 0; i < nitups; i++)
971  XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
972 
974  XLogRegisterBufData(2, (char *) deletable,
975  ndeletable * sizeof(OffsetNumber));
976 
977  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
978 
979  PageSetLSN(BufferGetPage(wbuf), recptr);
980  PageSetLSN(BufferGetPage(rbuf), recptr);
981  }
982 
984 
985  tups_moved = true;
986  }
987 
988  /*
989  * release the lock on previous page after acquiring the lock
990  * on next page
991  */
992  if (retain_pin)
994  else
995  _hash_relbuf(rel, wbuf);
996 
997  /* nothing more to do if we reached the read page */
998  if (rblkno == wblkno)
999  {
1000  _hash_relbuf(rel, rbuf);
1001  return;
1002  }
1003 
1004  wbuf = next_wbuf;
1005  wpage = BufferGetPage(wbuf);
1006  wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
1007  Assert(wopaque->hasho_bucket == bucket);
1008  retain_pin = false;
1009 
1010  /* be tidy */
1011  for (i = 0; i < nitups; i++)
1012  pfree(itups[i]);
1013  nitups = 0;
1014  all_tups_size = 0;
1015  ndeletable = 0;
1016 
1017  /*
1018  * after moving the tuples, rpage would have been compacted,
1019  * so we need to rescan it.
1020  */
1021  if (tups_moved)
1022  goto readpage;
1023  }
1024 
1025  /* remember tuple for deletion from "read" page */
1026  deletable[ndeletable++] = roffnum;
1027 
1028  /*
1029  * we need a copy of index tuples as they can be freed as part of
1030  * overflow page, however we need them to write a WAL record in
1031  * _hash_freeovflpage.
1032  */
1033  itups[nitups] = CopyIndexTuple(itup);
1034  tups_size[nitups++] = itemsz;
1035  all_tups_size += itemsz;
1036  }
1037 
1038  /*
1039  * If we reach here, there are no live tuples on the "read" page ---
1040  * it was empty when we got to it, or we moved them all. So we can
1041  * just free the page without bothering with deleting tuples
1042  * individually. Then advance to the previous "read" page.
1043  *
1044  * Tricky point here: if our read and write pages are adjacent in the
1045  * bucket chain, our write lock on wbuf will conflict with
1046  * _hash_freeovflpage's attempt to update the sibling links of the
1047  * removed page. In that case, we don't need to lock it again.
1048  */
1049  rblkno = ropaque->hasho_prevblkno;
1050  Assert(BlockNumberIsValid(rblkno));
1051 
1052  /* free this overflow page (releases rbuf) */
1053  _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
1054  tups_size, nitups, bstrategy);
1055 
1056  /* be tidy */
1057  for (i = 0; i < nitups; i++)
1058  pfree(itups[i]);
1059 
1060  /* are we freeing the page adjacent to wbuf? */
1061  if (rblkno == wblkno)
1062  {
1063  /* retain the pin on primary bucket page till end of bucket scan */
1064  if (wblkno == bucket_blkno)
1066  else
1067  _hash_relbuf(rel, wbuf);
1068  return;
1069  }
1070 
1071  rbuf = _hash_getbuf_with_strategy(rel,
1072  rblkno,
1073  HASH_WRITE,
1075  bstrategy);
1076  rpage = BufferGetPage(rbuf);
1077  ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
1078  Assert(ropaque->hasho_bucket == bucket);
1079  }
1080 
1081  /* NOTREACHED */
1082 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition: hashinsert.c:297
#define PageIsEmpty(page)
Definition: bufpage.h:218
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define MaxOffsetNumber
Definition: off.h:28
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition: hashpage.c:247
#define END_CRIT_SECTION()
Definition: miscadmin.h:135
#define InvalidBuffer
Definition: buf.h:25
#define START_CRIT_SECTION()
Definition: miscadmin.h:133
uint32 BlockNumber
Definition: block.h:31
#define SizeOfHashMovePageContents
Definition: hash_xlog.h:157
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
uint16 OffsetNumber
Definition: off.h:24
unsigned short uint16
Definition: c.h:267
void pfree(void *pointer)
Definition: mcxt.c:950
BlockNumber hasho_prevblkno
Definition: hash.h:78
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:434
#define IndexTupleDSize(itup)
Definition: itup.h:71
#define HASH_WRITE
Definition: hash.h:283
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define LH_OVERFLOW_PAGE
Definition: hash.h:53
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
Definition: bufpage.c:609
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:274
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
Bucket hasho_bucket
Definition: hash.h:80
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:836
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:356
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define REGBUF_NO_IMAGE
Definition: xloginsert.h:31
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:85
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition: xloginsert.c:146
#define MAXALIGN(LEN)
Definition: c.h:588
#define RelationNeedsWAL(relation)
Definition: rel.h:505
BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:488
#define MaxIndexTuplesPerPage
Definition: itup.h:137
int i
BlockNumber hasho_nextblkno
Definition: hash.h:79
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
static BlockNumber bitno_to_blkno ( HashMetaPage  metap,
uint32  ovflbitnum 
)
static

Definition at line 34 of file hashovfl.c.

References _hash_get_totalbuckets(), HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, and i.

Referenced by _hash_addovflpage().

35 {
36  uint32 splitnum = metap->hashm_ovflpoint;
37  uint32 i;
38 
39  /* Convert zero-based bitnumber to 1-based page number */
40  ovflbitnum += 1;
41 
42  /* Determine the split number for this page (must be >= 1) */
43  for (i = 1;
44  i < splitnum && ovflbitnum > metap->hashm_spares[i];
45  i++)
46  /* loop */ ;
47 
48  /*
49  * Convert to absolute page number by adding the number of bucket pages
50  * that exist before this split point.
51  */
52  return (BlockNumber) (_hash_get_totalbuckets(i) + ovflbitnum);
53 }
uint32 BlockNumber
Definition: block.h:31
uint32 _hash_get_totalbuckets(uint32 splitpoint_phase)
Definition: hashutil.c:189
unsigned int uint32
Definition: c.h:268
uint32 hashm_ovflpoint
Definition: hash.h:215
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:220
int i