PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
hashovfl.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hashovfl.c
4  * Overflow page management code for the Postgres hash access method
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hashovfl.c
12  *
13  * NOTES
14  * Overflow pages look like ordinary relation pages.
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/hash.h"
21 #include "utils/rel.h"
22 
23 
24 static Buffer _hash_getovflpage(Relation rel, Buffer metabuf);
25 static uint32 _hash_firstfreebit(uint32 map);
26 
27 
28 /*
29  * Convert overflow page bit number (its index in the free-page bitmaps)
30  * to block number within the index.
31  */
32 static BlockNumber
34 {
35  uint32 splitnum = metap->hashm_ovflpoint;
36  uint32 i;
37 
38  /* Convert zero-based bitnumber to 1-based page number */
39  ovflbitnum += 1;
40 
41  /* Determine the split number for this page (must be >= 1) */
42  for (i = 1;
43  i < splitnum && ovflbitnum > metap->hashm_spares[i];
44  i++)
45  /* loop */ ;
46 
47  /*
48  * Convert to absolute page number by adding the number of bucket pages
49  * that exist before this split point.
50  */
51  return (BlockNumber) ((1 << i) + ovflbitnum);
52 }
53 
54 /*
55  * _hash_ovflblkno_to_bitno
56  *
57  * Convert overflow page block number to bit number for free-page bitmap.
58  */
59 uint32
61 {
62  uint32 splitnum = metap->hashm_ovflpoint;
63  uint32 i;
64  uint32 bitnum;
65 
66  /* Determine the split number containing this page */
67  for (i = 1; i <= splitnum; i++)
68  {
69  if (ovflblkno <= (BlockNumber) (1 << i))
70  break; /* oops */
71  bitnum = ovflblkno - (1 << i);
72 
73  /*
74  * bitnum has to be greater than number of overflow page added in
75  * previous split point. The overflow page at this splitnum (i) if any
76  * should start from ((2 ^ i) + metap->hashm_spares[i - 1] + 1).
77  */
78  if (bitnum > metap->hashm_spares[i - 1] &&
79  bitnum <= metap->hashm_spares[i])
80  return bitnum - 1; /* -1 to convert 1-based to 0-based */
81  }
82 
83  ereport(ERROR,
84  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
85  errmsg("invalid overflow block number %u", ovflblkno)));
86  return 0; /* keep compiler quiet */
87 }
88 
89 /*
90  * _hash_addovflpage
91  *
92  * Add an overflow page to the bucket whose last page is pointed to by 'buf'.
93  *
94  * On entry, the caller must hold a pin but no lock on 'buf'. The pin is
95  * dropped before exiting (we assume the caller is not interested in 'buf'
96  * anymore) if not asked to retain. The pin will be retained only for the
97  * primary bucket. The returned overflow page will be pinned and
98  * write-locked; it is guaranteed to be empty.
99  *
100  * The caller must hold a pin, but no lock, on the metapage buffer.
101  * That buffer is returned in the same state.
102  *
103  * NB: since this could be executed concurrently by multiple processes,
104  * one should not assume that the returned overflow page will be the
105  * immediate successor of the originally passed 'buf'. Additional overflow
106  * pages might have been added to the bucket chain in between.
107  */
108 Buffer
109 _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
110 {
111  Buffer ovflbuf;
112  Page page;
113  Page ovflpage;
114  HashPageOpaque pageopaque;
115  HashPageOpaque ovflopaque;
116 
117  /* allocate and lock an empty overflow page */
118  ovflbuf = _hash_getovflpage(rel, metabuf);
119 
120  /*
121  * Write-lock the tail page. It is okay to hold two buffer locks here
122  * since there cannot be anyone else contending for access to ovflbuf.
123  */
125 
126  /* probably redundant... */
128 
129  /* loop to find current tail page, in case someone else inserted too */
130  for (;;)
131  {
132  BlockNumber nextblkno;
133 
134  page = BufferGetPage(buf);
135  pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
136  nextblkno = pageopaque->hasho_nextblkno;
137 
138  if (!BlockNumberIsValid(nextblkno))
139  break;
140 
141  /* we assume we do not need to write the unmodified page */
142  if (retain_pin)
143  {
144  /* pin will be retained only for the primary bucket page */
145  Assert(pageopaque->hasho_flag & LH_BUCKET_PAGE);
147  }
148  else
149  _hash_relbuf(rel, buf);
150 
151  retain_pin = false;
152 
153  buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
154  }
155 
156  /* now that we have correct backlink, initialize new overflow page */
157  ovflpage = BufferGetPage(ovflbuf);
158  ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
159  ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
160  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
161  ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
162  ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
163  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
164 
165  MarkBufferDirty(ovflbuf);
166 
167  /* logically chain overflow page to previous page */
168  pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
169  MarkBufferDirty(buf);
170  if (retain_pin)
171  {
172  /* pin will be retained only for the primary bucket page */
173  Assert(pageopaque->hasho_flag & LH_BUCKET_PAGE);
175  }
176  else
177  _hash_relbuf(rel, buf);
178 
179  return ovflbuf;
180 }
181 
182 /*
183  * _hash_getovflpage()
184  *
185  * Find an available overflow page and return it. The returned buffer
186  * is pinned and write-locked, and has had _hash_pageinit() applied,
187  * but it is caller's responsibility to fill the special space.
188  *
189  * The caller must hold a pin, but no lock, on the metapage buffer.
190  * That buffer is left in the same state at exit.
191  */
192 static Buffer
194 {
195  HashMetaPage metap;
196  Buffer mapbuf = 0;
197  Buffer newbuf;
198  BlockNumber blkno;
199  uint32 orig_firstfree;
200  uint32 splitnum;
201  uint32 *freep = NULL;
202  uint32 max_ovflpg;
203  uint32 bit;
204  uint32 first_page;
205  uint32 last_bit;
206  uint32 last_page;
207  uint32 i,
208  j;
209 
210  /* Get exclusive lock on the meta page */
212 
213  _hash_checkpage(rel, metabuf, LH_META_PAGE);
214  metap = HashPageGetMeta(BufferGetPage(metabuf));
215 
216  /* start search at hashm_firstfree */
217  orig_firstfree = metap->hashm_firstfree;
218  first_page = orig_firstfree >> BMPG_SHIFT(metap);
219  bit = orig_firstfree & BMPG_MASK(metap);
220  i = first_page;
221  j = bit / BITS_PER_MAP;
222  bit &= ~(BITS_PER_MAP - 1);
223 
224  /* outer loop iterates once per bitmap page */
225  for (;;)
226  {
227  BlockNumber mapblkno;
228  Page mappage;
229  uint32 last_inpage;
230 
231  /* want to end search with the last existing overflow page */
232  splitnum = metap->hashm_ovflpoint;
233  max_ovflpg = metap->hashm_spares[splitnum] - 1;
234  last_page = max_ovflpg >> BMPG_SHIFT(metap);
235  last_bit = max_ovflpg & BMPG_MASK(metap);
236 
237  if (i > last_page)
238  break;
239 
240  Assert(i < metap->hashm_nmaps);
241  mapblkno = metap->hashm_mapp[i];
242 
243  if (i == last_page)
244  last_inpage = last_bit;
245  else
246  last_inpage = BMPGSZ_BIT(metap) - 1;
247 
248  /* Release exclusive lock on metapage while reading bitmap page */
249  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
250 
251  mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
252  mappage = BufferGetPage(mapbuf);
253  freep = HashPageGetBitmap(mappage);
254 
255  for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
256  {
257  if (freep[j] != ALL_SET)
258  goto found;
259  }
260 
261  /* No free space here, try to advance to next map page */
262  _hash_relbuf(rel, mapbuf);
263  i++;
264  j = 0; /* scan from start of next map page */
265  bit = 0;
266 
267  /* Reacquire exclusive lock on the meta page */
269  }
270 
271  /*
272  * No free pages --- have to extend the relation to add an overflow page.
273  * First, check to see if we have to add a new bitmap page too.
274  */
275  if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
276  {
277  /*
278  * We create the new bitmap page with all pages marked "in use".
279  * Actually two pages in the new bitmap's range will exist
280  * immediately: the bitmap page itself, and the following page which
281  * is the one we return to the caller. Both of these are correctly
282  * marked "in use". Subsequent pages do not exist yet, but it is
283  * convenient to pre-mark them as "in use" too.
284  */
285  bit = metap->hashm_spares[splitnum];
286  _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
287  metap->hashm_spares[splitnum]++;
288  }
289  else
290  {
291  /*
292  * Nothing to do here; since the page will be past the last used page,
293  * we know its bitmap bit was preinitialized to "in use".
294  */
295  }
296 
297  /* Calculate address of the new overflow page */
298  bit = metap->hashm_spares[splitnum];
299  blkno = bitno_to_blkno(metap, bit);
300 
301  /*
302  * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
303  * relation length stays in sync with ours. XXX It's annoying to do this
304  * with metapage write lock held; would be better to use a lock that
305  * doesn't block incoming searches.
306  */
307  newbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
308 
309  metap->hashm_spares[splitnum]++;
310 
311  /*
312  * Adjust hashm_firstfree to avoid redundant searches. But don't risk
313  * changing it if someone moved it while we were searching bitmap pages.
314  */
315  if (metap->hashm_firstfree == orig_firstfree)
316  metap->hashm_firstfree = bit + 1;
317 
318  /* Write updated metapage and release lock, but not pin */
319  MarkBufferDirty(metabuf);
320  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
321 
322  return newbuf;
323 
324 found:
325  /* convert bit to bit number within page */
326  bit += _hash_firstfreebit(freep[j]);
327 
328  /* mark page "in use" in the bitmap */
329  SETBIT(freep, bit);
330  MarkBufferDirty(mapbuf);
331  _hash_relbuf(rel, mapbuf);
332 
333  /* Reacquire exclusive lock on the meta page */
335 
336  /* convert bit to absolute bit number */
337  bit += (i << BMPG_SHIFT(metap));
338 
339  /* Calculate address of the recycled overflow page */
340  blkno = bitno_to_blkno(metap, bit);
341 
342  /*
343  * Adjust hashm_firstfree to avoid redundant searches. But don't risk
344  * changing it if someone moved it while we were searching bitmap pages.
345  */
346  if (metap->hashm_firstfree == orig_firstfree)
347  {
348  metap->hashm_firstfree = bit + 1;
349 
350  /* Write updated metapage and release lock, but not pin */
351  MarkBufferDirty(metabuf);
352  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
353  }
354  else
355  {
356  /* We didn't change the metapage, so no need to write */
357  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
358  }
359 
360  /* Fetch, init, and return the recycled page */
361  return _hash_getinitbuf(rel, blkno);
362 }
363 
364 /*
365  * _hash_firstfreebit()
366  *
367  * Return the number of the first bit that is not set in the word 'map'.
368  */
369 static uint32
371 {
372  uint32 i,
373  mask;
374 
375  mask = 0x1;
376  for (i = 0; i < BITS_PER_MAP; i++)
377  {
378  if (!(mask & map))
379  return i;
380  mask <<= 1;
381  }
382 
383  elog(ERROR, "firstfreebit found no free bit");
384 
385  return 0; /* keep compiler quiet */
386 }
387 
388 /*
389  * _hash_freeovflpage() -
390  *
391  * Remove this overflow page from its bucket's chain, and mark the page as
392  * free. On entry, ovflbuf is write-locked; it is released before exiting.
393  *
394  * Since this function is invoked in VACUUM, we provide an access strategy
395  * parameter that controls fetches of the bucket pages.
396  *
397  * Returns the block number of the page that followed the given page
398  * in the bucket, or InvalidBlockNumber if no following page.
399  *
400  * NB: caller must not hold lock on metapage, nor on page, that's next to
401  * ovflbuf in the bucket chain. We don't acquire the lock on page that's
402  * prior to ovflbuf in chain if it is same as wbuf because the caller already
403  * has a lock on same.
404  */
407  BufferAccessStrategy bstrategy)
408 {
409  HashMetaPage metap;
410  Buffer metabuf;
411  Buffer mapbuf;
412  Buffer prevbuf = InvalidBuffer;
413  BlockNumber ovflblkno;
414  BlockNumber prevblkno;
415  BlockNumber blkno;
416  BlockNumber nextblkno;
417  BlockNumber writeblkno;
418  HashPageOpaque ovflopaque;
419  Page ovflpage;
420  Page mappage;
421  uint32 *freep;
422  uint32 ovflbitno;
423  int32 bitmappage,
424  bitmapbit;
426 
427  /* Get information from the doomed page */
428  _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
429  ovflblkno = BufferGetBlockNumber(ovflbuf);
430  ovflpage = BufferGetPage(ovflbuf);
431  ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
432  nextblkno = ovflopaque->hasho_nextblkno;
433  prevblkno = ovflopaque->hasho_prevblkno;
434  writeblkno = BufferGetBlockNumber(wbuf);
435  bucket = ovflopaque->hasho_bucket;
436 
437  /*
438  * Zero the page for debugging's sake; then write and release it. (Note:
439  * if we failed to zero the page here, we'd have problems with the Assert
440  * in _hash_pageinit() when the page is reused.)
441  */
442  MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf));
443  MarkBufferDirty(ovflbuf);
444  _hash_relbuf(rel, ovflbuf);
445 
446  /*
447  * Fix up the bucket chain. this is a doubly-linked list, so we must fix
448  * up the bucket chain members behind and ahead of the overflow page being
449  * deleted. Concurrency issues are avoided by using lock chaining as
450  * described atop hashbucketcleanup.
451  */
452  if (BlockNumberIsValid(prevblkno))
453  {
454  Page prevpage;
455  HashPageOpaque prevopaque;
456 
457  if (prevblkno == writeblkno)
458  prevbuf = wbuf;
459  else
460  prevbuf = _hash_getbuf_with_strategy(rel,
461  prevblkno,
462  HASH_WRITE,
464  bstrategy);
465 
466  prevpage = BufferGetPage(prevbuf);
467  prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
468 
469  Assert(prevopaque->hasho_bucket == bucket);
470  prevopaque->hasho_nextblkno = nextblkno;
471 
472  MarkBufferDirty(prevbuf);
473  if (prevblkno != writeblkno)
474  _hash_relbuf(rel, prevbuf);
475  }
476  if (BlockNumberIsValid(nextblkno))
477  {
478  Buffer nextbuf = _hash_getbuf_with_strategy(rel,
479  nextblkno,
480  HASH_WRITE,
482  bstrategy);
483  Page nextpage = BufferGetPage(nextbuf);
484  HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
485 
486  Assert(nextopaque->hasho_bucket == bucket);
487  nextopaque->hasho_prevblkno = prevblkno;
488  MarkBufferDirty(nextbuf);
489  _hash_relbuf(rel, nextbuf);
490  }
491 
492  /* Note: bstrategy is intentionally not used for metapage and bitmap */
493 
494  /* Read the metapage so we can determine which bitmap page to use */
496  metap = HashPageGetMeta(BufferGetPage(metabuf));
497 
498  /* Identify which bit to set */
499  ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
500 
501  bitmappage = ovflbitno >> BMPG_SHIFT(metap);
502  bitmapbit = ovflbitno & BMPG_MASK(metap);
503 
504  if (bitmappage >= metap->hashm_nmaps)
505  elog(ERROR, "invalid overflow bit number %u", ovflbitno);
506  blkno = metap->hashm_mapp[bitmappage];
507 
508  /* Release metapage lock while we access the bitmap page */
509  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
510 
511  /* Clear the bitmap bit to indicate that this overflow page is free */
512  mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
513  mappage = BufferGetPage(mapbuf);
514  freep = HashPageGetBitmap(mappage);
515  Assert(ISSET(freep, bitmapbit));
516  CLRBIT(freep, bitmapbit);
517  MarkBufferDirty(mapbuf);
518  _hash_relbuf(rel, mapbuf);
519 
520  /* Get write-lock on metapage to update firstfree */
522 
523  /* if this is now the first free page, update hashm_firstfree */
524  if (ovflbitno < metap->hashm_firstfree)
525  {
526  metap->hashm_firstfree = ovflbitno;
527  MarkBufferDirty(metabuf);
528  }
529  _hash_relbuf(rel, metabuf);
530 
531  return nextblkno;
532 }
533 
534 
535 /*
536  * _hash_initbitmap()
537  *
538  * Initialize a new bitmap page. The metapage has a write-lock upon
539  * entering the function, and must be written by caller after return.
540  *
541  * 'blkno' is the block number of the new bitmap page.
542  *
543  * All bits in the new bitmap page are set to "1", indicating "in use".
544  */
545 void
547  ForkNumber forkNum)
548 {
549  Buffer buf;
550  Page pg;
551  HashPageOpaque op;
552  uint32 *freep;
553 
554  /*
555  * It is okay to write-lock the new bitmap page while holding metapage
556  * write lock, because no one else could be contending for the new page.
557  * Also, the metapage lock makes it safe to extend the index using
558  * _hash_getnewbuf.
559  *
560  * There is some loss of concurrency in possibly doing I/O for the new
561  * page while holding the metapage lock, but this path is taken so seldom
562  * that it's not worth worrying about.
563  */
564  buf = _hash_getnewbuf(rel, blkno, forkNum);
565  pg = BufferGetPage(buf);
566 
567  /* initialize the page's special space */
571  op->hasho_bucket = -1;
574 
575  /* set all of the bits to 1 */
576  freep = HashPageGetBitmap(pg);
577  MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
578 
579  /* dirty the new bitmap page, and release write lock and pin */
580  MarkBufferDirty(buf);
581  _hash_relbuf(rel, buf);
582 
583  /* add the new bitmap page to the metapage's list of bitmaps */
584  /* metapage already has a write lock */
585  if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
586  ereport(ERROR,
587  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
588  errmsg("out of overflow pages in hash index \"%s\"",
589  RelationGetRelationName(rel))));
590 
591  metap->hashm_mapp[metap->hashm_nmaps] = blkno;
592 
593  metap->hashm_nmaps++;
594 }
595 
596 
597 /*
598  * _hash_squeezebucket(rel, bucket)
599  *
600  * Try to squeeze the tuples onto pages occurring earlier in the
601  * bucket chain in an attempt to free overflow pages. When we start
602  * the "squeezing", the page from which we start taking tuples (the
603  * "read" page) is the last bucket in the bucket chain and the page
604  * onto which we start squeezing tuples (the "write" page) is the
605  * first page in the bucket chain. The read page works backward and
606  * the write page works forward; the procedure terminates when the
607  * read page and write page are the same page.
608  *
609  * At completion of this procedure, it is guaranteed that all pages in
610  * the bucket are nonempty, unless the bucket is totally empty (in
611  * which case all overflow pages will be freed). The original implementation
612  * required that to be true on entry as well, but it's a lot easier for
613  * callers to leave empty overflow pages and let this guy clean it up.
614  *
615  * Caller must acquire cleanup lock on the primary page of the target
616  * bucket to exclude any scans that are in progress, which could easily
617  * be confused into returning the same tuple more than once or some tuples
618  * not at all by the rearrangement we are performing here. To prevent
619  * any concurrent scan to cross the squeeze scan we use lock chaining
620  * similar to hasbucketcleanup. Refer comments atop hashbucketcleanup.
621  *
622  * We need to retain a pin on the primary bucket to ensure that no concurrent
623  * split can start.
624  *
625  * Since this function is invoked in VACUUM, we provide an access strategy
626  * parameter that controls fetches of the bucket pages.
627  */
628 void
630  Bucket bucket,
631  BlockNumber bucket_blkno,
632  Buffer bucket_buf,
633  BufferAccessStrategy bstrategy)
634 {
635  BlockNumber wblkno;
636  BlockNumber rblkno;
637  Buffer wbuf;
638  Buffer rbuf;
639  Page wpage;
640  Page rpage;
641  HashPageOpaque wopaque;
642  HashPageOpaque ropaque;
643  bool wbuf_dirty;
644 
645  /*
646  * start squeezing into the primary bucket page.
647  */
648  wblkno = bucket_blkno;
649  wbuf = bucket_buf;
650  wpage = BufferGetPage(wbuf);
651  wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
652 
653  /*
654  * if there aren't any overflow pages, there's nothing to squeeze. caller
655  * is responsible for releasing the pin on primary bucket page.
656  */
657  if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
658  {
660  return;
661  }
662 
663  /*
664  * Find the last page in the bucket chain by starting at the base bucket
665  * page and working forward. Note: we assume that a hash bucket chain is
666  * usually smaller than the buffer ring being used by VACUUM, else using
667  * the access strategy here would be counterproductive.
668  */
669  rbuf = InvalidBuffer;
670  ropaque = wopaque;
671  do
672  {
673  rblkno = ropaque->hasho_nextblkno;
674  if (rbuf != InvalidBuffer)
675  _hash_relbuf(rel, rbuf);
676  rbuf = _hash_getbuf_with_strategy(rel,
677  rblkno,
678  HASH_WRITE,
680  bstrategy);
681  rpage = BufferGetPage(rbuf);
682  ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
683  Assert(ropaque->hasho_bucket == bucket);
684  } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
685 
686  /*
687  * squeeze the tuples.
688  */
689  wbuf_dirty = false;
690  for (;;)
691  {
692  OffsetNumber roffnum;
693  OffsetNumber maxroffnum;
694  OffsetNumber deletable[MaxOffsetNumber];
695  int ndeletable = 0;
696  bool retain_pin = false;
697 
698  /* Scan each tuple in "read" page */
699  maxroffnum = PageGetMaxOffsetNumber(rpage);
700  for (roffnum = FirstOffsetNumber;
701  roffnum <= maxroffnum;
702  roffnum = OffsetNumberNext(roffnum))
703  {
704  IndexTuple itup;
705  Size itemsz;
706 
707  /* skip dead tuples */
708  if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
709  continue;
710 
711  itup = (IndexTuple) PageGetItem(rpage,
712  PageGetItemId(rpage, roffnum));
713  itemsz = IndexTupleDSize(*itup);
714  itemsz = MAXALIGN(itemsz);
715 
716  /*
717  * Walk up the bucket chain, looking for a page big enough for
718  * this item. Exit if we reach the read page.
719  */
720  while (PageGetFreeSpace(wpage) < itemsz)
721  {
722  Buffer next_wbuf = InvalidBuffer;
723 
724  Assert(!PageIsEmpty(wpage));
725 
726  if (wblkno == bucket_blkno)
727  retain_pin = true;
728 
729  wblkno = wopaque->hasho_nextblkno;
730  Assert(BlockNumberIsValid(wblkno));
731 
732  /* don't need to move to next page if we reached the read page */
733  if (wblkno != rblkno)
734  next_wbuf = _hash_getbuf_with_strategy(rel,
735  wblkno,
736  HASH_WRITE,
738  bstrategy);
739 
740  /*
741  * release the lock on previous page after acquiring the lock
742  * on next page
743  */
744  if (wbuf_dirty)
745  MarkBufferDirty(wbuf);
746  if (retain_pin)
748  else
749  _hash_relbuf(rel, wbuf);
750 
751  /* nothing more to do if we reached the read page */
752  if (rblkno == wblkno)
753  {
754  if (ndeletable > 0)
755  {
756  /* Delete tuples we already moved off read page */
757  PageIndexMultiDelete(rpage, deletable, ndeletable);
758  MarkBufferDirty(rbuf);
759  }
760  _hash_relbuf(rel, rbuf);
761  return;
762  }
763 
764  wbuf = next_wbuf;
765  wpage = BufferGetPage(wbuf);
766  wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
767  Assert(wopaque->hasho_bucket == bucket);
768  wbuf_dirty = false;
769  retain_pin = false;
770  }
771 
772  /*
773  * we have found room so insert on the "write" page, being careful
774  * to preserve hashkey ordering. (If we insert many tuples into
775  * the same "write" page it would be worth qsort'ing instead of
776  * doing repeated _hash_pgaddtup.)
777  */
778  (void) _hash_pgaddtup(rel, wbuf, itemsz, itup);
779  wbuf_dirty = true;
780 
781  /* remember tuple for deletion from "read" page */
782  deletable[ndeletable++] = roffnum;
783  }
784 
785  /*
786  * If we reach here, there are no live tuples on the "read" page ---
787  * it was empty when we got to it, or we moved them all. So we can
788  * just free the page without bothering with deleting tuples
789  * individually. Then advance to the previous "read" page.
790  *
791  * Tricky point here: if our read and write pages are adjacent in the
792  * bucket chain, our write lock on wbuf will conflict with
793  * _hash_freeovflpage's attempt to update the sibling links of the
794  * removed page. In that case, we don't need to lock it again.
795  */
796  rblkno = ropaque->hasho_prevblkno;
797  Assert(BlockNumberIsValid(rblkno));
798 
799  /* free this overflow page (releases rbuf) */
800  _hash_freeovflpage(rel, rbuf, wbuf, bstrategy);
801 
802  if (wbuf_dirty)
803  MarkBufferDirty(wbuf);
804 
805  /* are we freeing the page adjacent to wbuf? */
806  if (rblkno == wblkno)
807  {
808  /* retain the pin on primary bucket page till end of bucket scan */
809  if (wblkno == bucket_blkno)
811  else
812  _hash_relbuf(rel, wbuf);
813  return;
814  }
815 
816  rbuf = _hash_getbuf_with_strategy(rel,
817  rblkno,
818  HASH_WRITE,
820  bstrategy);
821  rpage = BufferGetPage(rbuf);
822  ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
823  Assert(ropaque->hasho_bucket == bucket);
824  }
825 
826  /* NOTREACHED */
827 }
uint16 hasho_page_id
Definition: hash.h:81
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define SETBIT(x, i)
Definition: blutils.c:33
BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:406
#define PageIsEmpty(page)
Definition: bufpage.h:219
#define HashPageGetBitmap(page)
Definition: hash.h:231
#define LH_BITMAP_PAGE
Definition: hash.h:55
static BlockNumber bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
Definition: hashovfl.c:33
static Buffer _hash_getovflpage(Relation rel, Buffer metabuf)
Definition: hashovfl.c:193
#define LH_META_PAGE
Definition: hash.h:56
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1445
#define MaxOffsetNumber
Definition: off.h:28
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition: hashpage.c:218
#define InvalidBuffer
Definition: buf.h:25
#define ALL_SET
Definition: hash.h:217
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:852
uint32 BlockNumber
Definition: block.h:31
Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
Definition: hashpage.c:177
Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno)
Definition: hashpage.c:144
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:79
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:582
signed int int32
Definition: c.h:253
uint16 OffsetNumber
Definition: off.h:24
#define HASH_READ
Definition: hash.h:254
static uint32 _hash_firstfreebit(uint32 map)
Definition: hashovfl.c:370
uint32 Bucket
Definition: hash.h:34
BlockNumber hasho_prevblkno
Definition: hash.h:77
#define ERROR
Definition: elog.h:43
uint32 hashm_nmaps
Definition: hash.h:190
#define IndexTupleDSize(itup)
Definition: itup.h:71
static char * buf
Definition: pg_test_fsync.c:65
#define HASH_WRITE
Definition: hash.h:255
#define BMPG_MASK(metap)
Definition: hash.h:229
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:433
unsigned int uint32
Definition: c.h:265
#define BITS_PER_MAP
Definition: hash.h:244
#define BMPG_SHIFT(metap)
Definition: hash.h:228
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
ForkNumber
Definition: relpath.h:24
#define ISSET(A, N)
Definition: hash.h:249
#define HASH_MAX_BITMAPS
Definition: hash.h:172
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
uint32 hashm_ovflpoint
Definition: hash.h:187
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:158
#define CLRBIT(x, i)
Definition: blutils.c:32
#define HASH_METAPAGE
Definition: hash.h:146
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
#define LH_OVERFLOW_PAGE
Definition: hash.h:53
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
uint32 hashm_firstfree
Definition: hash.h:189
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:192
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:245
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define LH_BUCKET_PAGE
Definition: hash.h:54
#define NULL
Definition: c.h:226
Datum bit(PG_FUNCTION_ARGS)
Definition: varbit.c:361
#define Assert(condition)
Definition: c.h:670
Bucket hasho_bucket
Definition: hash.h:79
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:809
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:352
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
#define InvalidBlockNumber
Definition: block.h:33
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:84
#define HASHO_PAGE_ID
Definition: hash.h:96
Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
Definition: hashovfl.c:109
#define MAXALIGN(LEN)
Definition: c.h:583
void _hash_squeezebucket(Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:629
uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Definition: hashovfl.c:60
OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
Definition: hashinsert.c:211
uint16 hasho_flag
Definition: hash.h:80
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2588
#define HashPageGetMeta(page)
Definition: hash.h:238
int errmsg(const char *fmt,...)
Definition: elog.c:797
int i
BlockNumber hasho_nextblkno
Definition: hash.h:78
#define elog
Definition: elog.h:219
#define BMPGSZ_BYTE(metap)
Definition: hash.h:226
void _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno, ForkNumber forkNum)
Definition: hashovfl.c:546
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:985
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:194
int Buffer
Definition: buf.h:23
#define BMPGSZ_BIT(metap)
Definition: hash.h:227
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74