PostgreSQL Source Code git master
Loading...
Searching...
No Matches
hashovfl.c File Reference
#include "postgres.h"
#include "access/hash.h"
#include "access/hash_xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "utils/rel.h"
Include dependency graph for hashovfl.c:

Go to the source code of this file.

Functions

static uint32 _hash_firstfreebit (uint32 map)
 
static BlockNumber bitno_to_blkno (HashMetaPage metap, uint32 ovflbitnum)
 
uint32 _hash_ovflblkno_to_bitno (HashMetaPage metap, BlockNumber ovflblkno)
 
Buffer _hash_addovflpage (Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 
BlockNumber _hash_freeovflpage (Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
 
void _hash_initbitmapbuffer (Buffer buf, uint16 bmsize, bool initpage)
 
void _hash_squeezebucket (Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
 

Function Documentation

◆ _hash_addovflpage()

Buffer _hash_addovflpage ( Relation  rel,
Buffer  metabuf,
Buffer  buf,
bool  retain_pin 
)

Definition at line 112 of file hashovfl.c.

113{
115 Page page;
122 BlockNumber blkno;
125 uint32 *freep = NULL;
127 uint32 bit;
129 uint32 first_page;
132 uint32 i,
133 j;
134 bool page_found = false;
135
136 /*
137 * Write-lock the tail page. Here, we need to maintain locking order such
138 * that, first acquire the lock on tail page of bucket, then on meta page
139 * to find and lock the bitmap page and if it is found, then lock on meta
140 * page is released, then finally acquire the lock on new overflow buffer.
141 * We need this locking order to avoid deadlock with backends that are
142 * doing inserts.
143 *
144 * Note: We could have avoided locking many buffers here if we made two
145 * WAL records for acquiring an overflow page (one to allocate an overflow
146 * page and another to add it to overflow bucket chain). However, doing
147 * so can leak an overflow page, if the system crashes after allocation.
148 * Needless to say, it is better to have a single record from a
149 * performance point of view as well.
150 */
152
153 /* probably redundant... */
155
156 /* loop to find current tail page, in case someone else inserted too */
157 for (;;)
158 {
159 BlockNumber nextblkno;
160
161 page = BufferGetPage(buf);
163 nextblkno = pageopaque->hasho_nextblkno;
164
165 if (!BlockNumberIsValid(nextblkno))
166 break;
167
168 /* we assume we do not need to write the unmodified page */
169 if (retain_pin)
170 {
171 /* pin will be retained only for the primary bucket page */
172 Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
174 }
175 else
176 _hash_relbuf(rel, buf);
177
178 retain_pin = false;
179
180 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
181 }
182
183 /* Get exclusive lock on the meta page */
185
188
189 /* start search at hashm_firstfree */
190 orig_firstfree = metap->hashm_firstfree;
191 first_page = orig_firstfree >> BMPG_SHIFT(metap);
193 i = first_page;
194 j = bit / BITS_PER_MAP;
195 bit &= ~(BITS_PER_MAP - 1);
196
197 /* outer loop iterates once per bitmap page */
198 for (;;)
199 {
203
204 /* want to end search with the last existing overflow page */
205 splitnum = metap->hashm_ovflpoint;
206 max_ovflpg = metap->hashm_spares[splitnum] - 1;
209
210 if (i > last_page)
211 break;
212
213 Assert(i < metap->hashm_nmaps);
214 mapblkno = metap->hashm_mapp[i];
215
216 if (i == last_page)
218 else
220
221 /* Release exclusive lock on metapage while reading bitmap page */
223
227
228 for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
229 {
230 if (freep[j] != ALL_SET)
231 {
232 page_found = true;
233
234 /* Reacquire exclusive lock on the meta page */
236
237 /* convert bit to bit number within page */
240
241 /* convert bit to absolute bit number */
242 bit += (i << BMPG_SHIFT(metap));
243 /* Calculate address of the recycled overflow page */
244 blkno = bitno_to_blkno(metap, bit);
245
246 /* Fetch and init the recycled page */
247 ovflbuf = _hash_getinitbuf(rel, blkno);
248
249 goto found;
250 }
251 }
252
253 /* No free space here, try to advance to next map page */
254 _hash_relbuf(rel, mapbuf);
256 i++;
257 j = 0; /* scan from start of next map page */
258 bit = 0;
259
260 /* Reacquire exclusive lock on the meta page */
262 }
263
264 /*
265 * No free pages --- have to extend the relation to add an overflow page.
266 * First, check to see if we have to add a new bitmap page too.
267 */
268 if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
269 {
270 /*
271 * We create the new bitmap page with all pages marked "in use".
272 * Actually two pages in the new bitmap's range will exist
273 * immediately: the bitmap page itself, and the following page which
274 * is the one we return to the caller. Both of these are correctly
275 * marked "in use". Subsequent pages do not exist yet, but it is
276 * convenient to pre-mark them as "in use" too.
277 */
278 bit = metap->hashm_spares[splitnum];
279
280 /* metapage already has a write lock */
281 if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
284 errmsg("out of overflow pages in hash index \"%s\"",
286
288 }
289 else
290 {
291 /*
292 * Nothing to do here; since the page will be past the last used page,
293 * we know its bitmap bit was preinitialized to "in use".
294 */
295 }
296
297 /* Calculate address of the new overflow page */
299 metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
300 blkno = bitno_to_blkno(metap, bit);
301
302 /*
303 * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
304 * relation length stays in sync with ours. XXX It's annoying to do this
305 * with metapage write lock held; would be better to use a lock that
306 * doesn't block incoming searches.
307 *
308 * It is okay to hold two buffer locks here (one on tail page of bucket
309 * and other on new overflow page) since there cannot be anyone else
310 * contending for access to ovflbuf.
311 */
312 ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
313
314found:
315
316 /*
317 * Do the update. No ereport(ERROR) until changes are logged. We want to
318 * log the changes for bitmap page and overflow page together to avoid
319 * loss of pages in case the new page is added.
320 */
322
323 if (page_found)
324 {
326
327 /* mark page "in use" in the bitmap */
330 }
331 else
332 {
333 /* update the count to indicate new overflow page is added */
334 metap->hashm_spares[splitnum]++;
335
337 {
338 _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
340
341 /* add the new bitmap page to the metapage's list of bitmaps */
342 metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
343 metap->hashm_nmaps++;
344 metap->hashm_spares[splitnum]++;
345 }
346
348
349 /*
350 * for new overflow page, we don't need to explicitly set the bit in
351 * bitmap page, as by default that will be set to "in use".
352 */
353 }
354
355 /*
356 * Adjust hashm_firstfree to avoid redundant searches. But don't risk
357 * changing it if someone moved it while we were searching bitmap pages.
358 */
359 if (metap->hashm_firstfree == orig_firstfree)
360 {
361 metap->hashm_firstfree = bit + 1;
363 }
364
365 /* initialize new overflow page */
368 ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
369 ovflopaque->hasho_nextblkno = InvalidBlockNumber;
370 ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
371 ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
372 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
373
375
376 /* logically chain overflow page to previous page */
377 pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
378
380
381 /* XLOG stuff */
382 if (RelationNeedsWAL(rel))
383 {
386
388 xlrec.bmsize = metap->hashm_bmsize;
389
392
394 XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
395
397
399 {
402 }
403
406
408 XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
409
411
414
417
420
422 }
423
425
426 if (retain_pin)
428 else
429 _hash_relbuf(rel, buf);
430
432 _hash_relbuf(rel, mapbuf);
433
435
438
439 return ovflbuf;
440}
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
#define SETBIT(x, i)
Definition blutils.c:29
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4356
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3056
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
PageData * Page
Definition bufpage.h:81
#define Assert(condition)
Definition c.h:873
uint32_t uint32
Definition c.h:546
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define ereport(elevel,...)
Definition elog.h:150
#define HashPageGetOpaque(page)
Definition hash.h:88
#define LH_BUCKET_PAGE
Definition hash.h:55
#define HASH_MAX_BITMAPS
Definition hash.h:230
#define BMPG_MASK(metap)
Definition hash.h:314
#define HASH_WRITE
Definition hash.h:340
#define BITS_PER_MAP
Definition hash.h:329
#define HashPageGetBitmap(page)
Definition hash.h:316
#define LH_META_PAGE
Definition hash.h:57
#define HASHO_PAGE_ID
Definition hash.h:101
#define HashPageGetMeta(page)
Definition hash.h:323
#define BMPGSZ_BIT(metap)
Definition hash.h:312
#define LH_PAGE_TYPE
Definition hash.h:63
uint32 Bucket
Definition hash.h:35
#define ALL_SET
Definition hash.h:302
#define LH_BITMAP_PAGE
Definition hash.h:56
#define BMPG_SHIFT(metap)
Definition hash.h:313
#define LH_OVERFLOW_PAGE
Definition hash.h:54
#define SizeOfHashAddOvflPage
Definition hash_xlog.h:80
#define XLOG_HASH_ADD_OVFL_PAGE
Definition hash_xlog.h:30
static uint32 _hash_firstfreebit(uint32 map)
Definition hashovfl.c:448
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition hashovfl.c:777
static BlockNumber bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
Definition hashovfl.c:35
Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno)
Definition hashpage.c:135
void _hash_relbuf(Relation rel, Buffer buf)
Definition hashpage.c:266
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition hashpage.c:70
Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
Definition hashpage.c:198
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition hashutil.c:210
int j
Definition isn.c:78
int i
Definition isn.c:77
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
static char buf[DEFAULT_XLOG_SEG_SIZE]
static int fb(int x)
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
@ MAIN_FORKNUM
Definition relpath.h:58
Datum bit(PG_FUNCTION_ARGS)
Definition varbit.c:391
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:409
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:245
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define REGBUF_STANDARD
Definition xloginsert.h:35
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

References _hash_checkpage(), _hash_firstfreebit(), _hash_getbuf(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_initbitmapbuffer(), _hash_relbuf(), ALL_SET, Assert, bit(), bitno_to_blkno(), BITS_PER_MAP, BlockNumberIsValid(), xl_hash_add_ovfl_page::bmpage_found, BMPG_MASK, BMPG_SHIFT, BMPGSZ_BIT, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, fb(), HASH_MAX_BITMAPS, HASH_WRITE, HASHO_PAGE_ID, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBuffer, j, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), PageSetLSN(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, SETBIT, SizeOfHashAddOvflPage, START_CRIT_SECTION, XLOG_HASH_ADD_OVFL_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert(), and _hash_splitbucket().

◆ _hash_firstfreebit()

static uint32 _hash_firstfreebit ( uint32  map)
static

Definition at line 448 of file hashovfl.c.

449{
450 uint32 i,
451 mask;
452
453 mask = 0x1;
454 for (i = 0; i < BITS_PER_MAP; i++)
455 {
456 if (!(mask & map))
457 return i;
458 mask <<= 1;
459 }
460
461 elog(ERROR, "firstfreebit found no free bit");
462
463 return 0; /* keep compiler quiet */
464}
#define elog(elevel,...)
Definition elog.h:226

References BITS_PER_MAP, elog, ERROR, and i.

Referenced by _hash_addovflpage().

◆ _hash_freeovflpage()

BlockNumber _hash_freeovflpage ( Relation  rel,
Buffer  bucketbuf,
Buffer  ovflbuf,
Buffer  wbuf,
IndexTuple itups,
OffsetNumber itup_offsets,
Size tups_size,
uint16  nitups,
BufferAccessStrategy  bstrategy 
)

Definition at line 490 of file hashovfl.c.

494{
499 BlockNumber prevblkno;
500 BlockNumber blkno;
501 BlockNumber nextblkno;
506 uint32 *freep;
509 bitmapbit;
513 bool update_metap = false;
514
515 /* Get information from the doomed page */
520 nextblkno = ovflopaque->hasho_nextblkno;
521 prevblkno = ovflopaque->hasho_prevblkno;
523 bucket = ovflopaque->hasho_bucket;
524
525 /*
526 * Fix up the bucket chain. this is a doubly-linked list, so we must fix
527 * up the bucket chain members behind and ahead of the overflow page being
528 * deleted. Concurrency issues are avoided by using lock chaining as
529 * described atop hashbucketcleanup.
530 */
531 if (BlockNumberIsValid(prevblkno))
532 {
533 if (prevblkno == writeblkno)
534 prevbuf = wbuf;
535 else
537 prevblkno,
540 bstrategy);
541 }
542 if (BlockNumberIsValid(nextblkno))
544 nextblkno,
547 bstrategy);
548
549 /* Note: bstrategy is intentionally not used for metapage and bitmap */
550
551 /* Read the metapage so we can determine which bitmap page to use */
554
555 /* Identify which bit to set */
557
560
561 if (bitmappage >= metap->hashm_nmaps)
562 elog(ERROR, "invalid overflow bit number %u", ovflbitno);
563 blkno = metap->hashm_mapp[bitmappage];
564
565 /* Release metapage lock while we access the bitmap page */
567
568 /* read the bitmap page to clear the bitmap bit */
573
574 /* Get write-lock on metapage to update firstfree */
576
577 /* This operation needs to log multiple tuples, prepare WAL for that */
578 if (RelationNeedsWAL(rel))
580
582
583 /*
584 * we have to insert tuples on the "write" page, being careful to preserve
585 * hashkey ordering. (If we insert many tuples into the same "write" page
586 * it would be worth qsort'ing them).
587 */
588 if (nitups > 0)
589 {
592 }
593
594 /*
595 * Reinitialize the freed overflow page. Just zeroing the page won't
596 * work, because WAL replay routines expect pages to be initialized. See
597 * explanation of RBM_NORMAL mode atop XLogReadBufferExtended. We are
598 * careful to make the special space valid here so that tools like
599 * pageinspect won't get confused.
600 */
602
604
605 ovflopaque->hasho_prevblkno = InvalidBlockNumber;
606 ovflopaque->hasho_nextblkno = InvalidBlockNumber;
607 ovflopaque->hasho_bucket = InvalidBucket;
608 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
609 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
610
612
614 {
617
618 Assert(prevopaque->hasho_bucket == bucket);
619 prevopaque->hasho_nextblkno = nextblkno;
621 }
623 {
626
627 Assert(nextopaque->hasho_bucket == bucket);
628 nextopaque->hasho_prevblkno = prevblkno;
630 }
631
632 /* Clear the bitmap bit to indicate that this overflow page is free */
635
636 /* if this is now the first free page, update hashm_firstfree */
637 if (ovflbitno < metap->hashm_firstfree)
638 {
639 metap->hashm_firstfree = ovflbitno;
640 update_metap = true;
642 }
643
644 /* XLOG stuff */
645 if (RelationNeedsWAL(rel))
646 {
649 int i;
650 bool mod_wbuf = false;
651
652 xlrec.prevblkno = prevblkno;
653 xlrec.nextblkno = nextblkno;
654 xlrec.ntups = nitups;
655 xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
656 xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
657
660
661 /*
662 * bucket buffer was not changed, but still needs to be registered to
663 * ensure that we can acquire a cleanup lock on it during replay.
664 */
665 if (!xlrec.is_prim_bucket_same_wrt)
666 {
668
669 XLogRegisterBuffer(0, bucketbuf, flags);
670 }
671
672 if (xlrec.ntups > 0)
673 {
675
676 /* Remember that wbuf is modified. */
677 mod_wbuf = true;
678
680 nitups * sizeof(OffsetNumber));
681 for (i = 0; i < nitups; i++)
682 XLogRegisterBufData(1, itups[i], tups_size[i]);
683 }
684 else if (xlrec.is_prim_bucket_same_wrt || xlrec.is_prev_bucket_same_wrt)
685 {
687
688 /*
689 * A write buffer needs to be registered even if no tuples are
690 * added to it to ensure that we can acquire a cleanup lock on it
691 * if it is the same as primary bucket buffer or update the
692 * nextblkno if it is same as the previous bucket buffer.
693 */
694 Assert(xlrec.ntups == 0);
695
697 if (!xlrec.is_prev_bucket_same_wrt)
698 {
700 }
701 else
702 {
703 /* Remember that wbuf is modified. */
704 mod_wbuf = true;
705 }
707 }
708
710
711 /*
712 * If prevpage and the writepage (block in which we are moving tuples
713 * from overflow) are same, then no need to separately register
714 * prevpage. During replay, we can directly update the nextblock in
715 * writepage.
716 */
717 if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
719
722
725
726 if (update_metap)
727 {
729 XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
730 }
731
733
734 /* Set LSN iff wbuf is modified. */
735 if (mod_wbuf)
737
739
740 if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
744
746
747 if (update_metap)
749 }
750
752
753 /* release previous bucket if it is not same as write bucket */
754 if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
755 _hash_relbuf(rel, prevbuf);
756
758 _hash_relbuf(rel, ovflbuf);
759
761 _hash_relbuf(rel, nextbuf);
762
763 _hash_relbuf(rel, mapbuf);
764 _hash_relbuf(rel, metabuf);
765
766 return nextblkno;
767}
#define CLRBIT(x, i)
Definition blutils.c:28
static Size BufferGetPageSize(Buffer buffer)
Definition bufmgr.h:455
uint8_t uint8
Definition c.h:544
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
int32_t int32
Definition c.h:542
#define LH_UNUSED_PAGE
Definition hash.h:53
#define ISSET(A, N)
Definition hash.h:334
#define HASH_READ
Definition hash.h:339
#define HASH_METAPAGE
Definition hash.h:198
#define InvalidBucket
Definition hash.h:37
#define HASH_XLOG_FREE_OVFL_BUFS
Definition hash_xlog.h:22
#define XLOG_HASH_SQUEEZE_PAGE
Definition hash_xlog.h:35
#define SizeOfHashSqueezePage
Definition hash_xlog.h:168
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition hashinsert.c:329
uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Definition hashovfl.c:62
void _hash_pageinit(Page page, Size size)
Definition hashpage.c:596
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition hashpage.c:239
uint16 OffsetNumber
Definition off.h:24
BlockNumber prevblkno
Definition hash_xlog.h:156
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition xloginsert.c:178
#define REGBUF_NO_CHANGE
Definition xloginsert.h:37
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33

References _hash_checkpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_ovflblkno_to_bitno(), _hash_pageinit(), _hash_pgaddmultitup(), _hash_relbuf(), Assert, BlockNumberIsValid(), BMPG_MASK, BMPG_SHIFT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), CLRBIT, elog, END_CRIT_SECTION, ERROR, fb(), HASH_METAPAGE, HASH_READ, HASH_WRITE, HASH_XLOG_FREE_OVFL_BUFS, HASHO_PAGE_ID, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBucket, InvalidBuffer, ISSET, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_UNUSED_PAGE, LockBuffer(), MarkBufferDirty(), PageSetLSN(), PG_USED_FOR_ASSERTS_ONLY, xl_hash_squeeze_page::prevblkno, REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashSqueezePage, START_CRIT_SECTION, XLOG_HASH_SQUEEZE_PAGE, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_squeezebucket().

◆ _hash_initbitmapbuffer()

void _hash_initbitmapbuffer ( Buffer  buf,
uint16  bmsize,
bool  initpage 
)

Definition at line 777 of file hashovfl.c.

778{
779 Page pg;
781 uint32 *freep;
782
784
785 /* initialize the page */
786 if (initpage)
788
789 /* initialize the page's special space */
790 op = HashPageGetOpaque(pg);
796
797 /* set all of the bits to 1 */
799 memset(freep, 0xFF, bmsize);
800
801 /*
802 * Set pd_lower just past the end of the bitmap page data. We could even
803 * set pd_lower equal to pd_upper, but this is more precise and makes the
804 * page look compressible to xlog.c.
805 */
806 ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
807}
PageHeaderData * PageHeader
Definition bufpage.h:173
BlockNumber hasho_nextblkno
Definition hash.h:80
uint16 hasho_flag
Definition hash.h:82
BlockNumber hasho_prevblkno
Definition hash.h:79
uint16 hasho_page_id
Definition hash.h:83
Bucket hasho_bucket
Definition hash.h:81

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), fb(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetOpaque, InvalidBlockNumber, InvalidBucket, and LH_BITMAP_PAGE.

Referenced by _hash_addovflpage(), _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_init_bitmap_page().

◆ _hash_ovflblkno_to_bitno()

uint32 _hash_ovflblkno_to_bitno ( HashMetaPage  metap,
BlockNumber  ovflblkno 
)

Definition at line 62 of file hashovfl.c.

63{
64 uint32 splitnum = metap->hashm_ovflpoint;
65 uint32 i;
67
68 /* Determine the split number containing this page */
69 for (i = 1; i <= splitnum; i++)
70 {
72 break; /* oops */
74
75 /*
76 * bitnum has to be greater than number of overflow page added in
77 * previous split point. The overflow page at this splitnum (i) if any
78 * should start from (_hash_get_totalbuckets(i) +
79 * metap->hashm_spares[i - 1] + 1).
80 */
81 if (bitnum > metap->hashm_spares[i - 1] &&
82 bitnum <= metap->hashm_spares[i])
83 return bitnum - 1; /* -1 to convert 1-based to 0-based */
84 }
85
88 errmsg("invalid overflow block number %u", ovflblkno)));
89 return 0; /* keep compiler quiet */
90}
uint32 _hash_get_totalbuckets(uint32 splitpoint_phase)
Definition hashutil.c:174

References _hash_get_totalbuckets(), ereport, errcode(), errmsg(), ERROR, fb(), and i.

Referenced by _hash_freeovflpage(), and hash_bitmap_info().

◆ _hash_squeezebucket()

void _hash_squeezebucket ( Relation  rel,
Bucket  bucket,
BlockNumber  bucket_blkno,
Buffer  bucket_buf,
BufferAccessStrategy  bstrategy 
)

Definition at line 842 of file hashovfl.c.

847{
850 Buffer wbuf;
851 Buffer rbuf;
852 Page wpage;
853 Page rpage;
856
857 /*
858 * start squeezing into the primary bucket page.
859 */
864
865 /*
866 * if there aren't any overflow pages, there's nothing to squeeze. caller
867 * is responsible for releasing the pin on primary bucket page.
868 */
869 if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
870 {
872 return;
873 }
874
875 /*
876 * Find the last page in the bucket chain by starting at the base bucket
877 * page and working forward. Note: we assume that a hash bucket chain is
878 * usually smaller than the buffer ring being used by VACUUM, else using
879 * the access strategy here would be counterproductive.
880 */
883 do
884 {
885 rblkno = ropaque->hasho_nextblkno;
886 if (rbuf != InvalidBuffer)
887 _hash_relbuf(rel, rbuf);
889 rblkno,
892 bstrategy);
895 Assert(ropaque->hasho_bucket == bucket);
896 } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
897
898 /*
899 * squeeze the tuples.
900 */
901 for (;;)
902 {
909 uint16 ndeletable = 0;
910 uint16 nitups = 0;
912 int i;
913 bool retain_pin = false;
914
916 /* Scan each tuple in "read" page */
921 {
922 IndexTuple itup;
923 Size itemsz;
924
925 /* skip dead tuples */
927 continue;
928
931 itemsz = IndexTupleSize(itup);
932 itemsz = MAXALIGN(itemsz);
933
934 /*
935 * Walk up the bucket chain, looking for a page big enough for
936 * this item and all other accumulated items. Exit if we reach
937 * the read page.
938 */
940 {
942 bool tups_moved = false;
943
945
946 if (wblkno == bucket_blkno)
947 retain_pin = true;
948
949 wblkno = wopaque->hasho_nextblkno;
951
952 /* don't need to move to next page if we reached the read page */
953 if (wblkno != rblkno)
955 wblkno,
958 bstrategy);
959
960 if (nitups > 0)
961 {
963
964 /*
965 * This operation needs to log multiple tuples, prepare
966 * WAL for that.
967 */
968 if (RelationNeedsWAL(rel))
970
972
973 /*
974 * we have to insert tuples on the "write" page, being
975 * careful to preserve hashkey ordering. (If we insert
976 * many tuples into the same "write" page it would be
977 * worth qsort'ing them).
978 */
981
982 /* Delete tuples we already moved off read page */
985
986 /* XLOG stuff */
987 if (RelationNeedsWAL(rel))
988 {
991
993 xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
994
997
998 /*
999 * bucket buffer was not changed, but still needs to
1000 * be registered to ensure that we can acquire a
1001 * cleanup lock on it during replay.
1002 */
1003 if (!xlrec.is_prim_bucket_same_wrt)
1004 {
1006
1007 XLogRegisterBuffer(0, bucket_buf, flags);
1008 }
1009
1012 nitups * sizeof(OffsetNumber));
1013 for (i = 0; i < nitups; i++)
1014 XLogRegisterBufData(1, itups[i], tups_size[i]);
1015
1018 ndeletable * sizeof(OffsetNumber));
1019
1021
1024 }
1025
1027
1028 tups_moved = true;
1029 }
1030
1031 /*
1032 * release the lock on previous page after acquiring the lock
1033 * on next page
1034 */
1035 if (retain_pin)
1037 else
1038 _hash_relbuf(rel, wbuf);
1039
1040 /* nothing more to do if we reached the read page */
1041 if (rblkno == wblkno)
1042 {
1043 _hash_relbuf(rel, rbuf);
1044 return;
1045 }
1046
1047 wbuf = next_wbuf;
1050 Assert(wopaque->hasho_bucket == bucket);
1051 retain_pin = false;
1052
1053 /* be tidy */
1054 for (i = 0; i < nitups; i++)
1055 pfree(itups[i]);
1056 nitups = 0;
1057 all_tups_size = 0;
1058 ndeletable = 0;
1059
1060 /*
1061 * after moving the tuples, rpage would have been compacted,
1062 * so we need to rescan it.
1063 */
1064 if (tups_moved)
1065 goto readpage;
1066 }
1067
1068 /* remember tuple for deletion from "read" page */
1070
1071 /*
1072 * we need a copy of index tuples as they can be freed as part of
1073 * overflow page, however we need them to write a WAL record in
1074 * _hash_freeovflpage.
1075 */
1076 itups[nitups] = CopyIndexTuple(itup);
1077 tups_size[nitups++] = itemsz;
1078 all_tups_size += itemsz;
1079 }
1080
1081 /*
1082 * If we reach here, there are no live tuples on the "read" page ---
1083 * it was empty when we got to it, or we moved them all. So we can
1084 * just free the page without bothering with deleting tuples
1085 * individually. Then advance to the previous "read" page.
1086 *
1087 * Tricky point here: if our read and write pages are adjacent in the
1088 * bucket chain, our write lock on wbuf will conflict with
1089 * _hash_freeovflpage's attempt to update the sibling links of the
1090 * removed page. In that case, we don't need to lock it again.
1091 */
1092 rblkno = ropaque->hasho_prevblkno;
1094
1095 /* free this overflow page (releases rbuf) */
1097 tups_size, nitups, bstrategy);
1098
1099 /* be tidy */
1100 for (i = 0; i < nitups; i++)
1101 pfree(itups[i]);
1102
1103 /* are we freeing the page adjacent to wbuf? */
1104 if (rblkno == wblkno)
1105 {
1106 /* retain the pin on primary bucket page till end of bucket scan */
1107 if (wblkno == bucket_blkno)
1109 else
1110 _hash_relbuf(rel, wbuf);
1111 return;
1112 }
1113
1115 rblkno,
1116 HASH_WRITE,
1118 bstrategy);
1121 Assert(ropaque->hasho_bucket == bucket);
1122 }
1123
1124 /* NOTREACHED */
1125}
Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
Definition bufpage.c:933
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition bufpage.c:1160
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:223
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
#define MAXALIGN(LEN)
Definition c.h:826
uint16_t uint16
Definition c.h:545
size_t Size
Definition c.h:619
#define SizeOfHashMovePageContents
Definition hash_xlog.h:138
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition hash_xlog.h:34
BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
Definition hashovfl.c:490
IndexTuple CopyIndexTuple(IndexTuple source)
Definition indextuple.c:547
#define ItemIdIsDead(itemId)
Definition itemid.h:113
IndexTupleData * IndexTuple
Definition itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition itup.h:71
#define MaxIndexTuplesPerPage
Definition itup.h:181
void pfree(void *pointer)
Definition mcxt.c:1616
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28

References _hash_freeovflpage(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_relbuf(), Assert, BlockNumberIsValid(), BUFFER_LOCK_UNLOCK, BufferGetPage(), CopyIndexTuple(), END_CRIT_SECTION, fb(), FirstOffsetNumber, HASH_WRITE, HashPageGetOpaque, i, IndexTupleSize(), InvalidBuffer, ItemIdIsDead, LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, MaxOffsetNumber, xl_hash_move_page_contents::ntups, OffsetNumberNext, PageGetFreeSpaceForMultipleTuples(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIndexMultiDelete(), PageIsEmpty(), PageSetLSN(), pfree(), REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashMovePageContents, START_CRIT_SECTION, XLOG_HASH_MOVE_PAGE_CONTENTS, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbucketcleanup().

◆ bitno_to_blkno()

static BlockNumber bitno_to_blkno ( HashMetaPage  metap,
uint32  ovflbitnum 
)
static

Definition at line 35 of file hashovfl.c.

36{
37 uint32 splitnum = metap->hashm_ovflpoint;
38 uint32 i;
39
40 /* Convert zero-based bitnumber to 1-based page number */
41 ovflbitnum += 1;
42
43 /* Determine the split number for this page (must be >= 1) */
44 for (i = 1;
46 i++)
47 /* loop */ ;
48
49 /*
50 * Convert to absolute page number by adding the number of bucket pages
51 * that exist before this split point.
52 */
54}

References _hash_get_totalbuckets(), fb(), and i.

Referenced by _hash_addovflpage().