PostgreSQL Source Code git master
Loading...
Searching...
No Matches
hashovfl.c File Reference
#include "postgres.h"
#include "access/hash.h"
#include "access/hash_xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "utils/rel.h"
Include dependency graph for hashovfl.c:

Go to the source code of this file.

Functions

static uint32 _hash_firstfreebit (uint32 map)
 
static BlockNumber bitno_to_blkno (HashMetaPage metap, uint32 ovflbitnum)
 
uint32 _hash_ovflblkno_to_bitno (HashMetaPage metap, BlockNumber ovflblkno)
 
Buffer _hash_addovflpage (Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 
BlockNumber _hash_freeovflpage (Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
 
void _hash_initbitmapbuffer (Buffer buf, uint16 bmsize, bool initpage)
 
void _hash_squeezebucket (Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
 

Function Documentation

◆ _hash_addovflpage()

Buffer _hash_addovflpage ( Relation  rel,
Buffer  metabuf,
Buffer  buf,
bool  retain_pin 
)

Definition at line 112 of file hashovfl.c.

113{
115 Page page;
119 HashMetaPage metap;
122 BlockNumber blkno;
125 uint32 *freep = NULL;
127 uint32 bit;
129 uint32 first_page;
132 uint32 i,
133 j;
134 bool page_found = false;
136
137 /*
138 * Write-lock the tail page. Here, we need to maintain locking order such
139 * that, first acquire the lock on tail page of bucket, then on meta page
140 * to find and lock the bitmap page and if it is found, then lock on meta
141 * page is released, then finally acquire the lock on new overflow buffer.
142 * We need this locking order to avoid deadlock with backends that are
143 * doing inserts.
144 *
145 * Note: We could have avoided locking many buffers here if we made two
146 * WAL records for acquiring an overflow page (one to allocate an overflow
147 * page and another to add it to overflow bucket chain). However, doing
148 * so can leak an overflow page, if the system crashes after allocation.
149 * Needless to say, it is better to have a single record from a
150 * performance point of view as well.
151 */
153
154 /* probably redundant... */
156
157 /* loop to find current tail page, in case someone else inserted too */
158 for (;;)
159 {
160 BlockNumber nextblkno;
161
162 page = BufferGetPage(buf);
164 nextblkno = pageopaque->hasho_nextblkno;
165
166 if (!BlockNumberIsValid(nextblkno))
167 break;
168
169 /* we assume we do not need to write the unmodified page */
170 if (retain_pin)
171 {
172 /* pin will be retained only for the primary bucket page */
173 Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
175 }
176 else
177 _hash_relbuf(rel, buf);
178
179 retain_pin = false;
180
181 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
182 }
183
184 /* Get exclusive lock on the meta page */
186
189
190 /* start search at hashm_firstfree */
192 first_page = orig_firstfree >> BMPG_SHIFT(metap);
193 bit = orig_firstfree & BMPG_MASK(metap);
194 i = first_page;
195 j = bit / BITS_PER_MAP;
196 bit &= ~(BITS_PER_MAP - 1);
197
198 /* outer loop iterates once per bitmap page */
199 for (;;)
200 {
204
205 /* want to end search with the last existing overflow page */
206 splitnum = metap->hashm_ovflpoint;
207 max_ovflpg = metap->hashm_spares[splitnum] - 1;
208 last_page = max_ovflpg >> BMPG_SHIFT(metap);
209 last_bit = max_ovflpg & BMPG_MASK(metap);
210
211 if (i > last_page)
212 break;
213
214 Assert(i < metap->hashm_nmaps);
215 mapblkno = metap->hashm_mapp[i];
216
217 if (i == last_page)
219 else
220 last_inpage = BMPGSZ_BIT(metap) - 1;
221
222 /* Release exclusive lock on metapage while reading bitmap page */
224
228
229 for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
230 {
231 if (freep[j] != ALL_SET)
232 {
233 page_found = true;
234
235 /* Reacquire exclusive lock on the meta page */
237
238 /* convert bit to bit number within page */
241
242 /* convert bit to absolute bit number */
243 bit += (i << BMPG_SHIFT(metap));
244 /* Calculate address of the recycled overflow page */
245 blkno = bitno_to_blkno(metap, bit);
246
247 /* Fetch and init the recycled page */
248 ovflbuf = _hash_getinitbuf(rel, blkno);
249
250 goto found;
251 }
252 }
253
254 /* No free space here, try to advance to next map page */
255 _hash_relbuf(rel, mapbuf);
257 i++;
258 j = 0; /* scan from start of next map page */
259 bit = 0;
260
261 /* Reacquire exclusive lock on the meta page */
263 }
264
265 /*
266 * No free pages --- have to extend the relation to add an overflow page.
267 * First, check to see if we have to add a new bitmap page too.
268 */
269 if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
270 {
271 /*
272 * We create the new bitmap page with all pages marked "in use".
273 * Actually two pages in the new bitmap's range will exist
274 * immediately: the bitmap page itself, and the following page which
275 * is the one we return to the caller. Both of these are correctly
276 * marked "in use". Subsequent pages do not exist yet, but it is
277 * convenient to pre-mark them as "in use" too.
278 */
279 bit = metap->hashm_spares[splitnum];
280
281 /* metapage already has a write lock */
282 if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
285 errmsg("out of overflow pages in hash index \"%s\"",
287
289 }
290 else
291 {
292 /*
293 * Nothing to do here; since the page will be past the last used page,
294 * we know its bitmap bit was preinitialized to "in use".
295 */
296 }
297
298 /* Calculate address of the new overflow page */
300 metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
301 blkno = bitno_to_blkno(metap, bit);
302
303 /*
304 * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
305 * relation length stays in sync with ours. XXX It's annoying to do this
306 * with metapage write lock held; would be better to use a lock that
307 * doesn't block incoming searches.
308 *
309 * It is okay to hold two buffer locks here (one on tail page of bucket
310 * and other on new overflow page) since there cannot be anyone else
311 * contending for access to ovflbuf.
312 */
313 ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
314
315found:
316
317 /*
318 * Do the update. No ereport(ERROR) until changes are logged. We want to
319 * log the changes for bitmap page and overflow page together to avoid
320 * loss of pages in case the new page is added.
321 */
323
324 if (page_found)
325 {
327
328 /* mark page "in use" in the bitmap */
331 }
332 else
333 {
334 /* update the count to indicate new overflow page is added */
335 metap->hashm_spares[splitnum]++;
336
338 {
341
342 /* add the new bitmap page to the metapage's list of bitmaps */
344 metap->hashm_nmaps++;
345 metap->hashm_spares[splitnum]++;
346 }
347
349
350 /*
351 * for new overflow page, we don't need to explicitly set the bit in
352 * bitmap page, as by default that will be set to "in use".
353 */
354 }
355
356 /*
357 * Adjust hashm_firstfree to avoid redundant searches. But don't risk
358 * changing it if someone moved it while we were searching bitmap pages.
359 */
360 if (metap->hashm_firstfree == orig_firstfree)
361 {
362 metap->hashm_firstfree = bit + 1;
364 }
365
366 /* initialize new overflow page */
369 ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
370 ovflopaque->hasho_nextblkno = InvalidBlockNumber;
371 ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
372 ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
373 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
374
376
377 /* logically chain overflow page to previous page */
378 pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
379
381
382 /* XLOG stuff */
383 if (RelationNeedsWAL(rel))
384 {
386
388 xlrec.bmsize = metap->hashm_bmsize;
389
392
394 XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
395
397
399 {
402 }
403
406
408 XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
409
411 }
412 else
413 recptr = XLogGetFakeLSN(rel);
414
417
420
423
425
427
428 if (retain_pin)
430 else
431 _hash_relbuf(rel, buf);
432
434 _hash_relbuf(rel, mapbuf);
435
437
440
441 return ovflbuf;
442}
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
#define SETBIT(x, i)
Definition blutils.c:29
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4446
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3147
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:468
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:222
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:207
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:334
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:419
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:416
PageData * Page
Definition bufpage.h:81
#define Assert(condition)
Definition c.h:943
uint32_t uint32
Definition c.h:624
int errcode(int sqlerrcode)
Definition elog.c:874
#define ERROR
Definition elog.h:40
#define ereport(elevel,...)
Definition elog.h:152
#define HashPageGetOpaque(page)
Definition hash.h:88
#define LH_BUCKET_PAGE
Definition hash.h:55
#define HASH_MAX_BITMAPS
Definition hash.h:230
#define BMPG_MASK(metap)
Definition hash.h:314
#define HASH_WRITE
Definition hash.h:340
#define BITS_PER_MAP
Definition hash.h:329
#define HashPageGetBitmap(page)
Definition hash.h:316
#define LH_META_PAGE
Definition hash.h:57
#define HASHO_PAGE_ID
Definition hash.h:101
#define HashPageGetMeta(page)
Definition hash.h:323
#define BMPGSZ_BIT(metap)
Definition hash.h:312
#define LH_PAGE_TYPE
Definition hash.h:63
uint32 Bucket
Definition hash.h:35
#define ALL_SET
Definition hash.h:302
#define LH_BITMAP_PAGE
Definition hash.h:56
#define BMPG_SHIFT(metap)
Definition hash.h:313
#define LH_OVERFLOW_PAGE
Definition hash.h:54
#define SizeOfHashAddOvflPage
Definition hash_xlog.h:80
#define XLOG_HASH_ADD_OVFL_PAGE
Definition hash_xlog.h:30
static uint32 _hash_firstfreebit(uint32 map)
Definition hashovfl.c:450
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition hashovfl.c:778
static BlockNumber bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
Definition hashovfl.c:35
Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno)
Definition hashpage.c:135
void _hash_relbuf(Relation rel, Buffer buf)
Definition hashpage.c:266
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition hashpage.c:70
Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
Definition hashpage.c:198
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition hashutil.c:210
int j
Definition isn.c:78
int i
Definition isn.c:77
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define END_CRIT_SECTION()
Definition miscadmin.h:154
static char * errmsg
static char buf[DEFAULT_XLOG_SEG_SIZE]
static int fb(int x)
#define RelationGetRelationName(relation)
Definition rel.h:550
#define RelationNeedsWAL(relation)
Definition rel.h:639
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition hash.h:264
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition hash.h:262
uint32 hashm_firstfree
Definition hash.h:259
uint16 hashm_bmsize
Definition hash.h:251
uint32 hashm_ovflpoint
Definition hash.h:257
uint32 hashm_nmaps
Definition hash.h:260
Datum bit(PG_FUNCTION_ARGS)
Definition varbit.c:391
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:482
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:413
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:372
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:246
void XLogBeginInsert(void)
Definition xloginsert.c:153
XLogRecPtr XLogGetFakeLSN(Relation rel)
Definition xloginsert.c:562
#define REGBUF_STANDARD
Definition xloginsert.h:35
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

References _hash_checkpage(), _hash_firstfreebit(), _hash_getbuf(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_initbitmapbuffer(), _hash_relbuf(), ALL_SET, Assert, bit(), bitno_to_blkno(), BITS_PER_MAP, BlockNumberIsValid(), xl_hash_add_ovfl_page::bmpage_found, BMPG_MASK, BMPG_SHIFT, BMPGSZ_BIT, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), END_CRIT_SECTION, ereport, errcode(), errmsg, ERROR, fb(), HASH_MAX_BITMAPS, HASH_WRITE, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HASHO_PAGE_ID, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBuffer, j, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), PageSetLSN(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, SETBIT, SizeOfHashAddOvflPage, START_CRIT_SECTION, XLOG_HASH_ADD_OVFL_PAGE, XLogBeginInsert(), XLogGetFakeLSN(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert(), and _hash_splitbucket().

◆ _hash_firstfreebit()

static uint32 _hash_firstfreebit ( uint32  map)
static

Definition at line 450 of file hashovfl.c.

451{
452 uint32 i,
453 mask;
454
455 mask = 0x1;
456 for (i = 0; i < BITS_PER_MAP; i++)
457 {
458 if (!(mask & map))
459 return i;
460 mask <<= 1;
461 }
462
463 elog(ERROR, "firstfreebit found no free bit");
464
465 return 0; /* keep compiler quiet */
466}
#define elog(elevel,...)
Definition elog.h:228

References BITS_PER_MAP, elog, ERROR, and i.

Referenced by _hash_addovflpage().

◆ _hash_freeovflpage()

BlockNumber _hash_freeovflpage ( Relation  rel,
Buffer  bucketbuf,
Buffer  ovflbuf,
Buffer  wbuf,
IndexTuple itups,
OffsetNumber itup_offsets,
Size tups_size,
uint16  nitups,
BufferAccessStrategy  bstrategy 
)

Definition at line 492 of file hashovfl.c.

496{
497 HashMetaPage metap;
501 BlockNumber prevblkno;
502 BlockNumber blkno;
503 BlockNumber nextblkno;
508 uint32 *freep;
511 bitmapbit;
515 bool update_metap = false,
516 mod_wbuf,
517 is_prim_bucket_same_wrt,
518 is_prev_bucket_same_wrt;
520
521 /* Get information from the doomed page */
526 nextblkno = ovflopaque->hasho_nextblkno;
527 prevblkno = ovflopaque->hasho_prevblkno;
529 bucket = ovflopaque->hasho_bucket;
530
531 /*
532 * Fix up the bucket chain. this is a doubly-linked list, so we must fix
533 * up the bucket chain members behind and ahead of the overflow page being
534 * deleted. Concurrency issues are avoided by using lock chaining as
535 * described atop hashbucketcleanup.
536 */
537 if (BlockNumberIsValid(prevblkno))
538 {
539 if (prevblkno == writeblkno)
540 prevbuf = wbuf;
541 else
543 prevblkno,
546 bstrategy);
547 }
548 if (BlockNumberIsValid(nextblkno))
550 nextblkno,
553 bstrategy);
554
555 /* Note: bstrategy is intentionally not used for metapage and bitmap */
556
557 /* Read the metapage so we can determine which bitmap page to use */
560
561 /* Identify which bit to set */
563
564 bitmappage = ovflbitno >> BMPG_SHIFT(metap);
565 bitmapbit = ovflbitno & BMPG_MASK(metap);
566
567 if (bitmappage >= metap->hashm_nmaps)
568 elog(ERROR, "invalid overflow bit number %u", ovflbitno);
569 blkno = metap->hashm_mapp[bitmappage];
570
571 /* Release metapage lock while we access the bitmap page */
573
574 /* read the bitmap page to clear the bitmap bit */
579
580 /* Get write-lock on metapage to update firstfree */
582
583 /* This operation needs to log multiple tuples, prepare WAL for that */
584 if (RelationNeedsWAL(rel))
586
588
589 /*
590 * we have to insert tuples on the "write" page, being careful to preserve
591 * hashkey ordering. (If we insert many tuples into the same "write" page
592 * it would be worth qsort'ing them).
593 */
594 if (nitups > 0)
595 {
598 }
599
600 /*
601 * Reinitialize the freed overflow page. Just zeroing the page won't
602 * work, because WAL replay routines expect pages to be initialized. See
603 * explanation of RBM_NORMAL mode atop XLogReadBufferExtended. We are
604 * careful to make the special space valid here so that tools like
605 * pageinspect won't get confused.
606 */
608
610
611 ovflopaque->hasho_prevblkno = InvalidBlockNumber;
612 ovflopaque->hasho_nextblkno = InvalidBlockNumber;
613 ovflopaque->hasho_bucket = InvalidBucket;
614 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
615 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
616
618
620 {
623
624 Assert(prevopaque->hasho_bucket == bucket);
625 prevopaque->hasho_nextblkno = nextblkno;
627 }
629 {
632
633 Assert(nextopaque->hasho_bucket == bucket);
634 nextopaque->hasho_prevblkno = prevblkno;
636 }
637
638 /* Clear the bitmap bit to indicate that this overflow page is free */
641
642 /* if this is now the first free page, update hashm_firstfree */
643 if (ovflbitno < metap->hashm_firstfree)
644 {
645 metap->hashm_firstfree = ovflbitno;
646 update_metap = true;
648 }
649
650 /* Determine which pages are modified */
651 is_prim_bucket_same_wrt = (wbuf == bucketbuf);
652 is_prev_bucket_same_wrt = (wbuf == prevbuf);
653 mod_wbuf = (nitups > 0 || is_prev_bucket_same_wrt);
654
655 /* XLOG stuff */
656 if (RelationNeedsWAL(rel))
657 {
659
660 xlrec.prevblkno = prevblkno;
661 xlrec.nextblkno = nextblkno;
662 xlrec.ntups = nitups;
663 xlrec.is_prim_bucket_same_wrt = is_prim_bucket_same_wrt;
664 xlrec.is_prev_bucket_same_wrt = is_prev_bucket_same_wrt;
665
668
669 /*
670 * bucket buffer was not changed, but still needs to be registered to
671 * ensure that we can acquire a cleanup lock on it during replay.
672 */
673 if (!is_prim_bucket_same_wrt)
674 {
676
677 XLogRegisterBuffer(0, bucketbuf, flags);
678 }
679
680 if (nitups > 0)
681 {
684 nitups * sizeof(OffsetNumber));
685 for (int i = 0; i < nitups; i++)
686 XLogRegisterBufData(1, itups[i], tups_size[i]);
687 }
688 else if (is_prim_bucket_same_wrt || is_prev_bucket_same_wrt)
689 {
691
692 /*
693 * A write buffer needs to be registered even if no tuples are
694 * added to it to ensure that we can acquire a cleanup lock on it
695 * if it is the same as primary bucket buffer or update the
696 * nextblkno if it is same as the previous bucket buffer.
697 */
698 Assert(nitups == 0);
699
701 if (!is_prev_bucket_same_wrt)
703 else
706 }
707
709
710 /*
711 * If prevpage and the writepage (block in which we are moving tuples
712 * from overflow) are same, then no need to separately register
713 * prevpage. During replay, we can directly update the nextblock in
714 * writepage.
715 */
716 if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt)
718
721
724
725 if (update_metap)
726 {
728 XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
729 }
730
732 }
733 else /* !RelationNeedsWAL(rel) */
734 recptr = XLogGetFakeLSN(rel);
735
736 /* Set LSN iff wbuf is modified. */
737 if (mod_wbuf)
739
741
742 if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt)
746
748
749 if (update_metap)
751
753
754 /* release previous bucket if it is not same as write bucket */
755 if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
756 _hash_relbuf(rel, prevbuf);
757
759 _hash_relbuf(rel, ovflbuf);
760
762 _hash_relbuf(rel, nextbuf);
763
764 _hash_relbuf(rel, mapbuf);
765 _hash_relbuf(rel, metabuf);
766
767 return nextblkno;
768}
#define CLRBIT(x, i)
Definition blutils.c:28
static Size BufferGetPageSize(Buffer buffer)
Definition bufmgr.h:457
uint8_t uint8
Definition c.h:622
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:249
int32_t int32
Definition c.h:620
#define LH_UNUSED_PAGE
Definition hash.h:53
#define ISSET(A, N)
Definition hash.h:334
#define HASH_READ
Definition hash.h:339
#define HASH_METAPAGE
Definition hash.h:198
#define InvalidBucket
Definition hash.h:37
#define HASH_XLOG_FREE_OVFL_BUFS
Definition hash_xlog.h:22
#define XLOG_HASH_SQUEEZE_PAGE
Definition hash_xlog.h:35
#define SizeOfHashSqueezePage
Definition hash_xlog.h:168
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition hashinsert.c:331
uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Definition hashovfl.c:62
void _hash_pageinit(Page page, Size size)
Definition hashpage.c:596
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition hashpage.c:239
uint16 OffsetNumber
Definition off.h:24
BlockNumber prevblkno
Definition hash_xlog.h:156
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition xloginsert.c:179
#define REGBUF_NO_CHANGE
Definition xloginsert.h:37
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33

References _hash_checkpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_ovflblkno_to_bitno(), _hash_pageinit(), _hash_pgaddmultitup(), _hash_relbuf(), Assert, BlockNumberIsValid(), BMPG_MASK, BMPG_SHIFT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), CLRBIT, elog, END_CRIT_SECTION, ERROR, fb(), HASH_METAPAGE, HASH_READ, HASH_WRITE, HASH_XLOG_FREE_OVFL_BUFS, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HASHO_PAGE_ID, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBucket, InvalidBuffer, ISSET, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_UNUSED_PAGE, LockBuffer(), MarkBufferDirty(), PageSetLSN(), PG_USED_FOR_ASSERTS_ONLY, xl_hash_squeeze_page::prevblkno, REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashSqueezePage, START_CRIT_SECTION, XLOG_HASH_SQUEEZE_PAGE, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogGetFakeLSN(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_squeezebucket().

◆ _hash_initbitmapbuffer()

void _hash_initbitmapbuffer ( Buffer  buf,
uint16  bmsize,
bool  initpage 
)

Definition at line 778 of file hashovfl.c.

779{
780 Page pg;
782 uint32 *freep;
783
785
786 /* initialize the page */
787 if (initpage)
789
790 /* initialize the page's special space */
791 op = HashPageGetOpaque(pg);
797
798 /* set all of the bits to 1 */
800 memset(freep, 0xFF, bmsize);
801
802 /*
803 * Set pd_lower just past the end of the bitmap page data. We could even
804 * set pd_lower equal to pd_upper, but this is more precise and makes the
805 * page look compressible to xlog.c.
806 */
807 ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
808}
PageHeaderData * PageHeader
Definition bufpage.h:199
BlockNumber hasho_nextblkno
Definition hash.h:80
uint16 hasho_flag
Definition hash.h:82
BlockNumber hasho_prevblkno
Definition hash.h:79
uint16 hasho_page_id
Definition hash.h:83
Bucket hasho_bucket
Definition hash.h:81

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), fb(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetOpaque, InvalidBlockNumber, InvalidBucket, and LH_BITMAP_PAGE.

Referenced by _hash_addovflpage(), _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_init_bitmap_page().

◆ _hash_ovflblkno_to_bitno()

uint32 _hash_ovflblkno_to_bitno ( HashMetaPage  metap,
BlockNumber  ovflblkno 
)

Definition at line 62 of file hashovfl.c.

63{
65 uint32 i;
67
68 /* Determine the split number containing this page */
69 for (i = 1; i <= splitnum; i++)
70 {
72 break; /* oops */
74
75 /*
76 * bitnum has to be greater than number of overflow page added in
77 * previous split point. The overflow page at this splitnum (i) if any
78 * should start from (_hash_get_totalbuckets(i) +
79 * metap->hashm_spares[i - 1] + 1).
80 */
81 if (bitnum > metap->hashm_spares[i - 1] &&
82 bitnum <= metap->hashm_spares[i])
83 return bitnum - 1; /* -1 to convert 1-based to 0-based */
84 }
85
88 errmsg("invalid overflow block number %u", ovflblkno)));
89 return 0; /* keep compiler quiet */
90}
uint32 _hash_get_totalbuckets(uint32 splitpoint_phase)
Definition hashutil.c:174

References _hash_get_totalbuckets(), ereport, errcode(), errmsg, ERROR, fb(), HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, and i.

Referenced by _hash_freeovflpage(), and hash_bitmap_info().

◆ _hash_squeezebucket()

void _hash_squeezebucket ( Relation  rel,
Bucket  bucket,
BlockNumber  bucket_blkno,
Buffer  bucket_buf,
BufferAccessStrategy  bstrategy 
)

Definition at line 843 of file hashovfl.c.

848{
851 Buffer wbuf;
852 Buffer rbuf;
853 Page wpage;
854 Page rpage;
857
858 /*
859 * start squeezing into the primary bucket page.
860 */
865
866 /*
867 * if there aren't any overflow pages, there's nothing to squeeze. caller
868 * is responsible for releasing the pin on primary bucket page.
869 */
870 if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
871 {
873 return;
874 }
875
876 /*
877 * Find the last page in the bucket chain by starting at the base bucket
878 * page and working forward. Note: we assume that a hash bucket chain is
879 * usually smaller than the buffer ring being used by VACUUM, else using
880 * the access strategy here would be counterproductive.
881 */
884 do
885 {
886 rblkno = ropaque->hasho_nextblkno;
887 if (rbuf != InvalidBuffer)
888 _hash_relbuf(rel, rbuf);
890 rblkno,
893 bstrategy);
896 Assert(ropaque->hasho_bucket == bucket);
897 } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
898
899 /*
900 * squeeze the tuples.
901 */
902 for (;;)
903 {
910 uint16 ndeletable = 0;
911 uint16 nitups = 0;
913 int i;
914 bool retain_pin = false;
915
917 /* Scan each tuple in "read" page */
922 {
923 IndexTuple itup;
924 Size itemsz;
925
926 /* skip dead tuples */
928 continue;
929
932 itemsz = IndexTupleSize(itup);
933 itemsz = MAXALIGN(itemsz);
934
935 /*
936 * Walk up the bucket chain, looking for a page big enough for
937 * this item and all other accumulated items. Exit if we reach
938 * the read page.
939 */
941 {
943 bool tups_moved = false;
944
946
947 if (wblkno == bucket_blkno)
948 retain_pin = true;
949
950 wblkno = wopaque->hasho_nextblkno;
952
953 /* don't need to move to next page if we reached the read page */
954 if (wblkno != rblkno)
956 wblkno,
959 bstrategy);
960
961 if (nitups > 0)
962 {
964
966
967 /*
968 * This operation needs to log multiple tuples, prepare
969 * WAL for that.
970 */
971 if (RelationNeedsWAL(rel))
973
975
976 /*
977 * we have to insert tuples on the "write" page, being
978 * careful to preserve hashkey ordering. (If we insert
979 * many tuples into the same "write" page it would be
980 * worth qsort'ing them).
981 */
984
985 /* Delete tuples we already moved off read page */
988
989 /* XLOG stuff */
990 if (RelationNeedsWAL(rel))
991 {
993
995 xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
996
999
1000 /*
1001 * bucket buffer was not changed, but still needs to
1002 * be registered to ensure that we can acquire a
1003 * cleanup lock on it during replay.
1004 */
1005 if (!xlrec.is_prim_bucket_same_wrt)
1006 {
1008
1009 XLogRegisterBuffer(0, bucket_buf, flags);
1010 }
1011
1014 nitups * sizeof(OffsetNumber));
1015 for (i = 0; i < nitups; i++)
1016 XLogRegisterBufData(1, itups[i], tups_size[i]);
1017
1020 ndeletable * sizeof(OffsetNumber));
1021
1023 }
1024 else
1025 recptr = XLogGetFakeLSN(rel);
1026
1029
1031
1032 tups_moved = true;
1033 }
1034
1035 /*
1036 * release the lock on previous page after acquiring the lock
1037 * on next page
1038 */
1039 if (retain_pin)
1041 else
1042 _hash_relbuf(rel, wbuf);
1043
1044 /* nothing more to do if we reached the read page */
1045 if (rblkno == wblkno)
1046 {
1047 _hash_relbuf(rel, rbuf);
1048 return;
1049 }
1050
1051 wbuf = next_wbuf;
1054 Assert(wopaque->hasho_bucket == bucket);
1055 retain_pin = false;
1056
1057 /* be tidy */
1058 for (i = 0; i < nitups; i++)
1059 pfree(itups[i]);
1060 nitups = 0;
1061 all_tups_size = 0;
1062 ndeletable = 0;
1063
1064 /*
1065 * after moving the tuples, rpage would have been compacted,
1066 * so we need to rescan it.
1067 */
1068 if (tups_moved)
1069 goto readpage;
1070 }
1071
1072 /* remember tuple for deletion from "read" page */
1074
1075 /*
1076 * we need a copy of index tuples as they can be freed as part of
1077 * overflow page, however we need them to write a WAL record in
1078 * _hash_freeovflpage.
1079 */
1080 itups[nitups] = CopyIndexTuple(itup);
1081 tups_size[nitups++] = itemsz;
1082 all_tups_size += itemsz;
1083 }
1084
1085 /*
1086 * If we reach here, there are no live tuples on the "read" page ---
1087 * it was empty when we got to it, or we moved them all. So we can
1088 * just free the page without bothering with deleting tuples
1089 * individually. Then advance to the previous "read" page.
1090 *
1091 * Tricky point here: if our read and write pages are adjacent in the
1092 * bucket chain, our write lock on wbuf will conflict with
1093 * _hash_freeovflpage's attempt to update the sibling links of the
1094 * removed page. In that case, we don't need to lock it again.
1095 */
1096 rblkno = ropaque->hasho_prevblkno;
1098
1099 /* free this overflow page (releases rbuf) */
1101 tups_size, nitups, bstrategy);
1102
1103 /* be tidy */
1104 for (i = 0; i < nitups; i++)
1105 pfree(itups[i]);
1106
1107 /* are we freeing the page adjacent to wbuf? */
1108 if (rblkno == wblkno)
1109 {
1110 /* retain the pin on primary bucket page till end of bucket scan */
1111 if (wblkno == bucket_blkno)
1113 else
1114 _hash_relbuf(rel, wbuf);
1115 return;
1116 }
1117
1119 rblkno,
1120 HASH_WRITE,
1122 bstrategy);
1125 Assert(ropaque->hasho_bucket == bucket);
1126 }
1127
1128 /* NOTREACHED */
1129}
Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
Definition bufpage.c:943
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition bufpage.c:1170
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:248
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:268
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:378
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:396
#define MAXALIGN(LEN)
Definition c.h:896
uint16_t uint16
Definition c.h:623
size_t Size
Definition c.h:689
#define SizeOfHashMovePageContents
Definition hash_xlog.h:138
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition hash_xlog.h:34
BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
Definition hashovfl.c:492
IndexTuple CopyIndexTuple(IndexTuple source)
Definition indextuple.c:479
#define ItemIdIsDead(itemId)
Definition itemid.h:113
IndexTupleData * IndexTuple
Definition itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition itup.h:71
#define MaxIndexTuplesPerPage
Definition itup.h:181
void pfree(void *pointer)
Definition mcxt.c:1616
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28

References _hash_freeovflpage(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_relbuf(), Assert, BlockNumberIsValid(), BUFFER_LOCK_UNLOCK, BufferGetPage(), CopyIndexTuple(), END_CRIT_SECTION, fb(), FirstOffsetNumber, HASH_WRITE, HashPageGetOpaque, i, IndexTupleSize(), InvalidBuffer, ItemIdIsDead, LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, MaxOffsetNumber, xl_hash_move_page_contents::ntups, OffsetNumberNext, PageGetFreeSpaceForMultipleTuples(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIndexMultiDelete(), PageIsEmpty(), PageSetLSN(), pfree(), REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashMovePageContents, START_CRIT_SECTION, XLOG_HASH_MOVE_PAGE_CONTENTS, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogGetFakeLSN(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbucketcleanup().

◆ bitno_to_blkno()

static BlockNumber bitno_to_blkno ( HashMetaPage  metap,
uint32  ovflbitnum 
)
static

Definition at line 35 of file hashovfl.c.

36{
38 uint32 i;
39
40 /* Convert zero-based bitnumber to 1-based page number */
41 ovflbitnum += 1;
42
43 /* Determine the split number for this page (must be >= 1) */
44 for (i = 1;
46 i++)
47 /* loop */ ;
48
49 /*
50 * Convert to absolute page number by adding the number of bucket pages
51 * that exist before this split point.
52 */
54}

References _hash_get_totalbuckets(), fb(), HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, and i.

Referenced by _hash_addovflpage().