PostgreSQL Source Code git master
Loading...
Searching...
No Matches
hashovfl.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hashovfl.c
4 * Overflow page management code for the Postgres hash access method
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hashovfl.c
12 *
13 * NOTES
14 * Overflow pages look like ordinary relation pages.
15 *
16 *-------------------------------------------------------------------------
17 */
18#include "postgres.h"
19
20#include "access/hash.h"
21#include "access/hash_xlog.h"
22#include "access/xloginsert.h"
23#include "miscadmin.h"
24#include "utils/rel.h"
25
26
28
29
30/*
31 * Convert overflow page bit number (its index in the free-page bitmaps)
32 * to block number within the index.
33 */
34static BlockNumber
36{
38 uint32 i;
39
40 /* Convert zero-based bitnumber to 1-based page number */
41 ovflbitnum += 1;
42
43 /* Determine the split number for this page (must be >= 1) */
44 for (i = 1;
46 i++)
47 /* loop */ ;
48
49 /*
50 * Convert to absolute page number by adding the number of bucket pages
51 * that exist before this split point.
52 */
54}
55
56/*
57 * _hash_ovflblkno_to_bitno
58 *
59 * Convert overflow page block number to bit number for free-page bitmap.
60 */
63{
65 uint32 i;
67
68 /* Determine the split number containing this page */
69 for (i = 1; i <= splitnum; i++)
70 {
72 break; /* oops */
74
75 /*
76 * bitnum has to be greater than number of overflow page added in
77 * previous split point. The overflow page at this splitnum (i) if any
78 * should start from (_hash_get_totalbuckets(i) +
79 * metap->hashm_spares[i - 1] + 1).
80 */
81 if (bitnum > metap->hashm_spares[i - 1] &&
82 bitnum <= metap->hashm_spares[i])
83 return bitnum - 1; /* -1 to convert 1-based to 0-based */
84 }
85
88 errmsg("invalid overflow block number %u", ovflblkno)));
89 return 0; /* keep compiler quiet */
90}
91
92/*
93 * _hash_addovflpage
94 *
95 * Add an overflow page to the bucket whose last page is pointed to by 'buf'.
96 *
97 * On entry, the caller must hold a pin but no lock on 'buf'. The pin is
98 * dropped before exiting (we assume the caller is not interested in 'buf'
99 * anymore) if not asked to retain. The pin will be retained only for the
100 * primary bucket. The returned overflow page will be pinned and
101 * write-locked; it is guaranteed to be empty.
102 *
103 * The caller must hold a pin, but no lock, on the metapage buffer.
104 * That buffer is returned in the same state.
105 *
106 * NB: since this could be executed concurrently by multiple processes,
107 * one should not assume that the returned overflow page will be the
108 * immediate successor of the originally passed 'buf'. Additional overflow
109 * pages might have been added to the bucket chain in between.
110 */
111Buffer
113{
115 Page page;
119 HashMetaPage metap;
122 BlockNumber blkno;
125 uint32 *freep = NULL;
127 uint32 bit;
129 uint32 first_page;
132 uint32 i,
133 j;
134 bool page_found = false;
136
137 /*
138 * Write-lock the tail page. Here, we need to maintain locking order such
139 * that, first acquire the lock on tail page of bucket, then on meta page
140 * to find and lock the bitmap page and if it is found, then lock on meta
141 * page is released, then finally acquire the lock on new overflow buffer.
142 * We need this locking order to avoid deadlock with backends that are
143 * doing inserts.
144 *
145 * Note: We could have avoided locking many buffers here if we made two
146 * WAL records for acquiring an overflow page (one to allocate an overflow
147 * page and another to add it to overflow bucket chain). However, doing
148 * so can leak an overflow page, if the system crashes after allocation.
149 * Needless to say, it is better to have a single record from a
150 * performance point of view as well.
151 */
153
154 /* probably redundant... */
156
157 /* loop to find current tail page, in case someone else inserted too */
158 for (;;)
159 {
160 BlockNumber nextblkno;
161
162 page = BufferGetPage(buf);
164 nextblkno = pageopaque->hasho_nextblkno;
165
166 if (!BlockNumberIsValid(nextblkno))
167 break;
168
169 /* we assume we do not need to write the unmodified page */
170 if (retain_pin)
171 {
172 /* pin will be retained only for the primary bucket page */
173 Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
175 }
176 else
177 _hash_relbuf(rel, buf);
178
179 retain_pin = false;
180
181 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
182 }
183
184 /* Get exclusive lock on the meta page */
186
189
190 /* start search at hashm_firstfree */
192 first_page = orig_firstfree >> BMPG_SHIFT(metap);
193 bit = orig_firstfree & BMPG_MASK(metap);
194 i = first_page;
195 j = bit / BITS_PER_MAP;
196 bit &= ~(BITS_PER_MAP - 1);
197
198 /* outer loop iterates once per bitmap page */
199 for (;;)
200 {
204
205 /* want to end search with the last existing overflow page */
206 splitnum = metap->hashm_ovflpoint;
207 max_ovflpg = metap->hashm_spares[splitnum] - 1;
208 last_page = max_ovflpg >> BMPG_SHIFT(metap);
209 last_bit = max_ovflpg & BMPG_MASK(metap);
210
211 if (i > last_page)
212 break;
213
214 Assert(i < metap->hashm_nmaps);
215 mapblkno = metap->hashm_mapp[i];
216
217 if (i == last_page)
219 else
220 last_inpage = BMPGSZ_BIT(metap) - 1;
221
222 /* Release exclusive lock on metapage while reading bitmap page */
224
228
229 for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
230 {
231 if (freep[j] != ALL_SET)
232 {
233 page_found = true;
234
235 /* Reacquire exclusive lock on the meta page */
237
238 /* convert bit to bit number within page */
241
242 /* convert bit to absolute bit number */
243 bit += (i << BMPG_SHIFT(metap));
244 /* Calculate address of the recycled overflow page */
245 blkno = bitno_to_blkno(metap, bit);
246
247 /* Fetch and init the recycled page */
248 ovflbuf = _hash_getinitbuf(rel, blkno);
249
250 goto found;
251 }
252 }
253
254 /* No free space here, try to advance to next map page */
255 _hash_relbuf(rel, mapbuf);
257 i++;
258 j = 0; /* scan from start of next map page */
259 bit = 0;
260
261 /* Reacquire exclusive lock on the meta page */
263 }
264
265 /*
266 * No free pages --- have to extend the relation to add an overflow page.
267 * First, check to see if we have to add a new bitmap page too.
268 */
269 if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
270 {
271 /*
272 * We create the new bitmap page with all pages marked "in use".
273 * Actually two pages in the new bitmap's range will exist
274 * immediately: the bitmap page itself, and the following page which
275 * is the one we return to the caller. Both of these are correctly
276 * marked "in use". Subsequent pages do not exist yet, but it is
277 * convenient to pre-mark them as "in use" too.
278 */
279 bit = metap->hashm_spares[splitnum];
280
281 /* metapage already has a write lock */
282 if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
285 errmsg("out of overflow pages in hash index \"%s\"",
287
289 }
290 else
291 {
292 /*
293 * Nothing to do here; since the page will be past the last used page,
294 * we know its bitmap bit was preinitialized to "in use".
295 */
296 }
297
298 /* Calculate address of the new overflow page */
300 metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
301 blkno = bitno_to_blkno(metap, bit);
302
303 /*
304 * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
305 * relation length stays in sync with ours. XXX It's annoying to do this
306 * with metapage write lock held; would be better to use a lock that
307 * doesn't block incoming searches.
308 *
309 * It is okay to hold two buffer locks here (one on tail page of bucket
310 * and other on new overflow page) since there cannot be anyone else
311 * contending for access to ovflbuf.
312 */
313 ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
314
315found:
316
317 /*
318 * Do the update. No ereport(ERROR) until changes are logged. We want to
319 * log the changes for bitmap page and overflow page together to avoid
320 * loss of pages in case the new page is added.
321 */
323
324 if (page_found)
325 {
327
328 /* mark page "in use" in the bitmap */
331 }
332 else
333 {
334 /* update the count to indicate new overflow page is added */
335 metap->hashm_spares[splitnum]++;
336
338 {
341
342 /* add the new bitmap page to the metapage's list of bitmaps */
344 metap->hashm_nmaps++;
345 metap->hashm_spares[splitnum]++;
346 }
347
349
350 /*
351 * for new overflow page, we don't need to explicitly set the bit in
352 * bitmap page, as by default that will be set to "in use".
353 */
354 }
355
356 /*
357 * Adjust hashm_firstfree to avoid redundant searches. But don't risk
358 * changing it if someone moved it while we were searching bitmap pages.
359 */
360 if (metap->hashm_firstfree == orig_firstfree)
361 {
362 metap->hashm_firstfree = bit + 1;
364 }
365
366 /* initialize new overflow page */
369 ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
370 ovflopaque->hasho_nextblkno = InvalidBlockNumber;
371 ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
372 ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
373 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
374
376
377 /* logically chain overflow page to previous page */
378 pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
379
381
382 /* XLOG stuff */
383 if (RelationNeedsWAL(rel))
384 {
386
388 xlrec.bmsize = metap->hashm_bmsize;
389
392
394 XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
395
397
399 {
402 }
403
406
408 XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
409
411 }
412 else
413 recptr = XLogGetFakeLSN(rel);
414
417
420
423
425
427
428 if (retain_pin)
430 else
431 _hash_relbuf(rel, buf);
432
434 _hash_relbuf(rel, mapbuf);
435
437
440
441 return ovflbuf;
442}
443
444/*
445 * _hash_firstfreebit()
446 *
447 * Return the number of the first bit that is not set in the word 'map'.
448 */
449static uint32
451{
452 uint32 i,
453 mask;
454
455 mask = 0x1;
456 for (i = 0; i < BITS_PER_MAP; i++)
457 {
458 if (!(mask & map))
459 return i;
460 mask <<= 1;
461 }
462
463 elog(ERROR, "firstfreebit found no free bit");
464
465 return 0; /* keep compiler quiet */
466}
467
468/*
469 * _hash_freeovflpage() -
470 *
471 * Remove this overflow page from its bucket's chain, and mark the page as
472 * free. On entry, ovflbuf is write-locked; it is released before exiting.
473 *
474 * Add the tuples (itups) to wbuf in this function. We could do that in the
475 * caller as well, but the advantage of doing it here is we can easily write
476 * the WAL for XLOG_HASH_SQUEEZE_PAGE operation. Addition of tuples and
477 * removal of overflow page has to done as an atomic operation, otherwise
478 * during replay on standby users might find duplicate records.
479 *
480 * Since this function is invoked in VACUUM, we provide an access strategy
481 * parameter that controls fetches of the bucket pages.
482 *
483 * Returns the block number of the page that followed the given page
484 * in the bucket, or InvalidBlockNumber if no following page.
485 *
486 * NB: caller must not hold lock on metapage, nor on page, that's next to
487 * ovflbuf in the bucket chain. We don't acquire the lock on page that's
488 * prior to ovflbuf in chain if it is same as wbuf because the caller already
489 * has a lock on same.
490 */
495 BufferAccessStrategy bstrategy)
496{
497 HashMetaPage metap;
501 BlockNumber prevblkno;
502 BlockNumber blkno;
503 BlockNumber nextblkno;
508 uint32 *freep;
511 bitmapbit;
515 bool update_metap = false,
516 mod_wbuf,
517 is_prim_bucket_same_wrt,
518 is_prev_bucket_same_wrt;
520
521 /* Get information from the doomed page */
526 nextblkno = ovflopaque->hasho_nextblkno;
527 prevblkno = ovflopaque->hasho_prevblkno;
529 bucket = ovflopaque->hasho_bucket;
530
531 /*
532 * Fix up the bucket chain. this is a doubly-linked list, so we must fix
533 * up the bucket chain members behind and ahead of the overflow page being
534 * deleted. Concurrency issues are avoided by using lock chaining as
535 * described atop hashbucketcleanup.
536 */
537 if (BlockNumberIsValid(prevblkno))
538 {
539 if (prevblkno == writeblkno)
540 prevbuf = wbuf;
541 else
543 prevblkno,
546 bstrategy);
547 }
548 if (BlockNumberIsValid(nextblkno))
550 nextblkno,
553 bstrategy);
554
555 /* Note: bstrategy is intentionally not used for metapage and bitmap */
556
557 /* Read the metapage so we can determine which bitmap page to use */
560
561 /* Identify which bit to set */
563
564 bitmappage = ovflbitno >> BMPG_SHIFT(metap);
565 bitmapbit = ovflbitno & BMPG_MASK(metap);
566
567 if (bitmappage >= metap->hashm_nmaps)
568 elog(ERROR, "invalid overflow bit number %u", ovflbitno);
569 blkno = metap->hashm_mapp[bitmappage];
570
571 /* Release metapage lock while we access the bitmap page */
573
574 /* read the bitmap page to clear the bitmap bit */
579
580 /* Get write-lock on metapage to update firstfree */
582
583 /* This operation needs to log multiple tuples, prepare WAL for that */
584 if (RelationNeedsWAL(rel))
586
588
589 /*
590 * we have to insert tuples on the "write" page, being careful to preserve
591 * hashkey ordering. (If we insert many tuples into the same "write" page
592 * it would be worth qsort'ing them).
593 */
594 if (nitups > 0)
595 {
598 }
599
600 /*
601 * Reinitialize the freed overflow page. Just zeroing the page won't
602 * work, because WAL replay routines expect pages to be initialized. See
603 * explanation of RBM_NORMAL mode atop XLogReadBufferExtended. We are
604 * careful to make the special space valid here so that tools like
605 * pageinspect won't get confused.
606 */
608
610
611 ovflopaque->hasho_prevblkno = InvalidBlockNumber;
612 ovflopaque->hasho_nextblkno = InvalidBlockNumber;
613 ovflopaque->hasho_bucket = InvalidBucket;
614 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
615 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
616
618
620 {
623
624 Assert(prevopaque->hasho_bucket == bucket);
625 prevopaque->hasho_nextblkno = nextblkno;
627 }
629 {
632
633 Assert(nextopaque->hasho_bucket == bucket);
634 nextopaque->hasho_prevblkno = prevblkno;
636 }
637
638 /* Clear the bitmap bit to indicate that this overflow page is free */
641
642 /* if this is now the first free page, update hashm_firstfree */
643 if (ovflbitno < metap->hashm_firstfree)
644 {
645 metap->hashm_firstfree = ovflbitno;
646 update_metap = true;
648 }
649
650 /* Determine which pages are modified */
651 is_prim_bucket_same_wrt = (wbuf == bucketbuf);
652 is_prev_bucket_same_wrt = (wbuf == prevbuf);
653 mod_wbuf = (nitups > 0 || is_prev_bucket_same_wrt);
654
655 /* XLOG stuff */
656 if (RelationNeedsWAL(rel))
657 {
659
660 xlrec.prevblkno = prevblkno;
661 xlrec.nextblkno = nextblkno;
662 xlrec.ntups = nitups;
663 xlrec.is_prim_bucket_same_wrt = is_prim_bucket_same_wrt;
664 xlrec.is_prev_bucket_same_wrt = is_prev_bucket_same_wrt;
665
668
669 /*
670 * bucket buffer was not changed, but still needs to be registered to
671 * ensure that we can acquire a cleanup lock on it during replay.
672 */
673 if (!is_prim_bucket_same_wrt)
674 {
676
677 XLogRegisterBuffer(0, bucketbuf, flags);
678 }
679
680 if (nitups > 0)
681 {
684 nitups * sizeof(OffsetNumber));
685 for (int i = 0; i < nitups; i++)
686 XLogRegisterBufData(1, itups[i], tups_size[i]);
687 }
688 else if (is_prim_bucket_same_wrt || is_prev_bucket_same_wrt)
689 {
691
692 /*
693 * A write buffer needs to be registered even if no tuples are
694 * added to it to ensure that we can acquire a cleanup lock on it
695 * if it is the same as primary bucket buffer or update the
696 * nextblkno if it is same as the previous bucket buffer.
697 */
698 Assert(nitups == 0);
699
701 if (!is_prev_bucket_same_wrt)
703 else
706 }
707
709
710 /*
711 * If prevpage and the writepage (block in which we are moving tuples
712 * from overflow) are same, then no need to separately register
713 * prevpage. During replay, we can directly update the nextblock in
714 * writepage.
715 */
716 if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt)
718
721
724
725 if (update_metap)
726 {
728 XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
729 }
730
732 }
733 else /* !RelationNeedsWAL(rel) */
734 recptr = XLogGetFakeLSN(rel);
735
736 /* Set LSN iff wbuf is modified. */
737 if (mod_wbuf)
739
741
742 if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt)
746
748
749 if (update_metap)
751
753
754 /* release previous bucket if it is not same as write bucket */
755 if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
756 _hash_relbuf(rel, prevbuf);
757
759 _hash_relbuf(rel, ovflbuf);
760
762 _hash_relbuf(rel, nextbuf);
763
764 _hash_relbuf(rel, mapbuf);
765 _hash_relbuf(rel, metabuf);
766
767 return nextblkno;
768}
769
770
771/*
772 * _hash_initbitmapbuffer()
773 *
774 * Initialize a new bitmap page. All bits in the new bitmap page are set to
775 * "1", indicating "in use".
776 */
777void
779{
780 Page pg;
782 uint32 *freep;
783
785
786 /* initialize the page */
787 if (initpage)
789
790 /* initialize the page's special space */
791 op = HashPageGetOpaque(pg);
797
798 /* set all of the bits to 1 */
800 memset(freep, 0xFF, bmsize);
801
802 /*
803 * Set pd_lower just past the end of the bitmap page data. We could even
804 * set pd_lower equal to pd_upper, but this is more precise and makes the
805 * page look compressible to xlog.c.
806 */
807 ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
808}
809
810
811/*
812 * _hash_squeezebucket(rel, bucket)
813 *
814 * Try to squeeze the tuples onto pages occurring earlier in the
815 * bucket chain in an attempt to free overflow pages. When we start
816 * the "squeezing", the page from which we start taking tuples (the
817 * "read" page) is the last bucket in the bucket chain and the page
818 * onto which we start squeezing tuples (the "write" page) is the
819 * first page in the bucket chain. The read page works backward and
820 * the write page works forward; the procedure terminates when the
821 * read page and write page are the same page.
822 *
823 * At completion of this procedure, it is guaranteed that all pages in
824 * the bucket are nonempty, unless the bucket is totally empty (in
825 * which case all overflow pages will be freed). The original implementation
826 * required that to be true on entry as well, but it's a lot easier for
827 * callers to leave empty overflow pages and let this guy clean it up.
828 *
829 * Caller must acquire cleanup lock on the primary page of the target
830 * bucket to exclude any scans that are in progress, which could easily
831 * be confused into returning the same tuple more than once or some tuples
832 * not at all by the rearrangement we are performing here. To prevent
833 * any concurrent scan to cross the squeeze scan we use lock chaining
834 * similar to hashbucketcleanup. Refer comments atop hashbucketcleanup.
835 *
836 * We need to retain a pin on the primary bucket to ensure that no concurrent
837 * split can start.
838 *
839 * Since this function is invoked in VACUUM, we provide an access strategy
840 * parameter that controls fetches of the bucket pages.
841 */
842void
847 BufferAccessStrategy bstrategy)
848{
851 Buffer wbuf;
852 Buffer rbuf;
853 Page wpage;
854 Page rpage;
857
858 /*
859 * start squeezing into the primary bucket page.
860 */
865
866 /*
867 * if there aren't any overflow pages, there's nothing to squeeze. caller
868 * is responsible for releasing the pin on primary bucket page.
869 */
870 if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
871 {
873 return;
874 }
875
876 /*
877 * Find the last page in the bucket chain by starting at the base bucket
878 * page and working forward. Note: we assume that a hash bucket chain is
879 * usually smaller than the buffer ring being used by VACUUM, else using
880 * the access strategy here would be counterproductive.
881 */
884 do
885 {
886 rblkno = ropaque->hasho_nextblkno;
887 if (rbuf != InvalidBuffer)
888 _hash_relbuf(rel, rbuf);
890 rblkno,
893 bstrategy);
896 Assert(ropaque->hasho_bucket == bucket);
897 } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
898
899 /*
900 * squeeze the tuples.
901 */
902 for (;;)
903 {
910 uint16 ndeletable = 0;
911 uint16 nitups = 0;
913 int i;
914 bool retain_pin = false;
915
917 /* Scan each tuple in "read" page */
922 {
923 IndexTuple itup;
924 Size itemsz;
925
926 /* skip dead tuples */
928 continue;
929
932 itemsz = IndexTupleSize(itup);
933 itemsz = MAXALIGN(itemsz);
934
935 /*
936 * Walk up the bucket chain, looking for a page big enough for
937 * this item and all other accumulated items. Exit if we reach
938 * the read page.
939 */
941 {
943 bool tups_moved = false;
944
946
947 if (wblkno == bucket_blkno)
948 retain_pin = true;
949
950 wblkno = wopaque->hasho_nextblkno;
952
953 /* don't need to move to next page if we reached the read page */
954 if (wblkno != rblkno)
956 wblkno,
959 bstrategy);
960
961 if (nitups > 0)
962 {
964
966
967 /*
968 * This operation needs to log multiple tuples, prepare
969 * WAL for that.
970 */
971 if (RelationNeedsWAL(rel))
973
975
976 /*
977 * we have to insert tuples on the "write" page, being
978 * careful to preserve hashkey ordering. (If we insert
979 * many tuples into the same "write" page it would be
980 * worth qsort'ing them).
981 */
984
985 /* Delete tuples we already moved off read page */
988
989 /* XLOG stuff */
990 if (RelationNeedsWAL(rel))
991 {
993
995 xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
996
999
1000 /*
1001 * bucket buffer was not changed, but still needs to
1002 * be registered to ensure that we can acquire a
1003 * cleanup lock on it during replay.
1004 */
1005 if (!xlrec.is_prim_bucket_same_wrt)
1006 {
1008
1009 XLogRegisterBuffer(0, bucket_buf, flags);
1010 }
1011
1014 nitups * sizeof(OffsetNumber));
1015 for (i = 0; i < nitups; i++)
1016 XLogRegisterBufData(1, itups[i], tups_size[i]);
1017
1020 ndeletable * sizeof(OffsetNumber));
1021
1023 }
1024 else
1025 recptr = XLogGetFakeLSN(rel);
1026
1029
1031
1032 tups_moved = true;
1033 }
1034
1035 /*
1036 * release the lock on previous page after acquiring the lock
1037 * on next page
1038 */
1039 if (retain_pin)
1041 else
1042 _hash_relbuf(rel, wbuf);
1043
1044 /* nothing more to do if we reached the read page */
1045 if (rblkno == wblkno)
1046 {
1047 _hash_relbuf(rel, rbuf);
1048 return;
1049 }
1050
1051 wbuf = next_wbuf;
1054 Assert(wopaque->hasho_bucket == bucket);
1055 retain_pin = false;
1056
1057 /* be tidy */
1058 for (i = 0; i < nitups; i++)
1059 pfree(itups[i]);
1060 nitups = 0;
1061 all_tups_size = 0;
1062 ndeletable = 0;
1063
1064 /*
1065 * after moving the tuples, rpage would have been compacted,
1066 * so we need to rescan it.
1067 */
1068 if (tups_moved)
1069 goto readpage;
1070 }
1071
1072 /* remember tuple for deletion from "read" page */
1074
1075 /*
1076 * we need a copy of index tuples as they can be freed as part of
1077 * overflow page, however we need them to write a WAL record in
1078 * _hash_freeovflpage.
1079 */
1080 itups[nitups] = CopyIndexTuple(itup);
1081 tups_size[nitups++] = itemsz;
1082 all_tups_size += itemsz;
1083 }
1084
1085 /*
1086 * If we reach here, there are no live tuples on the "read" page ---
1087 * it was empty when we got to it, or we moved them all. So we can
1088 * just free the page without bothering with deleting tuples
1089 * individually. Then advance to the previous "read" page.
1090 *
1091 * Tricky point here: if our read and write pages are adjacent in the
1092 * bucket chain, our write lock on wbuf will conflict with
1093 * _hash_freeovflpage's attempt to update the sibling links of the
1094 * removed page. In that case, we don't need to lock it again.
1095 */
1096 rblkno = ropaque->hasho_prevblkno;
1098
1099 /* free this overflow page (releases rbuf) */
1101 tups_size, nitups, bstrategy);
1102
1103 /* be tidy */
1104 for (i = 0; i < nitups; i++)
1105 pfree(itups[i]);
1106
1107 /* are we freeing the page adjacent to wbuf? */
1108 if (rblkno == wblkno)
1109 {
1110 /* retain the pin on primary bucket page till end of bucket scan */
1111 if (wblkno == bucket_blkno)
1113 else
1114 _hash_relbuf(rel, wbuf);
1115 return;
1116 }
1117
1119 rblkno,
1120 HASH_WRITE,
1122 bstrategy);
1125 Assert(ropaque->hasho_bucket == bucket);
1126 }
1127
1128 /* NOTREACHED */
1129}
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
#define CLRBIT(x, i)
Definition blutils.c:28
#define SETBIT(x, i)
Definition blutils.c:29
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3063
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static Size BufferGetPageSize(Buffer buffer)
Definition bufmgr.h:459
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:332
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:421
Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
Definition bufpage.c:933
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition bufpage.c:1160
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:249
PageHeaderData * PageHeader
Definition bufpage.h:199
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:379
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:417
PageData * Page
Definition bufpage.h:81
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:397
#define MAXALIGN(LEN)
Definition c.h:898
uint8_t uint8
Definition c.h:616
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:243
#define Assert(condition)
Definition c.h:945
int32_t int32
Definition c.h:614
uint16_t uint16
Definition c.h:617
uint32_t uint32
Definition c.h:618
size_t Size
Definition c.h:691
int errcode(int sqlerrcode)
Definition elog.c:874
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define HashPageGetOpaque(page)
Definition hash.h:88
#define LH_BUCKET_PAGE
Definition hash.h:55
#define HASH_MAX_BITMAPS
Definition hash.h:230
#define BMPG_MASK(metap)
Definition hash.h:314
#define HASH_WRITE
Definition hash.h:340
#define LH_UNUSED_PAGE
Definition hash.h:53
#define BITS_PER_MAP
Definition hash.h:329
#define HashPageGetBitmap(page)
Definition hash.h:316
#define LH_META_PAGE
Definition hash.h:57
#define HASHO_PAGE_ID
Definition hash.h:101
#define ISSET(A, N)
Definition hash.h:334
#define HashPageGetMeta(page)
Definition hash.h:323
#define HASH_READ
Definition hash.h:339
#define BMPGSZ_BIT(metap)
Definition hash.h:312
#define HASH_METAPAGE
Definition hash.h:198
#define LH_PAGE_TYPE
Definition hash.h:63
uint32 Bucket
Definition hash.h:35
#define ALL_SET
Definition hash.h:302
#define LH_BITMAP_PAGE
Definition hash.h:56
#define BMPG_SHIFT(metap)
Definition hash.h:313
#define LH_OVERFLOW_PAGE
Definition hash.h:54
#define InvalidBucket
Definition hash.h:37
#define HASH_XLOG_FREE_OVFL_BUFS
Definition hash_xlog.h:22
#define XLOG_HASH_SQUEEZE_PAGE
Definition hash_xlog.h:35
#define SizeOfHashAddOvflPage
Definition hash_xlog.h:80
#define XLOG_HASH_ADD_OVFL_PAGE
Definition hash_xlog.h:30
#define SizeOfHashSqueezePage
Definition hash_xlog.h:168
#define SizeOfHashMovePageContents
Definition hash_xlog.h:138
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition hash_xlog.h:34
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition hashinsert.c:331
void _hash_squeezebucket(Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
Definition hashovfl.c:843
static uint32 _hash_firstfreebit(uint32 map)
Definition hashovfl.c:450
uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Definition hashovfl.c:62
BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
Definition hashovfl.c:492
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition hashovfl.c:778
Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
Definition hashovfl.c:112
static BlockNumber bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
Definition hashovfl.c:35
Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno)
Definition hashpage.c:135
void _hash_relbuf(Relation rel, Buffer buf)
Definition hashpage.c:266
void _hash_pageinit(Page page, Size size)
Definition hashpage.c:596
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition hashpage.c:70
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition hashpage.c:239
Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
Definition hashpage.c:198
uint32 _hash_get_totalbuckets(uint32 splitpoint_phase)
Definition hashutil.c:174
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition hashutil.c:210
IndexTuple CopyIndexTuple(IndexTuple source)
Definition indextuple.c:479
int j
Definition isn.c:78
int i
Definition isn.c:77
#define ItemIdIsDead(itemId)
Definition itemid.h:113
IndexTupleData * IndexTuple
Definition itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition itup.h:71
#define MaxIndexTuplesPerPage
Definition itup.h:181
void pfree(void *pointer)
Definition mcxt.c:1616
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
static char * errmsg
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28
static char buf[DEFAULT_XLOG_SEG_SIZE]
static int fb(int x)
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition hash.h:264
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition hash.h:262
uint32 hashm_firstfree
Definition hash.h:259
uint16 hashm_bmsize
Definition hash.h:251
uint32 hashm_ovflpoint
Definition hash.h:257
uint32 hashm_nmaps
Definition hash.h:260
BlockNumber hasho_nextblkno
Definition hash.h:80
uint16 hasho_flag
Definition hash.h:82
BlockNumber hasho_prevblkno
Definition hash.h:79
uint16 hasho_page_id
Definition hash.h:83
Bucket hasho_bucket
Definition hash.h:81
BlockNumber prevblkno
Definition hash_xlog.h:156
Datum bit(PG_FUNCTION_ARGS)
Definition varbit.c:391
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:479
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:410
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:369
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:246
void XLogBeginInsert(void)
Definition xloginsert.c:153
XLogRecPtr XLogGetFakeLSN(Relation rel)
Definition xloginsert.c:559
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition xloginsert.c:179
#define REGBUF_NO_CHANGE
Definition xloginsert.h:37
#define REGBUF_STANDARD
Definition xloginsert.h:35
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33
#define REGBUF_WILL_INIT
Definition xloginsert.h:34