PostgreSQL Source Code git master
hash_xlog.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hash_xlog.c
4 * WAL replay logic for hash index.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hash_xlog.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/bufmask.h"
18#include "access/hash.h"
19#include "access/hash_xlog.h"
20#include "access/xlogutils.h"
21#include "storage/standby.h"
22
23/*
24 * replay a hash index meta page
25 */
26static void
28{
29 XLogRecPtr lsn = record->EndRecPtr;
30 Page page;
31 Buffer metabuf;
32 ForkNumber forknum;
33
35
36 /* create the index' metapage */
37 metabuf = XLogInitBufferForRedo(record, 0);
38 Assert(BufferIsValid(metabuf));
39 _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 xlrec->ffactor, true);
41 page = BufferGetPage(metabuf);
42 PageSetLSN(page, lsn);
43 MarkBufferDirty(metabuf);
44
45 /*
46 * Force the on-disk state of init forks to always be in sync with the
47 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 * special handling for init forks as create index operations don't log a
49 * full page image of the metapage.
50 */
51 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 if (forknum == INIT_FORKNUM)
53 FlushOneBuffer(metabuf);
54
55 /* all done */
56 UnlockReleaseBuffer(metabuf);
57}
58
59/*
60 * replay a hash index bitmap page
61 */
62static void
64{
65 XLogRecPtr lsn = record->EndRecPtr;
66 Buffer bitmapbuf;
67 Buffer metabuf;
68 Page page;
69 HashMetaPage metap;
70 uint32 num_buckets;
71 ForkNumber forknum;
72
74
75 /*
76 * Initialize bitmap page
77 */
78 bitmapbuf = XLogInitBufferForRedo(record, 0);
79 _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 MarkBufferDirty(bitmapbuf);
82
83 /*
84 * Force the on-disk state of init forks to always be in sync with the
85 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 * special handling for init forks as create index operations don't log a
87 * full page image of the metapage.
88 */
89 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 if (forknum == INIT_FORKNUM)
91 FlushOneBuffer(bitmapbuf);
92 UnlockReleaseBuffer(bitmapbuf);
93
94 /* add the new bitmap page to the metapage's list of bitmaps */
95 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 {
97 /*
98 * Note: in normal operation, we'd update the metapage while still
99 * holding lock on the bitmap page. But during replay it's not
100 * necessary to hold that lock, since nobody can see it yet; the
101 * creating transaction hasn't yet committed.
102 */
103 page = BufferGetPage(metabuf);
104 metap = HashPageGetMeta(page);
105
106 num_buckets = metap->hashm_maxbucket + 1;
107 metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 metap->hashm_nmaps++;
109
110 PageSetLSN(page, lsn);
111 MarkBufferDirty(metabuf);
112
113 XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 if (forknum == INIT_FORKNUM)
115 FlushOneBuffer(metabuf);
116 }
117 if (BufferIsValid(metabuf))
118 UnlockReleaseBuffer(metabuf);
119}
120
121/*
122 * replay a hash index insert without split
123 */
124static void
126{
127 HashMetaPage metap;
128 XLogRecPtr lsn = record->EndRecPtr;
129 xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 Buffer buffer;
131 Page page;
132
133 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 {
135 Size datalen;
136 char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137
138 page = BufferGetPage(buffer);
139
140 if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
141 elog(PANIC, "hash_xlog_insert: failed to add item");
142
143 PageSetLSN(page, lsn);
144 MarkBufferDirty(buffer);
145 }
146 if (BufferIsValid(buffer))
147 UnlockReleaseBuffer(buffer);
148
149 if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
150 {
151 /*
152 * Note: in normal operation, we'd update the metapage while still
153 * holding lock on the page we inserted into. But during replay it's
154 * not necessary to hold that lock, since no other index updates can
155 * be happening concurrently.
156 */
157 page = BufferGetPage(buffer);
158 metap = HashPageGetMeta(page);
159 metap->hashm_ntuples += 1;
160
161 PageSetLSN(page, lsn);
162 MarkBufferDirty(buffer);
163 }
164 if (BufferIsValid(buffer))
165 UnlockReleaseBuffer(buffer);
166}
167
168/*
169 * replay addition of overflow page for hash index
170 */
171static void
173{
174 XLogRecPtr lsn = record->EndRecPtr;
176 Buffer leftbuf;
177 Buffer ovflbuf;
178 Buffer metabuf;
179 BlockNumber leftblk;
180 BlockNumber rightblk;
182 Page ovflpage;
183 HashPageOpaque ovflopaque;
184 uint32 *num_bucket;
185 char *data;
187 bool new_bmpage = false;
188
189 XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
190 XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
191
192 ovflbuf = XLogInitBufferForRedo(record, 0);
193 Assert(BufferIsValid(ovflbuf));
194
195 data = XLogRecGetBlockData(record, 0, &datalen);
196 num_bucket = (uint32 *) data;
197 Assert(datalen == sizeof(uint32));
199 true);
200 /* update backlink */
201 ovflpage = BufferGetPage(ovflbuf);
202 ovflopaque = HashPageGetOpaque(ovflpage);
203 ovflopaque->hasho_prevblkno = leftblk;
204
205 PageSetLSN(ovflpage, lsn);
206 MarkBufferDirty(ovflbuf);
207
208 if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
209 {
210 Page leftpage;
211 HashPageOpaque leftopaque;
212
213 leftpage = BufferGetPage(leftbuf);
214 leftopaque = HashPageGetOpaque(leftpage);
215 leftopaque->hasho_nextblkno = rightblk;
216
217 PageSetLSN(leftpage, lsn);
218 MarkBufferDirty(leftbuf);
219 }
220
221 if (BufferIsValid(leftbuf))
222 UnlockReleaseBuffer(leftbuf);
223 UnlockReleaseBuffer(ovflbuf);
224
225 /*
226 * Note: in normal operation, we'd update the bitmap and meta page while
227 * still holding lock on the overflow pages. But during replay it's not
228 * necessary to hold those locks, since no other index updates can be
229 * happening concurrently.
230 */
231 if (XLogRecHasBlockRef(record, 2))
232 {
233 Buffer mapbuffer;
234
235 if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
236 {
237 Page mappage = BufferGetPage(mapbuffer);
238 uint32 *freep = NULL;
239 uint32 *bitmap_page_bit;
240
241 freep = HashPageGetBitmap(mappage);
242
243 data = XLogRecGetBlockData(record, 2, &datalen);
244 bitmap_page_bit = (uint32 *) data;
245
246 SETBIT(freep, *bitmap_page_bit);
247
248 PageSetLSN(mappage, lsn);
249 MarkBufferDirty(mapbuffer);
250 }
251 if (BufferIsValid(mapbuffer))
252 UnlockReleaseBuffer(mapbuffer);
253 }
254
255 if (XLogRecHasBlockRef(record, 3))
256 {
257 Buffer newmapbuf;
258
259 newmapbuf = XLogInitBufferForRedo(record, 3);
260
261 _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
262
263 new_bmpage = true;
264 newmapblk = BufferGetBlockNumber(newmapbuf);
265
266 MarkBufferDirty(newmapbuf);
267 PageSetLSN(BufferGetPage(newmapbuf), lsn);
268
269 UnlockReleaseBuffer(newmapbuf);
270 }
271
272 if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
273 {
274 HashMetaPage metap;
275 Page page;
276 uint32 *firstfree_ovflpage;
277
278 data = XLogRecGetBlockData(record, 4, &datalen);
279 firstfree_ovflpage = (uint32 *) data;
280
281 page = BufferGetPage(metabuf);
282 metap = HashPageGetMeta(page);
283 metap->hashm_firstfree = *firstfree_ovflpage;
284
285 if (!xlrec->bmpage_found)
286 {
287 metap->hashm_spares[metap->hashm_ovflpoint]++;
288
289 if (new_bmpage)
290 {
291 Assert(BlockNumberIsValid(newmapblk));
292
293 metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
294 metap->hashm_nmaps++;
295 metap->hashm_spares[metap->hashm_ovflpoint]++;
296 }
297 }
298
299 PageSetLSN(page, lsn);
300 MarkBufferDirty(metabuf);
301 }
302 if (BufferIsValid(metabuf))
303 UnlockReleaseBuffer(metabuf);
304}
305
306/*
307 * replay allocation of page for split operation
308 */
309static void
311{
312 XLogRecPtr lsn = record->EndRecPtr;
314 Buffer oldbuf;
315 Buffer newbuf;
316 Buffer metabuf;
318
319 /*
320 * To be consistent with normal operation, here we take cleanup locks on
321 * both the old and new buckets even though there can't be any concurrent
322 * inserts.
323 */
324
325 /* replay the record for old bucket */
326 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
327
328 /*
329 * Note that we still update the page even if it was restored from a full
330 * page image, because the special space is not included in the image.
331 */
333 {
334 Page oldpage;
335 HashPageOpaque oldopaque;
336
337 oldpage = BufferGetPage(oldbuf);
338 oldopaque = HashPageGetOpaque(oldpage);
339
340 oldopaque->hasho_flag = xlrec->old_bucket_flag;
341 oldopaque->hasho_prevblkno = xlrec->new_bucket;
342
343 PageSetLSN(oldpage, lsn);
344 MarkBufferDirty(oldbuf);
345 }
346
347 /* replay the record for new bucket */
349 &newbuf);
350 _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
351 xlrec->new_bucket_flag, true);
352 MarkBufferDirty(newbuf);
353 PageSetLSN(BufferGetPage(newbuf), lsn);
354
355 /*
356 * We can release the lock on old bucket early as well but doing here to
357 * consistent with normal operation.
358 */
359 if (BufferIsValid(oldbuf))
360 UnlockReleaseBuffer(oldbuf);
361 if (BufferIsValid(newbuf))
362 UnlockReleaseBuffer(newbuf);
363
364 /*
365 * Note: in normal operation, we'd update the meta page while still
366 * holding lock on the old and new bucket pages. But during replay it's
367 * not necessary to hold those locks, since no other bucket splits can be
368 * happening concurrently.
369 */
370
371 /* replay the record for metapage changes */
372 if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
373 {
374 Page page;
375 HashMetaPage metap;
376 Size datalen;
377 char *data;
378 uint32 *uidata;
379 int uidatacount;
380
381 page = BufferGetPage(metabuf);
382 metap = HashPageGetMeta(page);
383 metap->hashm_maxbucket = xlrec->new_bucket;
384
385 data = XLogRecGetBlockData(record, 2, &datalen);
386
387 /*
388 * This cast is ok because XLogRecGetBlockData() returns a MAXALIGNed
389 * buffer.
390 */
391 uidata = (uint32 *) data;
392 uidatacount = 0;
393
395 {
396 uint32 lowmask = uidata[uidatacount++];
397 uint32 highmask = uidata[uidatacount++];
398
399 /* update metapage */
400 metap->hashm_lowmask = lowmask;
401 metap->hashm_highmask = highmask;
402 }
403
405 {
406 uint32 ovflpoint = uidata[uidatacount++];
407 uint32 ovflpages = uidata[uidatacount++];
408
409 /* update metapage */
410 metap->hashm_ovflpoint = ovflpoint;
411 metap->hashm_spares[ovflpoint] = ovflpages;
412 }
413
414 MarkBufferDirty(metabuf);
415 PageSetLSN(BufferGetPage(metabuf), lsn);
416 }
417
418 if (BufferIsValid(metabuf))
419 UnlockReleaseBuffer(metabuf);
420}
421
422/*
423 * replay of split operation
424 */
425static void
427{
428 Buffer buf;
429
430 if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
431 elog(ERROR, "Hash split record did not contain a full-page image");
432
434}
435
436/*
437 * replay completion of split operation
438 */
439static void
441{
442 XLogRecPtr lsn = record->EndRecPtr;
444 Buffer oldbuf;
445 Buffer newbuf;
447
448 /* replay the record for old bucket */
449 action = XLogReadBufferForRedo(record, 0, &oldbuf);
450
451 /*
452 * Note that we still update the page even if it was restored from a full
453 * page image, because the bucket flag is not included in the image.
454 */
456 {
457 Page oldpage;
458 HashPageOpaque oldopaque;
459
460 oldpage = BufferGetPage(oldbuf);
461 oldopaque = HashPageGetOpaque(oldpage);
462
463 oldopaque->hasho_flag = xlrec->old_bucket_flag;
464
465 PageSetLSN(oldpage, lsn);
466 MarkBufferDirty(oldbuf);
467 }
468 if (BufferIsValid(oldbuf))
469 UnlockReleaseBuffer(oldbuf);
470
471 /* replay the record for new bucket */
472 action = XLogReadBufferForRedo(record, 1, &newbuf);
473
474 /*
475 * Note that we still update the page even if it was restored from a full
476 * page image, because the bucket flag is not included in the image.
477 */
479 {
480 Page newpage;
481 HashPageOpaque nopaque;
482
483 newpage = BufferGetPage(newbuf);
484 nopaque = HashPageGetOpaque(newpage);
485
486 nopaque->hasho_flag = xlrec->new_bucket_flag;
487
488 PageSetLSN(newpage, lsn);
489 MarkBufferDirty(newbuf);
490 }
491 if (BufferIsValid(newbuf))
492 UnlockReleaseBuffer(newbuf);
493}
494
495/*
496 * replay move of page contents for squeeze operation of hash index
497 */
498static void
500{
501 XLogRecPtr lsn = record->EndRecPtr;
503 Buffer bucketbuf = InvalidBuffer;
504 Buffer writebuf = InvalidBuffer;
505 Buffer deletebuf = InvalidBuffer;
507
508 /*
509 * Ensure we have a cleanup lock on primary bucket page before we start
510 * with the actual replay operation. This is to ensure that neither a
511 * scan can start nor a scan can be already-in-progress during the replay
512 * of this operation. If we allow scans during this operation, then they
513 * can miss some records or show the same record multiple times.
514 */
515 if (xldata->is_prim_bucket_same_wrt)
516 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
517 else
518 {
519 /*
520 * we don't care for return value as the purpose of reading bucketbuf
521 * is to ensure a cleanup lock on primary bucket page.
522 */
523 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
524
525 action = XLogReadBufferForRedo(record, 1, &writebuf);
526 }
527
528 /* replay the record for adding entries in overflow buffer */
529 if (action == BLK_NEEDS_REDO)
530 {
531 Page writepage;
532 char *begin;
533 char *data;
534 Size datalen;
535 uint16 ninserted = 0;
536
537 data = begin = XLogRecGetBlockData(record, 1, &datalen);
538
539 writepage = BufferGetPage(writebuf);
540
541 if (xldata->ntups > 0)
542 {
543 OffsetNumber *towrite = (OffsetNumber *) data;
544
545 data += sizeof(OffsetNumber) * xldata->ntups;
546
547 while (data - begin < datalen)
548 {
549 IndexTuple itup = (IndexTuple) data;
550 Size itemsz;
551 OffsetNumber l;
552
553 itemsz = IndexTupleSize(itup);
554 itemsz = MAXALIGN(itemsz);
555
556 data += itemsz;
557
558 l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
559 if (l == InvalidOffsetNumber)
560 elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %zu bytes", itemsz);
561
562 ninserted++;
563 }
564 }
565
566 /*
567 * number of tuples inserted must be same as requested in REDO record.
568 */
569 Assert(ninserted == xldata->ntups);
570
571 PageSetLSN(writepage, lsn);
572 MarkBufferDirty(writebuf);
573 }
574
575 /* replay the record for deleting entries from overflow buffer */
576 if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
577 {
578 Page page;
579 char *ptr;
580 Size len;
581
582 ptr = XLogRecGetBlockData(record, 2, &len);
583
584 page = BufferGetPage(deletebuf);
585
586 if (len > 0)
587 {
588 OffsetNumber *unused;
589 OffsetNumber *unend;
590
591 unused = (OffsetNumber *) ptr;
592 unend = (OffsetNumber *) (ptr + len);
593
594 if ((unend - unused) > 0)
595 PageIndexMultiDelete(page, unused, unend - unused);
596 }
597
598 PageSetLSN(page, lsn);
599 MarkBufferDirty(deletebuf);
600 }
601
602 /*
603 * Replay is complete, now we can release the buffers. We release locks at
604 * end of replay operation to ensure that we hold lock on primary bucket
605 * page till end of operation. We can optimize by releasing the lock on
606 * write buffer as soon as the operation for same is complete, if it is
607 * not same as primary bucket page, but that doesn't seem to be worth
608 * complicating the code.
609 */
610 if (BufferIsValid(deletebuf))
611 UnlockReleaseBuffer(deletebuf);
612
613 if (BufferIsValid(writebuf))
614 UnlockReleaseBuffer(writebuf);
615
616 if (BufferIsValid(bucketbuf))
617 UnlockReleaseBuffer(bucketbuf);
618}
619
620/*
621 * replay squeeze page operation of hash index
622 */
623static void
625{
626 XLogRecPtr lsn = record->EndRecPtr;
628 Buffer bucketbuf = InvalidBuffer;
629 Buffer writebuf = InvalidBuffer;
630 Buffer ovflbuf;
631 Buffer prevbuf = InvalidBuffer;
632 Buffer mapbuf;
634
635 /*
636 * Ensure we have a cleanup lock on primary bucket page before we start
637 * with the actual replay operation. This is to ensure that neither a
638 * scan can start nor a scan can be already-in-progress during the replay
639 * of this operation. If we allow scans during this operation, then they
640 * can miss some records or show the same record multiple times.
641 */
642 if (xldata->is_prim_bucket_same_wrt)
643 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
644 else
645 {
646 /*
647 * we don't care for return value as the purpose of reading bucketbuf
648 * is to ensure a cleanup lock on primary bucket page.
649 */
650 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
651
652 if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
653 action = XLogReadBufferForRedo(record, 1, &writebuf);
654 else
656 }
657
658 /* replay the record for adding entries in overflow buffer */
659 if (action == BLK_NEEDS_REDO)
660 {
661 Page writepage;
662 char *begin;
663 char *data;
664 Size datalen;
665 uint16 ninserted = 0;
666 bool mod_wbuf = false;
667
668 data = begin = XLogRecGetBlockData(record, 1, &datalen);
669
670 writepage = BufferGetPage(writebuf);
671
672 if (xldata->ntups > 0)
673 {
674 OffsetNumber *towrite = (OffsetNumber *) data;
675
676 data += sizeof(OffsetNumber) * xldata->ntups;
677
678 while (data - begin < datalen)
679 {
680 IndexTuple itup = (IndexTuple) data;
681 Size itemsz;
682 OffsetNumber l;
683
684 itemsz = IndexTupleSize(itup);
685 itemsz = MAXALIGN(itemsz);
686
687 data += itemsz;
688
689 l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
690 if (l == InvalidOffsetNumber)
691 elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %zu bytes", itemsz);
692
693 ninserted++;
694 }
695
696 mod_wbuf = true;
697 }
698 else
699 {
700 /*
701 * Ensure that the required flags are set when there are no
702 * tuples. See _hash_freeovflpage().
703 */
706 }
707
708 /*
709 * number of tuples inserted must be same as requested in REDO record.
710 */
711 Assert(ninserted == xldata->ntups);
712
713 /*
714 * if the page on which are adding tuples is a page previous to freed
715 * overflow page, then update its nextblkno.
716 */
717 if (xldata->is_prev_bucket_same_wrt)
718 {
719 HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
720
721 writeopaque->hasho_nextblkno = xldata->nextblkno;
722 mod_wbuf = true;
723 }
724
725 /* Set LSN and mark writebuf dirty iff it is modified */
726 if (mod_wbuf)
727 {
728 PageSetLSN(writepage, lsn);
729 MarkBufferDirty(writebuf);
730 }
731 }
732
733 /* replay the record for initializing overflow buffer */
734 if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
735 {
736 Page ovflpage;
737 HashPageOpaque ovflopaque;
738
739 ovflpage = BufferGetPage(ovflbuf);
740
741 _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
742
743 ovflopaque = HashPageGetOpaque(ovflpage);
744
747 ovflopaque->hasho_bucket = InvalidBucket;
748 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
749 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
750
751 PageSetLSN(ovflpage, lsn);
752 MarkBufferDirty(ovflbuf);
753 }
754 if (BufferIsValid(ovflbuf))
755 UnlockReleaseBuffer(ovflbuf);
756
757 /* replay the record for page previous to the freed overflow page */
758 if (!xldata->is_prev_bucket_same_wrt &&
759 XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
760 {
761 Page prevpage = BufferGetPage(prevbuf);
762 HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
763
764 prevopaque->hasho_nextblkno = xldata->nextblkno;
765
766 PageSetLSN(prevpage, lsn);
767 MarkBufferDirty(prevbuf);
768 }
769 if (BufferIsValid(prevbuf))
770 UnlockReleaseBuffer(prevbuf);
771
772 /* replay the record for page next to the freed overflow page */
773 if (XLogRecHasBlockRef(record, 4))
774 {
775 Buffer nextbuf;
776
777 if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
778 {
779 Page nextpage = BufferGetPage(nextbuf);
780 HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
781
782 nextopaque->hasho_prevblkno = xldata->prevblkno;
783
784 PageSetLSN(nextpage, lsn);
785 MarkBufferDirty(nextbuf);
786 }
787 if (BufferIsValid(nextbuf))
788 UnlockReleaseBuffer(nextbuf);
789 }
790
791 if (BufferIsValid(writebuf))
792 UnlockReleaseBuffer(writebuf);
793
794 if (BufferIsValid(bucketbuf))
795 UnlockReleaseBuffer(bucketbuf);
796
797 /*
798 * Note: in normal operation, we'd update the bitmap and meta page while
799 * still holding lock on the primary bucket page and overflow pages. But
800 * during replay it's not necessary to hold those locks, since no other
801 * index updates can be happening concurrently.
802 */
803 /* replay the record for bitmap page */
804 if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
805 {
806 Page mappage = BufferGetPage(mapbuf);
807 uint32 *freep = NULL;
808 char *data;
809 uint32 *bitmap_page_bit;
810 Size datalen;
811
812 freep = HashPageGetBitmap(mappage);
813
814 data = XLogRecGetBlockData(record, 5, &datalen);
815 bitmap_page_bit = (uint32 *) data;
816
817 CLRBIT(freep, *bitmap_page_bit);
818
819 PageSetLSN(mappage, lsn);
820 MarkBufferDirty(mapbuf);
821 }
822 if (BufferIsValid(mapbuf))
823 UnlockReleaseBuffer(mapbuf);
824
825 /* replay the record for meta page */
826 if (XLogRecHasBlockRef(record, 6))
827 {
828 Buffer metabuf;
829
830 if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
831 {
832 HashMetaPage metap;
833 Page page;
834 char *data;
835 uint32 *firstfree_ovflpage;
836 Size datalen;
837
838 data = XLogRecGetBlockData(record, 6, &datalen);
839 firstfree_ovflpage = (uint32 *) data;
840
841 page = BufferGetPage(metabuf);
842 metap = HashPageGetMeta(page);
843 metap->hashm_firstfree = *firstfree_ovflpage;
844
845 PageSetLSN(page, lsn);
846 MarkBufferDirty(metabuf);
847 }
848 if (BufferIsValid(metabuf))
849 UnlockReleaseBuffer(metabuf);
850 }
851}
852
853/*
854 * replay delete operation of hash index
855 */
856static void
858{
859 XLogRecPtr lsn = record->EndRecPtr;
860 xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
861 Buffer bucketbuf = InvalidBuffer;
862 Buffer deletebuf;
863 Page page;
865
866 /*
867 * Ensure we have a cleanup lock on primary bucket page before we start
868 * with the actual replay operation. This is to ensure that neither a
869 * scan can start nor a scan can be already-in-progress during the replay
870 * of this operation. If we allow scans during this operation, then they
871 * can miss some records or show the same record multiple times.
872 */
873 if (xldata->is_primary_bucket_page)
874 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
875 else
876 {
877 /*
878 * we don't care for return value as the purpose of reading bucketbuf
879 * is to ensure a cleanup lock on primary bucket page.
880 */
881 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
882
883 action = XLogReadBufferForRedo(record, 1, &deletebuf);
884 }
885
886 /* replay the record for deleting entries in bucket page */
887 if (action == BLK_NEEDS_REDO)
888 {
889 char *ptr;
890 Size len;
891
892 ptr = XLogRecGetBlockData(record, 1, &len);
893
894 page = BufferGetPage(deletebuf);
895
896 if (len > 0)
897 {
898 OffsetNumber *unused;
899 OffsetNumber *unend;
900
901 unused = (OffsetNumber *) ptr;
902 unend = (OffsetNumber *) (ptr + len);
903
904 if ((unend - unused) > 0)
905 PageIndexMultiDelete(page, unused, unend - unused);
906 }
907
908 /*
909 * Mark the page as not containing any LP_DEAD items only if
910 * clear_dead_marking flag is set to true. See comments in
911 * hashbucketcleanup() for details.
912 */
913 if (xldata->clear_dead_marking)
914 {
915 HashPageOpaque pageopaque;
916
917 pageopaque = HashPageGetOpaque(page);
918 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
919 }
920
921 PageSetLSN(page, lsn);
922 MarkBufferDirty(deletebuf);
923 }
924 if (BufferIsValid(deletebuf))
925 UnlockReleaseBuffer(deletebuf);
926
927 if (BufferIsValid(bucketbuf))
928 UnlockReleaseBuffer(bucketbuf);
929}
930
931/*
932 * replay split cleanup flag operation for primary bucket page.
933 */
934static void
936{
937 XLogRecPtr lsn = record->EndRecPtr;
938 Buffer buffer;
939 Page page;
940
941 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
942 {
943 HashPageOpaque bucket_opaque;
944
945 page = BufferGetPage(buffer);
946
947 bucket_opaque = HashPageGetOpaque(page);
948 bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
949 PageSetLSN(page, lsn);
950 MarkBufferDirty(buffer);
951 }
952 if (BufferIsValid(buffer))
953 UnlockReleaseBuffer(buffer);
954}
955
956/*
957 * replay for update meta page
958 */
959static void
961{
962 HashMetaPage metap;
963 XLogRecPtr lsn = record->EndRecPtr;
965 Buffer metabuf;
966 Page page;
967
968 if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
969 {
970 page = BufferGetPage(metabuf);
971 metap = HashPageGetMeta(page);
972
973 metap->hashm_ntuples = xldata->ntuples;
974
975 PageSetLSN(page, lsn);
976 MarkBufferDirty(metabuf);
977 }
978 if (BufferIsValid(metabuf))
979 UnlockReleaseBuffer(metabuf);
980}
981
982/*
983 * replay delete operation in hash index to remove
984 * tuples marked as DEAD during index tuple insertion.
985 */
986static void
988{
989 XLogRecPtr lsn = record->EndRecPtr;
991 Buffer buffer;
992 Buffer metabuf;
993 Page page;
995 HashPageOpaque pageopaque;
996 OffsetNumber *toDelete;
997
998 xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
999 toDelete = xldata->offsets;
1000
1001 /*
1002 * If we have any conflict processing to do, it must happen before we
1003 * update the page.
1004 *
1005 * Hash index records that are marked as LP_DEAD and being removed during
1006 * hash index tuple insertion can conflict with standby queries. You might
1007 * think that vacuum records would conflict as well, but we've handled
1008 * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1009 * xid cleaned by the vacuum of the heap and so we can resolve any
1010 * conflicts just once when that arrives. After that we know that no
1011 * conflicts exist from individual hash index vacuum records on that
1012 * index.
1013 */
1014 if (InHotStandby)
1015 {
1016 RelFileLocator rlocator;
1017
1018 XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1020 xldata->isCatalogRel,
1021 rlocator);
1022 }
1023
1024 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1025
1026 if (action == BLK_NEEDS_REDO)
1027 {
1028 page = BufferGetPage(buffer);
1029
1030 PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1031
1032 /*
1033 * Mark the page as not containing any LP_DEAD items. See comments in
1034 * _hash_vacuum_one_page() for details.
1035 */
1036 pageopaque = HashPageGetOpaque(page);
1037 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1038
1039 PageSetLSN(page, lsn);
1040 MarkBufferDirty(buffer);
1041 }
1042 if (BufferIsValid(buffer))
1043 UnlockReleaseBuffer(buffer);
1044
1045 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1046 {
1047 Page metapage;
1048 HashMetaPage metap;
1049
1050 metapage = BufferGetPage(metabuf);
1051 metap = HashPageGetMeta(metapage);
1052
1053 metap->hashm_ntuples -= xldata->ntuples;
1054
1055 PageSetLSN(metapage, lsn);
1056 MarkBufferDirty(metabuf);
1057 }
1058 if (BufferIsValid(metabuf))
1059 UnlockReleaseBuffer(metabuf);
1060}
1061
1062void
1064{
1065 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1066
1067 switch (info)
1068 {
1071 break;
1074 break;
1075 case XLOG_HASH_INSERT:
1076 hash_xlog_insert(record);
1077 break;
1080 break;
1083 break;
1085 hash_xlog_split_page(record);
1086 break;
1089 break;
1092 break;
1094 hash_xlog_squeeze_page(record);
1095 break;
1096 case XLOG_HASH_DELETE:
1097 hash_xlog_delete(record);
1098 break;
1101 break;
1104 break;
1107 break;
1108 default:
1109 elog(PANIC, "hash_redo: unknown op code %u", info);
1110 }
1111}
1112
1113/*
1114 * Mask a hash page before performing consistency checks on it.
1115 */
1116void
1117hash_mask(char *pagedata, BlockNumber blkno)
1118{
1119 Page page = (Page) pagedata;
1120 HashPageOpaque opaque;
1121 int pagetype;
1122
1124
1125 mask_page_hint_bits(page);
1126 mask_unused_space(page);
1127
1128 opaque = HashPageGetOpaque(page);
1129
1130 pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1131 if (pagetype == LH_UNUSED_PAGE)
1132 {
1133 /*
1134 * Mask everything on a UNUSED page.
1135 */
1136 mask_page_content(page);
1137 }
1138 else if (pagetype == LH_BUCKET_PAGE ||
1139 pagetype == LH_OVERFLOW_PAGE)
1140 {
1141 /*
1142 * In hash bucket and overflow pages, it is possible to modify the
1143 * LP_FLAGS without emitting any WAL record. Hence, mask the line
1144 * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1145 */
1146 mask_lp_flags(page);
1147 }
1148
1149 /*
1150 * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1151 * unlogged. So, mask it. See _hash_kill_items() for details.
1152 */
1153 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1154}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define CLRBIT(x, i)
Definition: blutils.c:28
#define SETBIT(x, i)
Definition: blutils.c:29
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_content(Page page)
Definition: bufmask.c:119
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4223
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5383
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2943
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:5346
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:425
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:49
@ RBM_NORMAL
Definition: bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1160
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
PageData * Page
Definition: bufpage.h:81
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define MAXALIGN(LEN)
Definition: c.h:824
uint8_t uint8
Definition: c.h:550
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:229
uint16_t uint16
Definition: c.h:551
uint32_t uint32
Definition: c.h:552
size_t Size
Definition: c.h:624
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define LH_PAGE_TYPE
Definition: hash.h:63
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define InvalidBucket
Definition: hash.h:37
static void hash_xlog_split_cleanup(XLogReaderState *record)
Definition: hash_xlog.c:935
static void hash_xlog_add_ovfl_page(XLogReaderState *record)
Definition: hash_xlog.c:172
static void hash_xlog_split_page(XLogReaderState *record)
Definition: hash_xlog.c:426
static void hash_xlog_init_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:27
void hash_mask(char *pagedata, BlockNumber blkno)
Definition: hash_xlog.c:1117
static void hash_xlog_split_complete(XLogReaderState *record)
Definition: hash_xlog.c:440
static void hash_xlog_update_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:960
static void hash_xlog_vacuum_one_page(XLogReaderState *record)
Definition: hash_xlog.c:987
static void hash_xlog_squeeze_page(XLogReaderState *record)
Definition: hash_xlog.c:624
static void hash_xlog_insert(XLogReaderState *record)
Definition: hash_xlog.c:125
static void hash_xlog_move_page_contents(XLogReaderState *record)
Definition: hash_xlog.c:499
static void hash_xlog_split_allocate_page(XLogReaderState *record)
Definition: hash_xlog.c:310
void hash_redo(XLogReaderState *record)
Definition: hash_xlog.c:1063
static void hash_xlog_delete(XLogReaderState *record)
Definition: hash_xlog.c:857
static void hash_xlog_init_bitmap_page(XLogReaderState *record)
Definition: hash_xlog.c:63
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define XLOG_HASH_SPLIT_PAGE
Definition: hash_xlog.h:32
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
#define XLOG_HASH_SPLIT_COMPLETE
Definition: hash_xlog.h:33
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
Assert(PointerIsAligned(start, uint64))
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:777
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
IndexTupleData * IndexTuple
Definition: itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition: itup.h:71
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void size_t len
const void * data
while(p+4<=pend)
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
ForkNumber
Definition: relpath.h:56
@ INIT_FORKNUM
Definition: relpath.h:61
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:468
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_firstfree
Definition: hash.h:259
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_highmask
Definition: hash.h:255
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
XLogRecPtr EndRecPtr
Definition: xlogreader.h:206
bool clear_dead_marking
Definition: hash_xlog.h:181
bool is_primary_bucket_page
Definition: hash_xlog.h:183
RegProcedure procid
Definition: hash_xlog.h:214
OffsetNumber offnum
Definition: hash_xlog.h:58
BlockNumber prevblkno
Definition: hash_xlog.h:156
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:159
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:162
BlockNumber nextblkno
Definition: hash_xlog.h:157
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:248
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: hash_xlog.h:254
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2045
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1991
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:409
#define XLogRecGetData(decoder)
Definition: xlogreader.h:414
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:419
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:303
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:315
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:340
#define InHotStandby
Definition: xlogutils.h:60
XLogRedoAction
Definition: xlogutils.h:73
@ BLK_RESTORED
Definition: xlogutils.h:76
@ BLK_NEEDS_REDO
Definition: xlogutils.h:74
@ BLK_NOTFOUND
Definition: xlogutils.h:77