PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
hash_xlog.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hash_xlog.c
4 * WAL replay logic for hash index.
5 *
6 *
7 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hash_xlog.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/bufmask.h"
18#include "access/hash.h"
19#include "access/hash_xlog.h"
20#include "access/xlogutils.h"
21#include "storage/standby.h"
22
23/*
24 * replay a hash index meta page
25 */
26static void
28{
29 XLogRecPtr lsn = record->EndRecPtr;
30 Page page;
31 Buffer metabuf;
32 ForkNumber forknum;
33
35
36 /* create the index' metapage */
37 metabuf = XLogInitBufferForRedo(record, 0);
38 Assert(BufferIsValid(metabuf));
39 _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 xlrec->ffactor, true);
41 page = (Page) BufferGetPage(metabuf);
42 PageSetLSN(page, lsn);
43 MarkBufferDirty(metabuf);
44
45 /*
46 * Force the on-disk state of init forks to always be in sync with the
47 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 * special handling for init forks as create index operations don't log a
49 * full page image of the metapage.
50 */
51 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 if (forknum == INIT_FORKNUM)
53 FlushOneBuffer(metabuf);
54
55 /* all done */
56 UnlockReleaseBuffer(metabuf);
57}
58
59/*
60 * replay a hash index bitmap page
61 */
62static void
64{
65 XLogRecPtr lsn = record->EndRecPtr;
66 Buffer bitmapbuf;
67 Buffer metabuf;
68 Page page;
69 HashMetaPage metap;
70 uint32 num_buckets;
71 ForkNumber forknum;
72
74
75 /*
76 * Initialize bitmap page
77 */
78 bitmapbuf = XLogInitBufferForRedo(record, 0);
79 _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 MarkBufferDirty(bitmapbuf);
82
83 /*
84 * Force the on-disk state of init forks to always be in sync with the
85 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 * special handling for init forks as create index operations don't log a
87 * full page image of the metapage.
88 */
89 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 if (forknum == INIT_FORKNUM)
91 FlushOneBuffer(bitmapbuf);
92 UnlockReleaseBuffer(bitmapbuf);
93
94 /* add the new bitmap page to the metapage's list of bitmaps */
95 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 {
97 /*
98 * Note: in normal operation, we'd update the metapage while still
99 * holding lock on the bitmap page. But during replay it's not
100 * necessary to hold that lock, since nobody can see it yet; the
101 * creating transaction hasn't yet committed.
102 */
103 page = BufferGetPage(metabuf);
104 metap = HashPageGetMeta(page);
105
106 num_buckets = metap->hashm_maxbucket + 1;
107 metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 metap->hashm_nmaps++;
109
110 PageSetLSN(page, lsn);
111 MarkBufferDirty(metabuf);
112
113 XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 if (forknum == INIT_FORKNUM)
115 FlushOneBuffer(metabuf);
116 }
117 if (BufferIsValid(metabuf))
118 UnlockReleaseBuffer(metabuf);
119}
120
121/*
122 * replay a hash index insert without split
123 */
124static void
126{
127 HashMetaPage metap;
128 XLogRecPtr lsn = record->EndRecPtr;
129 xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 Buffer buffer;
131 Page page;
132
133 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 {
135 Size datalen;
136 char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137
138 page = BufferGetPage(buffer);
139
140 if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
141 false, false) == InvalidOffsetNumber)
142 elog(PANIC, "hash_xlog_insert: failed to add item");
143
144 PageSetLSN(page, lsn);
145 MarkBufferDirty(buffer);
146 }
147 if (BufferIsValid(buffer))
148 UnlockReleaseBuffer(buffer);
149
150 if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
151 {
152 /*
153 * Note: in normal operation, we'd update the metapage while still
154 * holding lock on the page we inserted into. But during replay it's
155 * not necessary to hold that lock, since no other index updates can
156 * be happening concurrently.
157 */
158 page = BufferGetPage(buffer);
159 metap = HashPageGetMeta(page);
160 metap->hashm_ntuples += 1;
161
162 PageSetLSN(page, lsn);
163 MarkBufferDirty(buffer);
164 }
165 if (BufferIsValid(buffer))
166 UnlockReleaseBuffer(buffer);
167}
168
169/*
170 * replay addition of overflow page for hash index
171 */
172static void
174{
175 XLogRecPtr lsn = record->EndRecPtr;
177 Buffer leftbuf;
178 Buffer ovflbuf;
179 Buffer metabuf;
180 BlockNumber leftblk;
181 BlockNumber rightblk;
183 Page ovflpage;
184 HashPageOpaque ovflopaque;
185 uint32 *num_bucket;
186 char *data;
188 bool new_bmpage = false;
189
190 XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
191 XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
192
193 ovflbuf = XLogInitBufferForRedo(record, 0);
194 Assert(BufferIsValid(ovflbuf));
195
196 data = XLogRecGetBlockData(record, 0, &datalen);
197 num_bucket = (uint32 *) data;
198 Assert(datalen == sizeof(uint32));
200 true);
201 /* update backlink */
202 ovflpage = BufferGetPage(ovflbuf);
203 ovflopaque = HashPageGetOpaque(ovflpage);
204 ovflopaque->hasho_prevblkno = leftblk;
205
206 PageSetLSN(ovflpage, lsn);
207 MarkBufferDirty(ovflbuf);
208
209 if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
210 {
211 Page leftpage;
212 HashPageOpaque leftopaque;
213
214 leftpage = BufferGetPage(leftbuf);
215 leftopaque = HashPageGetOpaque(leftpage);
216 leftopaque->hasho_nextblkno = rightblk;
217
218 PageSetLSN(leftpage, lsn);
219 MarkBufferDirty(leftbuf);
220 }
221
222 if (BufferIsValid(leftbuf))
223 UnlockReleaseBuffer(leftbuf);
224 UnlockReleaseBuffer(ovflbuf);
225
226 /*
227 * Note: in normal operation, we'd update the bitmap and meta page while
228 * still holding lock on the overflow pages. But during replay it's not
229 * necessary to hold those locks, since no other index updates can be
230 * happening concurrently.
231 */
232 if (XLogRecHasBlockRef(record, 2))
233 {
234 Buffer mapbuffer;
235
236 if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
237 {
238 Page mappage = (Page) BufferGetPage(mapbuffer);
239 uint32 *freep = NULL;
240 uint32 *bitmap_page_bit;
241
242 freep = HashPageGetBitmap(mappage);
243
244 data = XLogRecGetBlockData(record, 2, &datalen);
245 bitmap_page_bit = (uint32 *) data;
246
247 SETBIT(freep, *bitmap_page_bit);
248
249 PageSetLSN(mappage, lsn);
250 MarkBufferDirty(mapbuffer);
251 }
252 if (BufferIsValid(mapbuffer))
253 UnlockReleaseBuffer(mapbuffer);
254 }
255
256 if (XLogRecHasBlockRef(record, 3))
257 {
258 Buffer newmapbuf;
259
260 newmapbuf = XLogInitBufferForRedo(record, 3);
261
262 _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
263
264 new_bmpage = true;
265 newmapblk = BufferGetBlockNumber(newmapbuf);
266
267 MarkBufferDirty(newmapbuf);
268 PageSetLSN(BufferGetPage(newmapbuf), lsn);
269
270 UnlockReleaseBuffer(newmapbuf);
271 }
272
273 if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
274 {
275 HashMetaPage metap;
276 Page page;
277 uint32 *firstfree_ovflpage;
278
279 data = XLogRecGetBlockData(record, 4, &datalen);
280 firstfree_ovflpage = (uint32 *) data;
281
282 page = BufferGetPage(metabuf);
283 metap = HashPageGetMeta(page);
284 metap->hashm_firstfree = *firstfree_ovflpage;
285
286 if (!xlrec->bmpage_found)
287 {
288 metap->hashm_spares[metap->hashm_ovflpoint]++;
289
290 if (new_bmpage)
291 {
292 Assert(BlockNumberIsValid(newmapblk));
293
294 metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
295 metap->hashm_nmaps++;
296 metap->hashm_spares[metap->hashm_ovflpoint]++;
297 }
298 }
299
300 PageSetLSN(page, lsn);
301 MarkBufferDirty(metabuf);
302 }
303 if (BufferIsValid(metabuf))
304 UnlockReleaseBuffer(metabuf);
305}
306
307/*
308 * replay allocation of page for split operation
309 */
310static void
312{
313 XLogRecPtr lsn = record->EndRecPtr;
315 Buffer oldbuf;
316 Buffer newbuf;
317 Buffer metabuf;
319 char *data;
321
322 /*
323 * To be consistent with normal operation, here we take cleanup locks on
324 * both the old and new buckets even though there can't be any concurrent
325 * inserts.
326 */
327
328 /* replay the record for old bucket */
329 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
330
331 /*
332 * Note that we still update the page even if it was restored from a full
333 * page image, because the special space is not included in the image.
334 */
336 {
337 Page oldpage;
338 HashPageOpaque oldopaque;
339
340 oldpage = BufferGetPage(oldbuf);
341 oldopaque = HashPageGetOpaque(oldpage);
342
343 oldopaque->hasho_flag = xlrec->old_bucket_flag;
344 oldopaque->hasho_prevblkno = xlrec->new_bucket;
345
346 PageSetLSN(oldpage, lsn);
347 MarkBufferDirty(oldbuf);
348 }
349
350 /* replay the record for new bucket */
352 &newbuf);
353 _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
354 xlrec->new_bucket_flag, true);
355 MarkBufferDirty(newbuf);
356 PageSetLSN(BufferGetPage(newbuf), lsn);
357
358 /*
359 * We can release the lock on old bucket early as well but doing here to
360 * consistent with normal operation.
361 */
362 if (BufferIsValid(oldbuf))
363 UnlockReleaseBuffer(oldbuf);
364 if (BufferIsValid(newbuf))
365 UnlockReleaseBuffer(newbuf);
366
367 /*
368 * Note: in normal operation, we'd update the meta page while still
369 * holding lock on the old and new bucket pages. But during replay it's
370 * not necessary to hold those locks, since no other bucket splits can be
371 * happening concurrently.
372 */
373
374 /* replay the record for metapage changes */
375 if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
376 {
377 Page page;
378 HashMetaPage metap;
379
380 page = BufferGetPage(metabuf);
381 metap = HashPageGetMeta(page);
382 metap->hashm_maxbucket = xlrec->new_bucket;
383
384 data = XLogRecGetBlockData(record, 2, &datalen);
385
387 {
388 uint32 lowmask;
389 uint32 *highmask;
390
391 /* extract low and high masks. */
392 memcpy(&lowmask, data, sizeof(uint32));
393 highmask = (uint32 *) ((char *) data + sizeof(uint32));
394
395 /* update metapage */
396 metap->hashm_lowmask = lowmask;
397 metap->hashm_highmask = *highmask;
398
399 data += sizeof(uint32) * 2;
400 }
401
403 {
404 uint32 ovflpoint;
405 uint32 *ovflpages;
406
407 /* extract information of overflow pages. */
408 memcpy(&ovflpoint, data, sizeof(uint32));
409 ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
410
411 /* update metapage */
412 metap->hashm_spares[ovflpoint] = *ovflpages;
413 metap->hashm_ovflpoint = ovflpoint;
414 }
415
416 MarkBufferDirty(metabuf);
417 PageSetLSN(BufferGetPage(metabuf), lsn);
418 }
419
420 if (BufferIsValid(metabuf))
421 UnlockReleaseBuffer(metabuf);
422}
423
424/*
425 * replay of split operation
426 */
427static void
429{
430 Buffer buf;
431
432 if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
433 elog(ERROR, "Hash split record did not contain a full-page image");
434
436}
437
438/*
439 * replay completion of split operation
440 */
441static void
443{
444 XLogRecPtr lsn = record->EndRecPtr;
446 Buffer oldbuf;
447 Buffer newbuf;
449
450 /* replay the record for old bucket */
451 action = XLogReadBufferForRedo(record, 0, &oldbuf);
452
453 /*
454 * Note that we still update the page even if it was restored from a full
455 * page image, because the bucket flag is not included in the image.
456 */
458 {
459 Page oldpage;
460 HashPageOpaque oldopaque;
461
462 oldpage = BufferGetPage(oldbuf);
463 oldopaque = HashPageGetOpaque(oldpage);
464
465 oldopaque->hasho_flag = xlrec->old_bucket_flag;
466
467 PageSetLSN(oldpage, lsn);
468 MarkBufferDirty(oldbuf);
469 }
470 if (BufferIsValid(oldbuf))
471 UnlockReleaseBuffer(oldbuf);
472
473 /* replay the record for new bucket */
474 action = XLogReadBufferForRedo(record, 1, &newbuf);
475
476 /*
477 * Note that we still update the page even if it was restored from a full
478 * page image, because the bucket flag is not included in the image.
479 */
481 {
482 Page newpage;
483 HashPageOpaque nopaque;
484
485 newpage = BufferGetPage(newbuf);
486 nopaque = HashPageGetOpaque(newpage);
487
488 nopaque->hasho_flag = xlrec->new_bucket_flag;
489
490 PageSetLSN(newpage, lsn);
491 MarkBufferDirty(newbuf);
492 }
493 if (BufferIsValid(newbuf))
494 UnlockReleaseBuffer(newbuf);
495}
496
497/*
498 * replay move of page contents for squeeze operation of hash index
499 */
500static void
502{
503 XLogRecPtr lsn = record->EndRecPtr;
505 Buffer bucketbuf = InvalidBuffer;
506 Buffer writebuf = InvalidBuffer;
507 Buffer deletebuf = InvalidBuffer;
509
510 /*
511 * Ensure we have a cleanup lock on primary bucket page before we start
512 * with the actual replay operation. This is to ensure that neither a
513 * scan can start nor a scan can be already-in-progress during the replay
514 * of this operation. If we allow scans during this operation, then they
515 * can miss some records or show the same record multiple times.
516 */
517 if (xldata->is_prim_bucket_same_wrt)
518 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
519 else
520 {
521 /*
522 * we don't care for return value as the purpose of reading bucketbuf
523 * is to ensure a cleanup lock on primary bucket page.
524 */
525 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
526
527 action = XLogReadBufferForRedo(record, 1, &writebuf);
528 }
529
530 /* replay the record for adding entries in overflow buffer */
531 if (action == BLK_NEEDS_REDO)
532 {
533 Page writepage;
534 char *begin;
535 char *data;
536 Size datalen;
537 uint16 ninserted = 0;
538
539 data = begin = XLogRecGetBlockData(record, 1, &datalen);
540
541 writepage = (Page) BufferGetPage(writebuf);
542
543 if (xldata->ntups > 0)
544 {
545 OffsetNumber *towrite = (OffsetNumber *) data;
546
547 data += sizeof(OffsetNumber) * xldata->ntups;
548
549 while (data - begin < datalen)
550 {
551 IndexTuple itup = (IndexTuple) data;
552 Size itemsz;
553 OffsetNumber l;
554
555 itemsz = IndexTupleSize(itup);
556 itemsz = MAXALIGN(itemsz);
557
558 data += itemsz;
559
560 l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
561 if (l == InvalidOffsetNumber)
562 elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
563 (int) itemsz);
564
565 ninserted++;
566 }
567 }
568
569 /*
570 * number of tuples inserted must be same as requested in REDO record.
571 */
572 Assert(ninserted == xldata->ntups);
573
574 PageSetLSN(writepage, lsn);
575 MarkBufferDirty(writebuf);
576 }
577
578 /* replay the record for deleting entries from overflow buffer */
579 if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
580 {
581 Page page;
582 char *ptr;
583 Size len;
584
585 ptr = XLogRecGetBlockData(record, 2, &len);
586
587 page = (Page) BufferGetPage(deletebuf);
588
589 if (len > 0)
590 {
591 OffsetNumber *unused;
592 OffsetNumber *unend;
593
594 unused = (OffsetNumber *) ptr;
595 unend = (OffsetNumber *) ((char *) ptr + len);
596
597 if ((unend - unused) > 0)
598 PageIndexMultiDelete(page, unused, unend - unused);
599 }
600
601 PageSetLSN(page, lsn);
602 MarkBufferDirty(deletebuf);
603 }
604
605 /*
606 * Replay is complete, now we can release the buffers. We release locks at
607 * end of replay operation to ensure that we hold lock on primary bucket
608 * page till end of operation. We can optimize by releasing the lock on
609 * write buffer as soon as the operation for same is complete, if it is
610 * not same as primary bucket page, but that doesn't seem to be worth
611 * complicating the code.
612 */
613 if (BufferIsValid(deletebuf))
614 UnlockReleaseBuffer(deletebuf);
615
616 if (BufferIsValid(writebuf))
617 UnlockReleaseBuffer(writebuf);
618
619 if (BufferIsValid(bucketbuf))
620 UnlockReleaseBuffer(bucketbuf);
621}
622
623/*
624 * replay squeeze page operation of hash index
625 */
626static void
628{
629 XLogRecPtr lsn = record->EndRecPtr;
631 Buffer bucketbuf = InvalidBuffer;
632 Buffer writebuf = InvalidBuffer;
633 Buffer ovflbuf;
634 Buffer prevbuf = InvalidBuffer;
635 Buffer mapbuf;
637
638 /*
639 * Ensure we have a cleanup lock on primary bucket page before we start
640 * with the actual replay operation. This is to ensure that neither a
641 * scan can start nor a scan can be already-in-progress during the replay
642 * of this operation. If we allow scans during this operation, then they
643 * can miss some records or show the same record multiple times.
644 */
645 if (xldata->is_prim_bucket_same_wrt)
646 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
647 else
648 {
649 /*
650 * we don't care for return value as the purpose of reading bucketbuf
651 * is to ensure a cleanup lock on primary bucket page.
652 */
653 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
654
655 if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
656 action = XLogReadBufferForRedo(record, 1, &writebuf);
657 else
659 }
660
661 /* replay the record for adding entries in overflow buffer */
662 if (action == BLK_NEEDS_REDO)
663 {
664 Page writepage;
665 char *begin;
666 char *data;
667 Size datalen;
668 uint16 ninserted = 0;
669 bool mod_wbuf = false;
670
671 data = begin = XLogRecGetBlockData(record, 1, &datalen);
672
673 writepage = (Page) BufferGetPage(writebuf);
674
675 if (xldata->ntups > 0)
676 {
677 OffsetNumber *towrite = (OffsetNumber *) data;
678
679 data += sizeof(OffsetNumber) * xldata->ntups;
680
681 while (data - begin < datalen)
682 {
683 IndexTuple itup = (IndexTuple) data;
684 Size itemsz;
685 OffsetNumber l;
686
687 itemsz = IndexTupleSize(itup);
688 itemsz = MAXALIGN(itemsz);
689
690 data += itemsz;
691
692 l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
693 if (l == InvalidOffsetNumber)
694 elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
695 (int) itemsz);
696
697 ninserted++;
698 }
699
700 mod_wbuf = true;
701 }
702 else
703 {
704 /*
705 * Ensure that the required flags are set when there are no
706 * tuples. See _hash_freeovflpage().
707 */
710 }
711
712 /*
713 * number of tuples inserted must be same as requested in REDO record.
714 */
715 Assert(ninserted == xldata->ntups);
716
717 /*
718 * if the page on which are adding tuples is a page previous to freed
719 * overflow page, then update its nextblkno.
720 */
721 if (xldata->is_prev_bucket_same_wrt)
722 {
723 HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
724
725 writeopaque->hasho_nextblkno = xldata->nextblkno;
726 mod_wbuf = true;
727 }
728
729 /* Set LSN and mark writebuf dirty iff it is modified */
730 if (mod_wbuf)
731 {
732 PageSetLSN(writepage, lsn);
733 MarkBufferDirty(writebuf);
734 }
735 }
736
737 /* replay the record for initializing overflow buffer */
738 if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
739 {
740 Page ovflpage;
741 HashPageOpaque ovflopaque;
742
743 ovflpage = BufferGetPage(ovflbuf);
744
745 _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
746
747 ovflopaque = HashPageGetOpaque(ovflpage);
748
751 ovflopaque->hasho_bucket = InvalidBucket;
752 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
753 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
754
755 PageSetLSN(ovflpage, lsn);
756 MarkBufferDirty(ovflbuf);
757 }
758 if (BufferIsValid(ovflbuf))
759 UnlockReleaseBuffer(ovflbuf);
760
761 /* replay the record for page previous to the freed overflow page */
762 if (!xldata->is_prev_bucket_same_wrt &&
763 XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
764 {
765 Page prevpage = BufferGetPage(prevbuf);
766 HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
767
768 prevopaque->hasho_nextblkno = xldata->nextblkno;
769
770 PageSetLSN(prevpage, lsn);
771 MarkBufferDirty(prevbuf);
772 }
773 if (BufferIsValid(prevbuf))
774 UnlockReleaseBuffer(prevbuf);
775
776 /* replay the record for page next to the freed overflow page */
777 if (XLogRecHasBlockRef(record, 4))
778 {
779 Buffer nextbuf;
780
781 if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
782 {
783 Page nextpage = BufferGetPage(nextbuf);
784 HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
785
786 nextopaque->hasho_prevblkno = xldata->prevblkno;
787
788 PageSetLSN(nextpage, lsn);
789 MarkBufferDirty(nextbuf);
790 }
791 if (BufferIsValid(nextbuf))
792 UnlockReleaseBuffer(nextbuf);
793 }
794
795 if (BufferIsValid(writebuf))
796 UnlockReleaseBuffer(writebuf);
797
798 if (BufferIsValid(bucketbuf))
799 UnlockReleaseBuffer(bucketbuf);
800
801 /*
802 * Note: in normal operation, we'd update the bitmap and meta page while
803 * still holding lock on the primary bucket page and overflow pages. But
804 * during replay it's not necessary to hold those locks, since no other
805 * index updates can be happening concurrently.
806 */
807 /* replay the record for bitmap page */
808 if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
809 {
810 Page mappage = (Page) BufferGetPage(mapbuf);
811 uint32 *freep = NULL;
812 char *data;
813 uint32 *bitmap_page_bit;
814 Size datalen;
815
816 freep = HashPageGetBitmap(mappage);
817
818 data = XLogRecGetBlockData(record, 5, &datalen);
819 bitmap_page_bit = (uint32 *) data;
820
821 CLRBIT(freep, *bitmap_page_bit);
822
823 PageSetLSN(mappage, lsn);
824 MarkBufferDirty(mapbuf);
825 }
826 if (BufferIsValid(mapbuf))
827 UnlockReleaseBuffer(mapbuf);
828
829 /* replay the record for meta page */
830 if (XLogRecHasBlockRef(record, 6))
831 {
832 Buffer metabuf;
833
834 if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
835 {
836 HashMetaPage metap;
837 Page page;
838 char *data;
839 uint32 *firstfree_ovflpage;
840 Size datalen;
841
842 data = XLogRecGetBlockData(record, 6, &datalen);
843 firstfree_ovflpage = (uint32 *) data;
844
845 page = BufferGetPage(metabuf);
846 metap = HashPageGetMeta(page);
847 metap->hashm_firstfree = *firstfree_ovflpage;
848
849 PageSetLSN(page, lsn);
850 MarkBufferDirty(metabuf);
851 }
852 if (BufferIsValid(metabuf))
853 UnlockReleaseBuffer(metabuf);
854 }
855}
856
857/*
858 * replay delete operation of hash index
859 */
860static void
862{
863 XLogRecPtr lsn = record->EndRecPtr;
864 xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
865 Buffer bucketbuf = InvalidBuffer;
866 Buffer deletebuf;
867 Page page;
869
870 /*
871 * Ensure we have a cleanup lock on primary bucket page before we start
872 * with the actual replay operation. This is to ensure that neither a
873 * scan can start nor a scan can be already-in-progress during the replay
874 * of this operation. If we allow scans during this operation, then they
875 * can miss some records or show the same record multiple times.
876 */
877 if (xldata->is_primary_bucket_page)
878 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
879 else
880 {
881 /*
882 * we don't care for return value as the purpose of reading bucketbuf
883 * is to ensure a cleanup lock on primary bucket page.
884 */
885 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
886
887 action = XLogReadBufferForRedo(record, 1, &deletebuf);
888 }
889
890 /* replay the record for deleting entries in bucket page */
891 if (action == BLK_NEEDS_REDO)
892 {
893 char *ptr;
894 Size len;
895
896 ptr = XLogRecGetBlockData(record, 1, &len);
897
898 page = (Page) BufferGetPage(deletebuf);
899
900 if (len > 0)
901 {
902 OffsetNumber *unused;
903 OffsetNumber *unend;
904
905 unused = (OffsetNumber *) ptr;
906 unend = (OffsetNumber *) ((char *) ptr + len);
907
908 if ((unend - unused) > 0)
909 PageIndexMultiDelete(page, unused, unend - unused);
910 }
911
912 /*
913 * Mark the page as not containing any LP_DEAD items only if
914 * clear_dead_marking flag is set to true. See comments in
915 * hashbucketcleanup() for details.
916 */
917 if (xldata->clear_dead_marking)
918 {
919 HashPageOpaque pageopaque;
920
921 pageopaque = HashPageGetOpaque(page);
922 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
923 }
924
925 PageSetLSN(page, lsn);
926 MarkBufferDirty(deletebuf);
927 }
928 if (BufferIsValid(deletebuf))
929 UnlockReleaseBuffer(deletebuf);
930
931 if (BufferIsValid(bucketbuf))
932 UnlockReleaseBuffer(bucketbuf);
933}
934
935/*
936 * replay split cleanup flag operation for primary bucket page.
937 */
938static void
940{
941 XLogRecPtr lsn = record->EndRecPtr;
942 Buffer buffer;
943 Page page;
944
945 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
946 {
947 HashPageOpaque bucket_opaque;
948
949 page = (Page) BufferGetPage(buffer);
950
951 bucket_opaque = HashPageGetOpaque(page);
952 bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
953 PageSetLSN(page, lsn);
954 MarkBufferDirty(buffer);
955 }
956 if (BufferIsValid(buffer))
957 UnlockReleaseBuffer(buffer);
958}
959
960/*
961 * replay for update meta page
962 */
963static void
965{
966 HashMetaPage metap;
967 XLogRecPtr lsn = record->EndRecPtr;
969 Buffer metabuf;
970 Page page;
971
972 if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
973 {
974 page = BufferGetPage(metabuf);
975 metap = HashPageGetMeta(page);
976
977 metap->hashm_ntuples = xldata->ntuples;
978
979 PageSetLSN(page, lsn);
980 MarkBufferDirty(metabuf);
981 }
982 if (BufferIsValid(metabuf))
983 UnlockReleaseBuffer(metabuf);
984}
985
986/*
987 * replay delete operation in hash index to remove
988 * tuples marked as DEAD during index tuple insertion.
989 */
990static void
992{
993 XLogRecPtr lsn = record->EndRecPtr;
995 Buffer buffer;
996 Buffer metabuf;
997 Page page;
999 HashPageOpaque pageopaque;
1000 OffsetNumber *toDelete;
1001
1002 xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
1003 toDelete = xldata->offsets;
1004
1005 /*
1006 * If we have any conflict processing to do, it must happen before we
1007 * update the page.
1008 *
1009 * Hash index records that are marked as LP_DEAD and being removed during
1010 * hash index tuple insertion can conflict with standby queries. You might
1011 * think that vacuum records would conflict as well, but we've handled
1012 * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1013 * xid cleaned by the vacuum of the heap and so we can resolve any
1014 * conflicts just once when that arrives. After that we know that no
1015 * conflicts exist from individual hash index vacuum records on that
1016 * index.
1017 */
1018 if (InHotStandby)
1019 {
1020 RelFileLocator rlocator;
1021
1022 XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1024 xldata->isCatalogRel,
1025 rlocator);
1026 }
1027
1028 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1029
1030 if (action == BLK_NEEDS_REDO)
1031 {
1032 page = (Page) BufferGetPage(buffer);
1033
1034 PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1035
1036 /*
1037 * Mark the page as not containing any LP_DEAD items. See comments in
1038 * _hash_vacuum_one_page() for details.
1039 */
1040 pageopaque = HashPageGetOpaque(page);
1041 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1042
1043 PageSetLSN(page, lsn);
1044 MarkBufferDirty(buffer);
1045 }
1046 if (BufferIsValid(buffer))
1047 UnlockReleaseBuffer(buffer);
1048
1049 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1050 {
1051 Page metapage;
1052 HashMetaPage metap;
1053
1054 metapage = BufferGetPage(metabuf);
1055 metap = HashPageGetMeta(metapage);
1056
1057 metap->hashm_ntuples -= xldata->ntuples;
1058
1059 PageSetLSN(metapage, lsn);
1060 MarkBufferDirty(metabuf);
1061 }
1062 if (BufferIsValid(metabuf))
1063 UnlockReleaseBuffer(metabuf);
1064}
1065
1066void
1068{
1069 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1070
1071 switch (info)
1072 {
1075 break;
1078 break;
1079 case XLOG_HASH_INSERT:
1080 hash_xlog_insert(record);
1081 break;
1084 break;
1087 break;
1089 hash_xlog_split_page(record);
1090 break;
1093 break;
1096 break;
1098 hash_xlog_squeeze_page(record);
1099 break;
1100 case XLOG_HASH_DELETE:
1101 hash_xlog_delete(record);
1102 break;
1105 break;
1108 break;
1111 break;
1112 default:
1113 elog(PANIC, "hash_redo: unknown op code %u", info);
1114 }
1115}
1116
1117/*
1118 * Mask a hash page before performing consistency checks on it.
1119 */
1120void
1121hash_mask(char *pagedata, BlockNumber blkno)
1122{
1123 Page page = (Page) pagedata;
1124 HashPageOpaque opaque;
1125 int pagetype;
1126
1128
1129 mask_page_hint_bits(page);
1130 mask_unused_space(page);
1131
1132 opaque = HashPageGetOpaque(page);
1133
1134 pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1135 if (pagetype == LH_UNUSED_PAGE)
1136 {
1137 /*
1138 * Mask everything on a UNUSED page.
1139 */
1140 mask_page_content(page);
1141 }
1142 else if (pagetype == LH_BUCKET_PAGE ||
1143 pagetype == LH_OVERFLOW_PAGE)
1144 {
1145 /*
1146 * In hash bucket and overflow pages, it is possible to modify the
1147 * LP_FLAGS without emitting any WAL record. Hence, mask the line
1148 * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1149 */
1150 mask_lp_flags(page);
1151 }
1152
1153 /*
1154 * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1155 * unlogged. So, mask it. See _hash_kill_items() for details.
1156 */
1157 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1158}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define CLRBIT(x, i)
Definition: blutils.c:28
#define SETBIT(x, i)
Definition: blutils.c:29
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_content(Page page)
Definition: bufmask.c:119
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:4904
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:389
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:48
@ RBM_NORMAL
Definition: bufmgr.h:45
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1150
Pointer Page
Definition: bufpage.h:81
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define MAXALIGN(LEN)
Definition: c.h:765
uint8_t uint8
Definition: c.h:483
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:201
#define Assert(condition)
Definition: c.h:812
uint16_t uint16
Definition: c.h:484
uint32_t uint32
Definition: c.h:485
size_t Size
Definition: c.h:559
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define LH_PAGE_TYPE
Definition: hash.h:63
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define InvalidBucket
Definition: hash.h:37
static void hash_xlog_split_cleanup(XLogReaderState *record)
Definition: hash_xlog.c:939
static void hash_xlog_add_ovfl_page(XLogReaderState *record)
Definition: hash_xlog.c:173
static void hash_xlog_split_page(XLogReaderState *record)
Definition: hash_xlog.c:428
static void hash_xlog_init_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:27
void hash_mask(char *pagedata, BlockNumber blkno)
Definition: hash_xlog.c:1121
static void hash_xlog_split_complete(XLogReaderState *record)
Definition: hash_xlog.c:442
static void hash_xlog_update_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:964
static void hash_xlog_vacuum_one_page(XLogReaderState *record)
Definition: hash_xlog.c:991
static void hash_xlog_squeeze_page(XLogReaderState *record)
Definition: hash_xlog.c:627
static void hash_xlog_insert(XLogReaderState *record)
Definition: hash_xlog.c:125
static void hash_xlog_move_page_contents(XLogReaderState *record)
Definition: hash_xlog.c:501
static void hash_xlog_split_allocate_page(XLogReaderState *record)
Definition: hash_xlog.c:311
void hash_redo(XLogReaderState *record)
Definition: hash_xlog.c:1067
static void hash_xlog_delete(XLogReaderState *record)
Definition: hash_xlog.c:861
static void hash_xlog_init_bitmap_page(XLogReaderState *record)
Definition: hash_xlog.c:63
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define XLOG_HASH_SPLIT_PAGE
Definition: hash_xlog.h:32
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
#define XLOG_HASH_SPLIT_COMPLETE
Definition: hash_xlog.h:33
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:777
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
Pointer Item
Definition: item.h:17
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void size_t len
const void * data
while(p+4<=pend)
static char * buf
Definition: pg_test_fsync.c:72
ForkNumber
Definition: relpath.h:56
@ INIT_FORKNUM
Definition: relpath.h:61
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:467
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_firstfree
Definition: hash.h:259
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_highmask
Definition: hash.h:255
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
bool clear_dead_marking
Definition: hash_xlog.h:180
bool is_primary_bucket_page
Definition: hash_xlog.h:182
RegProcedure procid
Definition: hash_xlog.h:213
OffsetNumber offnum
Definition: hash_xlog.h:58
BlockNumber prevblkno
Definition: hash_xlog.h:155
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:158
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:161
BlockNumber nextblkno
Definition: hash_xlog.h:156
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:247
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: hash_xlog.h:253
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2025
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1971
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:326
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:351
#define InHotStandby
Definition: xlogutils.h:60
XLogRedoAction
Definition: xlogutils.h:73
@ BLK_RESTORED
Definition: xlogutils.h:76
@ BLK_NEEDS_REDO
Definition: xlogutils.h:74
@ BLK_NOTFOUND
Definition: xlogutils.h:77