PostgreSQL Source Code  git master
hash_xlog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hash_xlog.c
4  * WAL replay logic for hash index.
5  *
6  *
7  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hash_xlog.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/bufmask.h"
18 #include "access/hash.h"
19 #include "access/hash_xlog.h"
20 #include "access/transam.h"
21 #include "access/xlog.h"
22 #include "access/xlogutils.h"
23 #include "miscadmin.h"
24 #include "storage/procarray.h"
25 
26 /*
27  * replay a hash index meta page
28  */
29 static void
31 {
32  XLogRecPtr lsn = record->EndRecPtr;
33  Page page;
34  Buffer metabuf;
35  ForkNumber forknum;
36 
38 
39  /* create the index' metapage */
40  metabuf = XLogInitBufferForRedo(record, 0);
41  Assert(BufferIsValid(metabuf));
42  _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
43  xlrec->ffactor, true);
44  page = (Page) BufferGetPage(metabuf);
45  PageSetLSN(page, lsn);
46  MarkBufferDirty(metabuf);
47 
48  /*
49  * Force the on-disk state of init forks to always be in sync with the
50  * state in shared buffers. See XLogReadBufferForRedoExtended. We need
51  * special handling for init forks as create index operations don't log a
52  * full page image of the metapage.
53  */
54  XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
55  if (forknum == INIT_FORKNUM)
56  FlushOneBuffer(metabuf);
57 
58  /* all done */
59  UnlockReleaseBuffer(metabuf);
60 }
61 
62 /*
63  * replay a hash index bitmap page
64  */
65 static void
67 {
68  XLogRecPtr lsn = record->EndRecPtr;
69  Buffer bitmapbuf;
70  Buffer metabuf;
71  Page page;
72  HashMetaPage metap;
73  uint32 num_buckets;
74  ForkNumber forknum;
75 
77 
78  /*
79  * Initialize bitmap page
80  */
81  bitmapbuf = XLogInitBufferForRedo(record, 0);
82  _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
83  PageSetLSN(BufferGetPage(bitmapbuf), lsn);
84  MarkBufferDirty(bitmapbuf);
85 
86  /*
87  * Force the on-disk state of init forks to always be in sync with the
88  * state in shared buffers. See XLogReadBufferForRedoExtended. We need
89  * special handling for init forks as create index operations don't log a
90  * full page image of the metapage.
91  */
92  XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
93  if (forknum == INIT_FORKNUM)
94  FlushOneBuffer(bitmapbuf);
95  UnlockReleaseBuffer(bitmapbuf);
96 
97  /* add the new bitmap page to the metapage's list of bitmaps */
98  if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
99  {
100  /*
101  * Note: in normal operation, we'd update the metapage while still
102  * holding lock on the bitmap page. But during replay it's not
103  * necessary to hold that lock, since nobody can see it yet; the
104  * creating transaction hasn't yet committed.
105  */
106  page = BufferGetPage(metabuf);
107  metap = HashPageGetMeta(page);
108 
109  num_buckets = metap->hashm_maxbucket + 1;
110  metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
111  metap->hashm_nmaps++;
112 
113  PageSetLSN(page, lsn);
114  MarkBufferDirty(metabuf);
115 
116  XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
117  if (forknum == INIT_FORKNUM)
118  FlushOneBuffer(metabuf);
119  }
120  if (BufferIsValid(metabuf))
121  UnlockReleaseBuffer(metabuf);
122 }
123 
124 /*
125  * replay a hash index insert without split
126  */
127 static void
129 {
130  HashMetaPage metap;
131  XLogRecPtr lsn = record->EndRecPtr;
132  xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
133  Buffer buffer;
134  Page page;
135 
136  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
137  {
138  Size datalen;
139  char *datapos = XLogRecGetBlockData(record, 0, &datalen);
140 
141  page = BufferGetPage(buffer);
142 
143  if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
144  false, false) == InvalidOffsetNumber)
145  elog(PANIC, "hash_xlog_insert: failed to add item");
146 
147  PageSetLSN(page, lsn);
148  MarkBufferDirty(buffer);
149  }
150  if (BufferIsValid(buffer))
151  UnlockReleaseBuffer(buffer);
152 
153  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
154  {
155  /*
156  * Note: in normal operation, we'd update the metapage while still
157  * holding lock on the page we inserted into. But during replay it's
158  * not necessary to hold that lock, since no other index updates can
159  * be happening concurrently.
160  */
161  page = BufferGetPage(buffer);
162  metap = HashPageGetMeta(page);
163  metap->hashm_ntuples += 1;
164 
165  PageSetLSN(page, lsn);
166  MarkBufferDirty(buffer);
167  }
168  if (BufferIsValid(buffer))
169  UnlockReleaseBuffer(buffer);
170 }
171 
172 /*
173  * replay addition of overflow page for hash index
174  */
175 static void
177 {
178  XLogRecPtr lsn = record->EndRecPtr;
180  Buffer leftbuf;
181  Buffer ovflbuf;
182  Buffer metabuf;
183  BlockNumber leftblk;
184  BlockNumber rightblk;
185  BlockNumber newmapblk = InvalidBlockNumber;
186  Page ovflpage;
187  HashPageOpaque ovflopaque;
188  uint32 *num_bucket;
189  char *data;
191  bool new_bmpage = false;
192 
193  XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
194  XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
195 
196  ovflbuf = XLogInitBufferForRedo(record, 0);
197  Assert(BufferIsValid(ovflbuf));
198 
199  data = XLogRecGetBlockData(record, 0, &datalen);
200  num_bucket = (uint32 *) data;
201  Assert(datalen == sizeof(uint32));
202  _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
203  true);
204  /* update backlink */
205  ovflpage = BufferGetPage(ovflbuf);
206  ovflopaque = HashPageGetOpaque(ovflpage);
207  ovflopaque->hasho_prevblkno = leftblk;
208 
209  PageSetLSN(ovflpage, lsn);
210  MarkBufferDirty(ovflbuf);
211 
212  if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
213  {
214  Page leftpage;
215  HashPageOpaque leftopaque;
216 
217  leftpage = BufferGetPage(leftbuf);
218  leftopaque = HashPageGetOpaque(leftpage);
219  leftopaque->hasho_nextblkno = rightblk;
220 
221  PageSetLSN(leftpage, lsn);
222  MarkBufferDirty(leftbuf);
223  }
224 
225  if (BufferIsValid(leftbuf))
226  UnlockReleaseBuffer(leftbuf);
227  UnlockReleaseBuffer(ovflbuf);
228 
229  /*
230  * Note: in normal operation, we'd update the bitmap and meta page while
231  * still holding lock on the overflow pages. But during replay it's not
232  * necessary to hold those locks, since no other index updates can be
233  * happening concurrently.
234  */
235  if (XLogRecHasBlockRef(record, 2))
236  {
237  Buffer mapbuffer;
238 
239  if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
240  {
241  Page mappage = (Page) BufferGetPage(mapbuffer);
242  uint32 *freep = NULL;
243  uint32 *bitmap_page_bit;
244 
245  freep = HashPageGetBitmap(mappage);
246 
247  data = XLogRecGetBlockData(record, 2, &datalen);
248  bitmap_page_bit = (uint32 *) data;
249 
250  SETBIT(freep, *bitmap_page_bit);
251 
252  PageSetLSN(mappage, lsn);
253  MarkBufferDirty(mapbuffer);
254  }
255  if (BufferIsValid(mapbuffer))
256  UnlockReleaseBuffer(mapbuffer);
257  }
258 
259  if (XLogRecHasBlockRef(record, 3))
260  {
261  Buffer newmapbuf;
262 
263  newmapbuf = XLogInitBufferForRedo(record, 3);
264 
265  _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
266 
267  new_bmpage = true;
268  newmapblk = BufferGetBlockNumber(newmapbuf);
269 
270  MarkBufferDirty(newmapbuf);
271  PageSetLSN(BufferGetPage(newmapbuf), lsn);
272 
273  UnlockReleaseBuffer(newmapbuf);
274  }
275 
276  if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
277  {
278  HashMetaPage metap;
279  Page page;
280  uint32 *firstfree_ovflpage;
281 
282  data = XLogRecGetBlockData(record, 4, &datalen);
283  firstfree_ovflpage = (uint32 *) data;
284 
285  page = BufferGetPage(metabuf);
286  metap = HashPageGetMeta(page);
287  metap->hashm_firstfree = *firstfree_ovflpage;
288 
289  if (!xlrec->bmpage_found)
290  {
291  metap->hashm_spares[metap->hashm_ovflpoint]++;
292 
293  if (new_bmpage)
294  {
295  Assert(BlockNumberIsValid(newmapblk));
296 
297  metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
298  metap->hashm_nmaps++;
299  metap->hashm_spares[metap->hashm_ovflpoint]++;
300  }
301  }
302 
303  PageSetLSN(page, lsn);
304  MarkBufferDirty(metabuf);
305  }
306  if (BufferIsValid(metabuf))
307  UnlockReleaseBuffer(metabuf);
308 }
309 
310 /*
311  * replay allocation of page for split operation
312  */
313 static void
315 {
316  XLogRecPtr lsn = record->EndRecPtr;
318  Buffer oldbuf;
319  Buffer newbuf;
320  Buffer metabuf;
322  char *data;
324 
325  /*
326  * To be consistent with normal operation, here we take cleanup locks on
327  * both the old and new buckets even though there can't be any concurrent
328  * inserts.
329  */
330 
331  /* replay the record for old bucket */
332  action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
333 
334  /*
335  * Note that we still update the page even if it was restored from a full
336  * page image, because the special space is not included in the image.
337  */
339  {
340  Page oldpage;
341  HashPageOpaque oldopaque;
342 
343  oldpage = BufferGetPage(oldbuf);
344  oldopaque = HashPageGetOpaque(oldpage);
345 
346  oldopaque->hasho_flag = xlrec->old_bucket_flag;
347  oldopaque->hasho_prevblkno = xlrec->new_bucket;
348 
349  PageSetLSN(oldpage, lsn);
350  MarkBufferDirty(oldbuf);
351  }
352 
353  /* replay the record for new bucket */
355  &newbuf);
356  _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
357  xlrec->new_bucket_flag, true);
358  MarkBufferDirty(newbuf);
359  PageSetLSN(BufferGetPage(newbuf), lsn);
360 
361  /*
362  * We can release the lock on old bucket early as well but doing here to
363  * consistent with normal operation.
364  */
365  if (BufferIsValid(oldbuf))
366  UnlockReleaseBuffer(oldbuf);
367  if (BufferIsValid(newbuf))
368  UnlockReleaseBuffer(newbuf);
369 
370  /*
371  * Note: in normal operation, we'd update the meta page while still
372  * holding lock on the old and new bucket pages. But during replay it's
373  * not necessary to hold those locks, since no other bucket splits can be
374  * happening concurrently.
375  */
376 
377  /* replay the record for metapage changes */
378  if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
379  {
380  Page page;
381  HashMetaPage metap;
382 
383  page = BufferGetPage(metabuf);
384  metap = HashPageGetMeta(page);
385  metap->hashm_maxbucket = xlrec->new_bucket;
386 
387  data = XLogRecGetBlockData(record, 2, &datalen);
388 
389  if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
390  {
391  uint32 lowmask;
392  uint32 *highmask;
393 
394  /* extract low and high masks. */
395  memcpy(&lowmask, data, sizeof(uint32));
396  highmask = (uint32 *) ((char *) data + sizeof(uint32));
397 
398  /* update metapage */
399  metap->hashm_lowmask = lowmask;
400  metap->hashm_highmask = *highmask;
401 
402  data += sizeof(uint32) * 2;
403  }
404 
406  {
407  uint32 ovflpoint;
408  uint32 *ovflpages;
409 
410  /* extract information of overflow pages. */
411  memcpy(&ovflpoint, data, sizeof(uint32));
412  ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
413 
414  /* update metapage */
415  metap->hashm_spares[ovflpoint] = *ovflpages;
416  metap->hashm_ovflpoint = ovflpoint;
417  }
418 
419  MarkBufferDirty(metabuf);
420  PageSetLSN(BufferGetPage(metabuf), lsn);
421  }
422 
423  if (BufferIsValid(metabuf))
424  UnlockReleaseBuffer(metabuf);
425 }
426 
427 /*
428  * replay of split operation
429  */
430 static void
432 {
433  Buffer buf;
434 
435  if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
436  elog(ERROR, "Hash split record did not contain a full-page image");
437 
439 }
440 
441 /*
442  * replay completion of split operation
443  */
444 static void
446 {
447  XLogRecPtr lsn = record->EndRecPtr;
449  Buffer oldbuf;
450  Buffer newbuf;
452 
453  /* replay the record for old bucket */
454  action = XLogReadBufferForRedo(record, 0, &oldbuf);
455 
456  /*
457  * Note that we still update the page even if it was restored from a full
458  * page image, because the bucket flag is not included in the image.
459  */
461  {
462  Page oldpage;
463  HashPageOpaque oldopaque;
464 
465  oldpage = BufferGetPage(oldbuf);
466  oldopaque = HashPageGetOpaque(oldpage);
467 
468  oldopaque->hasho_flag = xlrec->old_bucket_flag;
469 
470  PageSetLSN(oldpage, lsn);
471  MarkBufferDirty(oldbuf);
472  }
473  if (BufferIsValid(oldbuf))
474  UnlockReleaseBuffer(oldbuf);
475 
476  /* replay the record for new bucket */
477  action = XLogReadBufferForRedo(record, 1, &newbuf);
478 
479  /*
480  * Note that we still update the page even if it was restored from a full
481  * page image, because the bucket flag is not included in the image.
482  */
484  {
485  Page newpage;
486  HashPageOpaque nopaque;
487 
488  newpage = BufferGetPage(newbuf);
489  nopaque = HashPageGetOpaque(newpage);
490 
491  nopaque->hasho_flag = xlrec->new_bucket_flag;
492 
493  PageSetLSN(newpage, lsn);
494  MarkBufferDirty(newbuf);
495  }
496  if (BufferIsValid(newbuf))
497  UnlockReleaseBuffer(newbuf);
498 }
499 
500 /*
501  * replay move of page contents for squeeze operation of hash index
502  */
503 static void
505 {
506  XLogRecPtr lsn = record->EndRecPtr;
508  Buffer bucketbuf = InvalidBuffer;
509  Buffer writebuf = InvalidBuffer;
510  Buffer deletebuf = InvalidBuffer;
512 
513  /*
514  * Ensure we have a cleanup lock on primary bucket page before we start
515  * with the actual replay operation. This is to ensure that neither a
516  * scan can start nor a scan can be already-in-progress during the replay
517  * of this operation. If we allow scans during this operation, then they
518  * can miss some records or show the same record multiple times.
519  */
520  if (xldata->is_prim_bucket_same_wrt)
521  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
522  else
523  {
524  /*
525  * we don't care for return value as the purpose of reading bucketbuf
526  * is to ensure a cleanup lock on primary bucket page.
527  */
528  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
529 
530  action = XLogReadBufferForRedo(record, 1, &writebuf);
531  }
532 
533  /* replay the record for adding entries in overflow buffer */
534  if (action == BLK_NEEDS_REDO)
535  {
536  Page writepage;
537  char *begin;
538  char *data;
539  Size datalen;
540  uint16 ninserted = 0;
541 
542  data = begin = XLogRecGetBlockData(record, 1, &datalen);
543 
544  writepage = (Page) BufferGetPage(writebuf);
545 
546  if (xldata->ntups > 0)
547  {
548  OffsetNumber *towrite = (OffsetNumber *) data;
549 
550  data += sizeof(OffsetNumber) * xldata->ntups;
551 
552  while (data - begin < datalen)
553  {
554  IndexTuple itup = (IndexTuple) data;
555  Size itemsz;
556  OffsetNumber l;
557 
558  itemsz = IndexTupleSize(itup);
559  itemsz = MAXALIGN(itemsz);
560 
561  data += itemsz;
562 
563  l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
564  if (l == InvalidOffsetNumber)
565  elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
566  (int) itemsz);
567 
568  ninserted++;
569  }
570  }
571 
572  /*
573  * number of tuples inserted must be same as requested in REDO record.
574  */
575  Assert(ninserted == xldata->ntups);
576 
577  PageSetLSN(writepage, lsn);
578  MarkBufferDirty(writebuf);
579  }
580 
581  /* replay the record for deleting entries from overflow buffer */
582  if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
583  {
584  Page page;
585  char *ptr;
586  Size len;
587 
588  ptr = XLogRecGetBlockData(record, 2, &len);
589 
590  page = (Page) BufferGetPage(deletebuf);
591 
592  if (len > 0)
593  {
594  OffsetNumber *unused;
595  OffsetNumber *unend;
596 
597  unused = (OffsetNumber *) ptr;
598  unend = (OffsetNumber *) ((char *) ptr + len);
599 
600  if ((unend - unused) > 0)
601  PageIndexMultiDelete(page, unused, unend - unused);
602  }
603 
604  PageSetLSN(page, lsn);
605  MarkBufferDirty(deletebuf);
606  }
607 
608  /*
609  * Replay is complete, now we can release the buffers. We release locks at
610  * end of replay operation to ensure that we hold lock on primary bucket
611  * page till end of operation. We can optimize by releasing the lock on
612  * write buffer as soon as the operation for same is complete, if it is
613  * not same as primary bucket page, but that doesn't seem to be worth
614  * complicating the code.
615  */
616  if (BufferIsValid(deletebuf))
617  UnlockReleaseBuffer(deletebuf);
618 
619  if (BufferIsValid(writebuf))
620  UnlockReleaseBuffer(writebuf);
621 
622  if (BufferIsValid(bucketbuf))
623  UnlockReleaseBuffer(bucketbuf);
624 }
625 
626 /*
627  * replay squeeze page operation of hash index
628  */
629 static void
631 {
632  XLogRecPtr lsn = record->EndRecPtr;
634  Buffer bucketbuf = InvalidBuffer;
635  Buffer writebuf = InvalidBuffer;
636  Buffer ovflbuf;
637  Buffer prevbuf = InvalidBuffer;
638  Buffer mapbuf;
640 
641  /*
642  * Ensure we have a cleanup lock on primary bucket page before we start
643  * with the actual replay operation. This is to ensure that neither a
644  * scan can start nor a scan can be already-in-progress during the replay
645  * of this operation. If we allow scans during this operation, then they
646  * can miss some records or show the same record multiple times.
647  */
648  if (xldata->is_prim_bucket_same_wrt)
649  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
650  else
651  {
652  /*
653  * we don't care for return value as the purpose of reading bucketbuf
654  * is to ensure a cleanup lock on primary bucket page.
655  */
656  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
657 
658  if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
659  action = XLogReadBufferForRedo(record, 1, &writebuf);
660  else
662  }
663 
664  /* replay the record for adding entries in overflow buffer */
665  if (action == BLK_NEEDS_REDO)
666  {
667  Page writepage;
668  char *begin;
669  char *data;
670  Size datalen;
671  uint16 ninserted = 0;
672 
673  data = begin = XLogRecGetBlockData(record, 1, &datalen);
674 
675  writepage = (Page) BufferGetPage(writebuf);
676 
677  if (xldata->ntups > 0)
678  {
679  OffsetNumber *towrite = (OffsetNumber *) data;
680 
681  data += sizeof(OffsetNumber) * xldata->ntups;
682 
683  while (data - begin < datalen)
684  {
685  IndexTuple itup = (IndexTuple) data;
686  Size itemsz;
687  OffsetNumber l;
688 
689  itemsz = IndexTupleSize(itup);
690  itemsz = MAXALIGN(itemsz);
691 
692  data += itemsz;
693 
694  l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
695  if (l == InvalidOffsetNumber)
696  elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
697  (int) itemsz);
698 
699  ninserted++;
700  }
701  }
702 
703  /*
704  * number of tuples inserted must be same as requested in REDO record.
705  */
706  Assert(ninserted == xldata->ntups);
707 
708  /*
709  * if the page on which are adding tuples is a page previous to freed
710  * overflow page, then update its nextblkno.
711  */
712  if (xldata->is_prev_bucket_same_wrt)
713  {
714  HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
715 
716  writeopaque->hasho_nextblkno = xldata->nextblkno;
717  }
718 
719  PageSetLSN(writepage, lsn);
720  MarkBufferDirty(writebuf);
721  }
722 
723  /* replay the record for initializing overflow buffer */
724  if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
725  {
726  Page ovflpage;
727  HashPageOpaque ovflopaque;
728 
729  ovflpage = BufferGetPage(ovflbuf);
730 
731  _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
732 
733  ovflopaque = HashPageGetOpaque(ovflpage);
734 
735  ovflopaque->hasho_prevblkno = InvalidBlockNumber;
736  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
737  ovflopaque->hasho_bucket = InvalidBucket;
738  ovflopaque->hasho_flag = LH_UNUSED_PAGE;
739  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
740 
741  PageSetLSN(ovflpage, lsn);
742  MarkBufferDirty(ovflbuf);
743  }
744  if (BufferIsValid(ovflbuf))
745  UnlockReleaseBuffer(ovflbuf);
746 
747  /* replay the record for page previous to the freed overflow page */
748  if (!xldata->is_prev_bucket_same_wrt &&
749  XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
750  {
751  Page prevpage = BufferGetPage(prevbuf);
752  HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
753 
754  prevopaque->hasho_nextblkno = xldata->nextblkno;
755 
756  PageSetLSN(prevpage, lsn);
757  MarkBufferDirty(prevbuf);
758  }
759  if (BufferIsValid(prevbuf))
760  UnlockReleaseBuffer(prevbuf);
761 
762  /* replay the record for page next to the freed overflow page */
763  if (XLogRecHasBlockRef(record, 4))
764  {
765  Buffer nextbuf;
766 
767  if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
768  {
769  Page nextpage = BufferGetPage(nextbuf);
770  HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
771 
772  nextopaque->hasho_prevblkno = xldata->prevblkno;
773 
774  PageSetLSN(nextpage, lsn);
775  MarkBufferDirty(nextbuf);
776  }
777  if (BufferIsValid(nextbuf))
778  UnlockReleaseBuffer(nextbuf);
779  }
780 
781  if (BufferIsValid(writebuf))
782  UnlockReleaseBuffer(writebuf);
783 
784  if (BufferIsValid(bucketbuf))
785  UnlockReleaseBuffer(bucketbuf);
786 
787  /*
788  * Note: in normal operation, we'd update the bitmap and meta page while
789  * still holding lock on the primary bucket page and overflow pages. But
790  * during replay it's not necessary to hold those locks, since no other
791  * index updates can be happening concurrently.
792  */
793  /* replay the record for bitmap page */
794  if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
795  {
796  Page mappage = (Page) BufferGetPage(mapbuf);
797  uint32 *freep = NULL;
798  char *data;
799  uint32 *bitmap_page_bit;
800  Size datalen;
801 
802  freep = HashPageGetBitmap(mappage);
803 
804  data = XLogRecGetBlockData(record, 5, &datalen);
805  bitmap_page_bit = (uint32 *) data;
806 
807  CLRBIT(freep, *bitmap_page_bit);
808 
809  PageSetLSN(mappage, lsn);
810  MarkBufferDirty(mapbuf);
811  }
812  if (BufferIsValid(mapbuf))
813  UnlockReleaseBuffer(mapbuf);
814 
815  /* replay the record for meta page */
816  if (XLogRecHasBlockRef(record, 6))
817  {
818  Buffer metabuf;
819 
820  if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
821  {
822  HashMetaPage metap;
823  Page page;
824  char *data;
825  uint32 *firstfree_ovflpage;
826  Size datalen;
827 
828  data = XLogRecGetBlockData(record, 6, &datalen);
829  firstfree_ovflpage = (uint32 *) data;
830 
831  page = BufferGetPage(metabuf);
832  metap = HashPageGetMeta(page);
833  metap->hashm_firstfree = *firstfree_ovflpage;
834 
835  PageSetLSN(page, lsn);
836  MarkBufferDirty(metabuf);
837  }
838  if (BufferIsValid(metabuf))
839  UnlockReleaseBuffer(metabuf);
840  }
841 }
842 
843 /*
844  * replay delete operation of hash index
845  */
846 static void
848 {
849  XLogRecPtr lsn = record->EndRecPtr;
850  xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
851  Buffer bucketbuf = InvalidBuffer;
852  Buffer deletebuf;
853  Page page;
855 
856  /*
857  * Ensure we have a cleanup lock on primary bucket page before we start
858  * with the actual replay operation. This is to ensure that neither a
859  * scan can start nor a scan can be already-in-progress during the replay
860  * of this operation. If we allow scans during this operation, then they
861  * can miss some records or show the same record multiple times.
862  */
863  if (xldata->is_primary_bucket_page)
864  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
865  else
866  {
867  /*
868  * we don't care for return value as the purpose of reading bucketbuf
869  * is to ensure a cleanup lock on primary bucket page.
870  */
871  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
872 
873  action = XLogReadBufferForRedo(record, 1, &deletebuf);
874  }
875 
876  /* replay the record for deleting entries in bucket page */
877  if (action == BLK_NEEDS_REDO)
878  {
879  char *ptr;
880  Size len;
881 
882  ptr = XLogRecGetBlockData(record, 1, &len);
883 
884  page = (Page) BufferGetPage(deletebuf);
885 
886  if (len > 0)
887  {
888  OffsetNumber *unused;
889  OffsetNumber *unend;
890 
891  unused = (OffsetNumber *) ptr;
892  unend = (OffsetNumber *) ((char *) ptr + len);
893 
894  if ((unend - unused) > 0)
895  PageIndexMultiDelete(page, unused, unend - unused);
896  }
897 
898  /*
899  * Mark the page as not containing any LP_DEAD items only if
900  * clear_dead_marking flag is set to true. See comments in
901  * hashbucketcleanup() for details.
902  */
903  if (xldata->clear_dead_marking)
904  {
905  HashPageOpaque pageopaque;
906 
907  pageopaque = HashPageGetOpaque(page);
908  pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
909  }
910 
911  PageSetLSN(page, lsn);
912  MarkBufferDirty(deletebuf);
913  }
914  if (BufferIsValid(deletebuf))
915  UnlockReleaseBuffer(deletebuf);
916 
917  if (BufferIsValid(bucketbuf))
918  UnlockReleaseBuffer(bucketbuf);
919 }
920 
921 /*
922  * replay split cleanup flag operation for primary bucket page.
923  */
924 static void
926 {
927  XLogRecPtr lsn = record->EndRecPtr;
928  Buffer buffer;
929  Page page;
930 
931  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
932  {
933  HashPageOpaque bucket_opaque;
934 
935  page = (Page) BufferGetPage(buffer);
936 
937  bucket_opaque = HashPageGetOpaque(page);
938  bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
939  PageSetLSN(page, lsn);
940  MarkBufferDirty(buffer);
941  }
942  if (BufferIsValid(buffer))
943  UnlockReleaseBuffer(buffer);
944 }
945 
946 /*
947  * replay for update meta page
948  */
949 static void
951 {
952  HashMetaPage metap;
953  XLogRecPtr lsn = record->EndRecPtr;
955  Buffer metabuf;
956  Page page;
957 
958  if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
959  {
960  page = BufferGetPage(metabuf);
961  metap = HashPageGetMeta(page);
962 
963  metap->hashm_ntuples = xldata->ntuples;
964 
965  PageSetLSN(page, lsn);
966  MarkBufferDirty(metabuf);
967  }
968  if (BufferIsValid(metabuf))
969  UnlockReleaseBuffer(metabuf);
970 }
971 
972 /*
973  * replay delete operation in hash index to remove
974  * tuples marked as DEAD during index tuple insertion.
975  */
976 static void
978 {
979  XLogRecPtr lsn = record->EndRecPtr;
980  xl_hash_vacuum_one_page *xldata;
981  Buffer buffer;
982  Buffer metabuf;
983  Page page;
985  HashPageOpaque pageopaque;
986  OffsetNumber *toDelete;
987 
988  xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
989  toDelete = xldata->offsets;
990 
991  /*
992  * If we have any conflict processing to do, it must happen before we
993  * update the page.
994  *
995  * Hash index records that are marked as LP_DEAD and being removed during
996  * hash index tuple insertion can conflict with standby queries. You might
997  * think that vacuum records would conflict as well, but we've handled
998  * that already. XLOG_HEAP2_PRUNE records provide the highest xid cleaned
999  * by the vacuum of the heap and so we can resolve any conflicts just once
1000  * when that arrives. After that we know that no conflicts exist from
1001  * individual hash index vacuum records on that index.
1002  */
1003  if (InHotStandby)
1004  {
1005  RelFileLocator rlocator;
1006 
1007  XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1009  xldata->isCatalogRel,
1010  rlocator);
1011  }
1012 
1013  action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1014 
1015  if (action == BLK_NEEDS_REDO)
1016  {
1017  page = (Page) BufferGetPage(buffer);
1018 
1019  PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1020 
1021  /*
1022  * Mark the page as not containing any LP_DEAD items. See comments in
1023  * _hash_vacuum_one_page() for details.
1024  */
1025  pageopaque = HashPageGetOpaque(page);
1026  pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1027 
1028  PageSetLSN(page, lsn);
1029  MarkBufferDirty(buffer);
1030  }
1031  if (BufferIsValid(buffer))
1032  UnlockReleaseBuffer(buffer);
1033 
1034  if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1035  {
1036  Page metapage;
1037  HashMetaPage metap;
1038 
1039  metapage = BufferGetPage(metabuf);
1040  metap = HashPageGetMeta(metapage);
1041 
1042  metap->hashm_ntuples -= xldata->ntuples;
1043 
1044  PageSetLSN(metapage, lsn);
1045  MarkBufferDirty(metabuf);
1046  }
1047  if (BufferIsValid(metabuf))
1048  UnlockReleaseBuffer(metabuf);
1049 }
1050 
1051 void
1053 {
1054  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1055 
1056  switch (info)
1057  {
1059  hash_xlog_init_meta_page(record);
1060  break;
1063  break;
1064  case XLOG_HASH_INSERT:
1065  hash_xlog_insert(record);
1066  break;
1068  hash_xlog_add_ovfl_page(record);
1069  break;
1072  break;
1073  case XLOG_HASH_SPLIT_PAGE:
1074  hash_xlog_split_page(record);
1075  break;
1077  hash_xlog_split_complete(record);
1078  break;
1081  break;
1083  hash_xlog_squeeze_page(record);
1084  break;
1085  case XLOG_HASH_DELETE:
1086  hash_xlog_delete(record);
1087  break;
1089  hash_xlog_split_cleanup(record);
1090  break;
1093  break;
1095  hash_xlog_vacuum_one_page(record);
1096  break;
1097  default:
1098  elog(PANIC, "hash_redo: unknown op code %u", info);
1099  }
1100 }
1101 
1102 /*
1103  * Mask a hash page before performing consistency checks on it.
1104  */
1105 void
1106 hash_mask(char *pagedata, BlockNumber blkno)
1107 {
1108  Page page = (Page) pagedata;
1109  HashPageOpaque opaque;
1110  int pagetype;
1111 
1113 
1114  mask_page_hint_bits(page);
1115  mask_unused_space(page);
1116 
1117  opaque = HashPageGetOpaque(page);
1118 
1119  pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1120  if (pagetype == LH_UNUSED_PAGE)
1121  {
1122  /*
1123  * Mask everything on a UNUSED page.
1124  */
1125  mask_page_content(page);
1126  }
1127  else if (pagetype == LH_BUCKET_PAGE ||
1128  pagetype == LH_OVERFLOW_PAGE)
1129  {
1130  /*
1131  * In hash bucket and overflow pages, it is possible to modify the
1132  * LP_FLAGS without emitting any WAL record. Hence, mask the line
1133  * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1134  */
1135  mask_lp_flags(page);
1136  }
1137 
1138  /*
1139  * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1140  * unlogged. So, mask it. See _hash_kill_items() for details.
1141  */
1142  opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1143 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define CLRBIT(x, i)
Definition: blutils.c:31
#define SETBIT(x, i)
Definition: blutils.c:32
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_content(Page page)
Definition: bufmask.c:119
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3386
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4590
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2198
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:4553
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:339
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:47
@ RBM_NORMAL
Definition: bufmgr.h:44
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
Pointer Page
Definition: bufpage.h:78
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned short uint16
Definition: c.h:494
unsigned int uint32
Definition: c.h:495
#define MAXALIGN(LEN)
Definition: c.h:800
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:171
unsigned char uint8
Definition: c.h:493
size_t Size
Definition: c.h:594
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define LH_PAGE_TYPE
Definition: hash.h:63
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP
Definition: hash.h:60
#define LH_PAGE_HAS_DEAD_TUPLES
Definition: hash.h:61
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define InvalidBucket
Definition: hash.h:37
static void hash_xlog_split_cleanup(XLogReaderState *record)
Definition: hash_xlog.c:925
static void hash_xlog_add_ovfl_page(XLogReaderState *record)
Definition: hash_xlog.c:176
static void hash_xlog_split_page(XLogReaderState *record)
Definition: hash_xlog.c:431
static void hash_xlog_init_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:30
void hash_mask(char *pagedata, BlockNumber blkno)
Definition: hash_xlog.c:1106
static void hash_xlog_split_complete(XLogReaderState *record)
Definition: hash_xlog.c:445
static void hash_xlog_update_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:950
static void hash_xlog_vacuum_one_page(XLogReaderState *record)
Definition: hash_xlog.c:977
static void hash_xlog_squeeze_page(XLogReaderState *record)
Definition: hash_xlog.c:630
static void hash_xlog_insert(XLogReaderState *record)
Definition: hash_xlog.c:128
static void hash_xlog_move_page_contents(XLogReaderState *record)
Definition: hash_xlog.c:504
static void hash_xlog_split_allocate_page(XLogReaderState *record)
Definition: hash_xlog.c:314
void hash_redo(XLogReaderState *record)
Definition: hash_xlog.c:1052
static void hash_xlog_delete(XLogReaderState *record)
Definition: hash_xlog.c:847
static void hash_xlog_init_bitmap_page(XLogReaderState *record)
Definition: hash_xlog.c:66
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define XLOG_HASH_SPLIT_PAGE
Definition: hash_xlog.h:32
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
#define XLOG_HASH_SPLIT_COMPLETE
Definition: hash_xlog.h:33
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:762
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
Pointer Item
Definition: item.h:17
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
Assert(fmt[strlen(fmt) - 1] !='\n')
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void size_t len
const void * data
while(p+4<=pend)
static char * buf
Definition: pg_test_fsync.c:73
ForkNumber
Definition: relpath.h:48
@ INIT_FORKNUM
Definition: relpath.h:53
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:468
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_firstfree
Definition: hash.h:259
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_highmask
Definition: hash.h:255
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
bool clear_dead_marking
Definition: hash_xlog.h:180
bool is_primary_bucket_page
Definition: hash_xlog.h:182
RegProcedure procid
Definition: hash_xlog.h:213
OffsetNumber offnum
Definition: hash_xlog.h:58
BlockNumber prevblkno
Definition: hash_xlog.h:155
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:158
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:161
BlockNumber nextblkno
Definition: hash_xlog.h:156
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:247
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: hash_xlog.h:253
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1976
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2030
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:317
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:329
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:354
#define InHotStandby
Definition: xlogutils.h:57
XLogRedoAction
Definition: xlogutils.h:70
@ BLK_RESTORED
Definition: xlogutils.h:73
@ BLK_NEEDS_REDO
Definition: xlogutils.h:71
@ BLK_NOTFOUND
Definition: xlogutils.h:74