PostgreSQL Source Code  git master
hash_xlog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hash_xlog.c
4  * WAL replay logic for hash index.
5  *
6  *
7  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hash_xlog.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/bufmask.h"
18 #include "access/hash.h"
19 #include "access/hash_xlog.h"
20 #include "access/transam.h"
21 #include "access/xlog.h"
22 #include "access/xlogutils.h"
23 #include "miscadmin.h"
24 #include "storage/procarray.h"
25 
26 /*
27  * replay a hash index meta page
28  */
29 static void
31 {
32  XLogRecPtr lsn = record->EndRecPtr;
33  Page page;
34  Buffer metabuf;
35  ForkNumber forknum;
36 
38 
39  /* create the index' metapage */
40  metabuf = XLogInitBufferForRedo(record, 0);
41  Assert(BufferIsValid(metabuf));
42  _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
43  xlrec->ffactor, true);
44  page = (Page) BufferGetPage(metabuf);
45  PageSetLSN(page, lsn);
46  MarkBufferDirty(metabuf);
47 
48  /*
49  * Force the on-disk state of init forks to always be in sync with the
50  * state in shared buffers. See XLogReadBufferForRedoExtended. We need
51  * special handling for init forks as create index operations don't log a
52  * full page image of the metapage.
53  */
54  XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
55  if (forknum == INIT_FORKNUM)
56  FlushOneBuffer(metabuf);
57 
58  /* all done */
59  UnlockReleaseBuffer(metabuf);
60 }
61 
62 /*
63  * replay a hash index bitmap page
64  */
65 static void
67 {
68  XLogRecPtr lsn = record->EndRecPtr;
69  Buffer bitmapbuf;
70  Buffer metabuf;
71  Page page;
72  HashMetaPage metap;
73  uint32 num_buckets;
74  ForkNumber forknum;
75 
77 
78  /*
79  * Initialize bitmap page
80  */
81  bitmapbuf = XLogInitBufferForRedo(record, 0);
82  _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
83  PageSetLSN(BufferGetPage(bitmapbuf), lsn);
84  MarkBufferDirty(bitmapbuf);
85 
86  /*
87  * Force the on-disk state of init forks to always be in sync with the
88  * state in shared buffers. See XLogReadBufferForRedoExtended. We need
89  * special handling for init forks as create index operations don't log a
90  * full page image of the metapage.
91  */
92  XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
93  if (forknum == INIT_FORKNUM)
94  FlushOneBuffer(bitmapbuf);
95  UnlockReleaseBuffer(bitmapbuf);
96 
97  /* add the new bitmap page to the metapage's list of bitmaps */
98  if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
99  {
100  /*
101  * Note: in normal operation, we'd update the metapage while still
102  * holding lock on the bitmap page. But during replay it's not
103  * necessary to hold that lock, since nobody can see it yet; the
104  * creating transaction hasn't yet committed.
105  */
106  page = BufferGetPage(metabuf);
107  metap = HashPageGetMeta(page);
108 
109  num_buckets = metap->hashm_maxbucket + 1;
110  metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
111  metap->hashm_nmaps++;
112 
113  PageSetLSN(page, lsn);
114  MarkBufferDirty(metabuf);
115 
116  XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
117  if (forknum == INIT_FORKNUM)
118  FlushOneBuffer(metabuf);
119  }
120  if (BufferIsValid(metabuf))
121  UnlockReleaseBuffer(metabuf);
122 }
123 
124 /*
125  * replay a hash index insert without split
126  */
127 static void
129 {
130  HashMetaPage metap;
131  XLogRecPtr lsn = record->EndRecPtr;
132  xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
133  Buffer buffer;
134  Page page;
135 
136  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
137  {
138  Size datalen;
139  char *datapos = XLogRecGetBlockData(record, 0, &datalen);
140 
141  page = BufferGetPage(buffer);
142 
143  if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
144  false, false) == InvalidOffsetNumber)
145  elog(PANIC, "hash_xlog_insert: failed to add item");
146 
147  PageSetLSN(page, lsn);
148  MarkBufferDirty(buffer);
149  }
150  if (BufferIsValid(buffer))
151  UnlockReleaseBuffer(buffer);
152 
153  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
154  {
155  /*
156  * Note: in normal operation, we'd update the metapage while still
157  * holding lock on the page we inserted into. But during replay it's
158  * not necessary to hold that lock, since no other index updates can
159  * be happening concurrently.
160  */
161  page = BufferGetPage(buffer);
162  metap = HashPageGetMeta(page);
163  metap->hashm_ntuples += 1;
164 
165  PageSetLSN(page, lsn);
166  MarkBufferDirty(buffer);
167  }
168  if (BufferIsValid(buffer))
169  UnlockReleaseBuffer(buffer);
170 }
171 
172 /*
173  * replay addition of overflow page for hash index
174  */
175 static void
177 {
178  XLogRecPtr lsn = record->EndRecPtr;
180  Buffer leftbuf;
181  Buffer ovflbuf;
182  Buffer metabuf;
183  BlockNumber leftblk;
184  BlockNumber rightblk;
185  BlockNumber newmapblk = InvalidBlockNumber;
186  Page ovflpage;
187  HashPageOpaque ovflopaque;
188  uint32 *num_bucket;
189  char *data;
191  bool new_bmpage = false;
192 
193  XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
194  XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
195 
196  ovflbuf = XLogInitBufferForRedo(record, 0);
197  Assert(BufferIsValid(ovflbuf));
198 
199  data = XLogRecGetBlockData(record, 0, &datalen);
200  num_bucket = (uint32 *) data;
201  Assert(datalen == sizeof(uint32));
202  _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
203  true);
204  /* update backlink */
205  ovflpage = BufferGetPage(ovflbuf);
206  ovflopaque = HashPageGetOpaque(ovflpage);
207  ovflopaque->hasho_prevblkno = leftblk;
208 
209  PageSetLSN(ovflpage, lsn);
210  MarkBufferDirty(ovflbuf);
211 
212  if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
213  {
214  Page leftpage;
215  HashPageOpaque leftopaque;
216 
217  leftpage = BufferGetPage(leftbuf);
218  leftopaque = HashPageGetOpaque(leftpage);
219  leftopaque->hasho_nextblkno = rightblk;
220 
221  PageSetLSN(leftpage, lsn);
222  MarkBufferDirty(leftbuf);
223  }
224 
225  if (BufferIsValid(leftbuf))
226  UnlockReleaseBuffer(leftbuf);
227  UnlockReleaseBuffer(ovflbuf);
228 
229  /*
230  * Note: in normal operation, we'd update the bitmap and meta page while
231  * still holding lock on the overflow pages. But during replay it's not
232  * necessary to hold those locks, since no other index updates can be
233  * happening concurrently.
234  */
235  if (XLogRecHasBlockRef(record, 2))
236  {
237  Buffer mapbuffer;
238 
239  if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
240  {
241  Page mappage = (Page) BufferGetPage(mapbuffer);
242  uint32 *freep = NULL;
243  uint32 *bitmap_page_bit;
244 
245  freep = HashPageGetBitmap(mappage);
246 
247  data = XLogRecGetBlockData(record, 2, &datalen);
248  bitmap_page_bit = (uint32 *) data;
249 
250  SETBIT(freep, *bitmap_page_bit);
251 
252  PageSetLSN(mappage, lsn);
253  MarkBufferDirty(mapbuffer);
254  }
255  if (BufferIsValid(mapbuffer))
256  UnlockReleaseBuffer(mapbuffer);
257  }
258 
259  if (XLogRecHasBlockRef(record, 3))
260  {
261  Buffer newmapbuf;
262 
263  newmapbuf = XLogInitBufferForRedo(record, 3);
264 
265  _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
266 
267  new_bmpage = true;
268  newmapblk = BufferGetBlockNumber(newmapbuf);
269 
270  MarkBufferDirty(newmapbuf);
271  PageSetLSN(BufferGetPage(newmapbuf), lsn);
272 
273  UnlockReleaseBuffer(newmapbuf);
274  }
275 
276  if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
277  {
278  HashMetaPage metap;
279  Page page;
280  uint32 *firstfree_ovflpage;
281 
282  data = XLogRecGetBlockData(record, 4, &datalen);
283  firstfree_ovflpage = (uint32 *) data;
284 
285  page = BufferGetPage(metabuf);
286  metap = HashPageGetMeta(page);
287  metap->hashm_firstfree = *firstfree_ovflpage;
288 
289  if (!xlrec->bmpage_found)
290  {
291  metap->hashm_spares[metap->hashm_ovflpoint]++;
292 
293  if (new_bmpage)
294  {
295  Assert(BlockNumberIsValid(newmapblk));
296 
297  metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
298  metap->hashm_nmaps++;
299  metap->hashm_spares[metap->hashm_ovflpoint]++;
300  }
301  }
302 
303  PageSetLSN(page, lsn);
304  MarkBufferDirty(metabuf);
305  }
306  if (BufferIsValid(metabuf))
307  UnlockReleaseBuffer(metabuf);
308 }
309 
310 /*
311  * replay allocation of page for split operation
312  */
313 static void
315 {
316  XLogRecPtr lsn = record->EndRecPtr;
318  Buffer oldbuf;
319  Buffer newbuf;
320  Buffer metabuf;
322  char *data;
324 
325  /*
326  * To be consistent with normal operation, here we take cleanup locks on
327  * both the old and new buckets even though there can't be any concurrent
328  * inserts.
329  */
330 
331  /* replay the record for old bucket */
332  action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
333 
334  /*
335  * Note that we still update the page even if it was restored from a full
336  * page image, because the special space is not included in the image.
337  */
339  {
340  Page oldpage;
341  HashPageOpaque oldopaque;
342 
343  oldpage = BufferGetPage(oldbuf);
344  oldopaque = HashPageGetOpaque(oldpage);
345 
346  oldopaque->hasho_flag = xlrec->old_bucket_flag;
347  oldopaque->hasho_prevblkno = xlrec->new_bucket;
348 
349  PageSetLSN(oldpage, lsn);
350  MarkBufferDirty(oldbuf);
351  }
352 
353  /* replay the record for new bucket */
355  &newbuf);
356  _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
357  xlrec->new_bucket_flag, true);
358  MarkBufferDirty(newbuf);
359  PageSetLSN(BufferGetPage(newbuf), lsn);
360 
361  /*
362  * We can release the lock on old bucket early as well but doing here to
363  * consistent with normal operation.
364  */
365  if (BufferIsValid(oldbuf))
366  UnlockReleaseBuffer(oldbuf);
367  if (BufferIsValid(newbuf))
368  UnlockReleaseBuffer(newbuf);
369 
370  /*
371  * Note: in normal operation, we'd update the meta page while still
372  * holding lock on the old and new bucket pages. But during replay it's
373  * not necessary to hold those locks, since no other bucket splits can be
374  * happening concurrently.
375  */
376 
377  /* replay the record for metapage changes */
378  if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
379  {
380  Page page;
381  HashMetaPage metap;
382 
383  page = BufferGetPage(metabuf);
384  metap = HashPageGetMeta(page);
385  metap->hashm_maxbucket = xlrec->new_bucket;
386 
387  data = XLogRecGetBlockData(record, 2, &datalen);
388 
389  if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
390  {
391  uint32 lowmask;
392  uint32 *highmask;
393 
394  /* extract low and high masks. */
395  memcpy(&lowmask, data, sizeof(uint32));
396  highmask = (uint32 *) ((char *) data + sizeof(uint32));
397 
398  /* update metapage */
399  metap->hashm_lowmask = lowmask;
400  metap->hashm_highmask = *highmask;
401 
402  data += sizeof(uint32) * 2;
403  }
404 
406  {
407  uint32 ovflpoint;
408  uint32 *ovflpages;
409 
410  /* extract information of overflow pages. */
411  memcpy(&ovflpoint, data, sizeof(uint32));
412  ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
413 
414  /* update metapage */
415  metap->hashm_spares[ovflpoint] = *ovflpages;
416  metap->hashm_ovflpoint = ovflpoint;
417  }
418 
419  MarkBufferDirty(metabuf);
420  PageSetLSN(BufferGetPage(metabuf), lsn);
421  }
422 
423  if (BufferIsValid(metabuf))
424  UnlockReleaseBuffer(metabuf);
425 }
426 
427 /*
428  * replay of split operation
429  */
430 static void
432 {
433  Buffer buf;
434 
435  if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
436  elog(ERROR, "Hash split record did not contain a full-page image");
437 
439 }
440 
441 /*
442  * replay completion of split operation
443  */
444 static void
446 {
447  XLogRecPtr lsn = record->EndRecPtr;
449  Buffer oldbuf;
450  Buffer newbuf;
452 
453  /* replay the record for old bucket */
454  action = XLogReadBufferForRedo(record, 0, &oldbuf);
455 
456  /*
457  * Note that we still update the page even if it was restored from a full
458  * page image, because the bucket flag is not included in the image.
459  */
461  {
462  Page oldpage;
463  HashPageOpaque oldopaque;
464 
465  oldpage = BufferGetPage(oldbuf);
466  oldopaque = HashPageGetOpaque(oldpage);
467 
468  oldopaque->hasho_flag = xlrec->old_bucket_flag;
469 
470  PageSetLSN(oldpage, lsn);
471  MarkBufferDirty(oldbuf);
472  }
473  if (BufferIsValid(oldbuf))
474  UnlockReleaseBuffer(oldbuf);
475 
476  /* replay the record for new bucket */
477  action = XLogReadBufferForRedo(record, 1, &newbuf);
478 
479  /*
480  * Note that we still update the page even if it was restored from a full
481  * page image, because the bucket flag is not included in the image.
482  */
484  {
485  Page newpage;
486  HashPageOpaque nopaque;
487 
488  newpage = BufferGetPage(newbuf);
489  nopaque = HashPageGetOpaque(newpage);
490 
491  nopaque->hasho_flag = xlrec->new_bucket_flag;
492 
493  PageSetLSN(newpage, lsn);
494  MarkBufferDirty(newbuf);
495  }
496  if (BufferIsValid(newbuf))
497  UnlockReleaseBuffer(newbuf);
498 }
499 
500 /*
501  * replay move of page contents for squeeze operation of hash index
502  */
503 static void
505 {
506  XLogRecPtr lsn = record->EndRecPtr;
508  Buffer bucketbuf = InvalidBuffer;
509  Buffer writebuf = InvalidBuffer;
510  Buffer deletebuf = InvalidBuffer;
512 
513  /*
514  * Ensure we have a cleanup lock on primary bucket page before we start
515  * with the actual replay operation. This is to ensure that neither a
516  * scan can start nor a scan can be already-in-progress during the replay
517  * of this operation. If we allow scans during this operation, then they
518  * can miss some records or show the same record multiple times.
519  */
520  if (xldata->is_prim_bucket_same_wrt)
521  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
522  else
523  {
524  /*
525  * we don't care for return value as the purpose of reading bucketbuf
526  * is to ensure a cleanup lock on primary bucket page.
527  */
528  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
529 
530  action = XLogReadBufferForRedo(record, 1, &writebuf);
531  }
532 
533  /* replay the record for adding entries in overflow buffer */
534  if (action == BLK_NEEDS_REDO)
535  {
536  Page writepage;
537  char *begin;
538  char *data;
539  Size datalen;
540  uint16 ninserted = 0;
541 
542  data = begin = XLogRecGetBlockData(record, 1, &datalen);
543 
544  writepage = (Page) BufferGetPage(writebuf);
545 
546  if (xldata->ntups > 0)
547  {
548  OffsetNumber *towrite = (OffsetNumber *) data;
549 
550  data += sizeof(OffsetNumber) * xldata->ntups;
551 
552  while (data - begin < datalen)
553  {
554  IndexTuple itup = (IndexTuple) data;
555  Size itemsz;
556  OffsetNumber l;
557 
558  itemsz = IndexTupleSize(itup);
559  itemsz = MAXALIGN(itemsz);
560 
561  data += itemsz;
562 
563  l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
564  if (l == InvalidOffsetNumber)
565  elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
566  (int) itemsz);
567 
568  ninserted++;
569  }
570  }
571 
572  /*
573  * number of tuples inserted must be same as requested in REDO record.
574  */
575  Assert(ninserted == xldata->ntups);
576 
577  PageSetLSN(writepage, lsn);
578  MarkBufferDirty(writebuf);
579  }
580 
581  /* replay the record for deleting entries from overflow buffer */
582  if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
583  {
584  Page page;
585  char *ptr;
586  Size len;
587 
588  ptr = XLogRecGetBlockData(record, 2, &len);
589 
590  page = (Page) BufferGetPage(deletebuf);
591 
592  if (len > 0)
593  {
594  OffsetNumber *unused;
595  OffsetNumber *unend;
596 
597  unused = (OffsetNumber *) ptr;
598  unend = (OffsetNumber *) ((char *) ptr + len);
599 
600  if ((unend - unused) > 0)
601  PageIndexMultiDelete(page, unused, unend - unused);
602  }
603 
604  PageSetLSN(page, lsn);
605  MarkBufferDirty(deletebuf);
606  }
607 
608  /*
609  * Replay is complete, now we can release the buffers. We release locks at
610  * end of replay operation to ensure that we hold lock on primary bucket
611  * page till end of operation. We can optimize by releasing the lock on
612  * write buffer as soon as the operation for same is complete, if it is
613  * not same as primary bucket page, but that doesn't seem to be worth
614  * complicating the code.
615  */
616  if (BufferIsValid(deletebuf))
617  UnlockReleaseBuffer(deletebuf);
618 
619  if (BufferIsValid(writebuf))
620  UnlockReleaseBuffer(writebuf);
621 
622  if (BufferIsValid(bucketbuf))
623  UnlockReleaseBuffer(bucketbuf);
624 }
625 
626 /*
627  * replay squeeze page operation of hash index
628  */
629 static void
631 {
632  XLogRecPtr lsn = record->EndRecPtr;
634  Buffer bucketbuf = InvalidBuffer;
635  Buffer writebuf;
636  Buffer ovflbuf;
637  Buffer prevbuf = InvalidBuffer;
638  Buffer mapbuf;
640 
641  /*
642  * Ensure we have a cleanup lock on primary bucket page before we start
643  * with the actual replay operation. This is to ensure that neither a
644  * scan can start nor a scan can be already-in-progress during the replay
645  * of this operation. If we allow scans during this operation, then they
646  * can miss some records or show the same record multiple times.
647  */
648  if (xldata->is_prim_bucket_same_wrt)
649  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
650  else
651  {
652  /*
653  * we don't care for return value as the purpose of reading bucketbuf
654  * is to ensure a cleanup lock on primary bucket page.
655  */
656  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
657 
658  action = XLogReadBufferForRedo(record, 1, &writebuf);
659  }
660 
661  /* replay the record for adding entries in overflow buffer */
662  if (action == BLK_NEEDS_REDO)
663  {
664  Page writepage;
665  char *begin;
666  char *data;
667  Size datalen;
668  uint16 ninserted = 0;
669 
670  data = begin = XLogRecGetBlockData(record, 1, &datalen);
671 
672  writepage = (Page) BufferGetPage(writebuf);
673 
674  if (xldata->ntups > 0)
675  {
676  OffsetNumber *towrite = (OffsetNumber *) data;
677 
678  data += sizeof(OffsetNumber) * xldata->ntups;
679 
680  while (data - begin < datalen)
681  {
682  IndexTuple itup = (IndexTuple) data;
683  Size itemsz;
684  OffsetNumber l;
685 
686  itemsz = IndexTupleSize(itup);
687  itemsz = MAXALIGN(itemsz);
688 
689  data += itemsz;
690 
691  l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
692  if (l == InvalidOffsetNumber)
693  elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
694  (int) itemsz);
695 
696  ninserted++;
697  }
698  }
699 
700  /*
701  * number of tuples inserted must be same as requested in REDO record.
702  */
703  Assert(ninserted == xldata->ntups);
704 
705  /*
706  * if the page on which are adding tuples is a page previous to freed
707  * overflow page, then update its nextblkno.
708  */
709  if (xldata->is_prev_bucket_same_wrt)
710  {
711  HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
712 
713  writeopaque->hasho_nextblkno = xldata->nextblkno;
714  }
715 
716  PageSetLSN(writepage, lsn);
717  MarkBufferDirty(writebuf);
718  }
719 
720  /* replay the record for initializing overflow buffer */
721  if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
722  {
723  Page ovflpage;
724  HashPageOpaque ovflopaque;
725 
726  ovflpage = BufferGetPage(ovflbuf);
727 
728  _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
729 
730  ovflopaque = HashPageGetOpaque(ovflpage);
731 
732  ovflopaque->hasho_prevblkno = InvalidBlockNumber;
733  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
734  ovflopaque->hasho_bucket = InvalidBucket;
735  ovflopaque->hasho_flag = LH_UNUSED_PAGE;
736  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
737 
738  PageSetLSN(ovflpage, lsn);
739  MarkBufferDirty(ovflbuf);
740  }
741  if (BufferIsValid(ovflbuf))
742  UnlockReleaseBuffer(ovflbuf);
743 
744  /* replay the record for page previous to the freed overflow page */
745  if (!xldata->is_prev_bucket_same_wrt &&
746  XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
747  {
748  Page prevpage = BufferGetPage(prevbuf);
749  HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
750 
751  prevopaque->hasho_nextblkno = xldata->nextblkno;
752 
753  PageSetLSN(prevpage, lsn);
754  MarkBufferDirty(prevbuf);
755  }
756  if (BufferIsValid(prevbuf))
757  UnlockReleaseBuffer(prevbuf);
758 
759  /* replay the record for page next to the freed overflow page */
760  if (XLogRecHasBlockRef(record, 4))
761  {
762  Buffer nextbuf;
763 
764  if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
765  {
766  Page nextpage = BufferGetPage(nextbuf);
767  HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
768 
769  nextopaque->hasho_prevblkno = xldata->prevblkno;
770 
771  PageSetLSN(nextpage, lsn);
772  MarkBufferDirty(nextbuf);
773  }
774  if (BufferIsValid(nextbuf))
775  UnlockReleaseBuffer(nextbuf);
776  }
777 
778  if (BufferIsValid(writebuf))
779  UnlockReleaseBuffer(writebuf);
780 
781  if (BufferIsValid(bucketbuf))
782  UnlockReleaseBuffer(bucketbuf);
783 
784  /*
785  * Note: in normal operation, we'd update the bitmap and meta page while
786  * still holding lock on the primary bucket page and overflow pages. But
787  * during replay it's not necessary to hold those locks, since no other
788  * index updates can be happening concurrently.
789  */
790  /* replay the record for bitmap page */
791  if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
792  {
793  Page mappage = (Page) BufferGetPage(mapbuf);
794  uint32 *freep = NULL;
795  char *data;
796  uint32 *bitmap_page_bit;
797  Size datalen;
798 
799  freep = HashPageGetBitmap(mappage);
800 
801  data = XLogRecGetBlockData(record, 5, &datalen);
802  bitmap_page_bit = (uint32 *) data;
803 
804  CLRBIT(freep, *bitmap_page_bit);
805 
806  PageSetLSN(mappage, lsn);
807  MarkBufferDirty(mapbuf);
808  }
809  if (BufferIsValid(mapbuf))
810  UnlockReleaseBuffer(mapbuf);
811 
812  /* replay the record for meta page */
813  if (XLogRecHasBlockRef(record, 6))
814  {
815  Buffer metabuf;
816 
817  if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
818  {
819  HashMetaPage metap;
820  Page page;
821  char *data;
822  uint32 *firstfree_ovflpage;
823  Size datalen;
824 
825  data = XLogRecGetBlockData(record, 6, &datalen);
826  firstfree_ovflpage = (uint32 *) data;
827 
828  page = BufferGetPage(metabuf);
829  metap = HashPageGetMeta(page);
830  metap->hashm_firstfree = *firstfree_ovflpage;
831 
832  PageSetLSN(page, lsn);
833  MarkBufferDirty(metabuf);
834  }
835  if (BufferIsValid(metabuf))
836  UnlockReleaseBuffer(metabuf);
837  }
838 }
839 
840 /*
841  * replay delete operation of hash index
842  */
843 static void
845 {
846  XLogRecPtr lsn = record->EndRecPtr;
847  xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
848  Buffer bucketbuf = InvalidBuffer;
849  Buffer deletebuf;
850  Page page;
852 
853  /*
854  * Ensure we have a cleanup lock on primary bucket page before we start
855  * with the actual replay operation. This is to ensure that neither a
856  * scan can start nor a scan can be already-in-progress during the replay
857  * of this operation. If we allow scans during this operation, then they
858  * can miss some records or show the same record multiple times.
859  */
860  if (xldata->is_primary_bucket_page)
861  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
862  else
863  {
864  /*
865  * we don't care for return value as the purpose of reading bucketbuf
866  * is to ensure a cleanup lock on primary bucket page.
867  */
868  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
869 
870  action = XLogReadBufferForRedo(record, 1, &deletebuf);
871  }
872 
873  /* replay the record for deleting entries in bucket page */
874  if (action == BLK_NEEDS_REDO)
875  {
876  char *ptr;
877  Size len;
878 
879  ptr = XLogRecGetBlockData(record, 1, &len);
880 
881  page = (Page) BufferGetPage(deletebuf);
882 
883  if (len > 0)
884  {
885  OffsetNumber *unused;
886  OffsetNumber *unend;
887 
888  unused = (OffsetNumber *) ptr;
889  unend = (OffsetNumber *) ((char *) ptr + len);
890 
891  if ((unend - unused) > 0)
892  PageIndexMultiDelete(page, unused, unend - unused);
893  }
894 
895  /*
896  * Mark the page as not containing any LP_DEAD items only if
897  * clear_dead_marking flag is set to true. See comments in
898  * hashbucketcleanup() for details.
899  */
900  if (xldata->clear_dead_marking)
901  {
902  HashPageOpaque pageopaque;
903 
904  pageopaque = HashPageGetOpaque(page);
905  pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
906  }
907 
908  PageSetLSN(page, lsn);
909  MarkBufferDirty(deletebuf);
910  }
911  if (BufferIsValid(deletebuf))
912  UnlockReleaseBuffer(deletebuf);
913 
914  if (BufferIsValid(bucketbuf))
915  UnlockReleaseBuffer(bucketbuf);
916 }
917 
918 /*
919  * replay split cleanup flag operation for primary bucket page.
920  */
921 static void
923 {
924  XLogRecPtr lsn = record->EndRecPtr;
925  Buffer buffer;
926  Page page;
927 
928  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
929  {
930  HashPageOpaque bucket_opaque;
931 
932  page = (Page) BufferGetPage(buffer);
933 
934  bucket_opaque = HashPageGetOpaque(page);
935  bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
936  PageSetLSN(page, lsn);
937  MarkBufferDirty(buffer);
938  }
939  if (BufferIsValid(buffer))
940  UnlockReleaseBuffer(buffer);
941 }
942 
943 /*
944  * replay for update meta page
945  */
946 static void
948 {
949  HashMetaPage metap;
950  XLogRecPtr lsn = record->EndRecPtr;
952  Buffer metabuf;
953  Page page;
954 
955  if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
956  {
957  page = BufferGetPage(metabuf);
958  metap = HashPageGetMeta(page);
959 
960  metap->hashm_ntuples = xldata->ntuples;
961 
962  PageSetLSN(page, lsn);
963  MarkBufferDirty(metabuf);
964  }
965  if (BufferIsValid(metabuf))
966  UnlockReleaseBuffer(metabuf);
967 }
968 
969 /*
970  * replay delete operation in hash index to remove
971  * tuples marked as DEAD during index tuple insertion.
972  */
973 static void
975 {
976  XLogRecPtr lsn = record->EndRecPtr;
977  xl_hash_vacuum_one_page *xldata;
978  Buffer buffer;
979  Buffer metabuf;
980  Page page;
982  HashPageOpaque pageopaque;
983  OffsetNumber *toDelete;
984 
985  xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
986  toDelete = xldata->offsets;
987 
988  /*
989  * If we have any conflict processing to do, it must happen before we
990  * update the page.
991  *
992  * Hash index records that are marked as LP_DEAD and being removed during
993  * hash index tuple insertion can conflict with standby queries. You might
994  * think that vacuum records would conflict as well, but we've handled
995  * that already. XLOG_HEAP2_PRUNE records provide the highest xid cleaned
996  * by the vacuum of the heap and so we can resolve any conflicts just once
997  * when that arrives. After that we know that no conflicts exist from
998  * individual hash index vacuum records on that index.
999  */
1000  if (InHotStandby)
1001  {
1002  RelFileLocator rlocator;
1003 
1004  XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1006  xldata->isCatalogRel,
1007  rlocator);
1008  }
1009 
1010  action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1011 
1012  if (action == BLK_NEEDS_REDO)
1013  {
1014  page = (Page) BufferGetPage(buffer);
1015 
1016  PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1017 
1018  /*
1019  * Mark the page as not containing any LP_DEAD items. See comments in
1020  * _hash_vacuum_one_page() for details.
1021  */
1022  pageopaque = HashPageGetOpaque(page);
1023  pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1024 
1025  PageSetLSN(page, lsn);
1026  MarkBufferDirty(buffer);
1027  }
1028  if (BufferIsValid(buffer))
1029  UnlockReleaseBuffer(buffer);
1030 
1031  if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1032  {
1033  Page metapage;
1034  HashMetaPage metap;
1035 
1036  metapage = BufferGetPage(metabuf);
1037  metap = HashPageGetMeta(metapage);
1038 
1039  metap->hashm_ntuples -= xldata->ntuples;
1040 
1041  PageSetLSN(metapage, lsn);
1042  MarkBufferDirty(metabuf);
1043  }
1044  if (BufferIsValid(metabuf))
1045  UnlockReleaseBuffer(metabuf);
1046 }
1047 
1048 void
1050 {
1051  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1052 
1053  switch (info)
1054  {
1056  hash_xlog_init_meta_page(record);
1057  break;
1060  break;
1061  case XLOG_HASH_INSERT:
1062  hash_xlog_insert(record);
1063  break;
1065  hash_xlog_add_ovfl_page(record);
1066  break;
1069  break;
1070  case XLOG_HASH_SPLIT_PAGE:
1071  hash_xlog_split_page(record);
1072  break;
1074  hash_xlog_split_complete(record);
1075  break;
1078  break;
1080  hash_xlog_squeeze_page(record);
1081  break;
1082  case XLOG_HASH_DELETE:
1083  hash_xlog_delete(record);
1084  break;
1086  hash_xlog_split_cleanup(record);
1087  break;
1090  break;
1092  hash_xlog_vacuum_one_page(record);
1093  break;
1094  default:
1095  elog(PANIC, "hash_redo: unknown op code %u", info);
1096  }
1097 }
1098 
1099 /*
1100  * Mask a hash page before performing consistency checks on it.
1101  */
1102 void
1103 hash_mask(char *pagedata, BlockNumber blkno)
1104 {
1105  Page page = (Page) pagedata;
1106  HashPageOpaque opaque;
1107  int pagetype;
1108 
1110 
1111  mask_page_hint_bits(page);
1112  mask_unused_space(page);
1113 
1114  opaque = HashPageGetOpaque(page);
1115 
1116  pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1117  if (pagetype == LH_UNUSED_PAGE)
1118  {
1119  /*
1120  * Mask everything on a UNUSED page.
1121  */
1122  mask_page_content(page);
1123  }
1124  else if (pagetype == LH_BUCKET_PAGE ||
1125  pagetype == LH_OVERFLOW_PAGE)
1126  {
1127  /*
1128  * In hash bucket and overflow pages, it is possible to modify the
1129  * LP_FLAGS without emitting any WAL record. Hence, mask the line
1130  * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1131  */
1132  mask_lp_flags(page);
1133  }
1134 
1135  /*
1136  * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1137  * unlogged. So, mask it. See _hash_kill_items() for details.
1138  */
1139  opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1140 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define CLRBIT(x, i)
Definition: blutils.c:31
#define SETBIT(x, i)
Definition: blutils.c:32
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_content(Page page)
Definition: bufmask.c:119
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3290
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4497
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2111
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:4460
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:355
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:341
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:47
@ RBM_NORMAL
Definition: bufmgr.h:44
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:303
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
Pointer Page
Definition: bufpage.h:78
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned short uint16
Definition: c.h:489
unsigned int uint32
Definition: c.h:490
#define MAXALIGN(LEN)
Definition: c.h:795
unsigned char uint8
Definition: c.h:488
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:166
size_t Size
Definition: c.h:589
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define LH_PAGE_TYPE
Definition: hash.h:63
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP
Definition: hash.h:60
#define LH_PAGE_HAS_DEAD_TUPLES
Definition: hash.h:61
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define InvalidBucket
Definition: hash.h:37
static void hash_xlog_split_cleanup(XLogReaderState *record)
Definition: hash_xlog.c:922
static void hash_xlog_add_ovfl_page(XLogReaderState *record)
Definition: hash_xlog.c:176
static void hash_xlog_split_page(XLogReaderState *record)
Definition: hash_xlog.c:431
static void hash_xlog_init_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:30
void hash_mask(char *pagedata, BlockNumber blkno)
Definition: hash_xlog.c:1103
static void hash_xlog_split_complete(XLogReaderState *record)
Definition: hash_xlog.c:445
static void hash_xlog_update_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:947
static void hash_xlog_vacuum_one_page(XLogReaderState *record)
Definition: hash_xlog.c:974
static void hash_xlog_squeeze_page(XLogReaderState *record)
Definition: hash_xlog.c:630
static void hash_xlog_insert(XLogReaderState *record)
Definition: hash_xlog.c:128
static void hash_xlog_move_page_contents(XLogReaderState *record)
Definition: hash_xlog.c:504
static void hash_xlog_split_allocate_page(XLogReaderState *record)
Definition: hash_xlog.c:314
void hash_redo(XLogReaderState *record)
Definition: hash_xlog.c:1049
static void hash_xlog_delete(XLogReaderState *record)
Definition: hash_xlog.c:844
static void hash_xlog_init_bitmap_page(XLogReaderState *record)
Definition: hash_xlog.c:66
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define XLOG_HASH_SPLIT_PAGE
Definition: hash_xlog.h:32
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
#define XLOG_HASH_SPLIT_COMPLETE
Definition: hash_xlog.h:33
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:741
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
Pointer Item
Definition: item.h:17
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
Assert(fmt[strlen(fmt) - 1] !='\n')
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void size_t len
const void * data
while(p+4<=pend)
static char * buf
Definition: pg_test_fsync.c:67
ForkNumber
Definition: relpath.h:48
@ INIT_FORKNUM
Definition: relpath.h:53
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:468
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_firstfree
Definition: hash.h:259
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_highmask
Definition: hash.h:255
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
bool clear_dead_marking
Definition: hash_xlog.h:180
bool is_primary_bucket_page
Definition: hash_xlog.h:182
RegProcedure procid
Definition: hash_xlog.h:213
OffsetNumber offnum
Definition: hash_xlog.h:58
BlockNumber prevblkno
Definition: hash_xlog.h:155
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:158
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:161
BlockNumber nextblkno
Definition: hash_xlog.h:156
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:247
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: hash_xlog.h:253
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1961
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2015
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:317
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:329
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:354
#define InHotStandby
Definition: xlogutils.h:57
XLogRedoAction
Definition: xlogutils.h:70
@ BLK_RESTORED
Definition: xlogutils.h:73
@ BLK_NEEDS_REDO
Definition: xlogutils.h:71