PostgreSQL Source Code  git master
hash_xlog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hash_xlog.c
4  * WAL replay logic for hash index.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hash_xlog.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/bufmask.h"
18 #include "access/hash.h"
19 #include "access/hash_xlog.h"
20 #include "access/xlogutils.h"
21 #include "storage/standby.h"
22 
23 /*
24  * replay a hash index meta page
25  */
26 static void
28 {
29  XLogRecPtr lsn = record->EndRecPtr;
30  Page page;
31  Buffer metabuf;
32  ForkNumber forknum;
33 
35 
36  /* create the index' metapage */
37  metabuf = XLogInitBufferForRedo(record, 0);
38  Assert(BufferIsValid(metabuf));
39  _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40  xlrec->ffactor, true);
41  page = (Page) BufferGetPage(metabuf);
42  PageSetLSN(page, lsn);
43  MarkBufferDirty(metabuf);
44 
45  /*
46  * Force the on-disk state of init forks to always be in sync with the
47  * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48  * special handling for init forks as create index operations don't log a
49  * full page image of the metapage.
50  */
51  XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52  if (forknum == INIT_FORKNUM)
53  FlushOneBuffer(metabuf);
54 
55  /* all done */
56  UnlockReleaseBuffer(metabuf);
57 }
58 
59 /*
60  * replay a hash index bitmap page
61  */
62 static void
64 {
65  XLogRecPtr lsn = record->EndRecPtr;
66  Buffer bitmapbuf;
67  Buffer metabuf;
68  Page page;
69  HashMetaPage metap;
70  uint32 num_buckets;
71  ForkNumber forknum;
72 
74 
75  /*
76  * Initialize bitmap page
77  */
78  bitmapbuf = XLogInitBufferForRedo(record, 0);
79  _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80  PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81  MarkBufferDirty(bitmapbuf);
82 
83  /*
84  * Force the on-disk state of init forks to always be in sync with the
85  * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86  * special handling for init forks as create index operations don't log a
87  * full page image of the metapage.
88  */
89  XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90  if (forknum == INIT_FORKNUM)
91  FlushOneBuffer(bitmapbuf);
92  UnlockReleaseBuffer(bitmapbuf);
93 
94  /* add the new bitmap page to the metapage's list of bitmaps */
95  if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96  {
97  /*
98  * Note: in normal operation, we'd update the metapage while still
99  * holding lock on the bitmap page. But during replay it's not
100  * necessary to hold that lock, since nobody can see it yet; the
101  * creating transaction hasn't yet committed.
102  */
103  page = BufferGetPage(metabuf);
104  metap = HashPageGetMeta(page);
105 
106  num_buckets = metap->hashm_maxbucket + 1;
107  metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108  metap->hashm_nmaps++;
109 
110  PageSetLSN(page, lsn);
111  MarkBufferDirty(metabuf);
112 
113  XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114  if (forknum == INIT_FORKNUM)
115  FlushOneBuffer(metabuf);
116  }
117  if (BufferIsValid(metabuf))
118  UnlockReleaseBuffer(metabuf);
119 }
120 
121 /*
122  * replay a hash index insert without split
123  */
124 static void
126 {
127  HashMetaPage metap;
128  XLogRecPtr lsn = record->EndRecPtr;
129  xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130  Buffer buffer;
131  Page page;
132 
133  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134  {
135  Size datalen;
136  char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137 
138  page = BufferGetPage(buffer);
139 
140  if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
141  false, false) == InvalidOffsetNumber)
142  elog(PANIC, "hash_xlog_insert: failed to add item");
143 
144  PageSetLSN(page, lsn);
145  MarkBufferDirty(buffer);
146  }
147  if (BufferIsValid(buffer))
148  UnlockReleaseBuffer(buffer);
149 
150  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
151  {
152  /*
153  * Note: in normal operation, we'd update the metapage while still
154  * holding lock on the page we inserted into. But during replay it's
155  * not necessary to hold that lock, since no other index updates can
156  * be happening concurrently.
157  */
158  page = BufferGetPage(buffer);
159  metap = HashPageGetMeta(page);
160  metap->hashm_ntuples += 1;
161 
162  PageSetLSN(page, lsn);
163  MarkBufferDirty(buffer);
164  }
165  if (BufferIsValid(buffer))
166  UnlockReleaseBuffer(buffer);
167 }
168 
169 /*
170  * replay addition of overflow page for hash index
171  */
172 static void
174 {
175  XLogRecPtr lsn = record->EndRecPtr;
177  Buffer leftbuf;
178  Buffer ovflbuf;
179  Buffer metabuf;
180  BlockNumber leftblk;
181  BlockNumber rightblk;
182  BlockNumber newmapblk = InvalidBlockNumber;
183  Page ovflpage;
184  HashPageOpaque ovflopaque;
185  uint32 *num_bucket;
186  char *data;
188  bool new_bmpage = false;
189 
190  XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
191  XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
192 
193  ovflbuf = XLogInitBufferForRedo(record, 0);
194  Assert(BufferIsValid(ovflbuf));
195 
196  data = XLogRecGetBlockData(record, 0, &datalen);
197  num_bucket = (uint32 *) data;
198  Assert(datalen == sizeof(uint32));
199  _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
200  true);
201  /* update backlink */
202  ovflpage = BufferGetPage(ovflbuf);
203  ovflopaque = HashPageGetOpaque(ovflpage);
204  ovflopaque->hasho_prevblkno = leftblk;
205 
206  PageSetLSN(ovflpage, lsn);
207  MarkBufferDirty(ovflbuf);
208 
209  if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
210  {
211  Page leftpage;
212  HashPageOpaque leftopaque;
213 
214  leftpage = BufferGetPage(leftbuf);
215  leftopaque = HashPageGetOpaque(leftpage);
216  leftopaque->hasho_nextblkno = rightblk;
217 
218  PageSetLSN(leftpage, lsn);
219  MarkBufferDirty(leftbuf);
220  }
221 
222  if (BufferIsValid(leftbuf))
223  UnlockReleaseBuffer(leftbuf);
224  UnlockReleaseBuffer(ovflbuf);
225 
226  /*
227  * Note: in normal operation, we'd update the bitmap and meta page while
228  * still holding lock on the overflow pages. But during replay it's not
229  * necessary to hold those locks, since no other index updates can be
230  * happening concurrently.
231  */
232  if (XLogRecHasBlockRef(record, 2))
233  {
234  Buffer mapbuffer;
235 
236  if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
237  {
238  Page mappage = (Page) BufferGetPage(mapbuffer);
239  uint32 *freep = NULL;
240  uint32 *bitmap_page_bit;
241 
242  freep = HashPageGetBitmap(mappage);
243 
244  data = XLogRecGetBlockData(record, 2, &datalen);
245  bitmap_page_bit = (uint32 *) data;
246 
247  SETBIT(freep, *bitmap_page_bit);
248 
249  PageSetLSN(mappage, lsn);
250  MarkBufferDirty(mapbuffer);
251  }
252  if (BufferIsValid(mapbuffer))
253  UnlockReleaseBuffer(mapbuffer);
254  }
255 
256  if (XLogRecHasBlockRef(record, 3))
257  {
258  Buffer newmapbuf;
259 
260  newmapbuf = XLogInitBufferForRedo(record, 3);
261 
262  _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
263 
264  new_bmpage = true;
265  newmapblk = BufferGetBlockNumber(newmapbuf);
266 
267  MarkBufferDirty(newmapbuf);
268  PageSetLSN(BufferGetPage(newmapbuf), lsn);
269 
270  UnlockReleaseBuffer(newmapbuf);
271  }
272 
273  if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
274  {
275  HashMetaPage metap;
276  Page page;
277  uint32 *firstfree_ovflpage;
278 
279  data = XLogRecGetBlockData(record, 4, &datalen);
280  firstfree_ovflpage = (uint32 *) data;
281 
282  page = BufferGetPage(metabuf);
283  metap = HashPageGetMeta(page);
284  metap->hashm_firstfree = *firstfree_ovflpage;
285 
286  if (!xlrec->bmpage_found)
287  {
288  metap->hashm_spares[metap->hashm_ovflpoint]++;
289 
290  if (new_bmpage)
291  {
292  Assert(BlockNumberIsValid(newmapblk));
293 
294  metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
295  metap->hashm_nmaps++;
296  metap->hashm_spares[metap->hashm_ovflpoint]++;
297  }
298  }
299 
300  PageSetLSN(page, lsn);
301  MarkBufferDirty(metabuf);
302  }
303  if (BufferIsValid(metabuf))
304  UnlockReleaseBuffer(metabuf);
305 }
306 
307 /*
308  * replay allocation of page for split operation
309  */
310 static void
312 {
313  XLogRecPtr lsn = record->EndRecPtr;
315  Buffer oldbuf;
316  Buffer newbuf;
317  Buffer metabuf;
319  char *data;
321 
322  /*
323  * To be consistent with normal operation, here we take cleanup locks on
324  * both the old and new buckets even though there can't be any concurrent
325  * inserts.
326  */
327 
328  /* replay the record for old bucket */
329  action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
330 
331  /*
332  * Note that we still update the page even if it was restored from a full
333  * page image, because the special space is not included in the image.
334  */
336  {
337  Page oldpage;
338  HashPageOpaque oldopaque;
339 
340  oldpage = BufferGetPage(oldbuf);
341  oldopaque = HashPageGetOpaque(oldpage);
342 
343  oldopaque->hasho_flag = xlrec->old_bucket_flag;
344  oldopaque->hasho_prevblkno = xlrec->new_bucket;
345 
346  PageSetLSN(oldpage, lsn);
347  MarkBufferDirty(oldbuf);
348  }
349 
350  /* replay the record for new bucket */
352  &newbuf);
353  _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
354  xlrec->new_bucket_flag, true);
355  MarkBufferDirty(newbuf);
356  PageSetLSN(BufferGetPage(newbuf), lsn);
357 
358  /*
359  * We can release the lock on old bucket early as well but doing here to
360  * consistent with normal operation.
361  */
362  if (BufferIsValid(oldbuf))
363  UnlockReleaseBuffer(oldbuf);
364  if (BufferIsValid(newbuf))
365  UnlockReleaseBuffer(newbuf);
366 
367  /*
368  * Note: in normal operation, we'd update the meta page while still
369  * holding lock on the old and new bucket pages. But during replay it's
370  * not necessary to hold those locks, since no other bucket splits can be
371  * happening concurrently.
372  */
373 
374  /* replay the record for metapage changes */
375  if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
376  {
377  Page page;
378  HashMetaPage metap;
379 
380  page = BufferGetPage(metabuf);
381  metap = HashPageGetMeta(page);
382  metap->hashm_maxbucket = xlrec->new_bucket;
383 
384  data = XLogRecGetBlockData(record, 2, &datalen);
385 
386  if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
387  {
388  uint32 lowmask;
389  uint32 *highmask;
390 
391  /* extract low and high masks. */
392  memcpy(&lowmask, data, sizeof(uint32));
393  highmask = (uint32 *) ((char *) data + sizeof(uint32));
394 
395  /* update metapage */
396  metap->hashm_lowmask = lowmask;
397  metap->hashm_highmask = *highmask;
398 
399  data += sizeof(uint32) * 2;
400  }
401 
403  {
404  uint32 ovflpoint;
405  uint32 *ovflpages;
406 
407  /* extract information of overflow pages. */
408  memcpy(&ovflpoint, data, sizeof(uint32));
409  ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
410 
411  /* update metapage */
412  metap->hashm_spares[ovflpoint] = *ovflpages;
413  metap->hashm_ovflpoint = ovflpoint;
414  }
415 
416  MarkBufferDirty(metabuf);
417  PageSetLSN(BufferGetPage(metabuf), lsn);
418  }
419 
420  if (BufferIsValid(metabuf))
421  UnlockReleaseBuffer(metabuf);
422 }
423 
424 /*
425  * replay of split operation
426  */
427 static void
429 {
430  Buffer buf;
431 
432  if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
433  elog(ERROR, "Hash split record did not contain a full-page image");
434 
436 }
437 
438 /*
439  * replay completion of split operation
440  */
441 static void
443 {
444  XLogRecPtr lsn = record->EndRecPtr;
446  Buffer oldbuf;
447  Buffer newbuf;
449 
450  /* replay the record for old bucket */
451  action = XLogReadBufferForRedo(record, 0, &oldbuf);
452 
453  /*
454  * Note that we still update the page even if it was restored from a full
455  * page image, because the bucket flag is not included in the image.
456  */
458  {
459  Page oldpage;
460  HashPageOpaque oldopaque;
461 
462  oldpage = BufferGetPage(oldbuf);
463  oldopaque = HashPageGetOpaque(oldpage);
464 
465  oldopaque->hasho_flag = xlrec->old_bucket_flag;
466 
467  PageSetLSN(oldpage, lsn);
468  MarkBufferDirty(oldbuf);
469  }
470  if (BufferIsValid(oldbuf))
471  UnlockReleaseBuffer(oldbuf);
472 
473  /* replay the record for new bucket */
474  action = XLogReadBufferForRedo(record, 1, &newbuf);
475 
476  /*
477  * Note that we still update the page even if it was restored from a full
478  * page image, because the bucket flag is not included in the image.
479  */
481  {
482  Page newpage;
483  HashPageOpaque nopaque;
484 
485  newpage = BufferGetPage(newbuf);
486  nopaque = HashPageGetOpaque(newpage);
487 
488  nopaque->hasho_flag = xlrec->new_bucket_flag;
489 
490  PageSetLSN(newpage, lsn);
491  MarkBufferDirty(newbuf);
492  }
493  if (BufferIsValid(newbuf))
494  UnlockReleaseBuffer(newbuf);
495 }
496 
497 /*
498  * replay move of page contents for squeeze operation of hash index
499  */
500 static void
502 {
503  XLogRecPtr lsn = record->EndRecPtr;
505  Buffer bucketbuf = InvalidBuffer;
506  Buffer writebuf = InvalidBuffer;
507  Buffer deletebuf = InvalidBuffer;
509 
510  /*
511  * Ensure we have a cleanup lock on primary bucket page before we start
512  * with the actual replay operation. This is to ensure that neither a
513  * scan can start nor a scan can be already-in-progress during the replay
514  * of this operation. If we allow scans during this operation, then they
515  * can miss some records or show the same record multiple times.
516  */
517  if (xldata->is_prim_bucket_same_wrt)
518  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
519  else
520  {
521  /*
522  * we don't care for return value as the purpose of reading bucketbuf
523  * is to ensure a cleanup lock on primary bucket page.
524  */
525  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
526 
527  action = XLogReadBufferForRedo(record, 1, &writebuf);
528  }
529 
530  /* replay the record for adding entries in overflow buffer */
531  if (action == BLK_NEEDS_REDO)
532  {
533  Page writepage;
534  char *begin;
535  char *data;
536  Size datalen;
537  uint16 ninserted = 0;
538 
539  data = begin = XLogRecGetBlockData(record, 1, &datalen);
540 
541  writepage = (Page) BufferGetPage(writebuf);
542 
543  if (xldata->ntups > 0)
544  {
545  OffsetNumber *towrite = (OffsetNumber *) data;
546 
547  data += sizeof(OffsetNumber) * xldata->ntups;
548 
549  while (data - begin < datalen)
550  {
551  IndexTuple itup = (IndexTuple) data;
552  Size itemsz;
553  OffsetNumber l;
554 
555  itemsz = IndexTupleSize(itup);
556  itemsz = MAXALIGN(itemsz);
557 
558  data += itemsz;
559 
560  l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
561  if (l == InvalidOffsetNumber)
562  elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
563  (int) itemsz);
564 
565  ninserted++;
566  }
567  }
568 
569  /*
570  * number of tuples inserted must be same as requested in REDO record.
571  */
572  Assert(ninserted == xldata->ntups);
573 
574  PageSetLSN(writepage, lsn);
575  MarkBufferDirty(writebuf);
576  }
577 
578  /* replay the record for deleting entries from overflow buffer */
579  if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
580  {
581  Page page;
582  char *ptr;
583  Size len;
584 
585  ptr = XLogRecGetBlockData(record, 2, &len);
586 
587  page = (Page) BufferGetPage(deletebuf);
588 
589  if (len > 0)
590  {
591  OffsetNumber *unused;
592  OffsetNumber *unend;
593 
594  unused = (OffsetNumber *) ptr;
595  unend = (OffsetNumber *) ((char *) ptr + len);
596 
597  if ((unend - unused) > 0)
598  PageIndexMultiDelete(page, unused, unend - unused);
599  }
600 
601  PageSetLSN(page, lsn);
602  MarkBufferDirty(deletebuf);
603  }
604 
605  /*
606  * Replay is complete, now we can release the buffers. We release locks at
607  * end of replay operation to ensure that we hold lock on primary bucket
608  * page till end of operation. We can optimize by releasing the lock on
609  * write buffer as soon as the operation for same is complete, if it is
610  * not same as primary bucket page, but that doesn't seem to be worth
611  * complicating the code.
612  */
613  if (BufferIsValid(deletebuf))
614  UnlockReleaseBuffer(deletebuf);
615 
616  if (BufferIsValid(writebuf))
617  UnlockReleaseBuffer(writebuf);
618 
619  if (BufferIsValid(bucketbuf))
620  UnlockReleaseBuffer(bucketbuf);
621 }
622 
623 /*
624  * replay squeeze page operation of hash index
625  */
626 static void
628 {
629  XLogRecPtr lsn = record->EndRecPtr;
631  Buffer bucketbuf = InvalidBuffer;
632  Buffer writebuf = InvalidBuffer;
633  Buffer ovflbuf;
634  Buffer prevbuf = InvalidBuffer;
635  Buffer mapbuf;
637 
638  /*
639  * Ensure we have a cleanup lock on primary bucket page before we start
640  * with the actual replay operation. This is to ensure that neither a
641  * scan can start nor a scan can be already-in-progress during the replay
642  * of this operation. If we allow scans during this operation, then they
643  * can miss some records or show the same record multiple times.
644  */
645  if (xldata->is_prim_bucket_same_wrt)
646  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
647  else
648  {
649  /*
650  * we don't care for return value as the purpose of reading bucketbuf
651  * is to ensure a cleanup lock on primary bucket page.
652  */
653  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
654 
655  if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
656  action = XLogReadBufferForRedo(record, 1, &writebuf);
657  else
659  }
660 
661  /* replay the record for adding entries in overflow buffer */
662  if (action == BLK_NEEDS_REDO)
663  {
664  Page writepage;
665  char *begin;
666  char *data;
667  Size datalen;
668  uint16 ninserted = 0;
669  bool mod_wbuf = false;
670 
671  data = begin = XLogRecGetBlockData(record, 1, &datalen);
672 
673  writepage = (Page) BufferGetPage(writebuf);
674 
675  if (xldata->ntups > 0)
676  {
677  OffsetNumber *towrite = (OffsetNumber *) data;
678 
679  data += sizeof(OffsetNumber) * xldata->ntups;
680 
681  while (data - begin < datalen)
682  {
683  IndexTuple itup = (IndexTuple) data;
684  Size itemsz;
685  OffsetNumber l;
686 
687  itemsz = IndexTupleSize(itup);
688  itemsz = MAXALIGN(itemsz);
689 
690  data += itemsz;
691 
692  l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
693  if (l == InvalidOffsetNumber)
694  elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
695  (int) itemsz);
696 
697  ninserted++;
698  }
699 
700  mod_wbuf = true;
701  }
702  else
703  {
704  /*
705  * Ensure that the required flags are set when there are no
706  * tuples. See _hash_freeovflpage().
707  */
709  xldata->is_prev_bucket_same_wrt);
710  }
711 
712  /*
713  * number of tuples inserted must be same as requested in REDO record.
714  */
715  Assert(ninserted == xldata->ntups);
716 
717  /*
718  * if the page on which are adding tuples is a page previous to freed
719  * overflow page, then update its nextblkno.
720  */
721  if (xldata->is_prev_bucket_same_wrt)
722  {
723  HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
724 
725  writeopaque->hasho_nextblkno = xldata->nextblkno;
726  mod_wbuf = true;
727  }
728 
729  /* Set LSN and mark writebuf dirty iff it is modified */
730  if (mod_wbuf)
731  {
732  PageSetLSN(writepage, lsn);
733  MarkBufferDirty(writebuf);
734  }
735  }
736 
737  /* replay the record for initializing overflow buffer */
738  if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
739  {
740  Page ovflpage;
741  HashPageOpaque ovflopaque;
742 
743  ovflpage = BufferGetPage(ovflbuf);
744 
745  _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
746 
747  ovflopaque = HashPageGetOpaque(ovflpage);
748 
749  ovflopaque->hasho_prevblkno = InvalidBlockNumber;
750  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
751  ovflopaque->hasho_bucket = InvalidBucket;
752  ovflopaque->hasho_flag = LH_UNUSED_PAGE;
753  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
754 
755  PageSetLSN(ovflpage, lsn);
756  MarkBufferDirty(ovflbuf);
757  }
758  if (BufferIsValid(ovflbuf))
759  UnlockReleaseBuffer(ovflbuf);
760 
761  /* replay the record for page previous to the freed overflow page */
762  if (!xldata->is_prev_bucket_same_wrt &&
763  XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
764  {
765  Page prevpage = BufferGetPage(prevbuf);
766  HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
767 
768  prevopaque->hasho_nextblkno = xldata->nextblkno;
769 
770  PageSetLSN(prevpage, lsn);
771  MarkBufferDirty(prevbuf);
772  }
773  if (BufferIsValid(prevbuf))
774  UnlockReleaseBuffer(prevbuf);
775 
776  /* replay the record for page next to the freed overflow page */
777  if (XLogRecHasBlockRef(record, 4))
778  {
779  Buffer nextbuf;
780 
781  if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
782  {
783  Page nextpage = BufferGetPage(nextbuf);
784  HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
785 
786  nextopaque->hasho_prevblkno = xldata->prevblkno;
787 
788  PageSetLSN(nextpage, lsn);
789  MarkBufferDirty(nextbuf);
790  }
791  if (BufferIsValid(nextbuf))
792  UnlockReleaseBuffer(nextbuf);
793  }
794 
795  if (BufferIsValid(writebuf))
796  UnlockReleaseBuffer(writebuf);
797 
798  if (BufferIsValid(bucketbuf))
799  UnlockReleaseBuffer(bucketbuf);
800 
801  /*
802  * Note: in normal operation, we'd update the bitmap and meta page while
803  * still holding lock on the primary bucket page and overflow pages. But
804  * during replay it's not necessary to hold those locks, since no other
805  * index updates can be happening concurrently.
806  */
807  /* replay the record for bitmap page */
808  if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
809  {
810  Page mappage = (Page) BufferGetPage(mapbuf);
811  uint32 *freep = NULL;
812  char *data;
813  uint32 *bitmap_page_bit;
814  Size datalen;
815 
816  freep = HashPageGetBitmap(mappage);
817 
818  data = XLogRecGetBlockData(record, 5, &datalen);
819  bitmap_page_bit = (uint32 *) data;
820 
821  CLRBIT(freep, *bitmap_page_bit);
822 
823  PageSetLSN(mappage, lsn);
824  MarkBufferDirty(mapbuf);
825  }
826  if (BufferIsValid(mapbuf))
827  UnlockReleaseBuffer(mapbuf);
828 
829  /* replay the record for meta page */
830  if (XLogRecHasBlockRef(record, 6))
831  {
832  Buffer metabuf;
833 
834  if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
835  {
836  HashMetaPage metap;
837  Page page;
838  char *data;
839  uint32 *firstfree_ovflpage;
840  Size datalen;
841 
842  data = XLogRecGetBlockData(record, 6, &datalen);
843  firstfree_ovflpage = (uint32 *) data;
844 
845  page = BufferGetPage(metabuf);
846  metap = HashPageGetMeta(page);
847  metap->hashm_firstfree = *firstfree_ovflpage;
848 
849  PageSetLSN(page, lsn);
850  MarkBufferDirty(metabuf);
851  }
852  if (BufferIsValid(metabuf))
853  UnlockReleaseBuffer(metabuf);
854  }
855 }
856 
857 /*
858  * replay delete operation of hash index
859  */
860 static void
862 {
863  XLogRecPtr lsn = record->EndRecPtr;
864  xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
865  Buffer bucketbuf = InvalidBuffer;
866  Buffer deletebuf;
867  Page page;
869 
870  /*
871  * Ensure we have a cleanup lock on primary bucket page before we start
872  * with the actual replay operation. This is to ensure that neither a
873  * scan can start nor a scan can be already-in-progress during the replay
874  * of this operation. If we allow scans during this operation, then they
875  * can miss some records or show the same record multiple times.
876  */
877  if (xldata->is_primary_bucket_page)
878  action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
879  else
880  {
881  /*
882  * we don't care for return value as the purpose of reading bucketbuf
883  * is to ensure a cleanup lock on primary bucket page.
884  */
885  (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
886 
887  action = XLogReadBufferForRedo(record, 1, &deletebuf);
888  }
889 
890  /* replay the record for deleting entries in bucket page */
891  if (action == BLK_NEEDS_REDO)
892  {
893  char *ptr;
894  Size len;
895 
896  ptr = XLogRecGetBlockData(record, 1, &len);
897 
898  page = (Page) BufferGetPage(deletebuf);
899 
900  if (len > 0)
901  {
902  OffsetNumber *unused;
903  OffsetNumber *unend;
904 
905  unused = (OffsetNumber *) ptr;
906  unend = (OffsetNumber *) ((char *) ptr + len);
907 
908  if ((unend - unused) > 0)
909  PageIndexMultiDelete(page, unused, unend - unused);
910  }
911 
912  /*
913  * Mark the page as not containing any LP_DEAD items only if
914  * clear_dead_marking flag is set to true. See comments in
915  * hashbucketcleanup() for details.
916  */
917  if (xldata->clear_dead_marking)
918  {
919  HashPageOpaque pageopaque;
920 
921  pageopaque = HashPageGetOpaque(page);
922  pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
923  }
924 
925  PageSetLSN(page, lsn);
926  MarkBufferDirty(deletebuf);
927  }
928  if (BufferIsValid(deletebuf))
929  UnlockReleaseBuffer(deletebuf);
930 
931  if (BufferIsValid(bucketbuf))
932  UnlockReleaseBuffer(bucketbuf);
933 }
934 
935 /*
936  * replay split cleanup flag operation for primary bucket page.
937  */
938 static void
940 {
941  XLogRecPtr lsn = record->EndRecPtr;
942  Buffer buffer;
943  Page page;
944 
945  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
946  {
947  HashPageOpaque bucket_opaque;
948 
949  page = (Page) BufferGetPage(buffer);
950 
951  bucket_opaque = HashPageGetOpaque(page);
952  bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
953  PageSetLSN(page, lsn);
954  MarkBufferDirty(buffer);
955  }
956  if (BufferIsValid(buffer))
957  UnlockReleaseBuffer(buffer);
958 }
959 
960 /*
961  * replay for update meta page
962  */
963 static void
965 {
966  HashMetaPage metap;
967  XLogRecPtr lsn = record->EndRecPtr;
969  Buffer metabuf;
970  Page page;
971 
972  if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
973  {
974  page = BufferGetPage(metabuf);
975  metap = HashPageGetMeta(page);
976 
977  metap->hashm_ntuples = xldata->ntuples;
978 
979  PageSetLSN(page, lsn);
980  MarkBufferDirty(metabuf);
981  }
982  if (BufferIsValid(metabuf))
983  UnlockReleaseBuffer(metabuf);
984 }
985 
986 /*
987  * replay delete operation in hash index to remove
988  * tuples marked as DEAD during index tuple insertion.
989  */
990 static void
992 {
993  XLogRecPtr lsn = record->EndRecPtr;
994  xl_hash_vacuum_one_page *xldata;
995  Buffer buffer;
996  Buffer metabuf;
997  Page page;
999  HashPageOpaque pageopaque;
1000  OffsetNumber *toDelete;
1001 
1002  xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
1003  toDelete = xldata->offsets;
1004 
1005  /*
1006  * If we have any conflict processing to do, it must happen before we
1007  * update the page.
1008  *
1009  * Hash index records that are marked as LP_DEAD and being removed during
1010  * hash index tuple insertion can conflict with standby queries. You might
1011  * think that vacuum records would conflict as well, but we've handled
1012  * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1013  * xid cleaned by the vacuum of the heap and so we can resolve any
1014  * conflicts just once when that arrives. After that we know that no
1015  * conflicts exist from individual hash index vacuum records on that
1016  * index.
1017  */
1018  if (InHotStandby)
1019  {
1020  RelFileLocator rlocator;
1021 
1022  XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1024  xldata->isCatalogRel,
1025  rlocator);
1026  }
1027 
1028  action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1029 
1030  if (action == BLK_NEEDS_REDO)
1031  {
1032  page = (Page) BufferGetPage(buffer);
1033 
1034  PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1035 
1036  /*
1037  * Mark the page as not containing any LP_DEAD items. See comments in
1038  * _hash_vacuum_one_page() for details.
1039  */
1040  pageopaque = HashPageGetOpaque(page);
1041  pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1042 
1043  PageSetLSN(page, lsn);
1044  MarkBufferDirty(buffer);
1045  }
1046  if (BufferIsValid(buffer))
1047  UnlockReleaseBuffer(buffer);
1048 
1049  if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1050  {
1051  Page metapage;
1052  HashMetaPage metap;
1053 
1054  metapage = BufferGetPage(metabuf);
1055  metap = HashPageGetMeta(metapage);
1056 
1057  metap->hashm_ntuples -= xldata->ntuples;
1058 
1059  PageSetLSN(metapage, lsn);
1060  MarkBufferDirty(metabuf);
1061  }
1062  if (BufferIsValid(metabuf))
1063  UnlockReleaseBuffer(metabuf);
1064 }
1065 
1066 void
1068 {
1069  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1070 
1071  switch (info)
1072  {
1074  hash_xlog_init_meta_page(record);
1075  break;
1078  break;
1079  case XLOG_HASH_INSERT:
1080  hash_xlog_insert(record);
1081  break;
1083  hash_xlog_add_ovfl_page(record);
1084  break;
1087  break;
1088  case XLOG_HASH_SPLIT_PAGE:
1089  hash_xlog_split_page(record);
1090  break;
1092  hash_xlog_split_complete(record);
1093  break;
1096  break;
1098  hash_xlog_squeeze_page(record);
1099  break;
1100  case XLOG_HASH_DELETE:
1101  hash_xlog_delete(record);
1102  break;
1104  hash_xlog_split_cleanup(record);
1105  break;
1108  break;
1110  hash_xlog_vacuum_one_page(record);
1111  break;
1112  default:
1113  elog(PANIC, "hash_redo: unknown op code %u", info);
1114  }
1115 }
1116 
1117 /*
1118  * Mask a hash page before performing consistency checks on it.
1119  */
1120 void
1121 hash_mask(char *pagedata, BlockNumber blkno)
1122 {
1123  Page page = (Page) pagedata;
1124  HashPageOpaque opaque;
1125  int pagetype;
1126 
1128 
1129  mask_page_hint_bits(page);
1130  mask_unused_space(page);
1131 
1132  opaque = HashPageGetOpaque(page);
1133 
1134  pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1135  if (pagetype == LH_UNUSED_PAGE)
1136  {
1137  /*
1138  * Mask everything on a UNUSED page.
1139  */
1140  mask_page_content(page);
1141  }
1142  else if (pagetype == LH_BUCKET_PAGE ||
1143  pagetype == LH_OVERFLOW_PAGE)
1144  {
1145  /*
1146  * In hash bucket and overflow pages, it is possible to modify the
1147  * LP_FLAGS without emitting any WAL record. Hence, mask the line
1148  * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1149  */
1150  mask_lp_flags(page);
1151  }
1152 
1153  /*
1154  * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1155  * unlogged. So, mask it. See _hash_kill_items() for details.
1156  */
1157  opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1158 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define CLRBIT(x, i)
Definition: blutils.c:31
#define SETBIT(x, i)
Definition: blutils.c:32
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_content(Page page)
Definition: bufmask.c:119
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3667
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4867
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2474
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:4830
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:408
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:397
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:48
@ RBM_NORMAL
Definition: bufmgr.h:45
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:359
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
Pointer Page
Definition: bufpage.h:78
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned short uint16
Definition: c.h:505
unsigned int uint32
Definition: c.h:506
#define MAXALIGN(LEN)
Definition: c.h:811
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define Assert(condition)
Definition: c.h:858
unsigned char uint8
Definition: c.h:504
size_t Size
Definition: c.h:605
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define LH_PAGE_TYPE
Definition: hash.h:63
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP
Definition: hash.h:60
#define LH_PAGE_HAS_DEAD_TUPLES
Definition: hash.h:61
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define InvalidBucket
Definition: hash.h:37
static void hash_xlog_split_cleanup(XLogReaderState *record)
Definition: hash_xlog.c:939
static void hash_xlog_add_ovfl_page(XLogReaderState *record)
Definition: hash_xlog.c:173
static void hash_xlog_split_page(XLogReaderState *record)
Definition: hash_xlog.c:428
static void hash_xlog_init_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:27
void hash_mask(char *pagedata, BlockNumber blkno)
Definition: hash_xlog.c:1121
static void hash_xlog_split_complete(XLogReaderState *record)
Definition: hash_xlog.c:442
static void hash_xlog_update_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:964
static void hash_xlog_vacuum_one_page(XLogReaderState *record)
Definition: hash_xlog.c:991
static void hash_xlog_squeeze_page(XLogReaderState *record)
Definition: hash_xlog.c:627
static void hash_xlog_insert(XLogReaderState *record)
Definition: hash_xlog.c:125
static void hash_xlog_move_page_contents(XLogReaderState *record)
Definition: hash_xlog.c:501
static void hash_xlog_split_allocate_page(XLogReaderState *record)
Definition: hash_xlog.c:311
void hash_redo(XLogReaderState *record)
Definition: hash_xlog.c:1067
static void hash_xlog_delete(XLogReaderState *record)
Definition: hash_xlog.c:861
static void hash_xlog_init_bitmap_page(XLogReaderState *record)
Definition: hash_xlog.c:63
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define XLOG_HASH_SPLIT_PAGE
Definition: hash_xlog.h:32
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
#define XLOG_HASH_SPLIT_COMPLETE
Definition: hash_xlog.h:33
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:777
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
Pointer Item
Definition: item.h:17
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void size_t len
const void * data
while(p+4<=pend)
static char * buf
Definition: pg_test_fsync.c:73
ForkNumber
Definition: relpath.h:48
@ INIT_FORKNUM
Definition: relpath.h:53
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:467
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_firstfree
Definition: hash.h:259
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_highmask
Definition: hash.h:255
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
bool clear_dead_marking
Definition: hash_xlog.h:180
bool is_primary_bucket_page
Definition: hash_xlog.h:182
RegProcedure procid
Definition: hash_xlog.h:213
OffsetNumber offnum
Definition: hash_xlog.h:58
BlockNumber prevblkno
Definition: hash_xlog.h:155
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:158
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:161
BlockNumber nextblkno
Definition: hash_xlog.h:156
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:247
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: hash_xlog.h:253
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1971
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2025
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:326
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:351
#define InHotStandby
Definition: xlogutils.h:57
XLogRedoAction
Definition: xlogutils.h:70
@ BLK_RESTORED
Definition: xlogutils.h:73
@ BLK_NEEDS_REDO
Definition: xlogutils.h:71
@ BLK_NOTFOUND
Definition: xlogutils.h:74