PostgreSQL Source Code  git master
hashinsert.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hashinsert.c
4  * Item insertion in hash tables for Postgres.
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hashinsert.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/hash.h"
19 #include "access/hash_xlog.h"
20 #include "miscadmin.h"
21 #include "utils/rel.h"
22 #include "storage/lwlock.h"
23 #include "storage/buf_internals.h"
24 #include "storage/predicate.h"
25 
26 static void _hash_vacuum_one_page(Relation rel, Relation hrel,
27  Buffer metabuf, Buffer buf);
28 
29 /*
30  * _hash_doinsert() -- Handle insertion of a single index tuple.
31  *
32  * This routine is called by the public interface routines, hashbuild
33  * and hashinsert. By here, itup is completely filled in.
34  */
35 void
37 {
39  Buffer bucket_buf;
40  Buffer metabuf;
41  HashMetaPage metap;
42  HashMetaPage usedmetap = NULL;
43  Page metapage;
44  Page page;
45  HashPageOpaque pageopaque;
46  Size itemsz;
47  bool do_expand;
48  uint32 hashkey;
49  Bucket bucket;
50  OffsetNumber itup_off;
51 
52  /*
53  * Get the hash key for the item (it's stored in the index tuple itself).
54  */
55  hashkey = _hash_get_indextuple_hashkey(itup);
56 
57  /* compute item size too */
58  itemsz = IndexTupleSize(itup);
59  itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
60  * need to be consistent */
61 
62 restart_insert:
63 
64  /*
65  * Read the metapage. We don't lock it yet; HashMaxItemSize() will
66  * examine pd_pagesize_version, but that can't change so we can examine it
67  * without a lock.
68  */
70  metapage = BufferGetPage(metabuf);
71 
72  /*
73  * Check whether the item can fit on a hash page at all. (Eventually, we
74  * ought to try to apply TOAST methods if not.) Note that at this point,
75  * itemsz doesn't include the ItemId.
76  *
77  * XXX this is useless code if we are only storing hash keys.
78  */
79  if (itemsz > HashMaxItemSize(metapage))
80  ereport(ERROR,
81  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
82  errmsg("index row size %zu exceeds hash maximum %zu",
83  itemsz, HashMaxItemSize(metapage)),
84  errhint("Values larger than a buffer page cannot be indexed.")));
85 
86  /* Lock the primary bucket page for the target bucket. */
87  buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
88  &usedmetap);
89  Assert(usedmetap != NULL);
90 
91  CheckForSerializableConflictIn(rel, NULL, buf);
92 
93  /* remember the primary bucket buffer to release the pin on it at end. */
94  bucket_buf = buf;
95 
96  page = BufferGetPage(buf);
97  pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
98  bucket = pageopaque->hasho_bucket;
99 
100  /*
101  * If this bucket is in the process of being split, try to finish the
102  * split before inserting, because that might create room for the
103  * insertion to proceed without allocating an additional overflow page.
104  * It's only interesting to finish the split if we're trying to insert
105  * into the bucket from which we're removing tuples (the "old" bucket),
106  * not if we're trying to insert into the bucket into which tuples are
107  * being moved (the "new" bucket).
108  */
109  if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
110  {
111  /* release the lock on bucket buffer, before completing the split. */
113 
114  _hash_finish_split(rel, metabuf, buf, bucket,
115  usedmetap->hashm_maxbucket,
116  usedmetap->hashm_highmask,
117  usedmetap->hashm_lowmask);
118 
119  /* release the pin on old and meta buffer. retry for insert. */
120  _hash_dropbuf(rel, buf);
121  _hash_dropbuf(rel, metabuf);
122  goto restart_insert;
123  }
124 
125  /* Do the insertion */
126  while (PageGetFreeSpace(page) < itemsz)
127  {
128  BlockNumber nextblkno;
129 
130  /*
131  * Check if current page has any DEAD tuples. If yes, delete these
132  * tuples and see if we can get a space for the new item to be
133  * inserted before moving to the next page in the bucket chain.
134  */
135  if (H_HAS_DEAD_TUPLES(pageopaque))
136  {
137 
138  if (IsBufferCleanupOK(buf))
139  {
140  _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
141 
142  if (PageGetFreeSpace(page) >= itemsz)
143  break; /* OK, now we have enough space */
144  }
145  }
146 
147  /*
148  * no space on this page; check for an overflow page
149  */
150  nextblkno = pageopaque->hasho_nextblkno;
151 
152  if (BlockNumberIsValid(nextblkno))
153  {
154  /*
155  * ovfl page exists; go get it. if it doesn't have room, we'll
156  * find out next pass through the loop test above. we always
157  * release both the lock and pin if this is an overflow page, but
158  * only the lock if this is the primary bucket page, since the pin
159  * on the primary bucket must be retained throughout the scan.
160  */
161  if (buf != bucket_buf)
162  _hash_relbuf(rel, buf);
163  else
165  buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
166  page = BufferGetPage(buf);
167  }
168  else
169  {
170  /*
171  * we're at the end of the bucket chain and we haven't found a
172  * page with enough room. allocate a new overflow page.
173  */
174 
175  /* release our write lock without modifying buffer */
177 
178  /* chain to a new overflow page */
179  buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf) ? true : false);
180  page = BufferGetPage(buf);
181 
182  /* should fit now, given test above */
183  Assert(PageGetFreeSpace(page) >= itemsz);
184  }
185  pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
186  Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
187  Assert(pageopaque->hasho_bucket == bucket);
188  }
189 
190  /*
191  * Write-lock the metapage so we can increment the tuple count. After
192  * incrementing it, check to see if it's time for a split.
193  */
195 
196  /* Do the update. No ereport(ERROR) until changes are logged */
198 
199  /* found page with enough space, so add the item here */
200  itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
201  MarkBufferDirty(buf);
202 
203  /* metapage operations */
204  metap = HashPageGetMeta(metapage);
205  metap->hashm_ntuples += 1;
206 
207  /* Make sure this stays in sync with _hash_expandtable() */
208  do_expand = metap->hashm_ntuples >
209  (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
210 
211  MarkBufferDirty(metabuf);
212 
213  /* XLOG stuff */
214  if (RelationNeedsWAL(rel))
215  {
216  xl_hash_insert xlrec;
217  XLogRecPtr recptr;
218 
219  xlrec.offnum = itup_off;
220 
221  XLogBeginInsert();
222  XLogRegisterData((char *) &xlrec, SizeOfHashInsert);
223 
224  XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
225 
227  XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
228 
229  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
230 
231  PageSetLSN(BufferGetPage(buf), recptr);
232  PageSetLSN(BufferGetPage(metabuf), recptr);
233  }
234 
236 
237  /* drop lock on metapage, but keep pin */
238  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
239 
240  /*
241  * Release the modified page and ensure to release the pin on primary
242  * page.
243  */
244  _hash_relbuf(rel, buf);
245  if (buf != bucket_buf)
246  _hash_dropbuf(rel, bucket_buf);
247 
248  /* Attempt to split if a split is needed */
249  if (do_expand)
250  _hash_expandtable(rel, metabuf);
251 
252  /* Finally drop our pin on the metapage */
253  _hash_dropbuf(rel, metabuf);
254 }
255 
256 /*
257  * _hash_pgaddtup() -- add a tuple to a particular page in the index.
258  *
259  * This routine adds the tuple to the page as requested; it does not write out
260  * the page. It is an error to call this function without pin and write lock
261  * on the target buffer.
262  *
263  * Returns the offset number at which the tuple was inserted. This function
264  * is responsible for preserving the condition that tuples in a hash index
265  * page are sorted by hashkey value.
266  */
269 {
270  OffsetNumber itup_off;
271  Page page;
272  uint32 hashkey;
273 
275  page = BufferGetPage(buf);
276 
277  /* Find where to insert the tuple (preserving page's hashkey ordering) */
278  hashkey = _hash_get_indextuple_hashkey(itup);
279  itup_off = _hash_binsearch(page, hashkey);
280 
281  if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
283  elog(ERROR, "failed to add index item to \"%s\"",
285 
286  return itup_off;
287 }
288 
289 /*
290  * _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
291  * index.
292  *
293  * This routine has same requirements for locking and tuple ordering as
294  * _hash_pgaddtup().
295  *
296  * Returns the offset number array at which the tuples were inserted.
297  */
298 void
300  OffsetNumber *itup_offsets, uint16 nitups)
301 {
302  OffsetNumber itup_off;
303  Page page;
304  uint32 hashkey;
305  int i;
306 
308  page = BufferGetPage(buf);
309 
310  for (i = 0; i < nitups; i++)
311  {
312  Size itemsize;
313 
314  itemsize = IndexTupleSize(itups[i]);
315  itemsize = MAXALIGN(itemsize);
316 
317  /* Find where to insert the tuple (preserving page's hashkey ordering) */
318  hashkey = _hash_get_indextuple_hashkey(itups[i]);
319  itup_off = _hash_binsearch(page, hashkey);
320 
321  itup_offsets[i] = itup_off;
322 
323  if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
325  elog(ERROR, "failed to add index item to \"%s\"",
327  }
328 }
329 
330 /*
331  * _hash_vacuum_one_page - vacuum just one index page.
332  *
333  * Try to remove LP_DEAD items from the given page. We must acquire cleanup
334  * lock on the page being modified before calling this function.
335  */
336 
337 static void
339 {
340  OffsetNumber deletable[MaxOffsetNumber];
341  int ndeletable = 0;
342  OffsetNumber offnum,
343  maxoff;
344  Page page = BufferGetPage(buf);
345  HashPageOpaque pageopaque;
346  HashMetaPage metap;
347 
348  /* Scan each tuple in page to see if it is marked as LP_DEAD */
349  maxoff = PageGetMaxOffsetNumber(page);
350  for (offnum = FirstOffsetNumber;
351  offnum <= maxoff;
352  offnum = OffsetNumberNext(offnum))
353  {
354  ItemId itemId = PageGetItemId(page, offnum);
355 
356  if (ItemIdIsDead(itemId))
357  deletable[ndeletable++] = offnum;
358  }
359 
360  if (ndeletable > 0)
361  {
362  TransactionId latestRemovedXid;
363 
364  latestRemovedXid =
366  deletable, ndeletable);
367 
368  /*
369  * Write-lock the meta page so that we can decrement tuple count.
370  */
372 
373  /* No ereport(ERROR) until changes are logged */
375 
376  PageIndexMultiDelete(page, deletable, ndeletable);
377 
378  /*
379  * Mark the page as not containing any LP_DEAD items. This is not
380  * certainly true (there might be some that have recently been marked,
381  * but weren't included in our target-item list), but it will almost
382  * always be true and it doesn't seem worth an additional page scan to
383  * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
384  * anyway.
385  */
386  pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
387  pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
388 
389  metap = HashPageGetMeta(BufferGetPage(metabuf));
390  metap->hashm_ntuples -= ndeletable;
391 
392  MarkBufferDirty(buf);
393  MarkBufferDirty(metabuf);
394 
395  /* XLOG stuff */
396  if (RelationNeedsWAL(rel))
397  {
399  XLogRecPtr recptr;
400 
401  xlrec.latestRemovedXid = latestRemovedXid;
402  xlrec.ntuples = ndeletable;
403 
404  XLogBeginInsert();
406  XLogRegisterData((char *) &xlrec, SizeOfHashVacuumOnePage);
407 
408  /*
409  * We need the target-offsets array whether or not we store the
410  * whole buffer, to allow us to find the latestRemovedXid on a
411  * standby server.
412  */
413  XLogRegisterData((char *) deletable,
414  ndeletable * sizeof(OffsetNumber));
415 
416  XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
417 
418  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
419 
420  PageSetLSN(BufferGetPage(buf), recptr);
421  PageSetLSN(BufferGetPage(metabuf), recptr);
422  }
423 
425 
426  /*
427  * Releasing write lock on meta page as we have updated the tuple
428  * count.
429  */
430  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
431  }
432 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
#define HashMaxItemSize(page)
Definition: hash.h:269
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition: hashinsert.c:299
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
Definition: hashinsert.c:36
int errhint(const char *fmt,...)
Definition: elog.c:974
uint32 TransactionId
Definition: c.h:507
#define LH_META_PAGE
Definition: hash.h:56
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define MaxOffsetNumber
Definition: off.h:28
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
uint16 hashm_ffactor
Definition: hash.h:246
Pointer Item
Definition: item.h:17
uint32 hashm_highmask
Definition: hash.h:252
#define InvalidBuffer
Definition: buf.h:25
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:570
Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)
Definition: hashpage.c:1558
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
uint32 BlockNumber
Definition: block.h:31
void _hash_dropbuf(Relation rel, Buffer buf)
Definition: hashpage.c:276
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:69
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:88
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
uint32 hashm_lowmask
Definition: hash.h:253
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
void CheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer)
Definition: predicate.c:4442
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:581
uint16 OffsetNumber
Definition: off.h:24
#define LH_PAGE_TYPE
Definition: hash.h:62
uint32 Bucket
Definition: hash.h:34
unsigned short uint16
Definition: c.h:357
#define SizeOfHashInsert
Definition: hash_xlog.h:67
#define ERROR
Definition: elog.h:43
uint32 _hash_get_indextuple_hashkey(IndexTuple itup)
Definition: hashutil.c:299
void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashpage.c:1354
static char * buf
Definition: pg_test_fsync.c:68
#define HASH_WRITE
Definition: hash.h:322
#define HASH_NOLOCK
Definition: hash.h:323
#define FirstOffsetNumber
Definition: off.h:27
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define RelationGetRelationName(relation)
Definition: rel.h:450
unsigned int uint32
Definition: c.h:358
void _hash_expandtable(Relation rel, Buffer metabuf)
Definition: hashpage.c:616
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
#define ereport(elevel, rest)
Definition: elog.h:141
bool IsBufferCleanupOK(Buffer buffer)
Definition: bufmgr.c:3818
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define SizeOfHashVacuumOnePage
Definition: hash_xlog.h:259
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:225
#define HASH_METAPAGE
Definition: hash.h:195
double hashm_ntuples
Definition: hash.h:245
#define LH_OVERFLOW_PAGE
Definition: hash.h:53
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3590
#define InvalidOffsetNumber
Definition: off.h:26
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:265
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
OffsetNumber _hash_binsearch(Page page, uint32 hash_value)
Definition: hashutil.c:358
#define LH_BUCKET_PAGE
Definition: hash.h:54
#define H_BUCKET_BEING_SPLIT(opaque)
Definition: hash.h:88
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:45
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
Bucket hasho_bucket
Definition: hash.h:80
TransactionId latestRemovedXid
Definition: hash_xlog.h:253
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:835
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:466
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:85
OffsetNumber offnum
Definition: hash_xlog.h:64
Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
Definition: hashovfl.c:111
#define MAXALIGN(LEN)
Definition: c.h:685
#define RelationNeedsWAL(relation)
Definition: rel.h:518
uint32 hashm_maxbucket
Definition: hash.h:251
OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
Definition: hashinsert.c:268
uint16 hasho_flag
Definition: hash.h:81
#define HashPageGetMeta(page)
Definition: hash.h:305
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define elog(elevel,...)
Definition: elog.h:226
int i
BlockNumber hasho_nextblkno
Definition: hash.h:79
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define H_HAS_DEAD_TUPLES(opaque)
Definition: hash.h:90
static void _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
Definition: hashinsert.c:338
#define LH_PAGE_HAS_DEAD_TUPLES
Definition: hash.h:60
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71
TransactionId index_compute_xid_horizon_for_tuples(Relation irel, Relation hrel, Buffer ibuf, OffsetNumber *itemnos, int nitems)
Definition: genam.c:281