PostgreSQL Source Code git master
hashinsert.c File Reference
#include "postgres.h"
#include "access/hash.h"
#include "access/hash_xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/predicate.h"
#include "utils/rel.h"
Include dependency graph for hashinsert.c:

Go to the source code of this file.

Functions

static void _hash_vacuum_one_page (Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
 
void _hash_doinsert (Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
 
OffsetNumber _hash_pgaddtup (Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool appendtup)
 
void _hash_pgaddmultitup (Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
 

Function Documentation

◆ _hash_doinsert()

void _hash_doinsert ( Relation  rel,
IndexTuple  itup,
Relation  heapRel,
bool  sorted 
)

Definition at line 38 of file hashinsert.c.

39{
41 Buffer bucket_buf;
42 Buffer metabuf;
43 HashMetaPage metap;
44 HashMetaPage usedmetap = NULL;
45 Page metapage;
46 Page page;
47 HashPageOpaque pageopaque;
48 Size itemsz;
49 bool do_expand;
50 uint32 hashkey;
51 Bucket bucket;
52 OffsetNumber itup_off;
53
54 /*
55 * Get the hash key for the item (it's stored in the index tuple itself).
56 */
57 hashkey = _hash_get_indextuple_hashkey(itup);
58
59 /* compute item size too */
60 itemsz = IndexTupleSize(itup);
61 itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
62 * need to be consistent */
63
64restart_insert:
65
66 /*
67 * Read the metapage. We don't lock it yet; HashMaxItemSize() will
68 * examine pd_pagesize_version, but that can't change so we can examine it
69 * without a lock.
70 */
72 metapage = BufferGetPage(metabuf);
73
74 /*
75 * Check whether the item can fit on a hash page at all. (Eventually, we
76 * ought to try to apply TOAST methods if not.) Note that at this point,
77 * itemsz doesn't include the ItemId.
78 *
79 * XXX this is useless code if we are only storing hash keys.
80 */
81 if (itemsz > HashMaxItemSize(metapage))
83 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
84 errmsg("index row size %zu exceeds hash maximum %zu",
85 itemsz, HashMaxItemSize(metapage)),
86 errhint("Values larger than a buffer page cannot be indexed.")));
87
88 /* Lock the primary bucket page for the target bucket. */
90 &usedmetap);
91 Assert(usedmetap != NULL);
92
94
95 /* remember the primary bucket buffer to release the pin on it at end. */
96 bucket_buf = buf;
97
98 page = BufferGetPage(buf);
99 pageopaque = HashPageGetOpaque(page);
100 bucket = pageopaque->hasho_bucket;
101
102 /*
103 * If this bucket is in the process of being split, try to finish the
104 * split before inserting, because that might create room for the
105 * insertion to proceed without allocating an additional overflow page.
106 * It's only interesting to finish the split if we're trying to insert
107 * into the bucket from which we're removing tuples (the "old" bucket),
108 * not if we're trying to insert into the bucket into which tuples are
109 * being moved (the "new" bucket).
110 */
111 if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
112 {
113 /* release the lock on bucket buffer, before completing the split. */
115
116 _hash_finish_split(rel, metabuf, buf, bucket,
117 usedmetap->hashm_maxbucket,
118 usedmetap->hashm_highmask,
119 usedmetap->hashm_lowmask);
120
121 /* release the pin on old and meta buffer. retry for insert. */
122 _hash_dropbuf(rel, buf);
123 _hash_dropbuf(rel, metabuf);
124 goto restart_insert;
125 }
126
127 /* Do the insertion */
128 while (PageGetFreeSpace(page) < itemsz)
129 {
130 BlockNumber nextblkno;
131
132 /*
133 * Check if current page has any DEAD tuples. If yes, delete these
134 * tuples and see if we can get a space for the new item to be
135 * inserted before moving to the next page in the bucket chain.
136 */
137 if (H_HAS_DEAD_TUPLES(pageopaque))
138 {
139
141 {
142 _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
143
144 if (PageGetFreeSpace(page) >= itemsz)
145 break; /* OK, now we have enough space */
146 }
147 }
148
149 /*
150 * no space on this page; check for an overflow page
151 */
152 nextblkno = pageopaque->hasho_nextblkno;
153
154 if (BlockNumberIsValid(nextblkno))
155 {
156 /*
157 * ovfl page exists; go get it. if it doesn't have room, we'll
158 * find out next pass through the loop test above. we always
159 * release both the lock and pin if this is an overflow page, but
160 * only the lock if this is the primary bucket page, since the pin
161 * on the primary bucket must be retained throughout the scan.
162 */
163 if (buf != bucket_buf)
164 _hash_relbuf(rel, buf);
165 else
167 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
168 page = BufferGetPage(buf);
169 }
170 else
171 {
172 /*
173 * we're at the end of the bucket chain and we haven't found a
174 * page with enough room. allocate a new overflow page.
175 */
176
177 /* release our write lock without modifying buffer */
179
180 /* chain to a new overflow page */
181 buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
182 page = BufferGetPage(buf);
183
184 /* should fit now, given test above */
185 Assert(PageGetFreeSpace(page) >= itemsz);
186 }
187 pageopaque = HashPageGetOpaque(page);
189 Assert(pageopaque->hasho_bucket == bucket);
190 }
191
192 /*
193 * Write-lock the metapage so we can increment the tuple count. After
194 * incrementing it, check to see if it's time for a split.
195 */
197
198 /* Do the update. No ereport(ERROR) until changes are logged */
200
201 /* found page with enough space, so add the item here */
202 itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
204
205 /* metapage operations */
206 metap = HashPageGetMeta(metapage);
207 metap->hashm_ntuples += 1;
208
209 /* Make sure this stays in sync with _hash_expandtable() */
210 do_expand = metap->hashm_ntuples >
211 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
212
213 MarkBufferDirty(metabuf);
214
215 /* XLOG stuff */
216 if (RelationNeedsWAL(rel))
217 {
218 xl_hash_insert xlrec;
219 XLogRecPtr recptr;
220
221 xlrec.offnum = itup_off;
222
225
227
229 XLogRegisterBufData(0, itup, IndexTupleSize(itup));
230
231 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
232
233 PageSetLSN(BufferGetPage(buf), recptr);
234 PageSetLSN(BufferGetPage(metabuf), recptr);
235 }
236
238
239 /* drop lock on metapage, but keep pin */
241
242 /*
243 * Release the modified page and ensure to release the pin on primary
244 * page.
245 */
246 _hash_relbuf(rel, buf);
247 if (buf != bucket_buf)
248 _hash_dropbuf(rel, bucket_buf);
249
250 /* Attempt to split if a split is needed */
251 if (do_expand)
252 _hash_expandtable(rel, metabuf);
253
254 /* Finally drop our pin on the metapage */
255 _hash_dropbuf(rel, metabuf);
256}
uint32 BlockNumber
Definition: block.h:31
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3730
bool IsBufferCleanupOK(Buffer buffer)
Definition: bufmgr.c:5383
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2531
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5086
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
Size PageGetFreeSpace(const PageData *page)
Definition: bufpage.c:896
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
PageData * Page
Definition: bufpage.h:82
#define MAXALIGN(LEN)
Definition: c.h:782
uint32_t uint32
Definition: c.h:502
size_t Size
Definition: c.h:576
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define HASH_NOLOCK
Definition: hash.h:341
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define HASH_WRITE
Definition: hash.h:340
#define H_BUCKET_BEING_SPLIT(opaque)
Definition: hash.h:91
#define LH_META_PAGE
Definition: hash.h:57
#define HashPageGetMeta(page)
Definition: hash.h:323
#define HASH_METAPAGE
Definition: hash.h:198
#define H_HAS_DEAD_TUPLES(opaque)
Definition: hash.h:93
#define LH_PAGE_TYPE
Definition: hash.h:63
uint32 Bucket
Definition: hash.h:35
#define HashMaxItemSize(page)
Definition: hash.h:287
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define SizeOfHashInsert
Definition: hash_xlog.h:61
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
Assert(PointerIsAligned(start, uint64))
OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool appendtup)
Definition: hashinsert.c:274
static void _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
Definition: hashinsert.c:370
Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
Definition: hashovfl.c:112
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:266
void _hash_dropbuf(Relation rel, Buffer buf)
Definition: hashpage.c:277
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:70
void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashpage.c:1356
Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)
Definition: hashpage.c:1559
void _hash_expandtable(Relation rel, Buffer metabuf)
Definition: hashpage.c:614
uint32 _hash_get_indextuple_hashkey(IndexTuple itup)
Definition: hashutil.c:291
static Size IndexTupleSize(const IndexTupleData *itup)
Definition: itup.h:71
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:72
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4326
#define RelationNeedsWAL(relation)
Definition: rel.h:635
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_highmask
Definition: hash.h:255
uint16 hashm_ffactor
Definition: hash.h:249
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
Bucket hasho_bucket
Definition: hash.h:81
OffsetNumber offnum
Definition: hash_xlog.h:58
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:35

References _hash_addovflpage(), _hash_dropbuf(), _hash_expandtable(), _hash_finish_split(), _hash_get_indextuple_hashkey(), _hash_getbucketbuf_from_hashkey(), _hash_getbuf(), _hash_pgaddtup(), _hash_relbuf(), _hash_vacuum_one_page(), Assert(), BlockNumberIsValid(), buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictIn(), END_CRIT_SECTION, ereport, errcode(), errhint(), errmsg(), ERROR, H_BUCKET_BEING_SPLIT, H_HAS_DEAD_TUPLES, HASH_METAPAGE, HASH_NOLOCK, HASH_WRITE, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMaxItemSize, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageGetMeta, HashPageGetOpaque, IndexTupleSize(), InvalidBuffer, IsBufferCleanupOK(), LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MarkBufferDirty(), MAXALIGN, xl_hash_insert::offnum, PageGetFreeSpace(), PageSetLSN(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashInsert, START_CRIT_SECTION, XLOG_HASH_INSERT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _h_indexbuild(), hashbuildCallback(), and hashinsert().

◆ _hash_pgaddmultitup()

void _hash_pgaddmultitup ( Relation  rel,
Buffer  buf,
IndexTuple itups,
OffsetNumber itup_offsets,
uint16  nitups 
)

Definition at line 331 of file hashinsert.c.

333{
334 OffsetNumber itup_off;
335 Page page;
336 uint32 hashkey;
337 int i;
338
340 page = BufferGetPage(buf);
341
342 for (i = 0; i < nitups; i++)
343 {
344 Size itemsize;
345
346 itemsize = IndexTupleSize(itups[i]);
347 itemsize = MAXALIGN(itemsize);
348
349 /* Find where to insert the tuple (preserving page's hashkey ordering) */
350 hashkey = _hash_get_indextuple_hashkey(itups[i]);
351 itup_off = _hash_binsearch(page, hashkey);
352
353 itup_offsets[i] = itup_off;
354
355 if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
357 elog(ERROR, "failed to add index item to \"%s\"",
359 }
360}
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define elog(elevel,...)
Definition: elog.h:225
#define LH_BUCKET_PAGE
Definition: hash.h:55
OffsetNumber _hash_binsearch(Page page, uint32 hash_value)
Definition: hashutil.c:350
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:210
int i
Definition: isn.c:74
Pointer Item
Definition: item.h:17
#define InvalidOffsetNumber
Definition: off.h:26
#define RelationGetRelationName(relation)
Definition: rel.h:546

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), buf, BufferGetPage(), elog, ERROR, i, IndexTupleSize(), InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, MAXALIGN, PageAddItem, and RelationGetRelationName.

Referenced by _hash_freeovflpage(), _hash_splitbucket(), and _hash_squeezebucket().

◆ _hash_pgaddtup()

OffsetNumber _hash_pgaddtup ( Relation  rel,
Buffer  buf,
Size  itemsize,
IndexTuple  itup,
bool  appendtup 
)

Definition at line 274 of file hashinsert.c.

276{
277 OffsetNumber itup_off;
278 Page page;
279
281 page = BufferGetPage(buf);
282
283 /*
284 * Find where to insert the tuple (preserving page's hashkey ordering). If
285 * 'appendtup' is true then we just insert it at the end.
286 */
287 if (appendtup)
288 {
289 itup_off = PageGetMaxOffsetNumber(page) + 1;
290
291#ifdef USE_ASSERT_CHECKING
292 /* ensure this tuple's hashkey is >= the final existing tuple */
293 if (PageGetMaxOffsetNumber(page) > 0)
294 {
295 IndexTuple lasttup;
296 ItemId itemid;
297
298 itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
299 lasttup = (IndexTuple) PageGetItem(page, itemid);
300
303 }
304#endif
305 }
306 else
307 {
308 uint32 hashkey = _hash_get_indextuple_hashkey(itup);
309
310 itup_off = _hash_binsearch(page, hashkey);
311 }
312
313 if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
315 elog(ERROR, "failed to add index item to \"%s\"",
317
318 return itup_off;
319}
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
IndexTupleData * IndexTuple
Definition: itup.h:53

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), Assert(), buf, BufferGetPage(), elog, ERROR, InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, PageAddItem, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and RelationGetRelationName.

Referenced by _hash_doinsert().

◆ _hash_vacuum_one_page()

static void _hash_vacuum_one_page ( Relation  rel,
Relation  hrel,
Buffer  metabuf,
Buffer  buf 
)
static

Definition at line 370 of file hashinsert.c.

371{
372 OffsetNumber deletable[MaxOffsetNumber];
373 int ndeletable = 0;
374 OffsetNumber offnum,
375 maxoff;
376 Page page = BufferGetPage(buf);
377 HashPageOpaque pageopaque;
378 HashMetaPage metap;
379
380 /* Scan each tuple in page to see if it is marked as LP_DEAD */
381 maxoff = PageGetMaxOffsetNumber(page);
382 for (offnum = FirstOffsetNumber;
383 offnum <= maxoff;
384 offnum = OffsetNumberNext(offnum))
385 {
386 ItemId itemId = PageGetItemId(page, offnum);
387
388 if (ItemIdIsDead(itemId))
389 deletable[ndeletable++] = offnum;
390 }
391
392 if (ndeletable > 0)
393 {
394 TransactionId snapshotConflictHorizon;
395
396 snapshotConflictHorizon =
398 deletable, ndeletable);
399
400 /*
401 * Write-lock the meta page so that we can decrement tuple count.
402 */
404
405 /* No ereport(ERROR) until changes are logged */
407
408 PageIndexMultiDelete(page, deletable, ndeletable);
409
410 /*
411 * Mark the page as not containing any LP_DEAD items. This is not
412 * certainly true (there might be some that have recently been marked,
413 * but weren't included in our target-item list), but it will almost
414 * always be true and it doesn't seem worth an additional page scan to
415 * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
416 * anyway.
417 */
418 pageopaque = HashPageGetOpaque(page);
419 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
420
421 metap = HashPageGetMeta(BufferGetPage(metabuf));
422 metap->hashm_ntuples -= ndeletable;
423
425 MarkBufferDirty(metabuf);
426
427 /* XLOG stuff */
428 if (RelationNeedsWAL(rel))
429 {
431 XLogRecPtr recptr;
432
434 xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
435 xlrec.ntuples = ndeletable;
436
440
441 /*
442 * We need the target-offsets array whether or not we store the
443 * whole buffer, to allow us to find the snapshotConflictHorizon
444 * on a standby server.
445 */
446 XLogRegisterData(deletable,
447 ndeletable * sizeof(OffsetNumber));
448
450
451 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
452
453 PageSetLSN(BufferGetPage(buf), recptr);
454 PageSetLSN(BufferGetPage(metabuf), recptr);
455 }
456
458
459 /*
460 * Releasing write lock on meta page as we have updated the tuple
461 * count.
462 */
464 }
465}
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1150
uint32 TransactionId
Definition: c.h:623
TransactionId index_compute_xid_horizon_for_tuples(Relation irel, Relation hrel, Buffer ibuf, OffsetNumber *itemnos, int nitems)
Definition: genam.c:295
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define SizeOfHashVacuumOnePage
Definition: hash_xlog.h:256
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27
#define MaxOffsetNumber
Definition: off.h:28
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:691
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:247

References buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), END_CRIT_SECTION, FirstOffsetNumber, HashMetaPageData::hashm_ntuples, HashPageOpaqueData::hasho_flag, HashPageGetMeta, HashPageGetOpaque, index_compute_xid_horizon_for_tuples(), xl_hash_vacuum_one_page::isCatalogRel, ItemIdIsDead, LockBuffer(), MarkBufferDirty(), MaxOffsetNumber, xl_hash_vacuum_one_page::ntuples, OffsetNumberNext, PageGetItemId(), PageGetMaxOffsetNumber(), PageIndexMultiDelete(), PageSetLSN(), REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, SizeOfHashVacuumOnePage, xl_hash_vacuum_one_page::snapshotConflictHorizon, START_CRIT_SECTION, XLOG_HASH_VACUUM_ONE_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert().