PostgreSQL Source Code git master
Loading...
Searching...
No Matches
hashinsert.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hashinsert.c
4 * Item insertion in hash tables for Postgres.
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hashinsert.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#include "postgres.h"
17
18#include "access/hash.h"
19#include "access/hash_xlog.h"
20#include "access/xloginsert.h"
21#include "miscadmin.h"
22#include "storage/predicate.h"
23#include "utils/rel.h"
24
27
28/*
29 * _hash_doinsert() -- Handle insertion of a single index tuple.
30 *
31 * This routine is called by the public interface routines, hashbuild
32 * and hashinsert. By here, itup is completely filled in.
33 *
34 * 'sorted' must only be passed as 'true' when inserts are done in hashkey
35 * order.
36 */
37void
38_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
39{
43 HashMetaPage metap;
46 Page page;
48 Size itemsz;
49 bool do_expand;
54
55 /*
56 * Get the hash key for the item (it's stored in the index tuple itself).
57 */
59
60 /* compute item size too */
61 itemsz = IndexTupleSize(itup);
62 itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
63 * need to be consistent */
64
66
67 /*
68 * Read the metapage. We don't lock it yet; HashMaxItemSize() will
69 * examine pd_pagesize_version, but that can't change so we can examine it
70 * without a lock.
71 */
74
75 /*
76 * Check whether the item can fit on a hash page at all. (Eventually, we
77 * ought to try to apply TOAST methods if not.) Note that at this point,
78 * itemsz doesn't include the ItemId.
79 *
80 * XXX this is useless code if we are only storing hash keys.
81 */
82 if (itemsz > HashMaxItemSize(metapage))
85 errmsg("index row size %zu exceeds hash maximum %zu",
86 itemsz, HashMaxItemSize(metapage)),
87 errhint("Values larger than a buffer page cannot be indexed.")));
88
89 /* Lock the primary bucket page for the target bucket. */
91 &usedmetap);
93
95
96 /* remember the primary bucket buffer to release the pin on it at end. */
98
99 page = BufferGetPage(buf);
101 bucket = pageopaque->hasho_bucket;
102
103 /*
104 * If this bucket is in the process of being split, try to finish the
105 * split before inserting, because that might create room for the
106 * insertion to proceed without allocating an additional overflow page.
107 * It's only interesting to finish the split if we're trying to insert
108 * into the bucket from which we're removing tuples (the "old" bucket),
109 * not if we're trying to insert into the bucket into which tuples are
110 * being moved (the "new" bucket).
111 */
113 {
114 /* release the lock on bucket buffer, before completing the split. */
116
118 usedmetap->hashm_maxbucket,
119 usedmetap->hashm_highmask,
120 usedmetap->hashm_lowmask);
121
122 /* release the pin on old and meta buffer. retry for insert. */
123 _hash_dropbuf(rel, buf);
125 goto restart_insert;
126 }
127
128 /* Do the insertion */
129 while (PageGetFreeSpace(page) < itemsz)
130 {
131 BlockNumber nextblkno;
132
133 /*
134 * Check if current page has any DEAD tuples. If yes, delete these
135 * tuples and see if we can get a space for the new item to be
136 * inserted before moving to the next page in the bucket chain.
137 */
139 {
140
142 {
143 _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
144
145 if (PageGetFreeSpace(page) >= itemsz)
146 break; /* OK, now we have enough space */
147 }
148 }
149
150 /*
151 * no space on this page; check for an overflow page
152 */
153 nextblkno = pageopaque->hasho_nextblkno;
154
155 if (BlockNumberIsValid(nextblkno))
156 {
157 /*
158 * ovfl page exists; go get it. if it doesn't have room, we'll
159 * find out next pass through the loop test above. we always
160 * release both the lock and pin if this is an overflow page, but
161 * only the lock if this is the primary bucket page, since the pin
162 * on the primary bucket must be retained throughout the scan.
163 */
164 if (buf != bucket_buf)
165 _hash_relbuf(rel, buf);
166 else
168 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
169 page = BufferGetPage(buf);
170 }
171 else
172 {
173 /*
174 * we're at the end of the bucket chain and we haven't found a
175 * page with enough room. allocate a new overflow page.
176 */
177
178 /* release our write lock without modifying buffer */
180
181 /* chain to a new overflow page */
183 page = BufferGetPage(buf);
184
185 /* should fit now, given test above */
186 Assert(PageGetFreeSpace(page) >= itemsz);
187 }
190 Assert(pageopaque->hasho_bucket == bucket);
191 }
192
193 /*
194 * Write-lock the metapage so we can increment the tuple count. After
195 * incrementing it, check to see if it's time for a split.
196 */
198
199 /* Do the update. No ereport(ERROR) until changes are logged */
201
202 /* found page with enough space, so add the item here */
203 itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
205
206 /* metapage operations */
207 metap = HashPageGetMeta(metapage);
208 metap->hashm_ntuples += 1;
209
210 /* Make sure this stays in sync with _hash_expandtable() */
211 do_expand = metap->hashm_ntuples >
212 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
213
215
216 /* XLOG stuff */
217 if (RelationNeedsWAL(rel))
218 {
220
222
225
227
229 XLogRegisterBufData(0, itup, IndexTupleSize(itup));
230
232 }
233 else
234 recptr = XLogGetFakeLSN(rel);
235
238
240
241 /* drop lock on metapage, but keep pin */
243
244 /*
245 * Release the modified page and ensure to release the pin on primary
246 * page.
247 */
248 _hash_relbuf(rel, buf);
249 if (buf != bucket_buf)
251
252 /* Attempt to split if a split is needed */
253 if (do_expand)
255
256 /* Finally drop our pin on the metapage */
258}
259
260/*
261 * _hash_pgaddtup() -- add a tuple to a particular page in the index.
262 *
263 * This routine adds the tuple to the page as requested; it does not write out
264 * the page. It is an error to call this function without pin and write lock
265 * on the target buffer.
266 *
267 * Returns the offset number at which the tuple was inserted. This function
268 * is responsible for preserving the condition that tuples in a hash index
269 * page are sorted by hashkey value, however, if the caller is certain that
270 * the hashkey for the tuple being added is >= the hashkeys of all existing
271 * tuples on the page, then the 'appendtup' flag may be passed as true. This
272 * saves from having to binary search for the correct location to insert the
273 * tuple.
274 */
277 bool appendtup)
278{
280 Page page;
281
283 page = BufferGetPage(buf);
284
285 /*
286 * Find where to insert the tuple (preserving page's hashkey ordering). If
287 * 'appendtup' is true then we just insert it at the end.
288 */
289 if (appendtup)
290 {
292
293#ifdef USE_ASSERT_CHECKING
294 /* ensure this tuple's hashkey is >= the final existing tuple */
295 if (PageGetMaxOffsetNumber(page) > 0)
296 {
298 ItemId itemid;
299
300 itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
301 lasttup = (IndexTuple) PageGetItem(page, itemid);
302
305 }
306#endif
307 }
308 else
309 {
311
313 }
314
315 if (PageAddItem(page, itup, itemsize, itup_off, false, false) == InvalidOffsetNumber)
316 elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
317
318 return itup_off;
319}
320
321/*
322 * _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
323 * index.
324 *
325 * This routine has same requirements for locking and tuple ordering as
326 * _hash_pgaddtup().
327 *
328 * Returns the offset number array at which the tuples were inserted.
329 */
330void
333{
335 Page page;
337 int i;
338
340 page = BufferGetPage(buf);
341
342 for (i = 0; i < nitups; i++)
343 {
345
346 itemsize = IndexTupleSize(itups[i]);
348
349 /* Find where to insert the tuple (preserving page's hashkey ordering) */
352
354
355 if (PageAddItem(page, itups[i], itemsize, itup_off, false, false) == InvalidOffsetNumber)
356 elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
357 }
358}
359
360/*
361 * _hash_vacuum_one_page - vacuum just one index page.
362 *
363 * Try to remove LP_DEAD items from the given page. We must acquire cleanup
364 * lock on the page being modified before calling this function.
365 */
366
367static void
369{
371 int ndeletable = 0;
372 OffsetNumber offnum,
373 maxoff;
374 Page page = BufferGetPage(buf);
376 HashMetaPage metap;
378
379 /* Scan each tuple in page to see if it is marked as LP_DEAD */
380 maxoff = PageGetMaxOffsetNumber(page);
381 for (offnum = FirstOffsetNumber;
382 offnum <= maxoff;
383 offnum = OffsetNumberNext(offnum))
384 {
385 ItemId itemId = PageGetItemId(page, offnum);
386
387 if (ItemIdIsDead(itemId))
388 deletable[ndeletable++] = offnum;
389 }
390
391 if (ndeletable > 0)
392 {
393 TransactionId snapshotConflictHorizon;
394
395 snapshotConflictHorizon =
398
399 /*
400 * Write-lock the meta page so that we can decrement tuple count.
401 */
403
404 /* No ereport(ERROR) until changes are logged */
406
408
409 /*
410 * Mark the page as not containing any LP_DEAD items. This is not
411 * certainly true (there might be some that have recently been marked,
412 * but weren't included in our target-item list), but it will almost
413 * always be true and it doesn't seem worth an additional page scan to
414 * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
415 * anyway.
416 */
419
421 metap->hashm_ntuples -= ndeletable;
422
425
426 /* XLOG stuff */
427 if (RelationNeedsWAL(rel))
428 {
430
432 xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
433 xlrec.ntuples = ndeletable;
434
438
439 /*
440 * We need the target-offsets array whether or not we store the
441 * whole buffer, to allow us to find the snapshotConflictHorizon
442 * on a standby server.
443 */
445 ndeletable * sizeof(OffsetNumber));
446
448
450 }
451 else
452 recptr = XLogGetFakeLSN(rel);
453
456
458
459 /*
460 * Releasing write lock on meta page as we have updated the tuple
461 * count.
462 */
464 }
465}
uint32 BlockNumber
Definition block.h:31
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4446
bool IsBufferCleanupOK(Buffer buffer)
Definition bufmgr.c:6901
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3147
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:468
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:222
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:207
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:334
Size PageGetFreeSpace(const PageData *page)
Definition bufpage.c:916
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition bufpage.c:1170
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:268
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:378
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:416
PageData * Page
Definition bufpage.h:81
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition bufpage.h:504
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:396
#define MAXALIGN(LEN)
Definition c.h:896
#define Assert(condition)
Definition c.h:943
uint16_t uint16
Definition c.h:623
uint32_t uint32
Definition c.h:624
uint32 TransactionId
Definition c.h:736
size_t Size
Definition c.h:689
int errcode(int sqlerrcode)
Definition elog.c:874
int errhint(const char *fmt,...) pg_attribute_printf(1
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
TransactionId index_compute_xid_horizon_for_tuples(Relation irel, Relation hrel, Buffer ibuf, OffsetNumber *itemnos, int nitems)
Definition genam.c:295
#define HASH_NOLOCK
Definition hash.h:341
#define HashPageGetOpaque(page)
Definition hash.h:88
#define LH_BUCKET_PAGE
Definition hash.h:55
#define HASH_WRITE
Definition hash.h:340
#define H_BUCKET_BEING_SPLIT(opaque)
Definition hash.h:91
#define LH_META_PAGE
Definition hash.h:57
#define HashPageGetMeta(page)
Definition hash.h:323
#define HASH_METAPAGE
Definition hash.h:198
#define H_HAS_DEAD_TUPLES(opaque)
Definition hash.h:93
#define LH_PAGE_TYPE
Definition hash.h:63
uint32 Bucket
Definition hash.h:35
#define HashMaxItemSize(page)
Definition hash.h:287
#define LH_OVERFLOW_PAGE
Definition hash.h:54
#define SizeOfHashInsert
Definition hash_xlog.h:61
#define XLOG_HASH_INSERT
Definition hash_xlog.h:29
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition hash_xlog.h:40
#define SizeOfHashVacuumOnePage
Definition hash_xlog.h:257
void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
Definition hashinsert.c:38
OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool appendtup)
Definition hashinsert.c:276
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition hashinsert.c:331
static void _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
Definition hashinsert.c:368
Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
Definition hashovfl.c:112
void _hash_relbuf(Relation rel, Buffer buf)
Definition hashpage.c:266
void _hash_dropbuf(Relation rel, Buffer buf)
Definition hashpage.c:277
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition hashpage.c:70
void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition hashpage.c:1360
Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)
Definition hashpage.c:1563
void _hash_expandtable(Relation rel, Buffer metabuf)
Definition hashpage.c:614
uint32 _hash_get_indextuple_hashkey(IndexTuple itup)
Definition hashutil.c:291
OffsetNumber _hash_binsearch(Page page, uint32 hash_value)
Definition hashutil.c:350
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition hashutil.c:210
int i
Definition isn.c:77
#define ItemIdIsDead(itemId)
Definition itemid.h:113
IndexTupleData * IndexTuple
Definition itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition itup.h:71
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define END_CRIT_SECTION()
Definition miscadmin.h:154
static char * errmsg
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28
static char buf[DEFAULT_XLOG_SEG_SIZE]
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4266
static int fb(int x)
#define RelationGetRelationName(relation)
Definition rel.h:550
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition rel.h:695
#define RelationNeedsWAL(relation)
Definition rel.h:639
uint32 hashm_maxbucket
Definition hash.h:254
double hashm_ntuples
Definition hash.h:248
uint16 hashm_ffactor
Definition hash.h:249
OffsetNumber offnum
Definition hash_xlog.h:58
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:482
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:413
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:372
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:246
void XLogBeginInsert(void)
Definition xloginsert.c:153
XLogRecPtr XLogGetFakeLSN(Relation rel)
Definition xloginsert.c:562
#define REGBUF_STANDARD
Definition xloginsert.h:35