PostgreSQL Source Code  git master
hash.h File Reference
#include "access/amapi.h"
#include "access/itup.h"
#include "access/sdir.h"
#include "catalog/pg_am_d.h"
#include "common/hashfn.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
#include "storage/lockdefs.h"
#include "utils/hsearch.h"
#include "utils/relcache.h"
Include dependency graph for hash.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HashPageOpaqueData
 
struct  HashScanPosItem
 
struct  HashScanPosData
 
struct  HashScanOpaqueData
 
struct  HashMetaPageData
 
struct  HashOptions
 

Macros

#define InvalidBucket   ((Bucket) 0xFFFFFFFF)
 
#define BUCKET_TO_BLKNO(metap, B)    ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)
 
#define LH_UNUSED_PAGE   (0)
 
#define LH_OVERFLOW_PAGE   (1 << 0)
 
#define LH_BUCKET_PAGE   (1 << 1)
 
#define LH_BITMAP_PAGE   (1 << 2)
 
#define LH_META_PAGE   (1 << 3)
 
#define LH_BUCKET_BEING_POPULATED   (1 << 4)
 
#define LH_BUCKET_BEING_SPLIT   (1 << 5)
 
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP   (1 << 6)
 
#define LH_PAGE_HAS_DEAD_TUPLES   (1 << 7)
 
#define LH_PAGE_TYPE    (LH_OVERFLOW_PAGE | LH_BUCKET_PAGE | LH_BITMAP_PAGE | LH_META_PAGE)
 
#define HashPageGetOpaque(page)   ((HashPageOpaque) PageGetSpecialPointer(page))
 
#define H_NEEDS_SPLIT_CLEANUP(opaque)   (((opaque)->hasho_flag & LH_BUCKET_NEEDS_SPLIT_CLEANUP) != 0)
 
#define H_BUCKET_BEING_SPLIT(opaque)   (((opaque)->hasho_flag & LH_BUCKET_BEING_SPLIT) != 0)
 
#define H_BUCKET_BEING_POPULATED(opaque)   (((opaque)->hasho_flag & LH_BUCKET_BEING_POPULATED) != 0)
 
#define H_HAS_DEAD_TUPLES(opaque)   (((opaque)->hasho_flag & LH_PAGE_HAS_DEAD_TUPLES) != 0)
 
#define HASHO_PAGE_ID   0xFF80
 
#define HashScanPosIsPinned(scanpos)
 
#define HashScanPosIsValid(scanpos)
 
#define HashScanPosInvalidate(scanpos)
 
#define HASH_METAPAGE   0 /* metapage is always block 0 */
 
#define HASH_MAGIC   0x6440640
 
#define HASH_VERSION   4
 
#define HASH_MAX_BITMAPS   Min(BLCKSZ / 8, 1024)
 
#define HASH_SPLITPOINT_PHASE_BITS   2
 
#define HASH_SPLITPOINT_PHASES_PER_GRP   (1 << HASH_SPLITPOINT_PHASE_BITS)
 
#define HASH_SPLITPOINT_PHASE_MASK   (HASH_SPLITPOINT_PHASES_PER_GRP - 1)
 
#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE   10
 
#define HASH_MAX_SPLITPOINT_GROUP   32
 
#define HASH_MAX_SPLITPOINTS
 
#define HashGetFillFactor(relation)
 
#define HashGetTargetPageUsage(relation)    (BLCKSZ * HashGetFillFactor(relation) / 100)
 
#define HashMaxItemSize(page)
 
#define INDEX_MOVED_BY_SPLIT_MASK   INDEX_AM_RESERVED_BIT
 
#define HASH_MIN_FILLFACTOR   10
 
#define HASH_DEFAULT_FILLFACTOR   75
 
#define BYTE_TO_BIT   3 /* 2^3 bits/byte */
 
#define ALL_SET   ((uint32) ~0)
 
#define BMPGSZ_BYTE(metap)   ((metap)->hashm_bmsize)
 
#define BMPGSZ_BIT(metap)   ((metap)->hashm_bmsize << BYTE_TO_BIT)
 
#define BMPG_SHIFT(metap)   ((metap)->hashm_bmshift)
 
#define BMPG_MASK(metap)   (BMPGSZ_BIT(metap) - 1)
 
#define HashPageGetBitmap(page)    ((uint32 *) PageGetContents(page))
 
#define HashGetMaxBitmapSize(page)
 
#define HashPageGetMeta(page)    ((HashMetaPage) PageGetContents(page))
 
#define BITS_PER_MAP   32 /* Number of bits in uint32 */
 
#define CLRBIT(A, N)   ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
 
#define SETBIT(A, N)   ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
 
#define ISSET(A, N)   ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
 
#define HASH_READ   BUFFER_LOCK_SHARE
 
#define HASH_WRITE   BUFFER_LOCK_EXCLUSIVE
 
#define HASH_NOLOCK   (-1)
 
#define HASHSTANDARD_PROC   1
 
#define HASHEXTENDED_PROC   2
 
#define HASHOPTIONS_PROC   3
 
#define HASHNProcs   3
 

Typedefs

typedef uint32 Bucket
 
typedef struct HashPageOpaqueData HashPageOpaqueData
 
typedef HashPageOpaqueDataHashPageOpaque
 
typedef struct HashScanPosItem HashScanPosItem
 
typedef struct HashScanPosData HashScanPosData
 
typedef struct HashScanOpaqueData HashScanOpaqueData
 
typedef HashScanOpaqueDataHashScanOpaque
 
typedef struct HashMetaPageData HashMetaPageData
 
typedef HashMetaPageDataHashMetaPage
 
typedef struct HashOptions HashOptions
 
typedef struct HSpool HSpool
 

Functions

IndexBuildResulthashbuild (Relation heap, Relation index, struct IndexInfo *indexInfo)
 
void hashbuildempty (Relation index)
 
bool hashinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo)
 
bool hashgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 hashgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc hashbeginscan (Relation rel, int nkeys, int norderbys)
 
void hashrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void hashendscan (IndexScanDesc scan)
 
IndexBulkDeleteResulthashbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResulthashvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
byteahashoptions (Datum reloptions, bool validate)
 
bool hashvalidate (Oid opclassoid)
 
void hashadjustmembers (Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
 
void _hash_doinsert (Relation rel, IndexTuple itup, Relation heapRel)
 
OffsetNumber _hash_pgaddtup (Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
 
void _hash_pgaddmultitup (Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
 
Buffer _hash_addovflpage (Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 
BlockNumber _hash_freeovflpage (Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
 
void _hash_initbitmapbuffer (Buffer buf, uint16 bmsize, bool initpage)
 
void _hash_squeezebucket (Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
 
uint32 _hash_ovflblkno_to_bitno (HashMetaPage metap, BlockNumber ovflblkno)
 
Buffer _hash_getbuf (Relation rel, BlockNumber blkno, int access, int flags)
 
Buffer _hash_getbuf_with_condlock_cleanup (Relation rel, BlockNumber blkno, int flags)
 
HashMetaPage _hash_getcachedmetap (Relation rel, Buffer *metabuf, bool force_refresh)
 
Buffer _hash_getbucketbuf_from_hashkey (Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)
 
Buffer _hash_getinitbuf (Relation rel, BlockNumber blkno)
 
void _hash_initbuf (Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
 
Buffer _hash_getnewbuf (Relation rel, BlockNumber blkno, ForkNumber forkNum)
 
Buffer _hash_getbuf_with_strategy (Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
 
void _hash_relbuf (Relation rel, Buffer buf)
 
void _hash_dropbuf (Relation rel, Buffer buf)
 
void _hash_dropscanbuf (Relation rel, HashScanOpaque so)
 
uint32 _hash_init (Relation rel, double num_tuples, ForkNumber forkNum)
 
void _hash_init_metabuffer (Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
 
void _hash_pageinit (Page page, Size size)
 
void _hash_expandtable (Relation rel, Buffer metabuf)
 
void _hash_finish_split (Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)
 
bool _hash_next (IndexScanDesc scan, ScanDirection dir)
 
bool _hash_first (IndexScanDesc scan, ScanDirection dir)
 
HSpool_h_spoolinit (Relation heap, Relation index, uint32 num_buckets)
 
void _h_spooldestroy (HSpool *hspool)
 
void _h_spool (HSpool *hspool, ItemPointer self, Datum *values, bool *isnull)
 
void _h_indexbuild (HSpool *hspool, Relation heapRel)
 
bool _hash_checkqual (IndexScanDesc scan, IndexTuple itup)
 
uint32 _hash_datum2hashkey (Relation rel, Datum key)
 
uint32 _hash_datum2hashkey_type (Relation rel, Datum key, Oid keytype)
 
Bucket _hash_hashkey2bucket (uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)
 
uint32 _hash_spareindex (uint32 num_bucket)
 
uint32 _hash_get_totalbuckets (uint32 splitpoint_phase)
 
void _hash_checkpage (Relation rel, Buffer buf, int flags)
 
uint32 _hash_get_indextuple_hashkey (IndexTuple itup)
 
bool _hash_convert_tuple (Relation index, Datum *user_values, bool *user_isnull, Datum *index_values, bool *index_isnull)
 
OffsetNumber _hash_binsearch (Page page, uint32 hash_value)
 
OffsetNumber _hash_binsearch_last (Page page, uint32 hash_value)
 
BlockNumber _hash_get_oldblock_from_newbucket (Relation rel, Bucket new_bucket)
 
BlockNumber _hash_get_newblock_from_oldbucket (Relation rel, Bucket old_bucket)
 
Bucket _hash_get_newbucket_from_oldbucket (Relation rel, Bucket old_bucket, uint32 lowmask, uint32 maxbucket)
 
void _hash_kill_items (IndexScanDesc scan)
 
void hashbucketcleanup (Relation rel, Bucket cur_bucket, Buffer bucket_buf, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy, uint32 maxbucket, uint32 highmask, uint32 lowmask, double *tuples_removed, double *num_index_tuples, bool split_cleanup, IndexBulkDeleteCallback callback, void *callback_state)
 

Macro Definition Documentation

◆ ALL_SET

#define ALL_SET   ((uint32) ~0)

Definition at line 302 of file hash.h.

◆ BITS_PER_MAP

#define BITS_PER_MAP   32 /* Number of bits in uint32 */

Definition at line 329 of file hash.h.

◆ BMPG_MASK

#define BMPG_MASK (   metap)    (BMPGSZ_BIT(metap) - 1)

Definition at line 314 of file hash.h.

◆ BMPG_SHIFT

#define BMPG_SHIFT (   metap)    ((metap)->hashm_bmshift)

Definition at line 313 of file hash.h.

◆ BMPGSZ_BIT

#define BMPGSZ_BIT (   metap)    ((metap)->hashm_bmsize << BYTE_TO_BIT)

Definition at line 312 of file hash.h.

◆ BMPGSZ_BYTE

#define BMPGSZ_BYTE (   metap)    ((metap)->hashm_bmsize)

Definition at line 311 of file hash.h.

◆ BUCKET_TO_BLKNO

#define BUCKET_TO_BLKNO (   metap,
 
)     ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)

Definition at line 39 of file hash.h.

◆ BYTE_TO_BIT

#define BYTE_TO_BIT   3 /* 2^3 bits/byte */

Definition at line 301 of file hash.h.

◆ CLRBIT

#define CLRBIT (   A,
 
)    ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))

Definition at line 332 of file hash.h.

◆ H_BUCKET_BEING_POPULATED

#define H_BUCKET_BEING_POPULATED (   opaque)    (((opaque)->hasho_flag & LH_BUCKET_BEING_POPULATED) != 0)

Definition at line 92 of file hash.h.

◆ H_BUCKET_BEING_SPLIT

#define H_BUCKET_BEING_SPLIT (   opaque)    (((opaque)->hasho_flag & LH_BUCKET_BEING_SPLIT) != 0)

Definition at line 91 of file hash.h.

◆ H_HAS_DEAD_TUPLES

#define H_HAS_DEAD_TUPLES (   opaque)    (((opaque)->hasho_flag & LH_PAGE_HAS_DEAD_TUPLES) != 0)

Definition at line 93 of file hash.h.

◆ H_NEEDS_SPLIT_CLEANUP

#define H_NEEDS_SPLIT_CLEANUP (   opaque)    (((opaque)->hasho_flag & LH_BUCKET_NEEDS_SPLIT_CLEANUP) != 0)

Definition at line 90 of file hash.h.

◆ HASH_DEFAULT_FILLFACTOR

#define HASH_DEFAULT_FILLFACTOR   75

Definition at line 296 of file hash.h.

◆ HASH_MAGIC

#define HASH_MAGIC   0x6440640

Definition at line 200 of file hash.h.

◆ HASH_MAX_BITMAPS

#define HASH_MAX_BITMAPS   Min(BLCKSZ / 8, 1024)

Definition at line 230 of file hash.h.

◆ HASH_MAX_SPLITPOINT_GROUP

#define HASH_MAX_SPLITPOINT_GROUP   32

Definition at line 238 of file hash.h.

◆ HASH_MAX_SPLITPOINTS

#define HASH_MAX_SPLITPOINTS
Value:
HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE
Definition: hash.h:235
#define HASH_SPLITPOINT_PHASES_PER_GRP
Definition: hash.h:233
#define HASH_MAX_SPLITPOINT_GROUP
Definition: hash.h:238

Definition at line 239 of file hash.h.

◆ HASH_METAPAGE

#define HASH_METAPAGE   0 /* metapage is always block 0 */

Definition at line 198 of file hash.h.

◆ HASH_MIN_FILLFACTOR

#define HASH_MIN_FILLFACTOR   10

Definition at line 295 of file hash.h.

◆ HASH_NOLOCK

#define HASH_NOLOCK   (-1)

Definition at line 341 of file hash.h.

◆ HASH_READ

#define HASH_READ   BUFFER_LOCK_SHARE

Definition at line 339 of file hash.h.

◆ HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE

#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE   10

Definition at line 235 of file hash.h.

◆ HASH_SPLITPOINT_PHASE_BITS

#define HASH_SPLITPOINT_PHASE_BITS   2

Definition at line 232 of file hash.h.

◆ HASH_SPLITPOINT_PHASE_MASK

#define HASH_SPLITPOINT_PHASE_MASK   (HASH_SPLITPOINT_PHASES_PER_GRP - 1)

Definition at line 234 of file hash.h.

◆ HASH_SPLITPOINT_PHASES_PER_GRP

#define HASH_SPLITPOINT_PHASES_PER_GRP   (1 << HASH_SPLITPOINT_PHASE_BITS)

Definition at line 233 of file hash.h.

◆ HASH_VERSION

#define HASH_VERSION   4

Definition at line 201 of file hash.h.

◆ HASH_WRITE

#define HASH_WRITE   BUFFER_LOCK_EXCLUSIVE

Definition at line 340 of file hash.h.

◆ HASHEXTENDED_PROC

#define HASHEXTENDED_PROC   2

Definition at line 356 of file hash.h.

◆ HashGetFillFactor

#define HashGetFillFactor (   relation)
Value:
(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
relation->rd_rel->relam == HASH_AM_OID), \
(relation)->rd_options ? \
((HashOptions *) (relation)->rd_options)->fillfactor : \
HASH_DEFAULT_FILLFACTOR)
#define AssertMacro(condition)
Definition: c.h:805

Definition at line 275 of file hash.h.

◆ HashGetMaxBitmapSize

#define HashGetMaxBitmapSize (   page)
Value:
(PageGetPageSize((Page) page) - \
Pointer Page
Definition: bufpage.h:78
#define SizeOfPageHeaderData
Definition: bufpage.h:215
#define PageGetPageSize(page)
Definition: bufpage.h:267
#define MAXALIGN(LEN)
Definition: c.h:757

Definition at line 319 of file hash.h.

◆ HashGetTargetPageUsage

#define HashGetTargetPageUsage (   relation)     (BLCKSZ * HashGetFillFactor(relation) / 100)

Definition at line 281 of file hash.h.

◆ HashMaxItemSize

#define HashMaxItemSize (   page)
Value:
sizeof(ItemIdData) - \
#define MAXALIGN_DOWN(LEN)
Definition: c.h:769

Definition at line 287 of file hash.h.

◆ HASHNProcs

#define HASHNProcs   3

Definition at line 358 of file hash.h.

◆ HASHO_PAGE_ID

#define HASHO_PAGE_ID   0xFF80

Definition at line 101 of file hash.h.

◆ HASHOPTIONS_PROC

#define HASHOPTIONS_PROC   3

Definition at line 357 of file hash.h.

◆ HashPageGetBitmap

#define HashPageGetBitmap (   page)     ((uint32 *) PageGetContents(page))

Definition at line 316 of file hash.h.

◆ HashPageGetMeta

#define HashPageGetMeta (   page)     ((HashMetaPage) PageGetContents(page))

Definition at line 323 of file hash.h.

◆ HashPageGetOpaque

#define HashPageGetOpaque (   page)    ((HashPageOpaque) PageGetSpecialPointer(page))

Definition at line 88 of file hash.h.

◆ HashScanPosInvalidate

#define HashScanPosInvalidate (   scanpos)
Value:
do { \
(scanpos).buf = InvalidBuffer; \
(scanpos).currPage = InvalidBlockNumber; \
(scanpos).nextPage = InvalidBlockNumber; \
(scanpos).prevPage = InvalidBlockNumber; \
(scanpos).firstItem = 0; \
(scanpos).lastItem = 0; \
(scanpos).itemIndex = 0; \
} while (0)
#define InvalidBlockNumber
Definition: block.h:33
#define InvalidBuffer
Definition: buf.h:25
static char * buf
Definition: pg_test_fsync.c:67

Definition at line 144 of file hash.h.

◆ HashScanPosIsPinned

#define HashScanPosIsPinned (   scanpos)
Value:
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BufferIsValid((scanpos).buf) \
)
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123

Definition at line 130 of file hash.h.

◆ HashScanPosIsValid

#define HashScanPosIsValid (   scanpos)
Value:
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BlockNumberIsValid((scanpos).currPage) \
)

Definition at line 137 of file hash.h.

◆ HASHSTANDARD_PROC

#define HASHSTANDARD_PROC   1

Definition at line 355 of file hash.h.

◆ INDEX_MOVED_BY_SPLIT_MASK

#define INDEX_MOVED_BY_SPLIT_MASK   INDEX_AM_RESERVED_BIT

Definition at line 293 of file hash.h.

◆ InvalidBucket

#define InvalidBucket   ((Bucket) 0xFFFFFFFF)

Definition at line 37 of file hash.h.

◆ ISSET

#define ISSET (   A,
 
)    ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))

Definition at line 334 of file hash.h.

◆ LH_BITMAP_PAGE

#define LH_BITMAP_PAGE   (1 << 2)

Definition at line 56 of file hash.h.

◆ LH_BUCKET_BEING_POPULATED

#define LH_BUCKET_BEING_POPULATED   (1 << 4)

Definition at line 58 of file hash.h.

◆ LH_BUCKET_BEING_SPLIT

#define LH_BUCKET_BEING_SPLIT   (1 << 5)

Definition at line 59 of file hash.h.

◆ LH_BUCKET_NEEDS_SPLIT_CLEANUP

#define LH_BUCKET_NEEDS_SPLIT_CLEANUP   (1 << 6)

Definition at line 60 of file hash.h.

◆ LH_BUCKET_PAGE

#define LH_BUCKET_PAGE   (1 << 1)

Definition at line 55 of file hash.h.

◆ LH_META_PAGE

#define LH_META_PAGE   (1 << 3)

Definition at line 57 of file hash.h.

◆ LH_OVERFLOW_PAGE

#define LH_OVERFLOW_PAGE   (1 << 0)

Definition at line 54 of file hash.h.

◆ LH_PAGE_HAS_DEAD_TUPLES

#define LH_PAGE_HAS_DEAD_TUPLES   (1 << 7)

Definition at line 61 of file hash.h.

◆ LH_PAGE_TYPE

#define LH_PAGE_TYPE    (LH_OVERFLOW_PAGE | LH_BUCKET_PAGE | LH_BITMAP_PAGE | LH_META_PAGE)

Definition at line 63 of file hash.h.

◆ LH_UNUSED_PAGE

#define LH_UNUSED_PAGE   (0)

Definition at line 53 of file hash.h.

◆ SETBIT

#define SETBIT (   A,
 
)    ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))

Definition at line 333 of file hash.h.

Typedef Documentation

◆ Bucket

typedef uint32 Bucket

Definition at line 35 of file hash.h.

◆ HashMetaPage

Definition at line 267 of file hash.h.

◆ HashMetaPageData

◆ HashOptions

typedef struct HashOptions HashOptions

◆ HashPageOpaque

Definition at line 86 of file hash.h.

◆ HashPageOpaqueData

◆ HashScanOpaque

Definition at line 192 of file hash.h.

◆ HashScanOpaqueData

◆ HashScanPosData

◆ HashScanPosItem

◆ HSpool

typedef struct HSpool HSpool

Definition at line 444 of file hash.h.

Function Documentation

◆ _h_indexbuild()

void _h_indexbuild ( HSpool hspool,
Relation  heapRel 
)

Definition at line 119 of file hashsort.c.

120 {
121  IndexTuple itup;
122  int64 tups_done = 0;
123 #ifdef USE_ASSERT_CHECKING
124  uint32 hashkey = 0;
125 #endif
126 
128 
129  while ((itup = tuplesort_getindextuple(hspool->sortstate, true)) != NULL)
130  {
131  /*
132  * Technically, it isn't critical that hash keys be found in sorted
133  * order, since this sorting is only used to increase locality of
134  * access as a performance optimization. It still seems like a good
135  * idea to test tuplesort.c's handling of hash index tuple sorts
136  * through an assertion, though.
137  */
138 #ifdef USE_ASSERT_CHECKING
139  uint32 lasthashkey = hashkey;
140 
142  hspool->max_buckets, hspool->high_mask,
143  hspool->low_mask);
144  Assert(hashkey >= lasthashkey);
145 #endif
146 
147  _hash_doinsert(hspool->index, itup, heapRel);
148 
150  ++tups_done);
151  }
152 }
void pgstat_progress_update_param(int index, int64 val)
unsigned int uint32
Definition: c.h:441
void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
Definition: hashinsert.c:37
uint32 _hash_get_indextuple_hashkey(IndexTuple itup)
Definition: hashutil.c:292
Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashutil.c:126
Assert(fmt[strlen(fmt) - 1] !='\n')
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition: progress.h:85
uint32 low_mask
Definition: hashsort.c:50
Tuplesortstate * sortstate
Definition: hashsort.c:41
uint32 high_mask
Definition: hashsort.c:49
uint32 max_buckets
Definition: hashsort.c:51
Relation index
Definition: hashsort.c:42
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2618
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:2196

References _hash_doinsert(), _hash_get_indextuple_hashkey(), _hash_hashkey2bucket(), Assert(), HSpool::high_mask, HSpool::index, HSpool::low_mask, HSpool::max_buckets, pgstat_progress_update_param(), PROGRESS_CREATEIDX_TUPLES_DONE, HSpool::sortstate, tuplesort_getindextuple(), and tuplesort_performsort().

Referenced by hashbuild().

◆ _h_spool()

void _h_spool ( HSpool hspool,
ItemPointer  self,
Datum values,
bool isnull 
)

Definition at line 108 of file hashsort.c.

109 {
111  self, values, isnull);
112 }
static Datum values[MAXATTR]
Definition: bootstrap.c:156
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, Datum *values, bool *isnull)
Definition: tuplesort.c:1883

References HSpool::index, HSpool::sortstate, tuplesort_putindextuplevalues(), and values.

Referenced by hashbuildCallback().

◆ _h_spooldestroy()

void _h_spooldestroy ( HSpool hspool)

Definition at line 98 of file hashsort.c.

99 {
100  tuplesort_end(hspool->sortstate);
101  pfree(hspool);
102 }
void pfree(void *pointer)
Definition: mcxt.c:1175
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1620

References pfree(), HSpool::sortstate, and tuplesort_end().

Referenced by hashbuild().

◆ _h_spoolinit()

HSpool* _h_spoolinit ( Relation  heap,
Relation  index,
uint32  num_buckets 
)

Definition at line 59 of file hashsort.c.

60 {
61  HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
62 
63  hspool->index = index;
64 
65  /*
66  * Determine the bitmask for hash code values. Since there are currently
67  * num_buckets buckets in the index, the appropriate mask can be computed
68  * as follows.
69  *
70  * NOTE : This hash mask calculation should be in sync with similar
71  * calculation in _hash_init_metabuffer.
72  */
73  hspool->high_mask = pg_nextpower2_32(num_buckets + 1) - 1;
74  hspool->low_mask = (hspool->high_mask >> 1);
75  hspool->max_buckets = num_buckets - 1;
76 
77  /*
78  * We size the sort area as maintenance_work_mem rather than work_mem to
79  * speed index creation. This should be OK since a single backend can't
80  * run multiple index creations in parallel.
81  */
83  index,
84  hspool->high_mask,
85  hspool->low_mask,
86  hspool->max_buckets,
88  NULL,
90 
91  return hspool;
92 }
int maintenance_work_mem
Definition: globals.c:127
void * palloc0(Size size)
Definition: mcxt.c:1099
static uint32 pg_nextpower2_32(uint32 num)
Definition: pg_bitutils.h:140
Definition: type.h:90
Tuplesortstate * tuplesort_begin_index_hash(Relation heapRel, Relation indexRel, uint32 high_mask, uint32 low_mask, uint32 max_buckets, int workMem, SortCoordinate coordinate, int sortopt)
Definition: tuplesort.c:1290
#define TUPLESORT_NONE
Definition: tuplesort.h:90

References HSpool::high_mask, HSpool::index, HSpool::low_mask, maintenance_work_mem, HSpool::max_buckets, palloc0(), pg_nextpower2_32(), HSpool::sortstate, tuplesort_begin_index_hash(), and TUPLESORT_NONE.

Referenced by hashbuild().

◆ _hash_addovflpage()

Buffer _hash_addovflpage ( Relation  rel,
Buffer  metabuf,
Buffer  buf,
bool  retain_pin 
)

Definition at line 112 of file hashovfl.c.

113 {
114  Buffer ovflbuf;
115  Page page;
116  Page ovflpage;
117  HashPageOpaque pageopaque;
118  HashPageOpaque ovflopaque;
119  HashMetaPage metap;
120  Buffer mapbuf = InvalidBuffer;
121  Buffer newmapbuf = InvalidBuffer;
122  BlockNumber blkno;
123  uint32 orig_firstfree;
124  uint32 splitnum;
125  uint32 *freep = NULL;
126  uint32 max_ovflpg;
127  uint32 bit;
128  uint32 bitmap_page_bit;
129  uint32 first_page;
130  uint32 last_bit;
131  uint32 last_page;
132  uint32 i,
133  j;
134  bool page_found = false;
135 
136  /*
137  * Write-lock the tail page. Here, we need to maintain locking order such
138  * that, first acquire the lock on tail page of bucket, then on meta page
139  * to find and lock the bitmap page and if it is found, then lock on meta
140  * page is released, then finally acquire the lock on new overflow buffer.
141  * We need this locking order to avoid deadlock with backends that are
142  * doing inserts.
143  *
144  * Note: We could have avoided locking many buffers here if we made two
145  * WAL records for acquiring an overflow page (one to allocate an overflow
146  * page and another to add it to overflow bucket chain). However, doing
147  * so can leak an overflow page, if the system crashes after allocation.
148  * Needless to say, it is better to have a single record from a
149  * performance point of view as well.
150  */
152 
153  /* probably redundant... */
155 
156  /* loop to find current tail page, in case someone else inserted too */
157  for (;;)
158  {
159  BlockNumber nextblkno;
160 
161  page = BufferGetPage(buf);
162  pageopaque = HashPageGetOpaque(page);
163  nextblkno = pageopaque->hasho_nextblkno;
164 
165  if (!BlockNumberIsValid(nextblkno))
166  break;
167 
168  /* we assume we do not need to write the unmodified page */
169  if (retain_pin)
170  {
171  /* pin will be retained only for the primary bucket page */
172  Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
174  }
175  else
176  _hash_relbuf(rel, buf);
177 
178  retain_pin = false;
179 
180  buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
181  }
182 
183  /* Get exclusive lock on the meta page */
185 
186  _hash_checkpage(rel, metabuf, LH_META_PAGE);
187  metap = HashPageGetMeta(BufferGetPage(metabuf));
188 
189  /* start search at hashm_firstfree */
190  orig_firstfree = metap->hashm_firstfree;
191  first_page = orig_firstfree >> BMPG_SHIFT(metap);
192  bit = orig_firstfree & BMPG_MASK(metap);
193  i = first_page;
194  j = bit / BITS_PER_MAP;
195  bit &= ~(BITS_PER_MAP - 1);
196 
197  /* outer loop iterates once per bitmap page */
198  for (;;)
199  {
200  BlockNumber mapblkno;
201  Page mappage;
202  uint32 last_inpage;
203 
204  /* want to end search with the last existing overflow page */
205  splitnum = metap->hashm_ovflpoint;
206  max_ovflpg = metap->hashm_spares[splitnum] - 1;
207  last_page = max_ovflpg >> BMPG_SHIFT(metap);
208  last_bit = max_ovflpg & BMPG_MASK(metap);
209 
210  if (i > last_page)
211  break;
212 
213  Assert(i < metap->hashm_nmaps);
214  mapblkno = metap->hashm_mapp[i];
215 
216  if (i == last_page)
217  last_inpage = last_bit;
218  else
219  last_inpage = BMPGSZ_BIT(metap) - 1;
220 
221  /* Release exclusive lock on metapage while reading bitmap page */
222  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
223 
224  mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
225  mappage = BufferGetPage(mapbuf);
226  freep = HashPageGetBitmap(mappage);
227 
228  for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
229  {
230  if (freep[j] != ALL_SET)
231  {
232  page_found = true;
233 
234  /* Reacquire exclusive lock on the meta page */
236 
237  /* convert bit to bit number within page */
238  bit += _hash_firstfreebit(freep[j]);
239  bitmap_page_bit = bit;
240 
241  /* convert bit to absolute bit number */
242  bit += (i << BMPG_SHIFT(metap));
243  /* Calculate address of the recycled overflow page */
244  blkno = bitno_to_blkno(metap, bit);
245 
246  /* Fetch and init the recycled page */
247  ovflbuf = _hash_getinitbuf(rel, blkno);
248 
249  goto found;
250  }
251  }
252 
253  /* No free space here, try to advance to next map page */
254  _hash_relbuf(rel, mapbuf);
255  mapbuf = InvalidBuffer;
256  i++;
257  j = 0; /* scan from start of next map page */
258  bit = 0;
259 
260  /* Reacquire exclusive lock on the meta page */
262  }
263 
264  /*
265  * No free pages --- have to extend the relation to add an overflow page.
266  * First, check to see if we have to add a new bitmap page too.
267  */
268  if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
269  {
270  /*
271  * We create the new bitmap page with all pages marked "in use".
272  * Actually two pages in the new bitmap's range will exist
273  * immediately: the bitmap page itself, and the following page which
274  * is the one we return to the caller. Both of these are correctly
275  * marked "in use". Subsequent pages do not exist yet, but it is
276  * convenient to pre-mark them as "in use" too.
277  */
278  bit = metap->hashm_spares[splitnum];
279 
280  /* metapage already has a write lock */
281  if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
282  ereport(ERROR,
283  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
284  errmsg("out of overflow pages in hash index \"%s\"",
285  RelationGetRelationName(rel))));
286 
287  newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
288  }
289  else
290  {
291  /*
292  * Nothing to do here; since the page will be past the last used page,
293  * we know its bitmap bit was preinitialized to "in use".
294  */
295  }
296 
297  /* Calculate address of the new overflow page */
298  bit = BufferIsValid(newmapbuf) ?
299  metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
300  blkno = bitno_to_blkno(metap, bit);
301 
302  /*
303  * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
304  * relation length stays in sync with ours. XXX It's annoying to do this
305  * with metapage write lock held; would be better to use a lock that
306  * doesn't block incoming searches.
307  *
308  * It is okay to hold two buffer locks here (one on tail page of bucket
309  * and other on new overflow page) since there cannot be anyone else
310  * contending for access to ovflbuf.
311  */
312  ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
313 
314 found:
315 
316  /*
317  * Do the update. No ereport(ERROR) until changes are logged. We want to
318  * log the changes for bitmap page and overflow page together to avoid
319  * loss of pages in case the new page is added.
320  */
322 
323  if (page_found)
324  {
325  Assert(BufferIsValid(mapbuf));
326 
327  /* mark page "in use" in the bitmap */
328  SETBIT(freep, bitmap_page_bit);
329  MarkBufferDirty(mapbuf);
330  }
331  else
332  {
333  /* update the count to indicate new overflow page is added */
334  metap->hashm_spares[splitnum]++;
335 
336  if (BufferIsValid(newmapbuf))
337  {
338  _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
339  MarkBufferDirty(newmapbuf);
340 
341  /* add the new bitmap page to the metapage's list of bitmaps */
342  metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
343  metap->hashm_nmaps++;
344  metap->hashm_spares[splitnum]++;
345  }
346 
347  MarkBufferDirty(metabuf);
348 
349  /*
350  * for new overflow page, we don't need to explicitly set the bit in
351  * bitmap page, as by default that will be set to "in use".
352  */
353  }
354 
355  /*
356  * Adjust hashm_firstfree to avoid redundant searches. But don't risk
357  * changing it if someone moved it while we were searching bitmap pages.
358  */
359  if (metap->hashm_firstfree == orig_firstfree)
360  {
361  metap->hashm_firstfree = bit + 1;
362  MarkBufferDirty(metabuf);
363  }
364 
365  /* initialize new overflow page */
366  ovflpage = BufferGetPage(ovflbuf);
367  ovflopaque = HashPageGetOpaque(ovflpage);
369  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
370  ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
371  ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
372  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
373 
374  MarkBufferDirty(ovflbuf);
375 
376  /* logically chain overflow page to previous page */
377  pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
378 
380 
381  /* XLOG stuff */
382  if (RelationNeedsWAL(rel))
383  {
384  XLogRecPtr recptr;
385  xl_hash_add_ovfl_page xlrec;
386 
387  xlrec.bmpage_found = page_found;
388  xlrec.bmsize = metap->hashm_bmsize;
389 
390  XLogBeginInsert();
391  XLogRegisterData((char *) &xlrec, SizeOfHashAddOvflPage);
392 
394  XLogRegisterBufData(0, (char *) &pageopaque->hasho_bucket, sizeof(Bucket));
395 
397 
398  if (BufferIsValid(mapbuf))
399  {
401  XLogRegisterBufData(2, (char *) &bitmap_page_bit, sizeof(uint32));
402  }
403 
404  if (BufferIsValid(newmapbuf))
405  XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
406 
407  XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
408  XLogRegisterBufData(4, (char *) &metap->hashm_firstfree, sizeof(uint32));
409 
410  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
411 
412  PageSetLSN(BufferGetPage(ovflbuf), recptr);
413  PageSetLSN(BufferGetPage(buf), recptr);
414 
415  if (BufferIsValid(mapbuf))
416  PageSetLSN(BufferGetPage(mapbuf), recptr);
417 
418  if (BufferIsValid(newmapbuf))
419  PageSetLSN(BufferGetPage(newmapbuf), recptr);
420 
421  PageSetLSN(BufferGetPage(metabuf), recptr);
422  }
423 
425 
426  if (retain_pin)
428  else
429  _hash_relbuf(rel, buf);
430 
431  if (BufferIsValid(mapbuf))
432  _hash_relbuf(rel, mapbuf);
433 
434  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
435 
436  if (BufferIsValid(newmapbuf))
437  _hash_relbuf(rel, newmapbuf);
438 
439  return ovflbuf;
440 }
uint32 BlockNumber
Definition: block.h:31
#define SETBIT(x, i)
Definition: blutils.c:32
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2755
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1573
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4156
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageSetLSN(page, lsn)
Definition: bufpage.h:367
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define ereport(elevel,...)
Definition: elog.h:143
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define HASH_MAX_BITMAPS
Definition: hash.h:230
#define BMPG_MASK(metap)
Definition: hash.h:314
#define HASH_WRITE
Definition: hash.h:340
#define BITS_PER_MAP
Definition: hash.h:329
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define LH_META_PAGE
Definition: hash.h:57
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define BMPGSZ_BIT(metap)
Definition: hash.h:312
#define LH_PAGE_TYPE
Definition: hash.h:63
uint32 Bucket
Definition: hash.h:35
#define ALL_SET
Definition: hash.h:302
#define LH_BITMAP_PAGE
Definition: hash.h:56
#define BMPG_SHIFT(metap)
Definition: hash.h:313
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define SizeOfHashAddOvflPage
Definition: hash_xlog.h:80
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
static uint32 _hash_firstfreebit(uint32 map)
Definition: hashovfl.c:448
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:741
static BlockNumber bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
Definition: hashovfl.c:35
Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno)
Definition: hashpage.c:135
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:266
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:70
Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
Definition: hashpage.c:198
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:211
int j
Definition: isn.c:74
int i
Definition: isn.c:73
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
#define RelationGetRelationName(relation)
Definition: rel.h:523
#define RelationNeedsWAL(relation)
Definition: rel.h:613
@ MAIN_FORKNUM
Definition: relpath.h:43
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
uint32 hashm_firstfree
Definition: hash.h:259
uint16 hashm_bmsize
Definition: hash.h:251
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
Datum bit(PG_FUNCTION_ARGS)
Definition: varbit.c:391
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:443
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:389
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:351
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References _hash_checkpage(), _hash_firstfreebit(), _hash_getbuf(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_initbitmapbuffer(), _hash_relbuf(), ALL_SET, Assert(), bit(), bitno_to_blkno(), BITS_PER_MAP, BlockNumberIsValid, xl_hash_add_ovfl_page::bmpage_found, BMPG_MASK, BMPG_SHIFT, BMPGSZ_BIT, xl_hash_add_ovfl_page::bmsize, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, HASH_MAX_BITMAPS, HASH_WRITE, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBuffer, j, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), PageSetLSN, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, SETBIT, SizeOfHashAddOvflPage, START_CRIT_SECTION, XLOG_HASH_ADD_OVFL_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert(), and _hash_splitbucket().

◆ _hash_binsearch()

OffsetNumber _hash_binsearch ( Page  page,
uint32  hash_value 
)

Definition at line 351 of file hashutil.c.

352 {
355 
356  /* Loop invariant: lower <= desired place <= upper */
357  upper = PageGetMaxOffsetNumber(page) + 1;
359 
360  while (upper > lower)
361  {
362  OffsetNumber off;
363  IndexTuple itup;
364  uint32 hashkey;
365 
366  off = (upper + lower) / 2;
368 
369  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
370  hashkey = _hash_get_indextuple_hashkey(itup);
371  if (hashkey < hash_value)
372  lower = off + 1;
373  else
374  upper = off;
375  }
376 
377  return lower;
378 }
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:356
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:234
#define PageGetItem(page, itemId)
Definition: bufpage.h:339
IndexTupleData * IndexTuple
Definition: itup.h:53
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:46
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:77

References _hash_get_indextuple_hashkey(), Assert(), FirstOffsetNumber, lower(), OffsetNumberIsValid, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and upper().

Referenced by _hash_pgaddmultitup(), _hash_pgaddtup(), and _hash_readpage().

◆ _hash_binsearch_last()

OffsetNumber _hash_binsearch_last ( Page  page,
uint32  hash_value 
)

Definition at line 389 of file hashutil.c.

390 {
393 
394  /* Loop invariant: lower <= desired place <= upper */
396  lower = FirstOffsetNumber - 1;
397 
398  while (upper > lower)
399  {
400  IndexTuple itup;
401  OffsetNumber off;
402  uint32 hashkey;
403 
404  off = (upper + lower + 1) / 2;
406 
407  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
408  hashkey = _hash_get_indextuple_hashkey(itup);
409  if (hashkey > hash_value)
410  upper = off - 1;
411  else
412  lower = off;
413  }
414 
415  return lower;
416 }

References _hash_get_indextuple_hashkey(), Assert(), FirstOffsetNumber, lower(), OffsetNumberIsValid, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and upper().

Referenced by _hash_readpage().

◆ _hash_checkpage()

void _hash_checkpage ( Relation  rel,
Buffer  buf,
int  flags 
)

Definition at line 211 of file hashutil.c.

212 {
213  Page page = BufferGetPage(buf);
214 
215  /*
216  * ReadBuffer verifies that every newly-read page passes
217  * PageHeaderIsValid, which means it either contains a reasonably sane
218  * page header or is all-zero. We have to defend against the all-zero
219  * case, however.
220  */
221  if (PageIsNew(page))
222  ereport(ERROR,
223  (errcode(ERRCODE_INDEX_CORRUPTED),
224  errmsg("index \"%s\" contains unexpected zero page at block %u",
227  errhint("Please REINDEX it.")));
228 
229  /*
230  * Additionally check that the special area looks sane.
231  */
232  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
233  ereport(ERROR,
234  (errcode(ERRCODE_INDEX_CORRUPTED),
235  errmsg("index \"%s\" contains corrupted page at block %u",
238  errhint("Please REINDEX it.")));
239 
240  if (flags)
241  {
242  HashPageOpaque opaque = HashPageGetOpaque(page);
243 
244  if ((opaque->hasho_flag & flags) == 0)
245  ereport(ERROR,
246  (errcode(ERRCODE_INDEX_CORRUPTED),
247  errmsg("index \"%s\" contains corrupted page at block %u",
250  errhint("Please REINDEX it.")));
251  }
252 
253  /*
254  * When checking the metapage, also verify magic number and version.
255  */
256  if (flags == LH_META_PAGE)
257  {
258  HashMetaPage metap = HashPageGetMeta(page);
259 
260  if (metap->hashm_magic != HASH_MAGIC)
261  ereport(ERROR,
262  (errcode(ERRCODE_INDEX_CORRUPTED),
263  errmsg("index \"%s\" is not a hash index",
264  RelationGetRelationName(rel))));
265 
266  if (metap->hashm_version != HASH_VERSION)
267  ereport(ERROR,
268  (errcode(ERRCODE_INDEX_CORRUPTED),
269  errmsg("index \"%s\" has wrong hash version",
271  errhint("Please REINDEX it.")));
272  }
273 }
#define PageGetSpecialSize(page)
Definition: bufpage.h:299
#define PageIsNew(page)
Definition: bufpage.h:228
int errhint(const char *fmt,...)
Definition: elog.c:1151
#define HASH_VERSION
Definition: hash.h:201
#define HASH_MAGIC
Definition: hash.h:200
uint32 hashm_version
Definition: hash.h:247
uint32 hashm_magic
Definition: hash.h:246

References buf, BufferGetBlockNumber(), BufferGetPage, ereport, errcode(), errhint(), errmsg(), ERROR, HASH_MAGIC, HASH_VERSION, HashMetaPageData::hashm_magic, HashMetaPageData::hashm_version, HashPageOpaqueData::hasho_flag, HashPageGetMeta, HashPageGetOpaque, LH_META_PAGE, MAXALIGN, PageGetSpecialSize, PageIsNew, and RelationGetRelationName.

Referenced by _hash_addovflpage(), _hash_expandtable(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_readpage(), and hashbulkdelete().

◆ _hash_checkqual()

bool _hash_checkqual ( IndexScanDesc  scan,
IndexTuple  itup 
)

Definition at line 32 of file hashutil.c.

33 {
34  /*
35  * Currently, we can't check any of the scan conditions since we do not
36  * have the original index entry value to supply to the sk_func. Always
37  * return true; we expect that hashgettuple already set the recheck flag
38  * to make the main indexscan code do it.
39  */
40 #ifdef NOT_USED
41  TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
42  ScanKey key = scan->keyData;
43  int scanKeySize = scan->numberOfKeys;
44 
45  while (scanKeySize > 0)
46  {
47  Datum datum;
48  bool isNull;
49  Datum test;
50 
51  datum = index_getattr(itup,
52  key->sk_attno,
53  tupdesc,
54  &isNull);
55 
56  /* assume sk_func is strict */
57  if (isNull)
58  return false;
59  if (key->sk_flags & SK_ISNULL)
60  return false;
61 
62  test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
63  datum, key->sk_argument);
64 
65  if (!DatumGetBool(test))
66  return false;
67 
68  key++;
69  scanKeySize--;
70  }
71 #endif
72 
73  return true;
74 }
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1134
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:99
uintptr_t Datum
Definition: postgres.h:411
#define DatumGetBool(X)
Definition: postgres.h:437
static void test(void)
#define RelationGetDescr(relation)
Definition: rel.h:515
#define SK_ISNULL
Definition: skey.h:115
struct ScanKeyData * keyData
Definition: relscan.h:122
Relation indexRelation
Definition: relscan.h:118

References DatumGetBool, FunctionCall2Coll(), index_getattr, IndexScanDescData::indexRelation, sort-test::key, IndexScanDescData::keyData, IndexScanDescData::numberOfKeys, RelationGetDescr, SK_ISNULL, and test().

Referenced by _hash_load_qualified_items().

◆ _hash_convert_tuple()

bool _hash_convert_tuple ( Relation  index,
Datum user_values,
bool user_isnull,
Datum index_values,
bool index_isnull 
)

Definition at line 319 of file hashutil.c.

322 {
323  uint32 hashkey;
324 
325  /*
326  * We do not insert null values into hash indexes. This is okay because
327  * the only supported search operator is '=', and we assume it is strict.
328  */
329  if (user_isnull[0])
330  return false;
331 
332  hashkey = _hash_datum2hashkey(index, user_values[0]);
333  index_values[0] = UInt32GetDatum(hashkey);
334  index_isnull[0] = false;
335  return true;
336 }
uint32 _hash_datum2hashkey(Relation rel, Datum key)
Definition: hashutil.c:83
#define UInt32GetDatum(X)
Definition: postgres.h:537

References _hash_datum2hashkey(), and UInt32GetDatum.

Referenced by hashbuildCallback(), and hashinsert().

◆ _hash_datum2hashkey()

uint32 _hash_datum2hashkey ( Relation  rel,
Datum  key 
)

Definition at line 83 of file hashutil.c.

84 {
85  FmgrInfo *procinfo;
86  Oid collation;
87 
88  /* XXX assumes index has only one attribute */
89  procinfo = index_getprocinfo(rel, 1, HASHSTANDARD_PROC);
90  collation = rel->rd_indcollation[0];
91 
92  return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
93 }
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1114
#define HASHSTANDARD_PROC
Definition: hash.h:355
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:803
#define DatumGetUInt32(X)
Definition: postgres.h:530
unsigned int Oid
Definition: postgres_ext.h:31
Definition: fmgr.h:57
Oid * rd_indcollation
Definition: rel.h:213

References DatumGetUInt32, FunctionCall1Coll(), HASHSTANDARD_PROC, index_getprocinfo(), sort-test::key, and RelationData::rd_indcollation.

Referenced by _hash_convert_tuple(), and _hash_first().

◆ _hash_datum2hashkey_type()

uint32 _hash_datum2hashkey_type ( Relation  rel,
Datum  key,
Oid  keytype 
)

Definition at line 103 of file hashutil.c.

104 {
105  RegProcedure hash_proc;
106  Oid collation;
107 
108  /* XXX assumes index has only one attribute */
109  hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
110  keytype,
111  keytype,
113  if (!RegProcedureIsValid(hash_proc))
114  elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
115  HASHSTANDARD_PROC, keytype, keytype,
117  collation = rel->rd_indcollation[0];
118 
119  return DatumGetUInt32(OidFunctionCall1Coll(hash_proc, collation, key));
120 }
#define RegProcedureIsValid(p)
Definition: c.h:712
regproc RegProcedure
Definition: c.h:585
#define elog(elevel,...)
Definition: elog.h:218
Datum OidFunctionCall1Coll(Oid functionId, Oid collation, Datum arg1)
Definition: fmgr.c:1396
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:794
Oid * rd_opfamily
Definition: rel.h:203

References DatumGetUInt32, elog, ERROR, get_opfamily_proc(), HASHSTANDARD_PROC, sort-test::key, OidFunctionCall1Coll(), RelationData::rd_indcollation, RelationData::rd_opfamily, RegProcedureIsValid, and RelationGetRelationName.

Referenced by _hash_first().

◆ _hash_doinsert()

void _hash_doinsert ( Relation  rel,
IndexTuple  itup,
Relation  heapRel 
)

Definition at line 37 of file hashinsert.c.

38 {
40  Buffer bucket_buf;
41  Buffer metabuf;
42  HashMetaPage metap;
43  HashMetaPage usedmetap = NULL;
44  Page metapage;
45  Page page;
46  HashPageOpaque pageopaque;
47  Size itemsz;
48  bool do_expand;
49  uint32 hashkey;
50  Bucket bucket;
51  OffsetNumber itup_off;
52 
53  /*
54  * Get the hash key for the item (it's stored in the index tuple itself).
55  */
56  hashkey = _hash_get_indextuple_hashkey(itup);
57 
58  /* compute item size too */
59  itemsz = IndexTupleSize(itup);
60  itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
61  * need to be consistent */
62 
63 restart_insert:
64 
65  /*
66  * Read the metapage. We don't lock it yet; HashMaxItemSize() will
67  * examine pd_pagesize_version, but that can't change so we can examine it
68  * without a lock.
69  */
71  metapage = BufferGetPage(metabuf);
72 
73  /*
74  * Check whether the item can fit on a hash page at all. (Eventually, we
75  * ought to try to apply TOAST methods if not.) Note that at this point,
76  * itemsz doesn't include the ItemId.
77  *
78  * XXX this is useless code if we are only storing hash keys.
79  */
80  if (itemsz > HashMaxItemSize(metapage))
81  ereport(ERROR,
82  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
83  errmsg("index row size %zu exceeds hash maximum %zu",
84  itemsz, HashMaxItemSize(metapage)),
85  errhint("Values larger than a buffer page cannot be indexed.")));
86 
87  /* Lock the primary bucket page for the target bucket. */
89  &usedmetap);
90  Assert(usedmetap != NULL);
91 
93 
94  /* remember the primary bucket buffer to release the pin on it at end. */
95  bucket_buf = buf;
96 
97  page = BufferGetPage(buf);
98  pageopaque = HashPageGetOpaque(page);
99  bucket = pageopaque->hasho_bucket;
100 
101  /*
102  * If this bucket is in the process of being split, try to finish the
103  * split before inserting, because that might create room for the
104  * insertion to proceed without allocating an additional overflow page.
105  * It's only interesting to finish the split if we're trying to insert
106  * into the bucket from which we're removing tuples (the "old" bucket),
107  * not if we're trying to insert into the bucket into which tuples are
108  * being moved (the "new" bucket).
109  */
110  if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
111  {
112  /* release the lock on bucket buffer, before completing the split. */
114 
115  _hash_finish_split(rel, metabuf, buf, bucket,
116  usedmetap->hashm_maxbucket,
117  usedmetap->hashm_highmask,
118  usedmetap->hashm_lowmask);
119 
120  /* release the pin on old and meta buffer. retry for insert. */
121  _hash_dropbuf(rel, buf);
122  _hash_dropbuf(rel, metabuf);
123  goto restart_insert;
124  }
125 
126  /* Do the insertion */
127  while (PageGetFreeSpace(page) < itemsz)
128  {
129  BlockNumber nextblkno;
130 
131  /*
132  * Check if current page has any DEAD tuples. If yes, delete these
133  * tuples and see if we can get a space for the new item to be
134  * inserted before moving to the next page in the bucket chain.
135  */
136  if (H_HAS_DEAD_TUPLES(pageopaque))
137  {
138 
139  if (IsBufferCleanupOK(buf))
140  {
141  _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
142 
143  if (PageGetFreeSpace(page) >= itemsz)
144  break; /* OK, now we have enough space */
145  }
146  }
147 
148  /*
149  * no space on this page; check for an overflow page
150  */
151  nextblkno = pageopaque->hasho_nextblkno;
152 
153  if (BlockNumberIsValid(nextblkno))
154  {
155  /*
156  * ovfl page exists; go get it. if it doesn't have room, we'll
157  * find out next pass through the loop test above. we always
158  * release both the lock and pin if this is an overflow page, but
159  * only the lock if this is the primary bucket page, since the pin
160  * on the primary bucket must be retained throughout the scan.
161  */
162  if (buf != bucket_buf)
163  _hash_relbuf(rel, buf);
164  else
166  buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
167  page = BufferGetPage(buf);
168  }
169  else
170  {
171  /*
172  * we're at the end of the bucket chain and we haven't found a
173  * page with enough room. allocate a new overflow page.
174  */
175 
176  /* release our write lock without modifying buffer */
178 
179  /* chain to a new overflow page */
180  buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
181  page = BufferGetPage(buf);
182 
183  /* should fit now, given test above */
184  Assert(PageGetFreeSpace(page) >= itemsz);
185  }
186  pageopaque = HashPageGetOpaque(page);
187  Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
188  Assert(pageopaque->hasho_bucket == bucket);
189  }
190 
191  /*
192  * Write-lock the metapage so we can increment the tuple count. After
193  * incrementing it, check to see if it's time for a split.
194  */
196 
197  /* Do the update. No ereport(ERROR) until changes are logged */
199 
200  /* found page with enough space, so add the item here */
201  itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
203 
204  /* metapage operations */
205  metap = HashPageGetMeta(metapage);
206  metap->hashm_ntuples += 1;
207 
208  /* Make sure this stays in sync with _hash_expandtable() */
209  do_expand = metap->hashm_ntuples >
210  (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
211 
212  MarkBufferDirty(metabuf);
213 
214  /* XLOG stuff */
215  if (RelationNeedsWAL(rel))
216  {
217  xl_hash_insert xlrec;
218  XLogRecPtr recptr;
219 
220  xlrec.offnum = itup_off;
221 
222  XLogBeginInsert();
223  XLogRegisterData((char *) &xlrec, SizeOfHashInsert);
224 
225  XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
226 
228  XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
229 
230  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
231 
232  PageSetLSN(BufferGetPage(buf), recptr);
233  PageSetLSN(BufferGetPage(metabuf), recptr);
234  }
235 
237 
238  /* drop lock on metapage, but keep pin */
239  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
240 
241  /*
242  * Release the modified page and ensure to release the pin on primary
243  * page.
244  */
245  _hash_relbuf(rel, buf);
246  if (buf != bucket_buf)
247  _hash_dropbuf(rel, bucket_buf);
248 
249  /* Attempt to split if a split is needed */
250  if (do_expand)
251  _hash_expandtable(rel, metabuf);
252 
253  /* Finally drop our pin on the metapage */
254  _hash_dropbuf(rel, metabuf);
255 }
bool IsBufferCleanupOK(Buffer buffer)
Definition: bufmgr.c:4446
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
size_t Size
Definition: c.h:540
#define HASH_NOLOCK
Definition: hash.h:341
#define H_BUCKET_BEING_SPLIT(opaque)
Definition: hash.h:91
#define HASH_METAPAGE
Definition: hash.h:198
#define H_HAS_DEAD_TUPLES(opaque)
Definition: hash.h:93
#define HashMaxItemSize(page)
Definition: hash.h:287
#define SizeOfHashInsert
Definition: hash_xlog.h:61
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
Definition: hashinsert.c:269
static void _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
Definition: hashinsert.c:339
Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
Definition: hashovfl.c:112
void _hash_dropbuf(Relation rel, Buffer buf)
Definition: hashpage.c:277
void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashpage.c:1352
Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)
Definition: hashpage.c:1555
void _hash_expandtable(Relation rel, Buffer metabuf)
Definition: hashpage.c:614
#define IndexTupleSize(itup)
Definition: itup.h:70
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4448
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_highmask
Definition: hash.h:255
uint16 hashm_ffactor
Definition: hash.h:249
OffsetNumber offnum
Definition: hash_xlog.h:58

References _hash_addovflpage(), _hash_dropbuf(), _hash_expandtable(), _hash_finish_split(), _hash_get_indextuple_hashkey(), _hash_getbucketbuf_from_hashkey(), _hash_getbuf(), _hash_pgaddtup(), _hash_relbuf(), _hash_vacuum_one_page(), Assert(), BlockNumberIsValid, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, CheckForSerializableConflictIn(), END_CRIT_SECTION, ereport, errcode(), errhint(), errmsg(), ERROR, H_BUCKET_BEING_SPLIT, H_HAS_DEAD_TUPLES, HASH_METAPAGE, HASH_NOLOCK, HASH_WRITE, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMaxItemSize, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageGetMeta, HashPageGetOpaque, IndexTupleSize, InvalidBuffer, IsBufferCleanupOK(), LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MarkBufferDirty(), MAXALIGN, xl_hash_insert::offnum, PageGetFreeSpace(), PageSetLSN, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashInsert, START_CRIT_SECTION, XLOG_HASH_INSERT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _h_indexbuild(), hashbuildCallback(), and hashinsert().

◆ _hash_dropbuf()

void _hash_dropbuf ( Relation  rel,
Buffer  buf 
)

◆ _hash_dropscanbuf()

void _hash_dropscanbuf ( Relation  rel,
HashScanOpaque  so 
)

Definition at line 289 of file hashpage.c.

290 {
291  /* release pin we hold on primary bucket page */
292  if (BufferIsValid(so->hashso_bucket_buf) &&
293  so->hashso_bucket_buf != so->currPos.buf)
296 
297  /* release pin we hold on primary bucket page of bucket being split */
302 
303  /* release any pin we still hold */
304  if (BufferIsValid(so->currPos.buf))
305  _hash_dropbuf(rel, so->currPos.buf);
306  so->currPos.buf = InvalidBuffer;
307 
308  /* reset split scan */
309  so->hashso_buc_populated = false;
310  so->hashso_buc_split = false;
311 }
bool hashso_buc_split
Definition: hash.h:180
HashScanPosData currPos
Definition: hash.h:189
bool hashso_buc_populated
Definition: hash.h:174
Buffer hashso_split_bucket_buf
Definition: hash.h:171
Buffer hashso_bucket_buf
Definition: hash.h:164
Buffer buf
Definition: hash.h:111

References _hash_dropbuf(), HashScanPosData::buf, BufferIsValid, HashScanOpaqueData::currPos, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, and InvalidBuffer.

Referenced by _hash_next(), hashendscan(), and hashrescan().

◆ _hash_expandtable()

void _hash_expandtable ( Relation  rel,
Buffer  metabuf 
)

Definition at line 614 of file hashpage.c.

615 {
616  HashMetaPage metap;
617  Bucket old_bucket;
618  Bucket new_bucket;
619  uint32 spare_ndx;
620  BlockNumber start_oblkno;
621  BlockNumber start_nblkno;
622  Buffer buf_nblkno;
623  Buffer buf_oblkno;
624  Page opage;
625  Page npage;
626  HashPageOpaque oopaque;
627  HashPageOpaque nopaque;
628  uint32 maxbucket;
629  uint32 highmask;
630  uint32 lowmask;
631  bool metap_update_masks = false;
632  bool metap_update_splitpoint = false;
633 
634 restart_expand:
635 
636  /*
637  * Write-lock the meta page. It used to be necessary to acquire a
638  * heavyweight lock to begin a split, but that is no longer required.
639  */
641 
642  _hash_checkpage(rel, metabuf, LH_META_PAGE);
643  metap = HashPageGetMeta(BufferGetPage(metabuf));
644 
645  /*
646  * Check to see if split is still needed; someone else might have already
647  * done one while we waited for the lock.
648  *
649  * Make sure this stays in sync with _hash_doinsert()
650  */
651  if (metap->hashm_ntuples <=
652  (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
653  goto fail;
654 
655  /*
656  * Can't split anymore if maxbucket has reached its maximum possible
657  * value.
658  *
659  * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because
660  * the calculation maxbucket+1 mustn't overflow). Currently we restrict
661  * to half that to prevent failure of pg_ceil_log2_32() and insufficient
662  * space in hashm_spares[]. It's moot anyway because an index with 2^32
663  * buckets would certainly overflow BlockNumber and hence
664  * _hash_alloc_buckets() would fail, but if we supported buckets smaller
665  * than a disk block then this would be an independent constraint.
666  *
667  * If you change this, see also the maximum initial number of buckets in
668  * _hash_init().
669  */
670  if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
671  goto fail;
672 
673  /*
674  * Determine which bucket is to be split, and attempt to take cleanup lock
675  * on the old bucket. If we can't get the lock, give up.
676  *
677  * The cleanup lock protects us not only against other backends, but
678  * against our own backend as well.
679  *
680  * The cleanup lock is mainly to protect the split from concurrent
681  * inserts. See src/backend/access/hash/README, Lock Definitions for
682  * further details. Due to this locking restriction, if there is any
683  * pending scan, the split will give up which is not good, but harmless.
684  */
685  new_bucket = metap->hashm_maxbucket + 1;
686 
687  old_bucket = (new_bucket & metap->hashm_lowmask);
688 
689  start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
690 
691  buf_oblkno = _hash_getbuf_with_condlock_cleanup(rel, start_oblkno, LH_BUCKET_PAGE);
692  if (!buf_oblkno)
693  goto fail;
694 
695  opage = BufferGetPage(buf_oblkno);
696  oopaque = HashPageGetOpaque(opage);
697 
698  /*
699  * We want to finish the split from a bucket as there is no apparent
700  * benefit by not doing so and it will make the code complicated to finish
701  * the split that involves multiple buckets considering the case where new
702  * split also fails. We don't need to consider the new bucket for
703  * completing the split here as it is not possible that a re-split of new
704  * bucket starts when there is still a pending split from old bucket.
705  */
706  if (H_BUCKET_BEING_SPLIT(oopaque))
707  {
708  /*
709  * Copy bucket mapping info now; refer the comment in code below where
710  * we copy this information before calling _hash_splitbucket to see
711  * why this is okay.
712  */
713  maxbucket = metap->hashm_maxbucket;
714  highmask = metap->hashm_highmask;
715  lowmask = metap->hashm_lowmask;
716 
717  /*
718  * Release the lock on metapage and old_bucket, before completing the
719  * split.
720  */
721  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
722  LockBuffer(buf_oblkno, BUFFER_LOCK_UNLOCK);
723 
724  _hash_finish_split(rel, metabuf, buf_oblkno, old_bucket, maxbucket,
725  highmask, lowmask);
726 
727  /* release the pin on old buffer and retry for expand. */
728  _hash_dropbuf(rel, buf_oblkno);
729 
730  goto restart_expand;
731  }
732 
733  /*
734  * Clean the tuples remained from the previous split. This operation
735  * requires cleanup lock and we already have one on the old bucket, so
736  * let's do it. We also don't want to allow further splits from the bucket
737  * till the garbage of previous split is cleaned. This has two
738  * advantages; first, it helps in avoiding the bloat due to garbage and
739  * second is, during cleanup of bucket, we are always sure that the
740  * garbage tuples belong to most recently split bucket. On the contrary,
741  * if we allow cleanup of bucket after meta page is updated to indicate
742  * the new split and before the actual split, the cleanup operation won't
743  * be able to decide whether the tuple has been moved to the newly created
744  * bucket and ended up deleting such tuples.
745  */
746  if (H_NEEDS_SPLIT_CLEANUP(oopaque))
747  {
748  /*
749  * Copy bucket mapping info now; refer to the comment in code below
750  * where we copy this information before calling _hash_splitbucket to
751  * see why this is okay.
752  */
753  maxbucket = metap->hashm_maxbucket;
754  highmask = metap->hashm_highmask;
755  lowmask = metap->hashm_lowmask;
756 
757  /* Release the metapage lock. */
758  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
759 
760  hashbucketcleanup(rel, old_bucket, buf_oblkno, start_oblkno, NULL,
761  maxbucket, highmask, lowmask, NULL, NULL, true,
762  NULL, NULL);
763 
764  _hash_dropbuf(rel, buf_oblkno);
765 
766  goto restart_expand;
767  }
768 
769  /*
770  * There shouldn't be any active scan on new bucket.
771  *
772  * Note: it is safe to compute the new bucket's blkno here, even though we
773  * may still need to update the BUCKET_TO_BLKNO mapping. This is because
774  * the current value of hashm_spares[hashm_ovflpoint] correctly shows
775  * where we are going to put a new splitpoint's worth of buckets.
776  */
777  start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);
778 
779  /*
780  * If the split point is increasing we need to allocate a new batch of
781  * bucket pages.
782  */
783  spare_ndx = _hash_spareindex(new_bucket + 1);
784  if (spare_ndx > metap->hashm_ovflpoint)
785  {
786  uint32 buckets_to_add;
787 
788  Assert(spare_ndx == metap->hashm_ovflpoint + 1);
789 
790  /*
791  * We treat allocation of buckets as a separate WAL-logged action.
792  * Even if we fail after this operation, won't leak bucket pages;
793  * rather, the next split will consume this space. In any case, even
794  * without failure we don't use all the space in one split operation.
795  */
796  buckets_to_add = _hash_get_totalbuckets(spare_ndx) - new_bucket;
797  if (!_hash_alloc_buckets(rel, start_nblkno, buckets_to_add))
798  {
799  /* can't split due to BlockNumber overflow */
800  _hash_relbuf(rel, buf_oblkno);
801  goto fail;
802  }
803  }
804 
805  /*
806  * Physically allocate the new bucket's primary page. We want to do this
807  * before changing the metapage's mapping info, in case we can't get the
808  * disk space. Ideally, we don't need to check for cleanup lock on new
809  * bucket as no other backend could find this bucket unless meta page is
810  * updated. However, it is good to be consistent with old bucket locking.
811  */
812  buf_nblkno = _hash_getnewbuf(rel, start_nblkno, MAIN_FORKNUM);
813  if (!IsBufferCleanupOK(buf_nblkno))
814  {
815  _hash_relbuf(rel, buf_oblkno);
816  _hash_relbuf(rel, buf_nblkno);
817  goto fail;
818  }
819 
820  /*
821  * Since we are scribbling on the pages in the shared buffers, establish a
822  * critical section. Any failure in this next code leaves us with a big
823  * problem: the metapage is effectively corrupt but could get written back
824  * to disk.
825  */
827 
828  /*
829  * Okay to proceed with split. Update the metapage bucket mapping info.
830  */
831  metap->hashm_maxbucket = new_bucket;
832 
833  if (new_bucket > metap->hashm_highmask)
834  {
835  /* Starting a new doubling */
836  metap->hashm_lowmask = metap->hashm_highmask;
837  metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
838  metap_update_masks = true;
839  }
840 
841  /*
842  * If the split point is increasing we need to adjust the hashm_spares[]
843  * array and hashm_ovflpoint so that future overflow pages will be created
844  * beyond this new batch of bucket pages.
845  */
846  if (spare_ndx > metap->hashm_ovflpoint)
847  {
848  metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
849  metap->hashm_ovflpoint = spare_ndx;
850  metap_update_splitpoint = true;
851  }
852 
853  MarkBufferDirty(metabuf);
854 
855  /*
856  * Copy bucket mapping info now; this saves re-accessing the meta page
857  * inside _hash_splitbucket's inner loop. Note that once we drop the
858  * split lock, other splits could begin, so these values might be out of
859  * date before _hash_splitbucket finishes. That's okay, since all it
860  * needs is to tell which of these two buckets to map hashkeys into.
861  */
862  maxbucket = metap->hashm_maxbucket;
863  highmask = metap->hashm_highmask;
864  lowmask = metap->hashm_lowmask;
865 
866  opage = BufferGetPage(buf_oblkno);
867  oopaque = HashPageGetOpaque(opage);
868 
869  /*
870  * Mark the old bucket to indicate that split is in progress. (At
871  * operation end, we will clear the split-in-progress flag.) Also, for a
872  * primary bucket page, hasho_prevblkno stores the number of buckets that
873  * existed as of the last split, so we must update that value here.
874  */
875  oopaque->hasho_flag |= LH_BUCKET_BEING_SPLIT;
876  oopaque->hasho_prevblkno = maxbucket;
877 
878  MarkBufferDirty(buf_oblkno);
879 
880  npage = BufferGetPage(buf_nblkno);
881 
882  /*
883  * initialize the new bucket's primary page and mark it to indicate that
884  * split is in progress.
885  */
886  nopaque = HashPageGetOpaque(npage);
887  nopaque->hasho_prevblkno = maxbucket;
889  nopaque->hasho_bucket = new_bucket;
891  nopaque->hasho_page_id = HASHO_PAGE_ID;
892 
893  MarkBufferDirty(buf_nblkno);
894 
895  /* XLOG stuff */
896  if (RelationNeedsWAL(rel))
897  {
899  XLogRecPtr recptr;
900 
901  xlrec.new_bucket = maxbucket;
902  xlrec.old_bucket_flag = oopaque->hasho_flag;
903  xlrec.new_bucket_flag = nopaque->hasho_flag;
904  xlrec.flags = 0;
905 
906  XLogBeginInsert();
907 
908  XLogRegisterBuffer(0, buf_oblkno, REGBUF_STANDARD);
909  XLogRegisterBuffer(1, buf_nblkno, REGBUF_WILL_INIT);
910  XLogRegisterBuffer(2, metabuf, REGBUF_STANDARD);
911 
912  if (metap_update_masks)
913  {
915  XLogRegisterBufData(2, (char *) &metap->hashm_lowmask, sizeof(uint32));
916  XLogRegisterBufData(2, (char *) &metap->hashm_highmask, sizeof(uint32));
917  }
918 
919  if (metap_update_splitpoint)
920  {
922  XLogRegisterBufData(2, (char *) &metap->hashm_ovflpoint,
923  sizeof(uint32));
925  (char *) &metap->hashm_spares[metap->hashm_ovflpoint],
926  sizeof(uint32));
927  }
928 
929  XLogRegisterData((char *) &xlrec, SizeOfHashSplitAllocPage);
930 
931  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_ALLOCATE_PAGE);
932 
933  PageSetLSN(BufferGetPage(buf_oblkno), recptr);
934  PageSetLSN(BufferGetPage(buf_nblkno), recptr);
935  PageSetLSN(BufferGetPage(metabuf), recptr);
936  }
937 
939 
940  /* drop lock, but keep pin */
941  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
942 
943  /* Relocate records to the new bucket */
944  _hash_splitbucket(rel, metabuf,
945  old_bucket, new_bucket,
946  buf_oblkno, buf_nblkno, NULL,
947  maxbucket, highmask, lowmask);
948 
949  /* all done, now release the pins on primary buckets. */
950  _hash_dropbuf(rel, buf_oblkno);
951  _hash_dropbuf(rel, buf_nblkno);
952 
953  return;
954 
955  /* Here if decide not to split or fail to acquire old bucket lock */
956 fail:
957 
958  /* We didn't write the metapage, so just drop lock */
959  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
960 }
void hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy, uint32 maxbucket, uint32 highmask, uint32 lowmask, double *tuples_removed, double *num_index_tuples, bool split_cleanup, IndexBulkDeleteCallback callback, void *callback_state)
Definition: hash.c:685
#define LH_BUCKET_BEING_POPULATED
Definition: hash.h:58
#define BUCKET_TO_BLKNO(metap, B)
Definition: hash.h:39
#define H_NEEDS_SPLIT_CLEANUP(opaque)
Definition: hash.h:90
#define LH_BUCKET_BEING_SPLIT
Definition: hash.h:59
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define SizeOfHashSplitAllocPage
Definition: hash_xlog.h:100
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
Buffer _hash_getbuf_with_condlock_cleanup(Relation rel, BlockNumber blkno, int flags)
Definition: hashpage.c:96
static void _hash_splitbucket(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket, Buffer obuf, Buffer nbuf, HTAB *htab, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashpage.c:1069
static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
Definition: hashpage.c:988
uint32 _hash_spareindex(uint32 num_bucket)
Definition: hashutil.c:143
uint32 _hash_get_totalbuckets(uint32 splitpoint_phase)
Definition: hashutil.c:175

References _hash_alloc_buckets(), _hash_checkpage(), _hash_dropbuf(), _hash_finish_split(), _hash_get_totalbuckets(), _hash_getbuf_with_condlock_cleanup(), _hash_getnewbuf(), _hash_relbuf(), _hash_spareindex(), _hash_splitbucket(), Assert(), BUCKET_TO_BLKNO, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage, END_CRIT_SECTION, xl_hash_split_allocate_page::flags, H_BUCKET_BEING_SPLIT, H_NEEDS_SPLIT_CLEANUP, hashbucketcleanup(), HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HashPageGetOpaque, InvalidBlockNumber, IsBufferCleanupOK(), LH_BUCKET_BEING_POPULATED, LH_BUCKET_BEING_SPLIT, LH_BUCKET_PAGE, LH_META_PAGE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), xl_hash_split_allocate_page::new_bucket, xl_hash_split_allocate_page::new_bucket_flag, xl_hash_split_allocate_page::old_bucket_flag, PageSetLSN, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationNeedsWAL, SizeOfHashSplitAllocPage, START_CRIT_SECTION, XLH_SPLIT_META_UPDATE_MASKS, XLH_SPLIT_META_UPDATE_SPLITPOINT, XLOG_HASH_SPLIT_ALLOCATE_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert().

◆ _hash_finish_split()

void _hash_finish_split ( Relation  rel,
Buffer  metabuf,
Buffer  obuf,
Bucket  obucket,
uint32  maxbucket,
uint32  highmask,
uint32  lowmask 
)

Definition at line 1352 of file hashpage.c.

1354 {
1355  HASHCTL hash_ctl;
1356  HTAB *tidhtab;
1357  Buffer bucket_nbuf = InvalidBuffer;
1358  Buffer nbuf;
1359  Page npage;
1360  BlockNumber nblkno;
1361  BlockNumber bucket_nblkno;
1362  HashPageOpaque npageopaque;
1363  Bucket nbucket;
1364  bool found;
1365 
1366  /* Initialize hash tables used to track TIDs */
1367  hash_ctl.keysize = sizeof(ItemPointerData);
1368  hash_ctl.entrysize = sizeof(ItemPointerData);
1369  hash_ctl.hcxt = CurrentMemoryContext;
1370 
1371  tidhtab =
1372  hash_create("bucket ctids",
1373  256, /* arbitrary initial size */
1374  &hash_ctl,
1376 
1377  bucket_nblkno = nblkno = _hash_get_newblock_from_oldbucket(rel, obucket);
1378 
1379  /*
1380  * Scan the new bucket and build hash table of TIDs
1381  */
1382  for (;;)
1383  {
1384  OffsetNumber noffnum;
1385  OffsetNumber nmaxoffnum;
1386 
1387  nbuf = _hash_getbuf(rel, nblkno, HASH_READ,
1389 
1390  /* remember the primary bucket buffer to acquire cleanup lock on it. */
1391  if (nblkno == bucket_nblkno)
1392  bucket_nbuf = nbuf;
1393 
1394  npage = BufferGetPage(nbuf);
1395  npageopaque = HashPageGetOpaque(npage);
1396 
1397  /* Scan each tuple in new page */
1398  nmaxoffnum = PageGetMaxOffsetNumber(npage);
1399  for (noffnum = FirstOffsetNumber;
1400  noffnum <= nmaxoffnum;
1401  noffnum = OffsetNumberNext(noffnum))
1402  {
1403  IndexTuple itup;
1404 
1405  /* Fetch the item's TID and insert it in hash table. */
1406  itup = (IndexTuple) PageGetItem(npage,
1407  PageGetItemId(npage, noffnum));
1408 
1409  (void) hash_search(tidhtab, &itup->t_tid, HASH_ENTER, &found);
1410 
1411  Assert(!found);
1412  }
1413 
1414  nblkno = npageopaque->hasho_nextblkno;
1415 
1416  /*
1417  * release our write lock without modifying buffer and ensure to
1418  * retain the pin on primary bucket.
1419  */
1420  if (nbuf == bucket_nbuf)
1422  else
1423  _hash_relbuf(rel, nbuf);
1424 
1425  /* Exit loop if no more overflow pages in new bucket */
1426  if (!BlockNumberIsValid(nblkno))
1427  break;
1428  }
1429 
1430  /*
1431  * Conditionally get the cleanup lock on old and new buckets to perform
1432  * the split operation. If we don't get the cleanup locks, silently give
1433  * up and next insertion on old bucket will try again to complete the
1434  * split.
1435  */
1437  {
1438  hash_destroy(tidhtab);
1439  return;
1440  }
1441  if (!ConditionalLockBufferForCleanup(bucket_nbuf))
1442  {
1444  hash_destroy(tidhtab);
1445  return;
1446  }
1447 
1448  npage = BufferGetPage(bucket_nbuf);
1449  npageopaque = HashPageGetOpaque(npage);
1450  nbucket = npageopaque->hasho_bucket;
1451 
1452  _hash_splitbucket(rel, metabuf, obucket,
1453  nbucket, obuf, bucket_nbuf, tidhtab,
1454  maxbucket, highmask, lowmask);
1455 
1456  _hash_dropbuf(rel, bucket_nbuf);
1457  hash_destroy(tidhtab);
1458 }
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4390
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:862
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
#define HASH_READ
Definition: hash.h:339
BlockNumber _hash_get_newblock_from_oldbucket(Relation rel, Bucket old_bucket)
Definition: hashutil.c:462
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
struct ItemPointerData ItemPointerData
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
MemoryContext hcxt
Definition: hsearch.h:86
Definition: dynahash.c:220
ItemPointerData t_tid
Definition: itup.h:37

References _hash_dropbuf(), _hash_get_newblock_from_oldbucket(), _hash_getbuf(), _hash_relbuf(), _hash_splitbucket(), Assert(), BlockNumberIsValid, BUFFER_LOCK_UNLOCK, BufferGetPage, ConditionalLockBufferForCleanup(), CurrentMemoryContext, HASHCTL::entrysize, FirstOffsetNumber, HASH_BLOBS, HASH_CONTEXT, hash_create(), hash_destroy(), HASH_ELEM, HASH_ENTER, HASH_READ, hash_search(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_nextblkno, HashPageGetOpaque, HASHCTL::hcxt, InvalidBuffer, HASHCTL::keysize, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, LockBuffer(), OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and IndexTupleData::t_tid.

Referenced by _hash_doinsert(), and _hash_expandtable().

◆ _hash_first()

bool _hash_first ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 292 of file hashsearch.c.

293 {
294  Relation rel = scan->indexRelation;
295  HashScanOpaque so = (HashScanOpaque) scan->opaque;
296  ScanKey cur;
297  uint32 hashkey;
298  Bucket bucket;
299  Buffer buf;
300  Page page;
301  HashPageOpaque opaque;
302  HashScanPosItem *currItem;
303 
305 
306  /*
307  * We do not support hash scans with no index qualification, because we
308  * would have to read the whole index rather than just one bucket. That
309  * creates a whole raft of problems, since we haven't got a practical way
310  * to lock all the buckets against splits or compactions.
311  */
312  if (scan->numberOfKeys < 1)
313  ereport(ERROR,
314  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
315  errmsg("hash indexes do not support whole-index scans")));
316 
317  /* There may be more than one index qual, but we hash only the first */
318  cur = &scan->keyData[0];
319 
320  /* We support only single-column hash indexes */
321  Assert(cur->sk_attno == 1);
322  /* And there's only one operator strategy, too */
323  Assert(cur->sk_strategy == HTEqualStrategyNumber);
324 
325  /*
326  * If the constant in the index qual is NULL, assume it cannot match any
327  * items in the index.
328  */
329  if (cur->sk_flags & SK_ISNULL)
330  return false;
331 
332  /*
333  * Okay to compute the hash key. We want to do this before acquiring any
334  * locks, in case a user-defined hash function happens to be slow.
335  *
336  * If scankey operator is not a cross-type comparison, we can use the
337  * cached hash function; otherwise gotta look it up in the catalogs.
338  *
339  * We support the convention that sk_subtype == InvalidOid means the
340  * opclass input type; this is a hack to simplify life for ScanKeyInit().
341  */
342  if (cur->sk_subtype == rel->rd_opcintype[0] ||
343  cur->sk_subtype == InvalidOid)
344  hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
345  else
346  hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
347  cur->sk_subtype);
348 
349  so->hashso_sk_hash = hashkey;
350 
351  buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
353  page = BufferGetPage(buf);
354  TestForOldSnapshot(scan->xs_snapshot, rel, page);
355  opaque = HashPageGetOpaque(page);
356  bucket = opaque->hasho_bucket;
357 
358  so->hashso_bucket_buf = buf;
359 
360  /*
361  * If a bucket split is in progress, then while scanning the bucket being
362  * populated, we need to skip tuples that were copied from bucket being
363  * split. We also need to maintain a pin on the bucket being split to
364  * ensure that split-cleanup work done by vacuum doesn't remove tuples
365  * from it till this scan is done. We need to maintain a pin on the
366  * bucket being populated to ensure that vacuum doesn't squeeze that
367  * bucket till this scan is complete; otherwise, the ordering of tuples
368  * can't be maintained during forward and backward scans. Here, we have
369  * to be cautious about locking order: first, acquire the lock on bucket
370  * being split; then, release the lock on it but not the pin; then,
371  * acquire a lock on bucket being populated and again re-verify whether
372  * the bucket split is still in progress. Acquiring the lock on bucket
373  * being split first ensures that the vacuum waits for this scan to
374  * finish.
375  */
376  if (H_BUCKET_BEING_POPULATED(opaque))
377  {
378  BlockNumber old_blkno;
379  Buffer old_buf;
380 
381  old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
382 
383  /*
384  * release the lock on new bucket and re-acquire it after acquiring
385  * the lock on old bucket.
386  */
388 
389  old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
390  TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
391 
392  /*
393  * remember the split bucket buffer so as to use it later for
394  * scanning.
395  */
396  so->hashso_split_bucket_buf = old_buf;
397  LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
398 
400  page = BufferGetPage(buf);
401  opaque = HashPageGetOpaque(page);
402  Assert(opaque->hasho_bucket == bucket);
403 
404  if (H_BUCKET_BEING_POPULATED(opaque))
405  so->hashso_buc_populated = true;
406  else
407  {
410  }
411  }
412 
413  /* If a backwards scan is requested, move to the end of the chain */
414  if (ScanDirectionIsBackward(dir))
415  {
416  /*
417  * Backward scans that start during split needs to start from end of
418  * bucket being split.
419  */
420  while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
422  _hash_readnext(scan, &buf, &page, &opaque);
423  }
424 
425  /* remember which buffer we have pinned, if any */
427  so->currPos.buf = buf;
428 
429  /* Now find all the tuples satisfying the qualification from a page */
430  if (!_hash_readpage(scan, &buf, dir))
431  return false;
432 
433  /* OK, itemIndex says what to return */
434  currItem = &so->currPos.items[so->currPos.itemIndex];
435  scan->xs_heaptid = currItem->heapTid;
436 
437  /* if we're here, _hash_readpage found a valid tuples */
438  return true;
439 }
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:282
struct cursor * cur
Definition: ecpg.c:28
#define H_BUCKET_BEING_POPULATED(opaque)
Definition: hash.h:92
HashScanOpaqueData * HashScanOpaque
Definition: hash.h:192
static void _hash_readnext(IndexScanDesc scan, Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
Definition: hashsearch.c:133
static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
Definition: hashsearch.c:452
BlockNumber _hash_get_oldblock_from_newbucket(Relation rel, Bucket new_bucket)
Definition: hashutil.c:423
uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype)
Definition: hashutil.c:103
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:540
#define InvalidOid
Definition: postgres_ext.h:36
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition: predicate.c:2594
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:41
#define HTEqualStrategyNumber
Definition: stratnum.h:41
uint32 hashso_sk_hash
Definition: hash.h:161
HashScanPosItem items[MaxIndexTuplesPerPage]
Definition: hash.h:127
int itemIndex
Definition: hash.h:125
ItemPointerData xs_heaptid
Definition: relscan.h:147
struct SnapshotData * xs_snapshot
Definition: relscan.h:119
Oid * rd_opcintype
Definition: rel.h:204

References _hash_datum2hashkey(), _hash_datum2hashkey_type(), _hash_dropbuf(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getbuf(), _hash_readnext(), _hash_readpage(), Assert(), BlockNumberIsValid, buf, HashScanPosData::buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferIsInvalid, cur, HashScanOpaqueData::currPos, ereport, errcode(), errmsg(), ERROR, H_BUCKET_BEING_POPULATED, HASH_READ, HashPageGetOpaque, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_sk_hash, HashScanOpaqueData::hashso_split_bucket_buf, HTEqualStrategyNumber, IndexScanDescData::indexRelation, InvalidBuffer, InvalidOid, HashScanPosData::itemIndex, HashScanPosData::items, IndexScanDescData::keyData, LH_BUCKET_PAGE, LockBuffer(), IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, pgstat_count_index_scan, PredicateLockPage(), RelationData::rd_opcintype, ScanDirectionIsBackward, SK_ISNULL, TestForOldSnapshot(), IndexScanDescData::xs_heaptid, and IndexScanDescData::xs_snapshot.

Referenced by hashgetbitmap(), and hashgettuple().

◆ _hash_freeovflpage()

BlockNumber _hash_freeovflpage ( Relation  rel,
Buffer  bucketbuf,
Buffer  ovflbuf,
Buffer  wbuf,
IndexTuple itups,
OffsetNumber itup_offsets,
Size tups_size,
uint16  nitups,
BufferAccessStrategy  bstrategy 
)

Definition at line 490 of file hashovfl.c.

494 {
495  HashMetaPage metap;
496  Buffer metabuf;
497  Buffer mapbuf;
498  BlockNumber ovflblkno;
499  BlockNumber prevblkno;
500  BlockNumber blkno;
501  BlockNumber nextblkno;
502  BlockNumber writeblkno;
503  HashPageOpaque ovflopaque;
504  Page ovflpage;
505  Page mappage;
506  uint32 *freep;
507  uint32 ovflbitno;
508  int32 bitmappage,
509  bitmapbit;
511  Buffer prevbuf = InvalidBuffer;
512  Buffer nextbuf = InvalidBuffer;
513  bool update_metap = false;
514 
515  /* Get information from the doomed page */
516  _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
517  ovflblkno = BufferGetBlockNumber(ovflbuf);
518  ovflpage = BufferGetPage(ovflbuf);
519  ovflopaque = HashPageGetOpaque(ovflpage);
520  nextblkno = ovflopaque->hasho_nextblkno;
521  prevblkno = ovflopaque->hasho_prevblkno;
522  writeblkno = BufferGetBlockNumber(wbuf);
523  bucket = ovflopaque->hasho_bucket;
524 
525  /*
526  * Fix up the bucket chain. this is a doubly-linked list, so we must fix
527  * up the bucket chain members behind and ahead of the overflow page being
528  * deleted. Concurrency issues are avoided by using lock chaining as
529  * described atop hashbucketcleanup.
530  */
531  if (BlockNumberIsValid(prevblkno))
532  {
533  if (prevblkno == writeblkno)
534  prevbuf = wbuf;
535  else
536  prevbuf = _hash_getbuf_with_strategy(rel,
537  prevblkno,
538  HASH_WRITE,
540  bstrategy);
541  }
542  if (BlockNumberIsValid(nextblkno))
543  nextbuf = _hash_getbuf_with_strategy(rel,
544  nextblkno,
545  HASH_WRITE,
547  bstrategy);
548 
549  /* Note: bstrategy is intentionally not used for metapage and bitmap */
550 
551  /* Read the metapage so we can determine which bitmap page to use */
553  metap = HashPageGetMeta(BufferGetPage(metabuf));
554 
555  /* Identify which bit to set */
556  ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
557 
558  bitmappage = ovflbitno >> BMPG_SHIFT(metap);
559  bitmapbit = ovflbitno & BMPG_MASK(metap);
560 
561  if (bitmappage >= metap->hashm_nmaps)
562  elog(ERROR, "invalid overflow bit number %u", ovflbitno);
563  blkno = metap->hashm_mapp[bitmappage];
564 
565  /* Release metapage lock while we access the bitmap page */
566  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
567 
568  /* read the bitmap page to clear the bitmap bit */
569  mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
570  mappage = BufferGetPage(mapbuf);
571  freep = HashPageGetBitmap(mappage);
572  Assert(ISSET(freep, bitmapbit));
573 
574  /* Get write-lock on metapage to update firstfree */
576 
577  /* This operation needs to log multiple tuples, prepare WAL for that */
578  if (RelationNeedsWAL(rel))
580 
582 
583  /*
584  * we have to insert tuples on the "write" page, being careful to preserve
585  * hashkey ordering. (If we insert many tuples into the same "write" page
586  * it would be worth qsort'ing them).
587  */
588  if (nitups > 0)
589  {
590  _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
591  MarkBufferDirty(wbuf);
592  }
593 
594  /*
595  * Reinitialize the freed overflow page. Just zeroing the page won't
596  * work, because WAL replay routines expect pages to be initialized. See
597  * explanation of RBM_NORMAL mode atop XLogReadBufferExtended. We are
598  * careful to make the special space valid here so that tools like
599  * pageinspect won't get confused.
600  */
601  _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
602 
603  ovflopaque = HashPageGetOpaque(ovflpage);
604 
605  ovflopaque->hasho_prevblkno = InvalidBlockNumber;
606  ovflopaque->hasho_nextblkno = InvalidBlockNumber;
607  ovflopaque->hasho_bucket = InvalidBucket;
608  ovflopaque->hasho_flag = LH_UNUSED_PAGE;
609  ovflopaque->hasho_page_id = HASHO_PAGE_ID;
610 
611  MarkBufferDirty(ovflbuf);
612 
613  if (BufferIsValid(prevbuf))
614  {
615  Page prevpage = BufferGetPage(prevbuf);
616  HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
617 
618  Assert(prevopaque->hasho_bucket == bucket);
619  prevopaque->hasho_nextblkno = nextblkno;
620  MarkBufferDirty(prevbuf);
621  }
622  if (BufferIsValid(nextbuf))
623  {
624  Page nextpage = BufferGetPage(nextbuf);
625  HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
626 
627  Assert(nextopaque->hasho_bucket == bucket);
628  nextopaque->hasho_prevblkno = prevblkno;
629  MarkBufferDirty(nextbuf);
630  }
631 
632  /* Clear the bitmap bit to indicate that this overflow page is free */
633  CLRBIT(freep, bitmapbit);
634  MarkBufferDirty(mapbuf);
635 
636  /* if this is now the first free page, update hashm_firstfree */
637  if (ovflbitno < metap->hashm_firstfree)
638  {
639  metap->hashm_firstfree = ovflbitno;
640  update_metap = true;
641  MarkBufferDirty(metabuf);
642  }
643 
644  /* XLOG stuff */
645  if (RelationNeedsWAL(rel))
646  {
647  xl_hash_squeeze_page xlrec;
648  XLogRecPtr recptr;
649  int i;
650 
651  xlrec.prevblkno = prevblkno;
652  xlrec.nextblkno = nextblkno;
653  xlrec.ntups = nitups;
654  xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
655  xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
656 
657  XLogBeginInsert();
658  XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
659 
660  /*
661  * bucket buffer needs to be registered to ensure that we can acquire
662  * a cleanup lock on it during replay.
663  */
664  if (!xlrec.is_prim_bucket_same_wrt)
666 
668  if (xlrec.ntups > 0)
669  {
670  XLogRegisterBufData(1, (char *) itup_offsets,
671  nitups * sizeof(OffsetNumber));
672  for (i = 0; i < nitups; i++)
673  XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
674  }
675 
676  XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
677 
678  /*
679  * If prevpage and the writepage (block in which we are moving tuples
680  * from overflow) are same, then no need to separately register
681  * prevpage. During replay, we can directly update the nextblock in
682  * writepage.
683  */
684  if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
685  XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
686 
687  if (BufferIsValid(nextbuf))
688  XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
689 
691  XLogRegisterBufData(5, (char *) &bitmapbit, sizeof(uint32));
692 
693  if (update_metap)
694  {
695  XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
696  XLogRegisterBufData(6, (char *) &metap->hashm_firstfree, sizeof(uint32));
697  }
698 
699  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
700 
701  PageSetLSN(BufferGetPage(wbuf), recptr);
702  PageSetLSN(BufferGetPage(ovflbuf), recptr);
703 
704  if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
705  PageSetLSN(BufferGetPage(prevbuf), recptr);
706  if (BufferIsValid(nextbuf))
707  PageSetLSN(BufferGetPage(nextbuf), recptr);
708 
709  PageSetLSN(BufferGetPage(mapbuf), recptr);
710 
711  if (update_metap)
712  PageSetLSN(BufferGetPage(metabuf), recptr);
713  }
714 
716 
717  /* release previous bucket if it is not same as write bucket */
718  if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
719  _hash_relbuf(rel, prevbuf);
720 
721  if (BufferIsValid(ovflbuf))
722  _hash_relbuf(rel, ovflbuf);
723 
724  if (BufferIsValid(nextbuf))
725  _hash_relbuf(rel, nextbuf);
726 
727  _hash_relbuf(rel, mapbuf);
728  _hash_relbuf(rel, metabuf);
729 
730  return nextblkno;
731 }
#define CLRBIT(x, i)
Definition: blutils.c:31
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
signed int int32
Definition: c.h:429
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define ISSET(A, N)
Definition: hash.h:334
#define InvalidBucket
Definition: hash.h:37
#define HASH_XLOG_FREE_OVFL_BUFS
Definition: hash_xlog.h:22
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define SizeOfHashSqueezePage
Definition: hash_xlog.h:167
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition: hashinsert.c:300
uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Definition: hashovfl.c:62
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition: hashpage.c:239
BlockNumber prevblkno
Definition: hash_xlog.h:155
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:158
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:161
BlockNumber nextblkno
Definition: hash_xlog.h:156
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition: xloginsert.c:176
#define REGBUF_NO_IMAGE
Definition: xloginsert.h:32

References _hash_checkpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_ovflblkno_to_bitno(), _hash_pageinit(), _hash_pgaddmultitup(), _hash_relbuf(), Assert(), BlockNumberIsValid, BMPG_MASK, BMPG_SHIFT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, BufferIsValid, CLRBIT, elog, END_CRIT_SECTION, ERROR, HASH_METAPAGE, HASH_READ, HASH_WRITE, HASH_XLOG_FREE_OVFL_BUFS, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBucket, InvalidBuffer, xl_hash_squeeze_page::is_prev_bucket_same_wrt, xl_hash_squeeze_page::is_prim_bucket_same_wrt, ISSET, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_UNUSED_PAGE, LockBuffer(), MarkBufferDirty(), xl_hash_squeeze_page::nextblkno, xl_hash_squeeze_page::ntups, PageSetLSN, PG_USED_FOR_ASSERTS_ONLY, xl_hash_squeeze_page::prevblkno, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashSqueezePage, START_CRIT_SECTION, XLOG_HASH_SQUEEZE_PAGE, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_squeezebucket().

◆ _hash_get_indextuple_hashkey()

uint32 _hash_get_indextuple_hashkey ( IndexTuple  itup)

Definition at line 292 of file hashutil.c.

293 {
294  char *attp;
295 
296  /*
297  * We assume the hash key is the first attribute and can't be null, so
298  * this can be done crudely but very very cheaply ...
299  */
300  attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
301  return *((uint32 *) attp);
302 }
#define IndexInfoFindDataOffset(t_info)
Definition: itup.h:79
unsigned short t_info
Definition: itup.h:49

References IndexInfoFindDataOffset, and IndexTupleData::t_info.

Referenced by _h_indexbuild(), _hash_binsearch(), _hash_binsearch_last(), _hash_doinsert(), _hash_load_qualified_items(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_splitbucket(), hash_page_items(), and hashbucketcleanup().

◆ _hash_get_newblock_from_oldbucket()

BlockNumber _hash_get_newblock_from_oldbucket ( Relation  rel,
Bucket  old_bucket 
)

Definition at line 462 of file hashutil.c.

463 {
464  Bucket new_bucket;
465  Buffer metabuf;
466  HashMetaPage metap;
467  BlockNumber blkno;
468 
470  metap = HashPageGetMeta(BufferGetPage(metabuf));
471 
472  new_bucket = _hash_get_newbucket_from_oldbucket(rel, old_bucket,
473  metap->hashm_lowmask,
474  metap->hashm_maxbucket);
475  blkno = BUCKET_TO_BLKNO(metap, new_bucket);
476 
477  _hash_relbuf(rel, metabuf);
478 
479  return blkno;
480 }
Bucket _hash_get_newbucket_from_oldbucket(Relation rel, Bucket old_bucket, uint32 lowmask, uint32 maxbucket)
Definition: hashutil.c:495

References _hash_get_newbucket_from_oldbucket(), _hash_getbuf(), _hash_relbuf(), BUCKET_TO_BLKNO, BufferGetPage, HASH_METAPAGE, HASH_READ, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashPageGetMeta, and LH_META_PAGE.

Referenced by _hash_finish_split().

◆ _hash_get_newbucket_from_oldbucket()

Bucket _hash_get_newbucket_from_oldbucket ( Relation  rel,
Bucket  old_bucket,
uint32  lowmask,
uint32  maxbucket 
)

Definition at line 495 of file hashutil.c.

497 {
498  Bucket new_bucket;
499 
500  new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
501  if (new_bucket > maxbucket)
502  {
503  lowmask = lowmask >> 1;
504  new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
505  }
506 
507  return new_bucket;
508 }
#define CALC_NEW_BUCKET(old_bucket, lowmask)
Definition: hashutil.c:25

References CALC_NEW_BUCKET.

Referenced by _hash_get_newblock_from_oldbucket(), and hashbucketcleanup().

◆ _hash_get_oldblock_from_newbucket()

BlockNumber _hash_get_oldblock_from_newbucket ( Relation  rel,
Bucket  new_bucket 
)

Definition at line 423 of file hashutil.c.

424 {
425  Bucket old_bucket;
426  uint32 mask;
427  Buffer metabuf;
428  HashMetaPage metap;
429  BlockNumber blkno;
430 
431  /*
432  * To get the old bucket from the current bucket, we need a mask to modulo
433  * into lower half of table. This mask is stored in meta page as
434  * hashm_lowmask, but here we can't rely on the same, because we need a
435  * value of lowmask that was prevalent at the time when bucket split was
436  * started. Masking the most significant bit of new bucket would give us
437  * old bucket.
438  */
439  mask = (((uint32) 1) << (fls(new_bucket) - 1)) - 1;
440  old_bucket = new_bucket & mask;
441 
443  metap = HashPageGetMeta(BufferGetPage(metabuf));
444 
445  blkno = BUCKET_TO_BLKNO(metap, old_bucket);
446 
447  _hash_relbuf(rel, metabuf);
448 
449  return blkno;
450 }
int fls(int mask)
Definition: fls.c:55

References _hash_getbuf(), _hash_relbuf(), BUCKET_TO_BLKNO, BufferGetPage, fls(), HASH_METAPAGE, HASH_READ, HashPageGetMeta, and LH_META_PAGE.

Referenced by _hash_first().

◆ _hash_get_totalbuckets()

uint32 _hash_get_totalbuckets ( uint32  splitpoint_phase)

Definition at line 175 of file hashutil.c.

176 {
177  uint32 splitpoint_group;
178  uint32 total_buckets;
179  uint32 phases_within_splitpoint_group;
180 
181  if (splitpoint_phase < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
182  return (1 << splitpoint_phase);
183 
184  /* get splitpoint's group */
185  splitpoint_group = HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE;
186  splitpoint_group +=
187  ((splitpoint_phase - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) >>
189 
190  /* account for buckets before splitpoint_group */
191  total_buckets = (1 << (splitpoint_group - 1));
192 
193  /* account for buckets within splitpoint_group */
194  phases_within_splitpoint_group =
195  (((splitpoint_phase - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) &
196  HASH_SPLITPOINT_PHASE_MASK) + 1); /* from 0-based to 1-based */
197  total_buckets +=
198  (((1 << (splitpoint_group - 1)) >> HASH_SPLITPOINT_PHASE_BITS) *
199  phases_within_splitpoint_group);
200 
201  return total_buckets;
202 }
#define HASH_SPLITPOINT_PHASE_MASK
Definition: hash.h:234
#define HASH_SPLITPOINT_PHASE_BITS
Definition: hash.h:232

References HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE, HASH_SPLITPOINT_PHASE_BITS, and HASH_SPLITPOINT_PHASE_MASK.

Referenced by _hash_expandtable(), _hash_init_metabuffer(), _hash_ovflblkno_to_bitno(), and bitno_to_blkno().

◆ _hash_getbucketbuf_from_hashkey()

Buffer _hash_getbucketbuf_from_hashkey ( Relation  rel,
uint32  hashkey,
int  access,
HashMetaPage cachedmetap 
)

Definition at line 1555 of file hashpage.c.

1557 {
1558  HashMetaPage metap;
1559  Buffer buf;
1560  Buffer metabuf = InvalidBuffer;
1561  Page page;
1562  Bucket bucket;
1563  BlockNumber blkno;
1564  HashPageOpaque opaque;
1565 
1566  /* We read from target bucket buffer, hence locking is must. */
1567  Assert(access == HASH_READ || access == HASH_WRITE);
1568 
1569  metap = _hash_getcachedmetap(rel, &metabuf, false);
1570  Assert(metap != NULL);
1571 
1572  /*
1573  * Loop until we get a lock on the correct target bucket.
1574  */
1575  for (;;)
1576  {
1577  /*
1578  * Compute the target bucket number, and convert to block number.
1579  */
1580  bucket = _hash_hashkey2bucket(hashkey,
1581  metap->hashm_maxbucket,
1582  metap->hashm_highmask,
1583  metap->hashm_lowmask);
1584 
1585  blkno = BUCKET_TO_BLKNO(metap, bucket);
1586 
1587  /* Fetch the primary bucket page for the bucket */
1588  buf = _hash_getbuf(rel, blkno, access, LH_BUCKET_PAGE);
1589  page = BufferGetPage(buf);
1590  opaque = HashPageGetOpaque(page);
1591  Assert(opaque->hasho_bucket == bucket);
1593 
1594  /*
1595  * If this bucket hasn't been split, we're done.
1596  */
1597  if (opaque->hasho_prevblkno <= metap->hashm_maxbucket)
1598  break;
1599 
1600  /* Drop lock on this buffer, update cached metapage, and retry. */
1601  _hash_relbuf(rel, buf);
1602  metap = _hash_getcachedmetap(rel, &metabuf, true);
1603  Assert(metap != NULL);
1604  }
1605 
1606  if (BufferIsValid(metabuf))
1607  _hash_dropbuf(rel, metabuf);
1608 
1609  if (cachedmetap)
1610  *cachedmetap = metap;
1611 
1612  return buf;
1613 }
HashMetaPage _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
Definition: hashpage.c:1497

References _hash_dropbuf(), _hash_getbuf(), _hash_getcachedmetap(), _hash_hashkey2bucket(), _hash_relbuf(), Assert(), BUCKET_TO_BLKNO, buf, BufferGetPage, BufferIsValid, HASH_READ, HASH_WRITE, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, InvalidBlockNumber, InvalidBuffer, and LH_BUCKET_PAGE.

Referenced by _hash_doinsert(), and _hash_first().

◆ _hash_getbuf()

Buffer _hash_getbuf ( Relation  rel,
BlockNumber  blkno,
int  access,
int  flags 
)

Definition at line 70 of file hashpage.c.

71 {
72  Buffer buf;
73 
74  if (blkno == P_NEW)
75  elog(ERROR, "hash AM does not use P_NEW");
76 
77  buf = ReadBuffer(rel, blkno);
78 
79  if (access != HASH_NOLOCK)
80  LockBuffer(buf, access);
81 
82  /* ref count and lock type are correct */
83 
84  _hash_checkpage(rel, buf, flags);
85 
86  return buf;
87 }
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:702
#define P_NEW
Definition: bufmgr.h:91

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), P_NEW, and ReadBuffer().

Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_get_newblock_from_oldbucket(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getcachedmetap(), _hash_kill_items(), _hash_next(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), hash_bitmap_info(), hashbulkdelete(), and pgstathashindex().

◆ _hash_getbuf_with_condlock_cleanup()

Buffer _hash_getbuf_with_condlock_cleanup ( Relation  rel,
BlockNumber  blkno,
int  flags 
)

Definition at line 96 of file hashpage.c.

97 {
98  Buffer buf;
99 
100  if (blkno == P_NEW)
101  elog(ERROR, "hash AM does not use P_NEW");
102 
103  buf = ReadBuffer(rel, blkno);
104 
106  {
108  return InvalidBuffer;
109  }
110 
111  /* ref count and lock type are correct */
112 
113  _hash_checkpage(rel, buf, flags);
114 
115  return buf;
116 }

References _hash_checkpage(), buf, ConditionalLockBufferForCleanup(), elog, ERROR, InvalidBuffer, P_NEW, ReadBuffer(), and ReleaseBuffer().

Referenced by _hash_expandtable().

◆ _hash_getbuf_with_strategy()

Buffer _hash_getbuf_with_strategy ( Relation  rel,
BlockNumber  blkno,
int  access,
int  flags,
BufferAccessStrategy  bstrategy 
)

Definition at line 239 of file hashpage.c.

242 {
243  Buffer buf;
244 
245  if (blkno == P_NEW)
246  elog(ERROR, "hash AM does not use P_NEW");
247 
248  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
249 
250  if (access != HASH_NOLOCK)
251  LockBuffer(buf, access);
252 
253  /* ref count and lock type are correct */
254 
255  _hash_checkpage(rel, buf, flags);
256 
257  return buf;
258 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:749
@ RBM_NORMAL
Definition: bufmgr.h:39

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), MAIN_FORKNUM, P_NEW, RBM_NORMAL, and ReadBufferExtended().

Referenced by _hash_freeovflpage(), _hash_squeezebucket(), hashbucketcleanup(), and pgstat_hash_page().

◆ _hash_getcachedmetap()

HashMetaPage _hash_getcachedmetap ( Relation  rel,
Buffer metabuf,
bool  force_refresh 
)

Definition at line 1497 of file hashpage.c.

1498 {
1499  Page page;
1500 
1501  Assert(metabuf);
1502  if (force_refresh || rel->rd_amcache == NULL)
1503  {
1504  char *cache = NULL;
1505 
1506  /*
1507  * It's important that we don't set rd_amcache to an invalid value.
1508  * Either MemoryContextAlloc or _hash_getbuf could fail, so don't
1509  * install a pointer to the newly-allocated storage in the actual
1510  * relcache entry until both have succeeded.
1511  */
1512  if (rel->rd_amcache == NULL)
1513  cache = MemoryContextAlloc(rel->rd_indexcxt,
1514  sizeof(HashMetaPageData));
1515 
1516  /* Read the metapage. */
1517  if (BufferIsValid(*metabuf))
1518  LockBuffer(*metabuf, BUFFER_LOCK_SHARE);
1519  else
1520  *metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ,
1521  LH_META_PAGE);
1522  page = BufferGetPage(*metabuf);
1523 
1524  /* Populate the cache. */
1525  if (rel->rd_amcache == NULL)
1526  rel->rd_amcache = cache;
1527  memcpy(rel->rd_amcache, HashPageGetMeta(page),
1528  sizeof(HashMetaPageData));
1529 
1530  /* Release metapage lock, but keep the pin. */
1531  LockBuffer(*metabuf, BUFFER_LOCK_UNLOCK);
1532  }
1533 
1534  return (HashMetaPage) rel->rd_amcache;
1535 }
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void * rd_amcache
Definition: rel.h:225
MemoryContext rd_indexcxt
Definition: rel.h:200

References _hash_getbuf(), Assert(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage, BufferIsValid, HASH_METAPAGE, HASH_READ, HashPageGetMeta, LH_META_PAGE, LockBuffer(), MemoryContextAlloc(), RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _hash_getbucketbuf_from_hashkey(), and hashbulkdelete().

◆ _hash_getinitbuf()

Buffer _hash_getinitbuf ( Relation  rel,
BlockNumber  blkno 
)

Definition at line 135 of file hashpage.c.

136 {
137  Buffer buf;
138 
139  if (blkno == P_NEW)
140  elog(ERROR, "hash AM does not use P_NEW");
141 
143  NULL);
144 
145  /* ref count and lock type are correct */
146 
147  /* initialize the page */
149 
150  return buf;
151 }
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:40

References _hash_pageinit(), buf, BufferGetPage, BufferGetPageSize, elog, ERROR, MAIN_FORKNUM, P_NEW, RBM_ZERO_AND_LOCK, and ReadBufferExtended().

Referenced by _hash_addovflpage().

◆ _hash_getnewbuf()

Buffer _hash_getnewbuf ( Relation  rel,
BlockNumber  blkno,
ForkNumber  forkNum 
)

Definition at line 198 of file hashpage.c.

199 {
200  BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
201  Buffer buf;
202 
203  if (blkno == P_NEW)
204  elog(ERROR, "hash AM does not use P_NEW");
205  if (blkno > nblocks)
206  elog(ERROR, "access to noncontiguous page in hash index \"%s\"",
208 
209  /* smgr insists we use P_NEW to extend the relation */
210  if (blkno == nblocks)
211  {
212  buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
213  if (BufferGetBlockNumber(buf) != blkno)
214  elog(ERROR, "unexpected hash relation size: %u, should be %u",
215  BufferGetBlockNumber(buf), blkno);
217  }
218  else
219  {
220  buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO_AND_LOCK,
221  NULL);
222  }
223 
224  /* ref count and lock type are correct */
225 
226  /* initialize the page */
228 
229  return buf;
230 }
BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
Definition: bufmgr.c:2942

References _hash_pageinit(), buf, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, elog, ERROR, HASH_WRITE, LockBuffer(), P_NEW, RBM_NORMAL, RBM_ZERO_AND_LOCK, ReadBufferExtended(), RelationGetNumberOfBlocksInFork(), and RelationGetRelationName.

Referenced by _hash_addovflpage(), _hash_expandtable(), and _hash_init().

◆ _hash_hashkey2bucket()

Bucket _hash_hashkey2bucket ( uint32  hashkey,
uint32  maxbucket,
uint32  highmask,
uint32  lowmask 
)

Definition at line 126 of file hashutil.c.

128 {
129  Bucket bucket;
130 
131  bucket = hashkey & highmask;
132  if (bucket > maxbucket)
133  bucket = bucket & lowmask;
134 
135  return bucket;
136 }

Referenced by _h_indexbuild(), _hash_getbucketbuf_from_hashkey(), _hash_splitbucket(), comparetup_index_hash(), and hashbucketcleanup().

◆ _hash_init()

uint32 _hash_init ( Relation  rel,
double  num_tuples,
ForkNumber  forkNum 
)

Definition at line 327 of file hashpage.c.

328 {
329  Buffer metabuf;
330  Buffer buf;
331  Buffer bitmapbuf;
332  Page pg;
333  HashMetaPage metap;
334  RegProcedure procid;
335  int32 data_width;
336  int32 item_width;
337  int32 ffactor;
338  uint32 num_buckets;
339  uint32 i;
340  bool use_wal;
341 
342  /* safety check */
343  if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
344  elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
346 
347  /*
348  * WAL log creation of pages if the relation is persistent, or this is the
349  * init fork. Init forks for unlogged relations always need to be WAL
350  * logged.
351  */
352  use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM;
353 
354  /*
355  * Determine the target fill factor (in tuples per bucket) for this index.
356  * The idea is to make the fill factor correspond to pages about as full
357  * as the user-settable fillfactor parameter says. We can compute it
358  * exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
359  */
360  data_width = sizeof(uint32);
361  item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
362  sizeof(ItemIdData); /* include the line pointer */
363  ffactor = HashGetTargetPageUsage(rel) / item_width;
364  /* keep to a sane range */
365  if (ffactor < 10)
366  ffactor = 10;
367 
368  procid = index_getprocid(rel, 1, HASHSTANDARD_PROC);
369 
370  /*
371  * We initialize the metapage, the first N bucket pages, and the first
372  * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
373  * calls to occur. This ensures that the smgr level has the right idea of
374  * the physical index length.
375  *
376  * Critical section not required, because on error the creation of the
377  * whole relation will be rolled back.
378  */
379  metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
380  _hash_init_metabuffer(metabuf, num_tuples, procid, ffactor, false);
381  MarkBufferDirty(metabuf);
382 
383  pg = BufferGetPage(metabuf);
384  metap = HashPageGetMeta(pg);
385 
386  /* XLOG stuff */
387  if (use_wal)
388  {
390  XLogRecPtr recptr;
391 
392  xlrec.num_tuples = num_tuples;
393  xlrec.procid = metap->hashm_procid;
394  xlrec.ffactor = metap->hashm_ffactor;
395 
396  XLogBeginInsert();
397  XLogRegisterData((char *) &xlrec, SizeOfHashInitMetaPage);
399 
400  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_META_PAGE);
401 
402  PageSetLSN(BufferGetPage(metabuf), recptr);
403  }
404 
405  num_buckets = metap->hashm_maxbucket + 1;
406 
407  /*
408  * Release buffer lock on the metapage while we initialize buckets.
409  * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
410  * won't accomplish anything. It's a bad idea to hold buffer locks for
411  * long intervals in any case, since that can block the bgwriter.
412  */
413  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
414 
415  /*
416  * Initialize and WAL Log the first N buckets
417  */
418  for (i = 0; i < num_buckets; i++)
419  {
420  BlockNumber blkno;
421 
422  /* Allow interrupts, in case N is huge */
424 
425  blkno = BUCKET_TO_BLKNO(metap, i);
426  buf = _hash_getnewbuf(rel, blkno, forkNum);
429 
430  if (use_wal)
431  log_newpage(&rel->rd_node,
432  forkNum,
433  blkno,
435  true);
436  _hash_relbuf(rel, buf);
437  }
438 
439  /* Now reacquire buffer lock on metapage */
441 
442  /*
443  * Initialize bitmap page
444  */
445  bitmapbuf = _hash_getnewbuf(rel, num_buckets + 1, forkNum);
446  _hash_initbitmapbuffer(bitmapbuf, metap->hashm_bmsize, false);
447  MarkBufferDirty(bitmapbuf);
448 
449  /* add the new bitmap page to the metapage's list of bitmaps */
450  /* metapage already has a write lock */
451  if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
452  ereport(ERROR,
453  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
454  errmsg("out of overflow pages in hash index \"%s\"",
455  RelationGetRelationName(rel))));
456 
457  metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
458 
459  metap->hashm_nmaps++;
460  MarkBufferDirty(metabuf);
461 
462  /* XLOG stuff */
463  if (use_wal)
464  {
466  XLogRecPtr recptr;
467 
468  xlrec.bmsize = metap->hashm_bmsize;
469 
470  XLogBeginInsert();
471  XLogRegisterData((char *) &xlrec, SizeOfHashInitBitmapPage);
472  XLogRegisterBuffer(0, bitmapbuf, REGBUF_WILL_INIT);
473 
474  /*
475  * This is safe only because nobody else can be modifying the index at
476  * this stage; it's only visible to the transaction that is creating
477  * it.
478  */
479  XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
480 
481  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_BITMAP_PAGE);
482 
483  PageSetLSN(BufferGetPage(bitmapbuf), recptr);
484  PageSetLSN(BufferGetPage(metabuf), recptr);
485  }
486 
487  /* all done */
488  _hash_relbuf(rel, bitmapbuf);
489  _hash_relbuf(rel, metabuf);
490 
491  return num_buckets;
492 }
#define HashGetTargetPageUsage(relation)
Definition: hash.h:281
#define SizeOfHashInitBitmapPage
Definition: hash_xlog.h:233
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define SizeOfHashInitMetaPage
Definition: hash_xlog.h:217
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:769
struct ItemIdData ItemIdData
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
@ INIT_FORKNUM
Definition: relpath.h:46
RegProcedure hashm_procid
Definition: hash.h:261
RelFileNode rd_node
Definition: rel.h:56
RegProcedure procid
Definition: hash_xlog.h:213
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:1083

References _hash_getnewbuf(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), _hash_relbuf(), xl_hash_init_bitmap_page::bmsize, BUCKET_TO_BLKNO, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage, CHECK_FOR_INTERRUPTS, elog, ereport, errcode(), errmsg(), ERROR, xl_hash_init_meta_page::ffactor, HASH_MAX_BITMAPS, HASH_METAPAGE, HashGetTargetPageUsage, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_procid, HashPageGetMeta, HASHSTANDARD_PROC, i, index_getprocid(), INIT_FORKNUM, LH_BUCKET_PAGE, LockBuffer(), log_newpage(), MarkBufferDirty(), MAXALIGN, xl_hash_init_meta_page::num_tuples, PageSetLSN, xl_hash_init_meta_page::procid, RelationData::rd_node, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetNumberOfBlocksInFork(), RelationGetRelationName, RelationNeedsWAL, SizeOfHashInitBitmapPage, SizeOfHashInitMetaPage, XLOG_HASH_INIT_BITMAP_PAGE, XLOG_HASH_INIT_META_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbuild(), and hashbuildempty().

◆ _hash_init_metabuffer()

void _hash_init_metabuffer ( Buffer  buf,
double  num_tuples,
RegProcedure  procid,
uint16  ffactor,
bool  initpage 
)

Definition at line 498 of file hashpage.c.

500 {
501  HashMetaPage metap;
502  HashPageOpaque pageopaque;
503  Page page;
504  double dnumbuckets;
505  uint32 num_buckets;
506  uint32 spare_index;
507  uint32 lshift;
508 
509  /*
510  * Choose the number of initial bucket pages to match the fill factor
511  * given the estimated number of tuples. We round up the result to the
512  * total number of buckets which has to be allocated before using its
513  * hashm_spares element. However always force at least 2 bucket pages. The
514  * upper limit is determined by considerations explained in
515  * _hash_expandtable().
516  */
517  dnumbuckets = num_tuples / ffactor;
518  if (dnumbuckets <= 2.0)
519  num_buckets = 2;
520  else if (dnumbuckets >= (double) 0x40000000)
521  num_buckets = 0x40000000;
522  else
523  num_buckets = _hash_get_totalbuckets(_hash_spareindex(dnumbuckets));
524 
525  spare_index = _hash_spareindex(num_buckets);
526  Assert(spare_index < HASH_MAX_SPLITPOINTS);
527 
528  page = BufferGetPage(buf);
529  if (initpage)
531 
532  pageopaque = HashPageGetOpaque(page);
533  pageopaque->hasho_prevblkno = InvalidBlockNumber;
534  pageopaque->hasho_nextblkno = InvalidBlockNumber;
535  pageopaque->hasho_bucket = InvalidBucket;
536  pageopaque->hasho_flag = LH_META_PAGE;
537  pageopaque->hasho_page_id = HASHO_PAGE_ID;
538 
539  metap = HashPageGetMeta(page);
540 
541  metap->hashm_magic = HASH_MAGIC;
542  metap->hashm_version = HASH_VERSION;
543  metap->hashm_ntuples = 0;
544  metap->hashm_nmaps = 0;
545  metap->hashm_ffactor = ffactor;
546  metap->hashm_bsize = HashGetMaxBitmapSize(page);
547 
548  /* find largest bitmap array size that will fit in page size */
549  lshift = pg_leftmost_one_pos32(metap->hashm_bsize);
550  Assert(lshift > 0);
551  metap->hashm_bmsize = 1 << lshift;
552  metap->hashm_bmshift = lshift + BYTE_TO_BIT;
553  Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));
554 
555  /*
556  * Label the index with its primary hash support function's OID. This is
557  * pretty useless for normal operation (in fact, hashm_procid is not used
558  * anywhere), but it might be handy for forensic purposes so we keep it.
559  */
560  metap->hashm_procid = procid;
561 
562  /*
563  * We initialize the index with N buckets, 0 .. N-1, occupying physical
564  * blocks 1 to N. The first freespace bitmap page is in block N+1.
565  */
566  metap->hashm_maxbucket = num_buckets - 1;
567 
568  /*
569  * Set highmask as next immediate ((2 ^ x) - 1), which should be
570  * sufficient to cover num_buckets.
571  */
572  metap->hashm_highmask = pg_nextpower2_32(num_buckets + 1) - 1;
573  metap->hashm_lowmask = (metap->hashm_highmask >> 1);
574 
575  MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
576  MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
577 
578  /* Set up mapping for one spare page after the initial splitpoints */
579  metap->hashm_spares[spare_index] = 1;
580  metap->hashm_ovflpoint = spare_index;
581  metap->hashm_firstfree = 0;
582 
583  /*
584  * Set pd_lower just past the end of the metadata. This is essential,
585  * because without doing so, metadata will be lost if xlog.c compresses
586  * the page.
587  */
588  ((PageHeader) page)->pd_lower =
589  ((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
590 }
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define MemSet(start, val, len)
Definition: c.h:1008
#define HASH_MAX_SPLITPOINTS
Definition: hash.h:239
#define BYTE_TO_BIT
Definition: hash.h:301
#define HashGetMaxBitmapSize(page)
Definition: hash.h:319
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:26
uint16 hashm_bsize
Definition: hash.h:250
uint16 hashm_bmshift
Definition: hash.h:253

References _hash_get_totalbuckets(), _hash_pageinit(), _hash_spareindex(), Assert(), BMPG_MASK, BMPG_SHIFT, buf, BufferGetPage, BufferGetPageSize, BYTE_TO_BIT, HASH_MAGIC, HASH_MAX_SPLITPOINTS, HASH_VERSION, HashGetMaxBitmapSize, HashMetaPageData::hashm_bmshift, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_bsize, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_magic, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_procid, HashMetaPageData::hashm_spares, HashMetaPageData::hashm_version, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HashPageGetOpaque, InvalidBlockNumber, InvalidBucket, LH_META_PAGE, MemSet, pg_leftmost_one_pos32(), and pg_nextpower2_32().

Referenced by _hash_init(), and hash_xlog_init_meta_page().

◆ _hash_initbitmapbuffer()

void _hash_initbitmapbuffer ( Buffer  buf,
uint16  bmsize,
bool  initpage 
)

Definition at line 741 of file hashovfl.c.

742 {
743  Page pg;
744  HashPageOpaque op;
745  uint32 *freep;
746 
747  pg = BufferGetPage(buf);
748 
749  /* initialize the page */
750  if (initpage)
752 
753  /* initialize the page's special space */
754  op = HashPageGetOpaque(pg);
760 
761  /* set all of the bits to 1 */
762  freep = HashPageGetBitmap(pg);
763  MemSet(freep, 0xFF, bmsize);
764 
765  /*
766  * Set pd_lower just past the end of the bitmap page data. We could even
767  * set pd_lower equal to pd_upper, but this is more precise and makes the
768  * page look compressible to xlog.c.
769  */
770  ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
771 }

References _hash_pageinit(), buf, BufferGetPage, BufferGetPageSize, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetOpaque, InvalidBlockNumber, InvalidBucket, LH_BITMAP_PAGE, and MemSet.

Referenced by _hash_addovflpage(), _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_init_bitmap_page().

◆ _hash_initbuf()

void _hash_initbuf ( Buffer  buf,
uint32  max_bucket,
uint32  num_bucket,
uint32  flag,
bool  initpage 
)

Definition at line 157 of file hashpage.c.

159 {
160  HashPageOpaque pageopaque;
161  Page page;
162 
163  page = BufferGetPage(buf);
164 
165  /* initialize the page */
166  if (initpage)
168 
169  pageopaque = HashPageGetOpaque(page);
170 
171  /*
172  * Set hasho_prevblkno with current hashm_maxbucket. This value will be
173  * used to validate cached HashMetaPageData. See
174  * _hash_getbucketbuf_from_hashkey().
175  */
176  pageopaque->hasho_prevblkno = max_bucket;
177  pageopaque->hasho_nextblkno = InvalidBlockNumber;
178  pageopaque->hasho_bucket = num_bucket;
179  pageopaque->hasho_flag = flag;
180  pageopaque->hasho_page_id = HASHO_PAGE_ID;
181 }
char * flag(int b)
Definition: test-ctype.c:33

References _hash_pageinit(), buf, BufferGetPage, BufferGetPageSize, flag(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, and InvalidBlockNumber.

Referenced by _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_split_allocate_page().

◆ _hash_kill_items()

void _hash_kill_items ( IndexScanDesc  scan)

Definition at line 537 of file hashutil.c.

538 {
539  HashScanOpaque so = (HashScanOpaque) scan->opaque;
540  Relation rel = scan->indexRelation;
541  BlockNumber blkno;
542  Buffer buf;
543  Page page;
544  HashPageOpaque opaque;
545  OffsetNumber offnum,
546  maxoff;
547  int numKilled = so->numKilled;
548  int i;
549  bool killedsomething = false;
550  bool havePin = false;
551 
552  Assert(so->numKilled > 0);
553  Assert(so->killedItems != NULL);
555 
556  /*
557  * Always reset the scan state, so we don't look for same items on other
558  * pages.
559  */
560  so->numKilled = 0;
561 
562  blkno = so->currPos.currPage;
563  if (HashScanPosIsPinned(so->currPos))
564  {
565  /*
566  * We already have pin on this buffer, so, all we need to do is
567  * acquire lock on it.
568  */
569  havePin = true;
570  buf = so->currPos.buf;
572  }
573  else
574  buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
575 
576  page = BufferGetPage(buf);
577  opaque = HashPageGetOpaque(page);
578  maxoff = PageGetMaxOffsetNumber(page);
579 
580  for (i = 0; i < numKilled; i++)
581  {
582  int itemIndex = so->killedItems[i];
583  HashScanPosItem *currItem = &so->currPos.items[itemIndex];
584 
585  offnum = currItem->indexOffset;
586 
587  Assert(itemIndex >= so->currPos.firstItem &&
588  itemIndex <= so->currPos.lastItem);
589 
590  while (offnum <= maxoff)
591  {
592  ItemId iid = PageGetItemId(page, offnum);
593  IndexTuple ituple = (IndexTuple) PageGetItem(page, iid);
594 
595  if (ItemPointerEquals(&ituple->t_tid, &currItem->heapTid))
596  {
597  /* found the item */
598  ItemIdMarkDead(iid);
599  killedsomething = true;
600  break; /* out of inner search loop */
601  }
602  offnum = OffsetNumberNext(offnum);
603  }
604  }
605 
606  /*
607  * Since this can be redone later if needed, mark as dirty hint. Whenever
608  * we mark anything LP_DEAD, we also set the page's
609  * LH_PAGE_HAS_DEAD_TUPLES flag, which is likewise just a hint.
610  */
611  if (killedsomething)
612  {
613  opaque->hasho_flag |= LH_PAGE_HAS_DEAD_TUPLES;
614  MarkBufferDirtyHint(buf, true);
615  }
616 
617  if (so->hashso_bucket_buf == so->currPos.buf ||
618  havePin)
620  else
621  _hash_relbuf(rel, buf);
622 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3985
#define HashScanPosIsPinned(scanpos)
Definition: hash.h:130
#define HashScanPosIsValid(scanpos)
Definition: hash.h:137
#define LH_PAGE_HAS_DEAD_TUPLES
Definition: hash.h:61
#define ItemIdMarkDead(itemId)
Definition: itemid.h:179
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
int * killedItems
Definition: hash.h:182
BlockNumber currPage
Definition: hash.h:112
int firstItem
Definition: hash.h:123
ItemPointerData heapTid
Definition: hash.h:105
OffsetNumber indexOffset
Definition: hash.h:106

References _hash_getbuf(), _hash_relbuf(), Assert(), buf, HashScanPosData::buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage, HashScanPosData::currPage, HashScanOpaqueData::currPos, HashScanPosData::firstItem, HASH_READ, HashPageGetOpaque, HashScanPosIsPinned, HashScanPosIsValid, HashScanOpaqueData::hashso_bucket_buf, HashScanPosItem::heapTid, i, HashScanPosItem::indexOffset, IndexScanDescData::indexRelation, ItemIdMarkDead, ItemPointerEquals(), HashScanPosData::items, HashScanOpaqueData::killedItems, LH_OVERFLOW_PAGE, LH_PAGE_HAS_DEAD_TUPLES, LockBuffer(), MarkBufferDirtyHint(), HashScanOpaqueData::numKilled, OffsetNumberNext, IndexScanDescData::opaque, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and IndexTupleData::t_tid.

Referenced by _hash_next(), _hash_readpage(), hashendscan(), and hashrescan().

◆ _hash_next()

bool _hash_next ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 48 of file hashsearch.c.

49 {
50  Relation rel = scan->indexRelation;
52  HashScanPosItem *currItem;
53  BlockNumber blkno;
54  Buffer buf;
55  bool end_of_scan = false;
56 
57  /*
58  * Advance to the next tuple on the current page; or if done, try to read
59  * data from the next or previous page based on the scan direction. Before
60  * moving to the next or previous page make sure that we deal with all the
61  * killed items.
62  */
64  {
65  if (++so->currPos.itemIndex > so->currPos.lastItem)
66  {
67  if (so->numKilled > 0)
68  _hash_kill_items(scan);
69 
70  blkno = so->currPos.nextPage;
71  if (BlockNumberIsValid(blkno))
72  {
75  if (!_hash_readpage(scan, &buf, dir))
76  end_of_scan = true;
77  }
78  else
79  end_of_scan = true;
80  }
81  }
82  else
83  {
84  if (--so->currPos.itemIndex < so->currPos.firstItem)
85  {
86  if (so->numKilled > 0)
87  _hash_kill_items(scan);
88 
89  blkno = so->currPos.prevPage;
90  if (BlockNumberIsValid(blkno))
91  {
92  buf = _hash_getbuf(rel, blkno, HASH_READ,
95 
96  /*
97  * We always maintain the pin on bucket page for whole scan
98  * operation, so releasing the additional pin we have acquired
99  * here.
100  */
101  if (buf == so->hashso_bucket_buf ||
103  _hash_dropbuf(rel, buf);
104 
105  if (!_hash_readpage(scan, &buf, dir))
106  end_of_scan = true;
107  }
108  else
109  end_of_scan = true;
110  }
111  }
112 
113  if (end_of_scan)
114  {
115  _hash_dropscanbuf(rel, so);
117  return false;
118  }
119 
120  /* OK, itemIndex says what to return */
121  currItem = &so->currPos.items[so->currPos.itemIndex];
122  scan->xs_heaptid = currItem->heapTid;
123 
124  return true;
125 }
#define HashScanPosInvalidate(scanpos)
Definition: hash.h:144
void _hash_dropscanbuf(Relation rel, HashScanOpaque so)
Definition: hashpage.c:289
void _hash_kill_items(IndexScanDesc scan)
Definition: hashutil.c:537
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
BlockNumber prevPage
Definition: hash.h:114
BlockNumber nextPage
Definition: hash.h:113
int lastItem
Definition: hash.h:124

References _hash_dropbuf(), _hash_dropscanbuf(), _hash_getbuf(), _hash_kill_items(), _hash_readpage(), BlockNumberIsValid, buf, BufferGetPage, HashScanOpaqueData::currPos, HashScanPosData::firstItem, HASH_READ, HashScanPosInvalidate, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, if(), IndexScanDescData::indexRelation, HashScanPosData::itemIndex, HashScanPosData::items, HashScanPosData::lastItem, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, HashScanPosData::nextPage, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, HashScanPosData::prevPage, ScanDirectionIsForward, TestForOldSnapshot(), IndexScanDescData::xs_heaptid, and IndexScanDescData::xs_snapshot.

Referenced by hashgetbitmap(), and hashgettuple().

◆ _hash_ovflblkno_to_bitno()

uint32 _hash_ovflblkno_to_bitno ( HashMetaPage  metap,
BlockNumber  ovflblkno 
)

Definition at line 62 of file hashovfl.c.

63 {
64  uint32 splitnum = metap->hashm_ovflpoint;
65  uint32 i;
66  uint32 bitnum;
67 
68  /* Determine the split number containing this page */
69  for (i = 1; i <= splitnum; i++)
70  {
71  if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
72  break; /* oops */
73  bitnum = ovflblkno - _hash_get_totalbuckets(i);
74 
75  /*
76  * bitnum has to be greater than number of overflow page added in
77  * previous split point. The overflow page at this splitnum (i) if any
78  * should start from (_hash_get_totalbuckets(i) +
79  * metap->hashm_spares[i - 1] + 1).
80  */
81  if (bitnum > metap->hashm_spares[i - 1] &&
82  bitnum <= metap->hashm_spares[i])
83  return bitnum - 1; /* -1 to convert 1-based to 0-based */
84  }
85 
86  ereport(ERROR,
87  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
88  errmsg("invalid overflow block number %u", ovflblkno)));
89  return 0; /* keep compiler quiet */
90 }

References _hash_get_totalbuckets(), ereport, errcode(), errmsg(), ERROR, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, and i.

Referenced by _hash_freeovflpage(), and hash_bitmap_info().

◆ _hash_pageinit()

void _hash_pageinit ( Page  page,
Size  size 
)

Definition at line 596 of file hashpage.c.

597 {
598  PageInit(page, size, sizeof(HashPageOpaqueData));
599 }
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42

References PageInit().

Referenced by _hash_alloc_buckets(), _hash_freeovflpage(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), and hash_xlog_squeeze_page().

◆ _hash_pgaddmultitup()

void _hash_pgaddmultitup ( Relation  rel,
Buffer  buf,
IndexTuple itups,
OffsetNumber itup_offsets,
uint16  nitups 
)

Definition at line 300 of file hashinsert.c.

302 {
303  OffsetNumber itup_off;
304  Page page;
305  uint32 hashkey;
306  int i;
307 
309  page = BufferGetPage(buf);
310 
311  for (i = 0; i < nitups; i++)
312  {
313  Size itemsize;
314 
315  itemsize = IndexTupleSize(itups[i]);
316  itemsize = MAXALIGN(itemsize);
317 
318  /* Find where to insert the tuple (preserving page's hashkey ordering) */
319  hashkey = _hash_get_indextuple_hashkey(itups[i]);
320  itup_off = _hash_binsearch(page, hashkey);
321 
322  itup_offsets[i] = itup_off;
323 
324  if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
326  elog(ERROR, "failed to add index item to \"%s\"",
328  }
329 }
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:415
OffsetNumber _hash_binsearch(Page page, uint32 hash_value)
Definition: hashutil.c:351
Pointer Item
Definition: item.h:17
#define InvalidOffsetNumber
Definition: off.h:26

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), buf, BufferGetPage, elog, ERROR, i, IndexTupleSize, InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, MAXALIGN, PageAddItem, and RelationGetRelationName.

Referenced by _hash_freeovflpage(), _hash_splitbucket(), and _hash_squeezebucket().

◆ _hash_pgaddtup()

OffsetNumber _hash_pgaddtup ( Relation  rel,
Buffer  buf,
Size  itemsize,
IndexTuple  itup 
)

Definition at line 269 of file hashinsert.c.

270 {
271  OffsetNumber itup_off;
272  Page page;
273  uint32 hashkey;
274 
276  page = BufferGetPage(buf);
277 
278  /* Find where to insert the tuple (preserving page's hashkey ordering) */
279  hashkey = _hash_get_indextuple_hashkey(itup);
280  itup_off = _hash_binsearch(page, hashkey);
281 
282  if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
284  elog(ERROR, "failed to add index item to \"%s\"",
286 
287  return itup_off;
288 }

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), buf, BufferGetPage, elog, ERROR, InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, PageAddItem, and RelationGetRelationName.

Referenced by _hash_doinsert().

◆ _hash_relbuf()

◆ _hash_spareindex()

uint32 _hash_spareindex ( uint32  num_bucket)

Definition at line 143 of file hashutil.c.

144 {
145  uint32 splitpoint_group;
146  uint32 splitpoint_phases;
147 
148  splitpoint_group = pg_ceil_log2_32(num_bucket);
149 
150  if (splitpoint_group < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
151  return splitpoint_group;
152 
153  /* account for single-phase groups */
154  splitpoint_phases = HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE;
155 
156  /* account for multi-phase groups before splitpoint_group */
157  splitpoint_phases +=
158  ((splitpoint_group - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) <<
160 
161  /* account for phases within current group */
162  splitpoint_phases +=
163  (((num_bucket - 1) >>
164  (splitpoint_group - (HASH_SPLITPOINT_PHASE_BITS + 1))) &
165  HASH_SPLITPOINT_PHASE_MASK); /* to 0-based value. */
166 
167  return splitpoint_phases;
168 }
static uint32 pg_ceil_log2_32(uint32 num)
Definition: pg_bitutils.h:229

References HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE, HASH_SPLITPOINT_PHASE_BITS, HASH_SPLITPOINT_PHASE_MASK, and pg_ceil_log2_32().

Referenced by _hash_expandtable(), and _hash_init_metabuffer().

◆ _hash_squeezebucket()

void _hash_squeezebucket ( Relation  rel,
Bucket  bucket,
BlockNumber  bucket_blkno,
Buffer  bucket_buf,
BufferAccessStrategy  bstrategy 
)

Definition at line 806 of file hashovfl.c.

811 {
812  BlockNumber wblkno;
813  BlockNumber rblkno;
814  Buffer wbuf;
815  Buffer rbuf;
816  Page wpage;
817  Page rpage;
818  HashPageOpaque wopaque;
819  HashPageOpaque ropaque;
820 
821  /*
822  * start squeezing into the primary bucket page.
823  */
824  wblkno = bucket_blkno;
825  wbuf = bucket_buf;
826  wpage = BufferGetPage(wbuf);
827  wopaque = HashPageGetOpaque(wpage);
828 
829  /*
830  * if there aren't any overflow pages, there's nothing to squeeze. caller
831  * is responsible for releasing the pin on primary bucket page.
832  */
833  if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
834  {
836  return;
837  }
838 
839  /*
840  * Find the last page in the bucket chain by starting at the base bucket
841  * page and working forward. Note: we assume that a hash bucket chain is
842  * usually smaller than the buffer ring being used by VACUUM, else using
843  * the access strategy here would be counterproductive.
844  */
845  rbuf = InvalidBuffer;
846  ropaque = wopaque;
847  do
848  {
849  rblkno = ropaque->hasho_nextblkno;
850  if (rbuf != InvalidBuffer)
851  _hash_relbuf(rel, rbuf);
852  rbuf = _hash_getbuf_with_strategy(rel,
853  rblkno,
854  HASH_WRITE,
856  bstrategy);
857  rpage = BufferGetPage(rbuf);
858  ropaque = HashPageGetOpaque(rpage);
859  Assert(ropaque->hasho_bucket == bucket);
860  } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
861 
862  /*
863  * squeeze the tuples.
864  */
865  for (;;)
866  {
867  OffsetNumber roffnum;
868  OffsetNumber maxroffnum;
869  OffsetNumber deletable[MaxOffsetNumber];
871  Size tups_size[MaxIndexTuplesPerPage];
872  OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
873  uint16 ndeletable = 0;
874  uint16 nitups = 0;
875  Size all_tups_size = 0;
876  int i;
877  bool retain_pin = false;
878 
879 readpage:
880  /* Scan each tuple in "read" page */
881  maxroffnum = PageGetMaxOffsetNumber(rpage);
882  for (roffnum = FirstOffsetNumber;
883  roffnum <= maxroffnum;
884  roffnum = OffsetNumberNext(roffnum))
885  {
886  IndexTuple itup;
887  Size itemsz;
888 
889  /* skip dead tuples */
890  if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
891  continue;
892 
893  itup = (IndexTuple) PageGetItem(rpage,
894  PageGetItemId(rpage, roffnum));
895  itemsz = IndexTupleSize(itup);
896  itemsz = MAXALIGN(itemsz);
897 
898  /*
899  * Walk up the bucket chain, looking for a page big enough for
900  * this item and all other accumulated items. Exit if we reach
901  * the read page.
902  */
903  while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
904  {
905  Buffer next_wbuf = InvalidBuffer;
906  bool tups_moved = false;
907 
908  Assert(!PageIsEmpty(wpage));
909 
910  if (wblkno == bucket_blkno)
911  retain_pin = true;
912 
913  wblkno = wopaque->hasho_nextblkno;
914  Assert(BlockNumberIsValid(wblkno));
915 
916  /* don't need to move to next page if we reached the read page */
917  if (wblkno != rblkno)
918  next_wbuf = _hash_getbuf_with_strategy(rel,
919  wblkno,
920  HASH_WRITE,
922  bstrategy);
923 
924  if (nitups > 0)
925  {
926  Assert(nitups == ndeletable);
927 
928  /*
929  * This operation needs to log multiple tuples, prepare
930  * WAL for that.
931  */
932  if (RelationNeedsWAL(rel))
933  XLogEnsureRecordSpace(0, 3 + nitups);
934 
936 
937  /*
938  * we have to insert tuples on the "write" page, being
939  * careful to preserve hashkey ordering. (If we insert
940  * many tuples into the same "write" page it would be
941  * worth qsort'ing them).
942  */
943  _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
944  MarkBufferDirty(wbuf);
945 
946  /* Delete tuples we already moved off read page */
947  PageIndexMultiDelete(rpage, deletable, ndeletable);
948  MarkBufferDirty(rbuf);
949 
950  /* XLOG stuff */
951  if (RelationNeedsWAL(rel))
952  {
953  XLogRecPtr recptr;
955 
956  xlrec.ntups = nitups;
957  xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
958 
959  XLogBeginInsert();
961 
962  /*
963  * bucket buffer needs to be registered to ensure that
964  * we can acquire a cleanup lock on it during replay.
965  */
966  if (!xlrec.is_prim_bucket_same_wrt)
968 
970  XLogRegisterBufData(1, (char *) itup_offsets,
971  nitups * sizeof(OffsetNumber));
972  for (i = 0; i < nitups; i++)
973  XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
974 
976  XLogRegisterBufData(2, (char *) deletable,
977  ndeletable * sizeof(OffsetNumber));
978 
979  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
980 
981  PageSetLSN(BufferGetPage(wbuf), recptr);
982  PageSetLSN(BufferGetPage(rbuf), recptr);
983  }
984 
986 
987  tups_moved = true;
988  }
989 
990  /*
991  * release the lock on previous page after acquiring the lock
992  * on next page
993  */
994  if (retain_pin)
996  else
997  _hash_relbuf(rel, wbuf);
998 
999  /* nothing more to do if we reached the read page */
1000  if (rblkno == wblkno)
1001  {
1002  _hash_relbuf(rel, rbuf);
1003  return;
1004  }
1005 
1006  wbuf = next_wbuf;
1007  wpage = BufferGetPage(wbuf);
1008  wopaque = HashPageGetOpaque(wpage);
1009  Assert(wopaque->hasho_bucket == bucket);
1010  retain_pin = false;
1011 
1012  /* be tidy */
1013  for (i = 0; i < nitups; i++)
1014  pfree(itups[i]);
1015  nitups = 0;
1016  all_tups_size = 0;
1017  ndeletable = 0;
1018 
1019  /*
1020  * after moving the tuples, rpage would have been compacted,
1021  * so we need to rescan it.
1022  */
1023  if (tups_moved)
1024  goto readpage;
1025  }
1026 
1027  /* remember tuple for deletion from "read" page */
1028  deletable[ndeletable++] = roffnum;
1029 
1030  /*
1031  * we need a copy of index tuples as they can be freed as part of
1032  * overflow page, however we need them to write a WAL record in
1033  * _hash_freeovflpage.
1034  */
1035  itups[nitups] = CopyIndexTuple(itup);
1036  tups_size[nitups++] = itemsz;
1037  all_tups_size += itemsz;
1038  }
1039 
1040  /*
1041  * If we reach here, there are no live tuples on the "read" page ---
1042  * it was empty when we got to it, or we moved them all. So we can
1043  * just free the page without bothering with deleting tuples
1044  * individually. Then advance to the previous "read" page.
1045  *
1046  * Tricky point here: if our read and write pages are adjacent in the
1047  * bucket chain, our write lock on wbuf will conflict with
1048  * _hash_freeovflpage's attempt to update the sibling links of the
1049  * removed page. In that case, we don't need to lock it again.
1050  */
1051  rblkno = ropaque->hasho_prevblkno;
1052  Assert(BlockNumberIsValid(rblkno));
1053 
1054  /* free this overflow page (releases rbuf) */
1055  _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
1056  tups_size, nitups, bstrategy);
1057 
1058  /* be tidy */
1059  for (i = 0; i < nitups; i++)
1060  pfree(itups[i]);
1061 
1062  /* are we freeing the page adjacent to wbuf? */
1063  if (rblkno == wblkno)
1064  {
1065  /* retain the pin on primary bucket page till end of bucket scan */
1066  if (wblkno == bucket_blkno)
1068  else
1069  _hash_relbuf(rel, wbuf);
1070  return;
1071  }
1072 
1073  rbuf = _hash_getbuf_with_strategy(rel,
1074  rblkno,
1075  HASH_WRITE,
1077  bstrategy);
1078  rpage = BufferGetPage(rbuf);
1079  ropaque = HashPageGetOpaque(rpage);
1080  Assert(ropaque->hasho_bucket == bucket);
1081  }
1082 
1083  /* NOTREACHED */
1084 }
Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
Definition: bufpage.c:934
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
#define PageIsEmpty(page)
Definition: bufpage.h:221
unsigned short uint16
Definition: c.h:440
#define SizeOfHashMovePageContents
Definition: hash_xlog.h:138
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:490
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:528
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define MaxIndexTuplesPerPage
Definition: itup.h:144
#define MaxOffsetNumber
Definition: off.h:28

References _hash_freeovflpage(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_relbuf(), Assert(), BlockNumberIsValid, BUFFER_LOCK_UNLOCK, BufferGetPage, CopyIndexTuple(), END_CRIT_SECTION, FirstOffsetNumber, HASH_WRITE, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, i, IndexTupleSize, InvalidBuffer, xl_hash_move_page_contents::is_prim_bucket_same_wrt, ItemIdIsDead, LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, MaxOffsetNumber, xl_hash_move_page_contents::ntups, OffsetNumberNext, PageGetFreeSpaceForMultipleTuples(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageIndexMultiDelete(), PageIsEmpty, PageSetLSN, pfree(), REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashMovePageContents, START_CRIT_SECTION, XLOG_HASH_MOVE_PAGE_CONTENTS, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbucketcleanup().

◆ hashadjustmembers()

void hashadjustmembers ( Oid  opfamilyoid,
Oid  opclassoid,
List operators,
List functions 
)

Definition at line 352 of file hashvalidate.c.

356 {
357  Oid opcintype;
358  ListCell *lc;
359 
360  /*
361  * Hash operators and required support functions are always "loose"
362  * members of the opfamily if they are cross-type. If they are not
363  * cross-type, we prefer to tie them to the appropriate opclass ... but if
364  * the user hasn't created one, we can't do that, and must fall back to
365  * using the opfamily dependency. (We mustn't force creation of an
366  * opclass in such a case, as leaving an incomplete opclass laying about
367  * would be bad. Throwing an error is another undesirable alternative.)
368  *
369  * This behavior results in a bit of a dump/reload hazard, in that the
370  * order of restoring objects could affect what dependencies we end up
371  * with. pg_dump's existing behavior will preserve the dependency choices
372  * in most cases, but not if a cross-type operator has been bound tightly
373  * into an opclass. That's a mistake anyway, so silently "fixing" it
374  * isn't awful.
375  *
376  * Optional support functions are always "loose" family members.
377  *
378  * To avoid repeated lookups, we remember the most recently used opclass's
379  * input type.
380  */
381  if (OidIsValid(opclassoid))
382  {
383  /* During CREATE OPERATOR CLASS, need CCI to see the pg_opclass row */
385  opcintype = get_opclass_input_type(opclassoid);
386  }
387  else
388  opcintype = InvalidOid;
389 
390  /*
391  * We handle operators and support functions almost identically, so rather
392  * than duplicate this code block, just join the lists.
393  */
394  foreach(lc, list_concat_copy(operators, functions))
395  {
396  OpFamilyMember *op = (OpFamilyMember *) lfirst(lc);
397 
398  if (op->is_func && op->number != HASHSTANDARD_PROC)
399  {
400  /* Optional support proc, so always a soft family dependency */
401  op->ref_is_hard = false;
402  op->ref_is_family = true;
403  op->refobjid = opfamilyoid;
404  }
405  else if (op->lefttype != op->righttype)
406  {
407  /* Cross-type, so always a soft family dependency */
408  op->ref_is_hard = false;
409  op->ref_is_family = true;
410  op->refobjid = opfamilyoid;
411  }
412  else
413  {
414  /* Not cross-type; is there a suitable opclass? */
415  if (op->lefttype != opcintype)
416  {
417  /* Avoid repeating this expensive lookup, even if it fails */
418  opcintype = op->lefttype;
419  opclassoid = opclass_for_family_datatype(HASH_AM_OID,
420  opfamilyoid,
421  opcintype);
422  }
423  if (OidIsValid(opclassoid))
424  {
425  /* Hard dependency on opclass */
426  op->ref_is_hard = true;
427  op->ref_is_family = false;
428  op->refobjid = opclassoid;
429  }
430  else
431  {
432  /* We're stuck, so make a soft dependency on the opfamily */
433  op->ref_is_hard = false;
434  op->ref_is_family = true;
435  op->refobjid = opfamilyoid;
436  }
437  }
438  }
439 }
Oid opclass_for_family_datatype(Oid amoid, Oid opfamilyoid, Oid datatypeoid)
Definition: amvalidate.c:236
#define OidIsValid(objectId)
Definition: c.h:710
List * list_concat_copy(const List *list1, const List *list2)
Definition: list.c:577
Oid get_opclass_input_type(Oid opclass)
Definition: lsyscache.c:1215
#define lfirst(lc)
Definition: pg_list.h:169
static const struct fns functions
Definition: regcomp.c:313
Oid refobjid
Definition: amapi.h:90
Oid lefttype
Definition: amapi.h:85
bool ref_is_family
Definition: amapi.h:89
Oid righttype
Definition: amapi.h:86
int number
Definition: amapi.h:84
bool is_func
Definition: amapi.h:82
bool ref_is_hard
Definition: amapi.h:88
void CommandCounterIncrement(void)
Definition: xact.c:1074

References CommandCounterIncrement(), functions, get_opclass_input_type(), HASHSTANDARD_PROC, InvalidOid, OpFamilyMember::is_func, OpFamilyMember::lefttype, lfirst, list_concat_copy(), OpFamilyMember::number, OidIsValid, opclass_for_family_datatype(), OpFamilyMember::ref_is_family, OpFamilyMember::ref_is_hard, OpFamilyMember::refobjid, and OpFamilyMember::righttype.

Referenced by hashhandler().

◆ hashbeginscan()

IndexScanDesc hashbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 365 of file hash.c.

366 {
367  IndexScanDesc scan;
368  HashScanOpaque so;
369 
370  /* no order by operators allowed */
371  Assert(norderbys == 0);
372 
373  scan = RelationGetIndexScan(rel, nkeys, norderbys);
374 
375  so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
379 
380  so->hashso_buc_populated = false;
381  so->hashso_buc_split = false;
382 
383  so->killedItems = NULL;
384  so->numKilled = 0;
385 
386  scan->opaque = so;
387 
388  return scan;
389 }
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81
void * palloc(Size size)
Definition: mcxt.c:1068

References Assert(), HashScanOpaqueData::currPos, HashScanPosInvalidate, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, InvalidBuffer, HashScanOpaqueData::killedItems, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and RelationGetIndexScan().

Referenced by hashhandler().

◆ hashbucketcleanup()

void hashbucketcleanup ( Relation  rel,
Bucket  cur_bucket,
Buffer  bucket_buf,
BlockNumber  bucket_blkno,
BufferAccessStrategy  bstrategy,
uint32  maxbucket,
uint32  highmask,
uint32  lowmask,
double *  tuples_removed,
double *  num_index_tuples,
bool  split_cleanup,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 685 of file hash.c.

691 {
692  BlockNumber blkno;
693  Buffer buf;
695  bool bucket_dirty = false;
696 
697  blkno = bucket_blkno;
698  buf = bucket_buf;
699 
700  if (split_cleanup)
701  new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
702  lowmask, maxbucket);
703 
704  /* Scan each page in bucket */
705  for (;;)
706  {
707  HashPageOpaque opaque;
708  OffsetNumber offno;
709  OffsetNumber maxoffno;
710  Buffer next_buf;
711  Page page;
712  OffsetNumber deletable[MaxOffsetNumber];
713  int ndeletable = 0;
714  bool retain_pin = false;
715  bool clear_dead_marking = false;
716 
718 
719  page = BufferGetPage(buf);
720  opaque = HashPageGetOpaque(page);
721 
722  /* Scan each tuple in page */
723  maxoffno = PageGetMaxOffsetNumber(page);
724  for (offno = FirstOffsetNumber;
725  offno <= maxoffno;
726  offno = OffsetNumberNext(offno))
727  {
728  ItemPointer htup;
729  IndexTuple itup;
730  Bucket bucket;
731  bool kill_tuple = false;
732 
733  itup = (IndexTuple) PageGetItem(page,
734  PageGetItemId(page, offno));
735  htup = &(itup->t_tid);
736 
737  /*
738  * To remove the dead tuples, we strictly want to rely on results
739  * of callback function. refer btvacuumpage for detailed reason.
740  */
741  if (callback && callback(htup, callback_state))
742  {
743  kill_tuple = true;
744  if (tuples_removed)
745  *tuples_removed += 1;
746  }
747  else if (split_cleanup)
748  {
749  /* delete the tuples that are moved by split. */
751  maxbucket,
752  highmask,
753  lowmask);
754  /* mark the item for deletion */
755  if (bucket != cur_bucket)
756  {
757  /*
758  * We expect tuples to either belong to current bucket or
759  * new_bucket. This is ensured because we don't allow
760  * further splits from bucket that contains garbage. See
761  * comments in _hash_expandtable.
762  */
763  Assert(bucket == new_bucket);
764  kill_tuple = true;
765  }
766  }
767 
768  if (kill_tuple)
769  {
770  /* mark the item for deletion */
771  deletable[ndeletable++] = offno;
772  }
773  else
774  {
775  /* we're keeping it, so count it */
776  if (num_index_tuples)
777  *num_index_tuples += 1;
778  }
779  }
780 
781  /* retain the pin on primary bucket page till end of bucket scan */
782  if (blkno == bucket_blkno)
783  retain_pin = true;
784  else
785  retain_pin = false;
786 
787  blkno = opaque->hasho_nextblkno;
788 
789  /*
790  * Apply deletions, advance to next page and write page if needed.
791  */
792  if (ndeletable > 0)
793  {
794  /* No ereport(ERROR) until changes are logged */
796 
797  PageIndexMultiDelete(page, deletable, ndeletable);
798  bucket_dirty = true;
799 
800  /*
801  * Let us mark the page as clean if vacuum removes the DEAD tuples
802  * from an index page. We do this by clearing
803  * LH_PAGE_HAS_DEAD_TUPLES flag.
804  */
805  if (tuples_removed && *tuples_removed > 0 &&
806  H_HAS_DEAD_TUPLES(opaque))
807  {
809  clear_dead_marking = true;
810  }
811 
813 
814  /* XLOG stuff */
815  if (RelationNeedsWAL(rel))
816  {
817  xl_hash_delete xlrec;
818  XLogRecPtr recptr;
819 
820  xlrec.clear_dead_marking = clear_dead_marking;
821  xlrec.is_primary_bucket_page = (buf == bucket_buf);
822 
823  XLogBeginInsert();
824  XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
825 
826  /*
827  * bucket buffer needs to be registered to ensure that we can
828  * acquire a cleanup lock on it during replay.
829  */
830  if (!xlrec.is_primary_bucket_page)
832 
834  XLogRegisterBufData(1, (char *) deletable,
835  ndeletable * sizeof(OffsetNumber));
836 
837  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
838  PageSetLSN(BufferGetPage(buf), recptr);
839  }
840 
842  }
843 
844  /* bail out if there are no more pages to scan. */
845  if (!BlockNumberIsValid(blkno))
846  break;
847 
848  next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
850  bstrategy);
851 
852  /*
853  * release the lock on previous page after acquiring the lock on next
854  * page
855  */
856  if (retain_pin)
858  else
859  _hash_relbuf(rel, buf);
860 
861  buf = next_buf;
862  }
863 
864  /*
865  * lock the bucket page to clear the garbage flag and squeeze the bucket.
866  * if the current buffer is same as bucket buffer, then we already have
867  * lock on bucket page.
868  */
869  if (buf != bucket_buf)
870  {
871  _hash_relbuf(rel, buf);
872  LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
873  }
874 
875  /*
876  * Clear the garbage flag from bucket after deleting the tuples that are
877  * moved by split. We purposefully clear the flag before squeeze bucket,
878  * so that after restart, vacuum shouldn't again try to delete the moved
879  * by split tuples.
880  */
881  if (split_cleanup)
882  {
883  HashPageOpaque bucket_opaque;
884  Page page;
885 
886  page = BufferGetPage(bucket_buf);
887  bucket_opaque = HashPageGetOpaque(page);
888 
889  /* No ereport(ERROR) until changes are logged */
891 
892  bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
893  MarkBufferDirty(bucket_buf);
894 
895  /* XLOG stuff */
896  if (RelationNeedsWAL(rel))
897  {
898  XLogRecPtr recptr;
899 
900  XLogBeginInsert();
901  XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
902 
903  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
904  PageSetLSN(page, recptr);
905  }
906 
908  }
909 
910  /*
911  * If we have deleted anything, try to compact free space. For squeezing
912  * the bucket, we must have a cleanup lock, else it can impact the
913  * ordering of tuples for a scan that has started before it.
914  */
915  if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
916  _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
917  bstrategy);
918  else
919  LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
920 }
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP
Definition: hash.h:60
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define SizeOfHashDelete
Definition: hash_xlog.h:186
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
void _hash_squeezebucket(Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:806
bool clear_dead_marking
Definition: hash_xlog.h:180
bool is_primary_bucket_page
Definition: hash_xlog.h:182
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void vacuum_delay_point(void)
Definition: vacuum.c:2207

References _hash_get_indextuple_hashkey(), _hash_get_newbucket_from_oldbucket(), _hash_getbuf_with_strategy(), _hash_hashkey2bucket(), _hash_relbuf(), _hash_squeezebucket(), Assert(), BlockNumberIsValid, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage, callback(), xl_hash_delete::clear_dead_marking, END_CRIT_SECTION, FirstOffsetNumber, H_HAS_DEAD_TUPLES, HASH_WRITE, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageGetOpaque, InvalidBucket, xl_hash_delete::is_primary_bucket_page, IsBufferCleanupOK(), LH_BUCKET_NEEDS_SPLIT_CLEANUP, LH_OVERFLOW_PAGE, LH_PAGE_HAS_DEAD_TUPLES, LockBuffer(), MarkBufferDirty(), MaxOffsetNumber, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageIndexMultiDelete(), PageSetLSN, PG_USED_FOR_ASSERTS_ONLY, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashDelete, START_CRIT_SECTION, IndexTupleData::t_tid, vacuum_delay_point(), XLOG_HASH_DELETE, XLOG_HASH_SPLIT_CLEANUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_expandtable(), _hash_splitbucket(), and hashbulkdelete().

◆ hashbuild()

IndexBuildResult* hashbuild ( Relation  heap,
Relation  index,
struct IndexInfo indexInfo 
)

Definition at line 113 of file hash.c.

114 {
115  IndexBuildResult *result;
116  BlockNumber relpages;
117  double reltuples;
118  double allvisfrac;
119  uint32 num_buckets;
120  long sort_threshold;
121  HashBuildState buildstate;
122 
123  /*
124  * We expect to be called exactly once for any index relation. If that's
125  * not the case, big trouble's what we have.
126  */
128  elog(ERROR, "index \"%s\" already contains data",
130 
131  /* Estimate the number of rows currently present in the table */
132  estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
133 
134  /* Initialize the hash index metadata page and initial buckets */
135  num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
136 
137  /*
138  * If we just insert the tuples into the index in scan order, then
139  * (assuming their hash codes are pretty random) there will be no locality
140  * of access to the index, and if the index is bigger than available RAM
141  * then we'll thrash horribly. To prevent that scenario, we can sort the
142  * tuples by (expected) bucket number. However, such a sort is useless
143  * overhead when the index does fit in RAM. We choose to sort if the
144  * initial index size exceeds maintenance_work_mem, or the number of
145  * buffers usable for the index, whichever is less. (Limiting by the
146  * number of buffers should reduce thrashing between PG buffers and kernel
147  * buffers, which seems useful even if no physical I/O results. Limiting
148  * by maintenance_work_mem is useful to allow easy testing of the sort
149  * code path, and may be useful to DBAs as an additional control knob.)
150  *
151  * NOTE: this test will need adjustment if a bucket is ever different from
152  * one page. Also, "initial index size" accounting does not include the
153  * metapage, nor the first bitmap page.
154  */
155  sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
156  if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
157  sort_threshold = Min(sort_threshold, NBuffers);
158  else
159  sort_threshold = Min(sort_threshold, NLocBuffer);
160 
161  if (num_buckets >= (uint32) sort_threshold)
162  buildstate.spool = _h_spoolinit(heap, index, num_buckets);
163  else
164  buildstate.spool = NULL;
165 
166  /* prepare to build the index */
167  buildstate.indtuples = 0;
168  buildstate.heapRel = heap;
169 
170  /* do the heap scan */
171  reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
173  (void *) &buildstate, NULL);
175  buildstate.indtuples);
176 
177  if (buildstate.spool)
178  {
179  /* sort the tuples and insert them into the index */
180  _h_indexbuild(buildstate.spool, buildstate.heapRel);
181  _h_spooldestroy(buildstate.spool);
182  }
183 
184  /*
185  * Return statistics
186  */
187  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
188 
189  result->heap_tuples = reltuples;
190  result->index_tuples = buildstate.indtuples;
191 
192  return result;
193 }
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:216
#define Min(x, y)
Definition: c.h:986
int NBuffers
Definition: globals.c:136
static void hashbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: hash.c:208
uint32 _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
Definition: hashpage.c:327
void _h_indexbuild(HSpool *hspool, Relation heapRel)
Definition: hashsort.c:119
HSpool * _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
Definition: hashsort.c:59
void _h_spooldestroy(HSpool *hspool)
Definition: hashsort.c:98
int NLocBuffer
Definition: localbuf.c:41
void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: plancat.c:960
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:84
HSpool * spool
Definition: hash.c:39
Relation heapRel
Definition: hash.c:41
double indtuples
Definition: hash.c:40
double heap_tuples
Definition: genam.h:32
double index_tuples
Definition: genam.h:33
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1747

References _h_indexbuild(), _h_spooldestroy(), _h_spoolinit(), _hash_init(), elog, ERROR, estimate_rel_size(), hashbuildCallback(), IndexBuildResult::heap_tuples, HashBuildState::heapRel, IndexBuildResult::index_tuples, HashBuildState::indtuples, MAIN_FORKNUM, maintenance_work_mem, Min, NBuffers, NLocBuffer, palloc(), pgstat_progress_update_param(), PROGRESS_CREATEIDX_TUPLES_TOTAL, RelationGetNumberOfBlocks, RelationGetRelationName, HashBuildState::spool, and table_index_build_scan().

Referenced by hashhandler().

◆ hashbuildempty()

void hashbuildempty ( Relation  index)

Definition at line 199 of file hash.c.

200 {
202 }

References _hash_init(), and INIT_FORKNUM.

Referenced by hashhandler().

◆ hashbulkdelete()

IndexBulkDeleteResult* hashbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 460 of file hash.c.

462 {
463  Relation rel = info->index;
464  double tuples_removed;
465  double num_index_tuples;
466  double orig_ntuples;
467  Bucket orig_maxbucket;
468  Bucket cur_maxbucket;
469  Bucket cur_bucket;
470  Buffer metabuf = InvalidBuffer;
471  HashMetaPage metap;
472  HashMetaPage cachedmetap;
473 
474  tuples_removed = 0;
475  num_index_tuples = 0;
476 
477  /*
478  * We need a copy of the metapage so that we can use its hashm_spares[]
479  * values to compute bucket page addresses, but a cached copy should be
480  * good enough. (If not, we'll detect that further down and refresh the
481  * cache as necessary.)
482  */
483  cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
484  Assert(cachedmetap != NULL);
485 
486  orig_maxbucket = cachedmetap->hashm_maxbucket;
487  orig_ntuples = cachedmetap->hashm_ntuples;
488 
489  /* Scan the buckets that we know exist */
490  cur_bucket = 0;
491  cur_maxbucket = orig_maxbucket;
492 
493 loop_top:
494  while (cur_bucket <= cur_maxbucket)
495  {
496  BlockNumber bucket_blkno;
497  BlockNumber blkno;
498  Buffer bucket_buf;
499  Buffer buf;
500  HashPageOpaque bucket_opaque;
501  Page page;
502  bool split_cleanup = false;
503 
504  /* Get address of bucket's start page */
505  bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
506 
507  blkno = bucket_blkno;
508 
509  /*
510  * We need to acquire a cleanup lock on the primary bucket page to out
511  * wait concurrent scans before deleting the dead tuples.
512  */
513  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
516 
517  page = BufferGetPage(buf);
518  bucket_opaque = HashPageGetOpaque(page);
519 
520  /*
521  * If the bucket contains tuples that are moved by split, then we need
522  * to delete such tuples. We can't delete such tuples if the split
523  * operation on bucket is not finished as those are needed by scans.
524  */
525  if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
526  H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
527  {
528  split_cleanup = true;
529 
530  /*
531  * This bucket might have been split since we last held a lock on
532  * the metapage. If so, hashm_maxbucket, hashm_highmask and
533  * hashm_lowmask might be old enough to cause us to fail to remove
534  * tuples left behind by the most recent split. To prevent that,
535  * now that the primary page of the target bucket has been locked
536  * (and thus can't be further split), check whether we need to
537  * update our cached metapage data.
538  */
539  Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
540  if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
541  {
542  cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
543  Assert(cachedmetap != NULL);
544  }
545  }
546 
547  bucket_buf = buf;
548