PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
hash.h File Reference
#include "access/amapi.h"
#include "access/itup.h"
#include "access/sdir.h"
#include "catalog/pg_am_d.h"
#include "common/hashfn.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
#include "storage/lockdefs.h"
#include "utils/hsearch.h"
#include "utils/relcache.h"
Include dependency graph for hash.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HashPageOpaqueData
 
struct  HashScanPosItem
 
struct  HashScanPosData
 
struct  HashScanOpaqueData
 
struct  HashMetaPageData
 
struct  HashOptions
 

Macros

#define InvalidBucket   ((Bucket) 0xFFFFFFFF)
 
#define BUCKET_TO_BLKNO(metap, B)    ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)
 
#define LH_UNUSED_PAGE   (0)
 
#define LH_OVERFLOW_PAGE   (1 << 0)
 
#define LH_BUCKET_PAGE   (1 << 1)
 
#define LH_BITMAP_PAGE   (1 << 2)
 
#define LH_META_PAGE   (1 << 3)
 
#define LH_BUCKET_BEING_POPULATED   (1 << 4)
 
#define LH_BUCKET_BEING_SPLIT   (1 << 5)
 
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP   (1 << 6)
 
#define LH_PAGE_HAS_DEAD_TUPLES   (1 << 7)
 
#define LH_PAGE_TYPE    (LH_OVERFLOW_PAGE | LH_BUCKET_PAGE | LH_BITMAP_PAGE | LH_META_PAGE)
 
#define HashPageGetOpaque(page)   ((HashPageOpaque) PageGetSpecialPointer(page))
 
#define H_NEEDS_SPLIT_CLEANUP(opaque)   (((opaque)->hasho_flag & LH_BUCKET_NEEDS_SPLIT_CLEANUP) != 0)
 
#define H_BUCKET_BEING_SPLIT(opaque)   (((opaque)->hasho_flag & LH_BUCKET_BEING_SPLIT) != 0)
 
#define H_BUCKET_BEING_POPULATED(opaque)   (((opaque)->hasho_flag & LH_BUCKET_BEING_POPULATED) != 0)
 
#define H_HAS_DEAD_TUPLES(opaque)   (((opaque)->hasho_flag & LH_PAGE_HAS_DEAD_TUPLES) != 0)
 
#define HASHO_PAGE_ID   0xFF80
 
#define HashScanPosIsPinned(scanpos)
 
#define HashScanPosIsValid(scanpos)
 
#define HashScanPosInvalidate(scanpos)
 
#define HASH_METAPAGE   0 /* metapage is always block 0 */
 
#define HASH_MAGIC   0x6440640
 
#define HASH_VERSION   4
 
#define HASH_MAX_BITMAPS   Min(BLCKSZ / 8, 1024)
 
#define HASH_SPLITPOINT_PHASE_BITS   2
 
#define HASH_SPLITPOINT_PHASES_PER_GRP   (1 << HASH_SPLITPOINT_PHASE_BITS)
 
#define HASH_SPLITPOINT_PHASE_MASK   (HASH_SPLITPOINT_PHASES_PER_GRP - 1)
 
#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE   10
 
#define HASH_MAX_SPLITPOINT_GROUP   32
 
#define HASH_MAX_SPLITPOINTS
 
#define HashGetFillFactor(relation)
 
#define HashGetTargetPageUsage(relation)    (BLCKSZ * HashGetFillFactor(relation) / 100)
 
#define HashMaxItemSize(page)
 
#define INDEX_MOVED_BY_SPLIT_MASK   INDEX_AM_RESERVED_BIT
 
#define HASH_MIN_FILLFACTOR   10
 
#define HASH_DEFAULT_FILLFACTOR   75
 
#define BYTE_TO_BIT   3 /* 2^3 bits/byte */
 
#define ALL_SET   ((uint32) ~0)
 
#define BMPGSZ_BYTE(metap)   ((metap)->hashm_bmsize)
 
#define BMPGSZ_BIT(metap)   ((metap)->hashm_bmsize << BYTE_TO_BIT)
 
#define BMPG_SHIFT(metap)   ((metap)->hashm_bmshift)
 
#define BMPG_MASK(metap)   (BMPGSZ_BIT(metap) - 1)
 
#define HashPageGetBitmap(page)    ((uint32 *) PageGetContents(page))
 
#define HashGetMaxBitmapSize(page)
 
#define HashPageGetMeta(page)    ((HashMetaPage) PageGetContents(page))
 
#define BITS_PER_MAP   32 /* Number of bits in uint32 */
 
#define CLRBIT(A, N)   ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
 
#define SETBIT(A, N)   ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
 
#define ISSET(A, N)   ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
 
#define HASH_READ   BUFFER_LOCK_SHARE
 
#define HASH_WRITE   BUFFER_LOCK_EXCLUSIVE
 
#define HASH_NOLOCK   (-1)
 
#define HASHSTANDARD_PROC   1
 
#define HASHEXTENDED_PROC   2
 
#define HASHOPTIONS_PROC   3
 
#define HASHNProcs   3
 

Typedefs

typedef uint32 Bucket
 
typedef struct HashPageOpaqueData HashPageOpaqueData
 
typedef HashPageOpaqueDataHashPageOpaque
 
typedef struct HashScanPosItem HashScanPosItem
 
typedef struct HashScanPosData HashScanPosData
 
typedef struct HashScanOpaqueData HashScanOpaqueData
 
typedef HashScanOpaqueDataHashScanOpaque
 
typedef struct HashMetaPageData HashMetaPageData
 
typedef HashMetaPageDataHashMetaPage
 
typedef struct HashOptions HashOptions
 
typedef struct HSpool HSpool
 

Functions

IndexBuildResulthashbuild (Relation heap, Relation index, struct IndexInfo *indexInfo)
 
void hashbuildempty (Relation index)
 
bool hashinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo)
 
bool hashgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 hashgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc hashbeginscan (Relation rel, int nkeys, int norderbys)
 
void hashrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void hashendscan (IndexScanDesc scan)
 
IndexBulkDeleteResulthashbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResulthashvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
byteahashoptions (Datum reloptions, bool validate)
 
bool hashvalidate (Oid opclassoid)
 
void hashadjustmembers (Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
 
CompareType hashtranslatestrategy (StrategyNumber strategy, Oid opfamily)
 
StrategyNumber hashtranslatecmptype (CompareType cmptype, Oid opfamily)
 
void _hash_doinsert (Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
 
OffsetNumber _hash_pgaddtup (Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool appendtup)
 
void _hash_pgaddmultitup (Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
 
Buffer _hash_addovflpage (Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 
BlockNumber _hash_freeovflpage (Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
 
void _hash_initbitmapbuffer (Buffer buf, uint16 bmsize, bool initpage)
 
void _hash_squeezebucket (Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
 
uint32 _hash_ovflblkno_to_bitno (HashMetaPage metap, BlockNumber ovflblkno)
 
Buffer _hash_getbuf (Relation rel, BlockNumber blkno, int access, int flags)
 
Buffer _hash_getbuf_with_condlock_cleanup (Relation rel, BlockNumber blkno, int flags)
 
HashMetaPage _hash_getcachedmetap (Relation rel, Buffer *metabuf, bool force_refresh)
 
Buffer _hash_getbucketbuf_from_hashkey (Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)
 
Buffer _hash_getinitbuf (Relation rel, BlockNumber blkno)
 
void _hash_initbuf (Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
 
Buffer _hash_getnewbuf (Relation rel, BlockNumber blkno, ForkNumber forkNum)
 
Buffer _hash_getbuf_with_strategy (Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
 
void _hash_relbuf (Relation rel, Buffer buf)
 
void _hash_dropbuf (Relation rel, Buffer buf)
 
void _hash_dropscanbuf (Relation rel, HashScanOpaque so)
 
uint32 _hash_init (Relation rel, double num_tuples, ForkNumber forkNum)
 
void _hash_init_metabuffer (Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
 
void _hash_pageinit (Page page, Size size)
 
void _hash_expandtable (Relation rel, Buffer metabuf)
 
void _hash_finish_split (Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)
 
bool _hash_next (IndexScanDesc scan, ScanDirection dir)
 
bool _hash_first (IndexScanDesc scan, ScanDirection dir)
 
HSpool_h_spoolinit (Relation heap, Relation index, uint32 num_buckets)
 
void _h_spooldestroy (HSpool *hspool)
 
void _h_spool (HSpool *hspool, ItemPointer self, const Datum *values, const bool *isnull)
 
void _h_indexbuild (HSpool *hspool, Relation heapRel)
 
bool _hash_checkqual (IndexScanDesc scan, IndexTuple itup)
 
uint32 _hash_datum2hashkey (Relation rel, Datum key)
 
uint32 _hash_datum2hashkey_type (Relation rel, Datum key, Oid keytype)
 
Bucket _hash_hashkey2bucket (uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)
 
uint32 _hash_spareindex (uint32 num_bucket)
 
uint32 _hash_get_totalbuckets (uint32 splitpoint_phase)
 
void _hash_checkpage (Relation rel, Buffer buf, int flags)
 
uint32 _hash_get_indextuple_hashkey (IndexTuple itup)
 
bool _hash_convert_tuple (Relation index, Datum *user_values, bool *user_isnull, Datum *index_values, bool *index_isnull)
 
OffsetNumber _hash_binsearch (Page page, uint32 hash_value)
 
OffsetNumber _hash_binsearch_last (Page page, uint32 hash_value)
 
BlockNumber _hash_get_oldblock_from_newbucket (Relation rel, Bucket new_bucket)
 
BlockNumber _hash_get_newblock_from_oldbucket (Relation rel, Bucket old_bucket)
 
Bucket _hash_get_newbucket_from_oldbucket (Relation rel, Bucket old_bucket, uint32 lowmask, uint32 maxbucket)
 
void _hash_kill_items (IndexScanDesc scan)
 
void hashbucketcleanup (Relation rel, Bucket cur_bucket, Buffer bucket_buf, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy, uint32 maxbucket, uint32 highmask, uint32 lowmask, double *tuples_removed, double *num_index_tuples, bool split_cleanup, IndexBulkDeleteCallback callback, void *callback_state)
 

Macro Definition Documentation

◆ ALL_SET

#define ALL_SET   ((uint32) ~0)

Definition at line 302 of file hash.h.

◆ BITS_PER_MAP

#define BITS_PER_MAP   32 /* Number of bits in uint32 */

Definition at line 329 of file hash.h.

◆ BMPG_MASK

#define BMPG_MASK (   metap)    (BMPGSZ_BIT(metap) - 1)

Definition at line 314 of file hash.h.

◆ BMPG_SHIFT

#define BMPG_SHIFT (   metap)    ((metap)->hashm_bmshift)

Definition at line 313 of file hash.h.

◆ BMPGSZ_BIT

#define BMPGSZ_BIT (   metap)    ((metap)->hashm_bmsize << BYTE_TO_BIT)

Definition at line 312 of file hash.h.

◆ BMPGSZ_BYTE

#define BMPGSZ_BYTE (   metap)    ((metap)->hashm_bmsize)

Definition at line 311 of file hash.h.

◆ BUCKET_TO_BLKNO

#define BUCKET_TO_BLKNO (   metap,
 
)     ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)

Definition at line 39 of file hash.h.

◆ BYTE_TO_BIT

#define BYTE_TO_BIT   3 /* 2^3 bits/byte */

Definition at line 301 of file hash.h.

◆ CLRBIT

#define CLRBIT (   A,
 
)    ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))

Definition at line 332 of file hash.h.

◆ H_BUCKET_BEING_POPULATED

#define H_BUCKET_BEING_POPULATED (   opaque)    (((opaque)->hasho_flag & LH_BUCKET_BEING_POPULATED) != 0)

Definition at line 92 of file hash.h.

◆ H_BUCKET_BEING_SPLIT

#define H_BUCKET_BEING_SPLIT (   opaque)    (((opaque)->hasho_flag & LH_BUCKET_BEING_SPLIT) != 0)

Definition at line 91 of file hash.h.

◆ H_HAS_DEAD_TUPLES

#define H_HAS_DEAD_TUPLES (   opaque)    (((opaque)->hasho_flag & LH_PAGE_HAS_DEAD_TUPLES) != 0)

Definition at line 93 of file hash.h.

◆ H_NEEDS_SPLIT_CLEANUP

#define H_NEEDS_SPLIT_CLEANUP (   opaque)    (((opaque)->hasho_flag & LH_BUCKET_NEEDS_SPLIT_CLEANUP) != 0)

Definition at line 90 of file hash.h.

◆ HASH_DEFAULT_FILLFACTOR

#define HASH_DEFAULT_FILLFACTOR   75

Definition at line 296 of file hash.h.

◆ HASH_MAGIC

#define HASH_MAGIC   0x6440640

Definition at line 200 of file hash.h.

◆ HASH_MAX_BITMAPS

#define HASH_MAX_BITMAPS   Min(BLCKSZ / 8, 1024)

Definition at line 230 of file hash.h.

◆ HASH_MAX_SPLITPOINT_GROUP

#define HASH_MAX_SPLITPOINT_GROUP   32

Definition at line 238 of file hash.h.

◆ HASH_MAX_SPLITPOINTS

#define HASH_MAX_SPLITPOINTS
Value:
HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE
Definition: hash.h:235
#define HASH_SPLITPOINT_PHASES_PER_GRP
Definition: hash.h:233
#define HASH_MAX_SPLITPOINT_GROUP
Definition: hash.h:238

Definition at line 239 of file hash.h.

◆ HASH_METAPAGE

#define HASH_METAPAGE   0 /* metapage is always block 0 */

Definition at line 198 of file hash.h.

◆ HASH_MIN_FILLFACTOR

#define HASH_MIN_FILLFACTOR   10

Definition at line 295 of file hash.h.

◆ HASH_NOLOCK

#define HASH_NOLOCK   (-1)

Definition at line 341 of file hash.h.

◆ HASH_READ

#define HASH_READ   BUFFER_LOCK_SHARE

Definition at line 339 of file hash.h.

◆ HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE

#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE   10

Definition at line 235 of file hash.h.

◆ HASH_SPLITPOINT_PHASE_BITS

#define HASH_SPLITPOINT_PHASE_BITS   2

Definition at line 232 of file hash.h.

◆ HASH_SPLITPOINT_PHASE_MASK

#define HASH_SPLITPOINT_PHASE_MASK   (HASH_SPLITPOINT_PHASES_PER_GRP - 1)

Definition at line 234 of file hash.h.

◆ HASH_SPLITPOINT_PHASES_PER_GRP

#define HASH_SPLITPOINT_PHASES_PER_GRP   (1 << HASH_SPLITPOINT_PHASE_BITS)

Definition at line 233 of file hash.h.

◆ HASH_VERSION

#define HASH_VERSION   4

Definition at line 201 of file hash.h.

◆ HASH_WRITE

#define HASH_WRITE   BUFFER_LOCK_EXCLUSIVE

Definition at line 340 of file hash.h.

◆ HASHEXTENDED_PROC

#define HASHEXTENDED_PROC   2

Definition at line 356 of file hash.h.

◆ HashGetFillFactor

#define HashGetFillFactor (   relation)
Value:
(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
relation->rd_rel->relam == HASH_AM_OID), \
(relation)->rd_options ? \
((HashOptions *) (relation)->rd_options)->fillfactor : \
HASH_DEFAULT_FILLFACTOR)
#define AssertMacro(condition)
Definition: c.h:830

Definition at line 275 of file hash.h.

◆ HashGetMaxBitmapSize

#define HashGetMaxBitmapSize (   page)
Value:
(PageGetPageSize((Page) page) - \
static Size PageGetPageSize(const PageData *page)
Definition: bufpage.h:277
#define SizeOfPageHeaderData
Definition: bufpage.h:217
PageData * Page
Definition: bufpage.h:82
#define MAXALIGN(LEN)
Definition: c.h:782

Definition at line 319 of file hash.h.

◆ HashGetTargetPageUsage

#define HashGetTargetPageUsage (   relation)     (BLCKSZ * HashGetFillFactor(relation) / 100)

Definition at line 281 of file hash.h.

◆ HashMaxItemSize

#define HashMaxItemSize (   page)
Value:
sizeof(ItemIdData) - \
#define MAXALIGN_DOWN(LEN)
Definition: c.h:794

Definition at line 287 of file hash.h.

◆ HASHNProcs

#define HASHNProcs   3

Definition at line 358 of file hash.h.

◆ HASHO_PAGE_ID

#define HASHO_PAGE_ID   0xFF80

Definition at line 101 of file hash.h.

◆ HASHOPTIONS_PROC

#define HASHOPTIONS_PROC   3

Definition at line 357 of file hash.h.

◆ HashPageGetBitmap

#define HashPageGetBitmap (   page)     ((uint32 *) PageGetContents(page))

Definition at line 316 of file hash.h.

◆ HashPageGetMeta

#define HashPageGetMeta (   page)     ((HashMetaPage) PageGetContents(page))

Definition at line 323 of file hash.h.

◆ HashPageGetOpaque

#define HashPageGetOpaque (   page)    ((HashPageOpaque) PageGetSpecialPointer(page))

Definition at line 88 of file hash.h.

◆ HashScanPosInvalidate

#define HashScanPosInvalidate (   scanpos)
Value:
do { \
(scanpos).buf = InvalidBuffer; \
(scanpos).currPage = InvalidBlockNumber; \
(scanpos).nextPage = InvalidBlockNumber; \
(scanpos).prevPage = InvalidBlockNumber; \
(scanpos).firstItem = 0; \
(scanpos).lastItem = 0; \
(scanpos).itemIndex = 0; \
} while (0)
#define InvalidBlockNumber
Definition: block.h:33
#define InvalidBuffer
Definition: buf.h:25
static char * buf
Definition: pg_test_fsync.c:72

Definition at line 144 of file hash.h.

◆ HashScanPosIsPinned

#define HashScanPosIsPinned (   scanpos)
Value:
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BufferIsValid((scanpos).buf) \
)
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:365

Definition at line 130 of file hash.h.

◆ HashScanPosIsValid

#define HashScanPosIsValid (   scanpos)
Value:
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BlockNumberIsValid((scanpos).currPage) \
)

Definition at line 137 of file hash.h.

◆ HASHSTANDARD_PROC

#define HASHSTANDARD_PROC   1

Definition at line 355 of file hash.h.

◆ INDEX_MOVED_BY_SPLIT_MASK

#define INDEX_MOVED_BY_SPLIT_MASK   INDEX_AM_RESERVED_BIT

Definition at line 293 of file hash.h.

◆ InvalidBucket

#define InvalidBucket   ((Bucket) 0xFFFFFFFF)

Definition at line 37 of file hash.h.

◆ ISSET

#define ISSET (   A,
 
)    ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))

Definition at line 334 of file hash.h.

◆ LH_BITMAP_PAGE

#define LH_BITMAP_PAGE   (1 << 2)

Definition at line 56 of file hash.h.

◆ LH_BUCKET_BEING_POPULATED

#define LH_BUCKET_BEING_POPULATED   (1 << 4)

Definition at line 58 of file hash.h.

◆ LH_BUCKET_BEING_SPLIT

#define LH_BUCKET_BEING_SPLIT   (1 << 5)

Definition at line 59 of file hash.h.

◆ LH_BUCKET_NEEDS_SPLIT_CLEANUP

#define LH_BUCKET_NEEDS_SPLIT_CLEANUP   (1 << 6)

Definition at line 60 of file hash.h.

◆ LH_BUCKET_PAGE

#define LH_BUCKET_PAGE   (1 << 1)

Definition at line 55 of file hash.h.

◆ LH_META_PAGE

#define LH_META_PAGE   (1 << 3)

Definition at line 57 of file hash.h.

◆ LH_OVERFLOW_PAGE

#define LH_OVERFLOW_PAGE   (1 << 0)

Definition at line 54 of file hash.h.

◆ LH_PAGE_HAS_DEAD_TUPLES

#define LH_PAGE_HAS_DEAD_TUPLES   (1 << 7)

Definition at line 61 of file hash.h.

◆ LH_PAGE_TYPE

#define LH_PAGE_TYPE    (LH_OVERFLOW_PAGE | LH_BUCKET_PAGE | LH_BITMAP_PAGE | LH_META_PAGE)

Definition at line 63 of file hash.h.

◆ LH_UNUSED_PAGE

#define LH_UNUSED_PAGE   (0)

Definition at line 53 of file hash.h.

◆ SETBIT

#define SETBIT (   A,
 
)    ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))

Definition at line 333 of file hash.h.

Typedef Documentation

◆ Bucket

typedef uint32 Bucket

Definition at line 35 of file hash.h.

◆ HashMetaPage

Definition at line 267 of file hash.h.

◆ HashMetaPageData

◆ HashOptions

typedef struct HashOptions HashOptions

◆ HashPageOpaque

Definition at line 86 of file hash.h.

◆ HashPageOpaqueData

◆ HashScanOpaque

Definition at line 192 of file hash.h.

◆ HashScanOpaqueData

◆ HashScanPosData

◆ HashScanPosItem

◆ HSpool

typedef struct HSpool HSpool

Definition at line 452 of file hash.h.

Function Documentation

◆ _h_indexbuild()

void _h_indexbuild ( HSpool hspool,
Relation  heapRel 
)

Definition at line 120 of file hashsort.c.

121{
122 IndexTuple itup;
123 int64 tups_done = 0;
124#ifdef USE_ASSERT_CHECKING
125 uint32 hashkey = 0;
126#endif
127
129
130 while ((itup = tuplesort_getindextuple(hspool->sortstate, true)) != NULL)
131 {
132 /*
133 * Technically, it isn't critical that hash keys be found in sorted
134 * order, since this sorting is only used to increase locality of
135 * access as a performance optimization. It still seems like a good
136 * idea to test tuplesort.c's handling of hash index tuple sorts
137 * through an assertion, though.
138 */
139#ifdef USE_ASSERT_CHECKING
140 uint32 lasthashkey = hashkey;
141
143 hspool->max_buckets, hspool->high_mask,
144 hspool->low_mask);
145 Assert(hashkey >= lasthashkey);
146#endif
147
148 /* the tuples are sorted by hashkey, so pass 'sorted' as true */
149 _hash_doinsert(hspool->index, itup, heapRel, true);
150
151 /* allow insertion phase to be interrupted, and track progress */
153
155 ++tups_done);
156 }
157}
void pgstat_progress_update_param(int index, int64 val)
int64_t int64
Definition: c.h:499
uint32_t uint32
Definition: c.h:502
Assert(PointerIsAligned(start, uint64))
void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
Definition: hashinsert.c:38
uint32 _hash_get_indextuple_hashkey(IndexTuple itup)
Definition: hashutil.c:291
Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashutil.c:125
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define PROGRESS_CREATEIDX_TUPLES_DONE
Definition: progress.h:90
uint32 low_mask
Definition: hashsort.c:51
Tuplesortstate * sortstate
Definition: hashsort.c:41
uint32 high_mask
Definition: hashsort.c:50
uint32 max_buckets
Definition: hashsort.c:52
Relation index
Definition: hashsort.c:42
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1363
IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward)

References _hash_doinsert(), _hash_get_indextuple_hashkey(), _hash_hashkey2bucket(), Assert(), CHECK_FOR_INTERRUPTS, HSpool::high_mask, HSpool::index, HSpool::low_mask, HSpool::max_buckets, pgstat_progress_update_param(), PROGRESS_CREATEIDX_TUPLES_DONE, HSpool::sortstate, tuplesort_getindextuple(), and tuplesort_performsort().

Referenced by hashbuild().

◆ _h_spool()

void _h_spool ( HSpool hspool,
ItemPointer  self,
const Datum values,
const bool *  isnull 
)

Definition at line 109 of file hashsort.c.

110{
112 self, values, isnull);
113}
static Datum values[MAXATTR]
Definition: bootstrap.c:151
void tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, ItemPointer self, const Datum *values, const bool *isnull)

References HSpool::index, HSpool::sortstate, tuplesort_putindextuplevalues(), and values.

Referenced by hashbuildCallback().

◆ _h_spooldestroy()

void _h_spooldestroy ( HSpool hspool)

Definition at line 99 of file hashsort.c.

100{
101 tuplesort_end(hspool->sortstate);
102 pfree(hspool);
103}
void pfree(void *pointer)
Definition: mcxt.c:2147
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:951

References pfree(), HSpool::sortstate, and tuplesort_end().

Referenced by hashbuild().

◆ _h_spoolinit()

HSpool * _h_spoolinit ( Relation  heap,
Relation  index,
uint32  num_buckets 
)

Definition at line 60 of file hashsort.c.

61{
62 HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
63
64 hspool->index = index;
65
66 /*
67 * Determine the bitmask for hash code values. Since there are currently
68 * num_buckets buckets in the index, the appropriate mask can be computed
69 * as follows.
70 *
71 * NOTE : This hash mask calculation should be in sync with similar
72 * calculation in _hash_init_metabuffer.
73 */
74 hspool->high_mask = pg_nextpower2_32(num_buckets + 1) - 1;
75 hspool->low_mask = (hspool->high_mask >> 1);
76 hspool->max_buckets = num_buckets - 1;
77
78 /*
79 * We size the sort area as maintenance_work_mem rather than work_mem to
80 * speed index creation. This should be OK since a single backend can't
81 * run multiple index creations in parallel.
82 */
84 index,
85 hspool->high_mask,
86 hspool->low_mask,
87 hspool->max_buckets,
89 NULL,
91
92 return hspool;
93}
int maintenance_work_mem
Definition: globals.c:134
void * palloc0(Size size)
Definition: mcxt.c:1970
static uint32 pg_nextpower2_32(uint32 num)
Definition: pg_bitutils.h:189
Definition: type.h:96
#define TUPLESORT_NONE
Definition: tuplesort.h:94
Tuplesortstate * tuplesort_begin_index_hash(Relation heapRel, Relation indexRel, uint32 high_mask, uint32 low_mask, uint32 max_buckets, int workMem, SortCoordinate coordinate, int sortopt)

References HSpool::high_mask, HSpool::index, HSpool::low_mask, maintenance_work_mem, HSpool::max_buckets, palloc0(), pg_nextpower2_32(), HSpool::sortstate, tuplesort_begin_index_hash(), and TUPLESORT_NONE.

Referenced by hashbuild().

◆ _hash_addovflpage()

Buffer _hash_addovflpage ( Relation  rel,
Buffer  metabuf,
Buffer  buf,
bool  retain_pin 
)

Definition at line 112 of file hashovfl.c.

113{
114 Buffer ovflbuf;
115 Page page;
116 Page ovflpage;
117 HashPageOpaque pageopaque;
118 HashPageOpaque ovflopaque;
119 HashMetaPage metap;
120 Buffer mapbuf = InvalidBuffer;
121 Buffer newmapbuf = InvalidBuffer;
122 BlockNumber blkno;
123 uint32 orig_firstfree;
124 uint32 splitnum;
125 uint32 *freep = NULL;
126 uint32 max_ovflpg;
127 uint32 bit;
128 uint32 bitmap_page_bit;
129 uint32 first_page;
130 uint32 last_bit;
131 uint32 last_page;
132 uint32 i,
133 j;
134 bool page_found = false;
135
136 /*
137 * Write-lock the tail page. Here, we need to maintain locking order such
138 * that, first acquire the lock on tail page of bucket, then on meta page
139 * to find and lock the bitmap page and if it is found, then lock on meta
140 * page is released, then finally acquire the lock on new overflow buffer.
141 * We need this locking order to avoid deadlock with backends that are
142 * doing inserts.
143 *
144 * Note: We could have avoided locking many buffers here if we made two
145 * WAL records for acquiring an overflow page (one to allocate an overflow
146 * page and another to add it to overflow bucket chain). However, doing
147 * so can leak an overflow page, if the system crashes after allocation.
148 * Needless to say, it is better to have a single record from a
149 * performance point of view as well.
150 */
152
153 /* probably redundant... */
155
156 /* loop to find current tail page, in case someone else inserted too */
157 for (;;)
158 {
159 BlockNumber nextblkno;
160
161 page = BufferGetPage(buf);
162 pageopaque = HashPageGetOpaque(page);
163 nextblkno = pageopaque->hasho_nextblkno;
164
165 if (!BlockNumberIsValid(nextblkno))
166 break;
167
168 /* we assume we do not need to write the unmodified page */
169 if (retain_pin)
170 {
171 /* pin will be retained only for the primary bucket page */
172 Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
174 }
175 else
176 _hash_relbuf(rel, buf);
177
178 retain_pin = false;
179
180 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
181 }
182
183 /* Get exclusive lock on the meta page */
185
186 _hash_checkpage(rel, metabuf, LH_META_PAGE);
187 metap = HashPageGetMeta(BufferGetPage(metabuf));
188
189 /* start search at hashm_firstfree */
190 orig_firstfree = metap->hashm_firstfree;
191 first_page = orig_firstfree >> BMPG_SHIFT(metap);
192 bit = orig_firstfree & BMPG_MASK(metap);
193 i = first_page;
194 j = bit / BITS_PER_MAP;
195 bit &= ~(BITS_PER_MAP - 1);
196
197 /* outer loop iterates once per bitmap page */
198 for (;;)
199 {
200 BlockNumber mapblkno;
201 Page mappage;
202 uint32 last_inpage;
203
204 /* want to end search with the last existing overflow page */
205 splitnum = metap->hashm_ovflpoint;
206 max_ovflpg = metap->hashm_spares[splitnum] - 1;
207 last_page = max_ovflpg >> BMPG_SHIFT(metap);
208 last_bit = max_ovflpg & BMPG_MASK(metap);
209
210 if (i > last_page)
211 break;
212
213 Assert(i < metap->hashm_nmaps);
214 mapblkno = metap->hashm_mapp[i];
215
216 if (i == last_page)
217 last_inpage = last_bit;
218 else
219 last_inpage = BMPGSZ_BIT(metap) - 1;
220
221 /* Release exclusive lock on metapage while reading bitmap page */
223
224 mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
225 mappage = BufferGetPage(mapbuf);
226 freep = HashPageGetBitmap(mappage);
227
228 for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
229 {
230 if (freep[j] != ALL_SET)
231 {
232 page_found = true;
233
234 /* Reacquire exclusive lock on the meta page */
236
237 /* convert bit to bit number within page */
238 bit += _hash_firstfreebit(freep[j]);
239 bitmap_page_bit = bit;
240
241 /* convert bit to absolute bit number */
242 bit += (i << BMPG_SHIFT(metap));
243 /* Calculate address of the recycled overflow page */
244 blkno = bitno_to_blkno(metap, bit);
245
246 /* Fetch and init the recycled page */
247 ovflbuf = _hash_getinitbuf(rel, blkno);
248
249 goto found;
250 }
251 }
252
253 /* No free space here, try to advance to next map page */
254 _hash_relbuf(rel, mapbuf);
255 mapbuf = InvalidBuffer;
256 i++;
257 j = 0; /* scan from start of next map page */
258 bit = 0;
259
260 /* Reacquire exclusive lock on the meta page */
262 }
263
264 /*
265 * No free pages --- have to extend the relation to add an overflow page.
266 * First, check to see if we have to add a new bitmap page too.
267 */
268 if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
269 {
270 /*
271 * We create the new bitmap page with all pages marked "in use".
272 * Actually two pages in the new bitmap's range will exist
273 * immediately: the bitmap page itself, and the following page which
274 * is the one we return to the caller. Both of these are correctly
275 * marked "in use". Subsequent pages do not exist yet, but it is
276 * convenient to pre-mark them as "in use" too.
277 */
278 bit = metap->hashm_spares[splitnum];
279
280 /* metapage already has a write lock */
281 if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
283 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
284 errmsg("out of overflow pages in hash index \"%s\"",
286
287 newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
288 }
289 else
290 {
291 /*
292 * Nothing to do here; since the page will be past the last used page,
293 * we know its bitmap bit was preinitialized to "in use".
294 */
295 }
296
297 /* Calculate address of the new overflow page */
298 bit = BufferIsValid(newmapbuf) ?
299 metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
300 blkno = bitno_to_blkno(metap, bit);
301
302 /*
303 * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
304 * relation length stays in sync with ours. XXX It's annoying to do this
305 * with metapage write lock held; would be better to use a lock that
306 * doesn't block incoming searches.
307 *
308 * It is okay to hold two buffer locks here (one on tail page of bucket
309 * and other on new overflow page) since there cannot be anyone else
310 * contending for access to ovflbuf.
311 */
312 ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
313
314found:
315
316 /*
317 * Do the update. No ereport(ERROR) until changes are logged. We want to
318 * log the changes for bitmap page and overflow page together to avoid
319 * loss of pages in case the new page is added.
320 */
322
323 if (page_found)
324 {
325 Assert(BufferIsValid(mapbuf));
326
327 /* mark page "in use" in the bitmap */
328 SETBIT(freep, bitmap_page_bit);
329 MarkBufferDirty(mapbuf);
330 }
331 else
332 {
333 /* update the count to indicate new overflow page is added */
334 metap->hashm_spares[splitnum]++;
335
336 if (BufferIsValid(newmapbuf))
337 {
338 _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
339 MarkBufferDirty(newmapbuf);
340
341 /* add the new bitmap page to the metapage's list of bitmaps */
342 metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
343 metap->hashm_nmaps++;
344 metap->hashm_spares[splitnum]++;
345 }
346
347 MarkBufferDirty(metabuf);
348
349 /*
350 * for new overflow page, we don't need to explicitly set the bit in
351 * bitmap page, as by default that will be set to "in use".
352 */
353 }
354
355 /*
356 * Adjust hashm_firstfree to avoid redundant searches. But don't risk
357 * changing it if someone moved it while we were searching bitmap pages.
358 */
359 if (metap->hashm_firstfree == orig_firstfree)
360 {
361 metap->hashm_firstfree = bit + 1;
362 MarkBufferDirty(metabuf);
363 }
364
365 /* initialize new overflow page */
366 ovflpage = BufferGetPage(ovflbuf);
367 ovflopaque = HashPageGetOpaque(ovflpage);
370 ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
371 ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
372 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
373
374 MarkBufferDirty(ovflbuf);
375
376 /* logically chain overflow page to previous page */
377 pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
378
380
381 /* XLOG stuff */
382 if (RelationNeedsWAL(rel))
383 {
384 XLogRecPtr recptr;
386
387 xlrec.bmpage_found = page_found;
388 xlrec.bmsize = metap->hashm_bmsize;
389
392
394 XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
395
397
398 if (BufferIsValid(mapbuf))
399 {
401 XLogRegisterBufData(2, &bitmap_page_bit, sizeof(uint32));
402 }
403
404 if (BufferIsValid(newmapbuf))
406
408 XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
409
410 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
411
412 PageSetLSN(BufferGetPage(ovflbuf), recptr);
413 PageSetLSN(BufferGetPage(buf), recptr);
414
415 if (BufferIsValid(mapbuf))
416 PageSetLSN(BufferGetPage(mapbuf), recptr);
417
418 if (BufferIsValid(newmapbuf))
419 PageSetLSN(BufferGetPage(newmapbuf), recptr);
420
421 PageSetLSN(BufferGetPage(metabuf), recptr);
422 }
423
425
426 if (retain_pin)
428 else
429 _hash_relbuf(rel, buf);
430
431 if (BufferIsValid(mapbuf))
432 _hash_relbuf(rel, mapbuf);
433
435
436 if (BufferIsValid(newmapbuf))
437 _hash_relbuf(rel, newmapbuf);
438
439 return ovflbuf;
440}
uint32 BlockNumber
Definition: block.h:31
#define SETBIT(x, i)
Definition: blutils.c:29
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4161
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2945
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5537
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:196
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:414
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:198
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define HASH_MAX_BITMAPS
Definition: hash.h:230
#define BMPG_MASK(metap)
Definition: hash.h:314
#define HASH_WRITE
Definition: hash.h:340
#define BITS_PER_MAP
Definition: hash.h:329
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define LH_META_PAGE
Definition: hash.h:57
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define BMPGSZ_BIT(metap)
Definition: hash.h:312
#define LH_PAGE_TYPE
Definition: hash.h:63
uint32 Bucket
Definition: hash.h:35
#define ALL_SET
Definition: hash.h:302
#define LH_BITMAP_PAGE
Definition: hash.h:56
#define BMPG_SHIFT(metap)
Definition: hash.h:313
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define SizeOfHashAddOvflPage
Definition: hash_xlog.h:80
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
static uint32 _hash_firstfreebit(uint32 map)
Definition: hashovfl.c:448
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:777
static BlockNumber bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
Definition: hashovfl.c:35
Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno)
Definition: hashpage.c:135
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:266
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:70
Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
Definition: hashpage.c:198
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:210
int j
Definition: isn.c:78
int i
Definition: isn.c:77
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
#define RelationGetRelationName(relation)
Definition: rel.h:550
#define RelationNeedsWAL(relation)
Definition: rel.h:639
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
uint32 hashm_firstfree
Definition: hash.h:259
uint16 hashm_bmsize
Definition: hash.h:251
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
Datum bit(PG_FUNCTION_ARGS)
Definition: varbit.c:391
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:34

References _hash_checkpage(), _hash_firstfreebit(), _hash_getbuf(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_initbitmapbuffer(), _hash_relbuf(), ALL_SET, Assert(), bit(), bitno_to_blkno(), BITS_PER_MAP, BlockNumberIsValid(), xl_hash_add_ovfl_page::bmpage_found, BMPG_MASK, BMPG_SHIFT, BMPGSZ_BIT, xl_hash_add_ovfl_page::bmsize, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, HASH_MAX_BITMAPS, HASH_WRITE, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBuffer, j, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), PageSetLSN(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, SETBIT, SizeOfHashAddOvflPage, START_CRIT_SECTION, XLOG_HASH_ADD_OVFL_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert(), and _hash_splitbucket().

◆ _hash_binsearch()

OffsetNumber _hash_binsearch ( Page  page,
uint32  hash_value 
)

Definition at line 350 of file hashutil.c.

351{
354
355 /* Loop invariant: lower <= desired place <= upper */
356 upper = PageGetMaxOffsetNumber(page) + 1;
358
359 while (upper > lower)
360 {
361 OffsetNumber off;
362 IndexTuple itup;
363 uint32 hashkey;
364
365 off = (upper + lower) / 2;
367
368 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
369 hashkey = _hash_get_indextuple_hashkey(itup);
370 if (hashkey < hash_value)
371 lower = off + 1;
372 else
373 upper = off;
374 }
375
376 return lower;
377}
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
IndexTupleData * IndexTuple
Definition: itup.h:53
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80

References _hash_get_indextuple_hashkey(), Assert(), FirstOffsetNumber, lower(), OffsetNumberIsValid, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and upper().

Referenced by _hash_pgaddmultitup(), _hash_pgaddtup(), and _hash_readpage().

◆ _hash_binsearch_last()

OffsetNumber _hash_binsearch_last ( Page  page,
uint32  hash_value 
)

Definition at line 388 of file hashutil.c.

389{
392
393 /* Loop invariant: lower <= desired place <= upper */
396
397 while (upper > lower)
398 {
399 IndexTuple itup;
400 OffsetNumber off;
401 uint32 hashkey;
402
403 off = (upper + lower + 1) / 2;
405
406 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
407 hashkey = _hash_get_indextuple_hashkey(itup);
408 if (hashkey > hash_value)
409 upper = off - 1;
410 else
411 lower = off;
412 }
413
414 return lower;
415}

References _hash_get_indextuple_hashkey(), Assert(), FirstOffsetNumber, lower(), OffsetNumberIsValid, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and upper().

Referenced by _hash_readpage().

◆ _hash_checkpage()

void _hash_checkpage ( Relation  rel,
Buffer  buf,
int  flags 
)

Definition at line 210 of file hashutil.c.

211{
212 Page page = BufferGetPage(buf);
213
214 /*
215 * ReadBuffer verifies that every newly-read page passes
216 * PageHeaderIsValid, which means it either contains a reasonably sane
217 * page header or is all-zero. We have to defend against the all-zero
218 * case, however.
219 */
220 if (PageIsNew(page))
222 (errcode(ERRCODE_INDEX_CORRUPTED),
223 errmsg("index \"%s\" contains unexpected zero page at block %u",
226 errhint("Please REINDEX it.")));
227
228 /*
229 * Additionally check that the special area looks sane.
230 */
231 if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
233 (errcode(ERRCODE_INDEX_CORRUPTED),
234 errmsg("index \"%s\" contains corrupted page at block %u",
237 errhint("Please REINDEX it.")));
238
239 if (flags)
240 {
241 HashPageOpaque opaque = HashPageGetOpaque(page);
242
243 if ((opaque->hasho_flag & flags) == 0)
245 (errcode(ERRCODE_INDEX_CORRUPTED),
246 errmsg("index \"%s\" contains corrupted page at block %u",
249 errhint("Please REINDEX it.")));
250 }
251
252 /*
253 * When checking the metapage, also verify magic number and version.
254 */
255 if (flags == LH_META_PAGE)
256 {
257 HashMetaPage metap = HashPageGetMeta(page);
258
259 if (metap->hashm_magic != HASH_MAGIC)
261 (errcode(ERRCODE_INDEX_CORRUPTED),
262 errmsg("index \"%s\" is not a hash index",
264
265 if (metap->hashm_version != HASH_VERSION)
267 (errcode(ERRCODE_INDEX_CORRUPTED),
268 errmsg("index \"%s\" has wrong hash version",
270 errhint("Please REINDEX it.")));
271 }
272}
static uint16 PageGetSpecialSize(const PageData *page)
Definition: bufpage.h:317
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:234
int errhint(const char *fmt,...)
Definition: elog.c:1318
#define HASH_VERSION
Definition: hash.h:201
#define HASH_MAGIC
Definition: hash.h:200
uint32 hashm_version
Definition: hash.h:247
uint32 hashm_magic
Definition: hash.h:246

References buf, BufferGetBlockNumber(), BufferGetPage(), ereport, errcode(), errhint(), errmsg(), ERROR, HASH_MAGIC, HASH_VERSION, HashMetaPageData::hashm_magic, HashMetaPageData::hashm_version, HashPageOpaqueData::hasho_flag, HashPageGetMeta, HashPageGetOpaque, LH_META_PAGE, MAXALIGN, PageGetSpecialSize(), PageIsNew(), and RelationGetRelationName.

Referenced by _hash_addovflpage(), _hash_expandtable(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_readpage(), and hashbulkdelete().

◆ _hash_checkqual()

bool _hash_checkqual ( IndexScanDesc  scan,
IndexTuple  itup 
)

Definition at line 31 of file hashutil.c.

32{
33 /*
34 * Currently, we can't check any of the scan conditions since we do not
35 * have the original index entry value to supply to the sk_func. Always
36 * return true; we expect that hashgettuple already set the recheck flag
37 * to make the main indexscan code do it.
38 */
39#ifdef NOT_USED
41 ScanKey key = scan->keyData;
42 int scanKeySize = scan->numberOfKeys;
43
44 while (scanKeySize > 0)
45 {
46 Datum datum;
47 bool isNull;
48 Datum test;
49
50 datum = index_getattr(itup,
51 key->sk_attno,
52 tupdesc,
53 &isNull);
54
55 /* assume sk_func is strict */
56 if (isNull)
57 return false;
58 if (key->sk_flags & SK_ISNULL)
59 return false;
60
61 test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
62 datum, key->sk_argument);
63
64 if (!DatumGetBool(test))
65 return false;
66
67 key++;
68 scanKeySize--;
69 }
70#endif
71
72 return true;
73}
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
static Datum index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
Definition: itup.h:131
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
uintptr_t Datum
Definition: postgres.h:69
static void test(void)
#define RelationGetDescr(relation)
Definition: rel.h:542
#define SK_ISNULL
Definition: skey.h:115
struct ScanKeyData * keyData
Definition: relscan.h:141
Relation indexRelation
Definition: relscan.h:137

References DatumGetBool(), FunctionCall2Coll(), index_getattr(), IndexScanDescData::indexRelation, sort-test::key, IndexScanDescData::keyData, IndexScanDescData::numberOfKeys, RelationGetDescr, SK_ISNULL, and test().

Referenced by _hash_load_qualified_items().

◆ _hash_convert_tuple()

bool _hash_convert_tuple ( Relation  index,
Datum user_values,
bool *  user_isnull,
Datum index_values,
bool *  index_isnull 
)

Definition at line 318 of file hashutil.c.

321{
322 uint32 hashkey;
323
324 /*
325 * We do not insert null values into hash indexes. This is okay because
326 * the only supported search operator is '=', and we assume it is strict.
327 */
328 if (user_isnull[0])
329 return false;
330
331 hashkey = _hash_datum2hashkey(index, user_values[0]);
332 index_values[0] = UInt32GetDatum(hashkey);
333 index_isnull[0] = false;
334 return true;
335}
uint32 _hash_datum2hashkey(Relation rel, Datum key)
Definition: hashutil.c:82
static Datum UInt32GetDatum(uint32 X)
Definition: postgres.h:237

References _hash_datum2hashkey(), and UInt32GetDatum().

Referenced by hashbuildCallback(), and hashinsert().

◆ _hash_datum2hashkey()

uint32 _hash_datum2hashkey ( Relation  rel,
Datum  key 
)

Definition at line 82 of file hashutil.c.

83{
84 FmgrInfo *procinfo;
85 Oid collation;
86
87 /* XXX assumes index has only one attribute */
88 procinfo = index_getprocinfo(rel, 1, HASHSTANDARD_PROC);
89 collation = rel->rd_indcollation[0];
90
91 return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
92}
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1129
#define HASHSTANDARD_PROC
Definition: hash.h:355
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:907
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:227
unsigned int Oid
Definition: postgres_ext.h:30
Definition: fmgr.h:57
Oid * rd_indcollation
Definition: rel.h:217

References DatumGetUInt32(), FunctionCall1Coll(), HASHSTANDARD_PROC, index_getprocinfo(), sort-test::key, and RelationData::rd_indcollation.

Referenced by _hash_convert_tuple(), and _hash_first().

◆ _hash_datum2hashkey_type()

uint32 _hash_datum2hashkey_type ( Relation  rel,
Datum  key,
Oid  keytype 
)

Definition at line 102 of file hashutil.c.

103{
104 RegProcedure hash_proc;
105 Oid collation;
106
107 /* XXX assumes index has only one attribute */
108 hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
109 keytype,
110 keytype,
112 if (!RegProcedureIsValid(hash_proc))
113 elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
114 HASHSTANDARD_PROC, keytype, keytype,
116 collation = rel->rd_indcollation[0];
117
118 return DatumGetUInt32(OidFunctionCall1Coll(hash_proc, collation, key));
119}
#define RegProcedureIsValid(p)
Definition: c.h:748
regproc RegProcedure
Definition: c.h:621
#define elog(elevel,...)
Definition: elog.h:226
Datum OidFunctionCall1Coll(Oid functionId, Oid collation, Datum arg1)
Definition: fmgr.c:1411
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:888
Oid * rd_opfamily
Definition: rel.h:207

References DatumGetUInt32(), elog, ERROR, get_opfamily_proc(), HASHSTANDARD_PROC, sort-test::key, OidFunctionCall1Coll(), RelationData::rd_indcollation, RelationData::rd_opfamily, RegProcedureIsValid, and RelationGetRelationName.

Referenced by _hash_first().

◆ _hash_doinsert()

void _hash_doinsert ( Relation  rel,
IndexTuple  itup,
Relation  heapRel,
bool  sorted 
)

Definition at line 38 of file hashinsert.c.

39{
41 Buffer bucket_buf;
42 Buffer metabuf;
43 HashMetaPage metap;
44 HashMetaPage usedmetap = NULL;
45 Page metapage;
46 Page page;
47 HashPageOpaque pageopaque;
48 Size itemsz;
49 bool do_expand;
50 uint32 hashkey;
51 Bucket bucket;
52 OffsetNumber itup_off;
53
54 /*
55 * Get the hash key for the item (it's stored in the index tuple itself).
56 */
57 hashkey = _hash_get_indextuple_hashkey(itup);
58
59 /* compute item size too */
60 itemsz = IndexTupleSize(itup);
61 itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
62 * need to be consistent */
63
64restart_insert:
65
66 /*
67 * Read the metapage. We don't lock it yet; HashMaxItemSize() will
68 * examine pd_pagesize_version, but that can't change so we can examine it
69 * without a lock.
70 */
72 metapage = BufferGetPage(metabuf);
73
74 /*
75 * Check whether the item can fit on a hash page at all. (Eventually, we
76 * ought to try to apply TOAST methods if not.) Note that at this point,
77 * itemsz doesn't include the ItemId.
78 *
79 * XXX this is useless code if we are only storing hash keys.
80 */
81 if (itemsz > HashMaxItemSize(metapage))
83 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
84 errmsg("index row size %zu exceeds hash maximum %zu",
85 itemsz, HashMaxItemSize(metapage)),
86 errhint("Values larger than a buffer page cannot be indexed.")));
87
88 /* Lock the primary bucket page for the target bucket. */
90 &usedmetap);
91 Assert(usedmetap != NULL);
92
94
95 /* remember the primary bucket buffer to release the pin on it at end. */
96 bucket_buf = buf;
97
98 page = BufferGetPage(buf);
99 pageopaque = HashPageGetOpaque(page);
100 bucket = pageopaque->hasho_bucket;
101
102 /*
103 * If this bucket is in the process of being split, try to finish the
104 * split before inserting, because that might create room for the
105 * insertion to proceed without allocating an additional overflow page.
106 * It's only interesting to finish the split if we're trying to insert
107 * into the bucket from which we're removing tuples (the "old" bucket),
108 * not if we're trying to insert into the bucket into which tuples are
109 * being moved (the "new" bucket).
110 */
111 if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
112 {
113 /* release the lock on bucket buffer, before completing the split. */
115
116 _hash_finish_split(rel, metabuf, buf, bucket,
117 usedmetap->hashm_maxbucket,
118 usedmetap->hashm_highmask,
119 usedmetap->hashm_lowmask);
120
121 /* release the pin on old and meta buffer. retry for insert. */
122 _hash_dropbuf(rel, buf);
123 _hash_dropbuf(rel, metabuf);
124 goto restart_insert;
125 }
126
127 /* Do the insertion */
128 while (PageGetFreeSpace(page) < itemsz)
129 {
130 BlockNumber nextblkno;
131
132 /*
133 * Check if current page has any DEAD tuples. If yes, delete these
134 * tuples and see if we can get a space for the new item to be
135 * inserted before moving to the next page in the bucket chain.
136 */
137 if (H_HAS_DEAD_TUPLES(pageopaque))
138 {
139
141 {
142 _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
143
144 if (PageGetFreeSpace(page) >= itemsz)
145 break; /* OK, now we have enough space */
146 }
147 }
148
149 /*
150 * no space on this page; check for an overflow page
151 */
152 nextblkno = pageopaque->hasho_nextblkno;
153
154 if (BlockNumberIsValid(nextblkno))
155 {
156 /*
157 * ovfl page exists; go get it. if it doesn't have room, we'll
158 * find out next pass through the loop test above. we always
159 * release both the lock and pin if this is an overflow page, but
160 * only the lock if this is the primary bucket page, since the pin
161 * on the primary bucket must be retained throughout the scan.
162 */
163 if (buf != bucket_buf)
164 _hash_relbuf(rel, buf);
165 else
167 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
168 page = BufferGetPage(buf);
169 }
170 else
171 {
172 /*
173 * we're at the end of the bucket chain and we haven't found a
174 * page with enough room. allocate a new overflow page.
175 */
176
177 /* release our write lock without modifying buffer */
179
180 /* chain to a new overflow page */
181 buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
182 page = BufferGetPage(buf);
183
184 /* should fit now, given test above */
185 Assert(PageGetFreeSpace(page) >= itemsz);
186 }
187 pageopaque = HashPageGetOpaque(page);
189 Assert(pageopaque->hasho_bucket == bucket);
190 }
191
192 /*
193 * Write-lock the metapage so we can increment the tuple count. After
194 * incrementing it, check to see if it's time for a split.
195 */
197
198 /* Do the update. No ereport(ERROR) until changes are logged */
200
201 /* found page with enough space, so add the item here */
202 itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
204
205 /* metapage operations */
206 metap = HashPageGetMeta(metapage);
207 metap->hashm_ntuples += 1;
208
209 /* Make sure this stays in sync with _hash_expandtable() */
210 do_expand = metap->hashm_ntuples >
211 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
212
213 MarkBufferDirty(metabuf);
214
215 /* XLOG stuff */
216 if (RelationNeedsWAL(rel))
217 {
218 xl_hash_insert xlrec;
219 XLogRecPtr recptr;
220
221 xlrec.offnum = itup_off;
222
225
227
229 XLogRegisterBufData(0, itup, IndexTupleSize(itup));
230
231 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
232
233 PageSetLSN(BufferGetPage(buf), recptr);
234 PageSetLSN(BufferGetPage(metabuf), recptr);
235 }
236
238
239 /* drop lock on metapage, but keep pin */
241
242 /*
243 * Release the modified page and ensure to release the pin on primary
244 * page.
245 */
246 _hash_relbuf(rel, buf);
247 if (buf != bucket_buf)
248 _hash_dropbuf(rel, bucket_buf);
249
250 /* Attempt to split if a split is needed */
251 if (do_expand)
252 _hash_expandtable(rel, metabuf);
253
254 /* Finally drop our pin on the metapage */
255 _hash_dropbuf(rel, metabuf);
256}
bool IsBufferCleanupOK(Buffer buffer)
Definition: bufmgr.c:5843
Size PageGetFreeSpace(const PageData *page)
Definition: bufpage.c:906
size_t Size
Definition: c.h:576
#define HASH_NOLOCK
Definition: hash.h:341
#define H_BUCKET_BEING_SPLIT(opaque)
Definition: hash.h:91
#define HASH_METAPAGE
Definition: hash.h:198
#define H_HAS_DEAD_TUPLES(opaque)
Definition: hash.h:93
#define HashMaxItemSize(page)
Definition: hash.h:287
#define SizeOfHashInsert
Definition: hash_xlog.h:61
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool appendtup)
Definition: hashinsert.c:274
static void _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
Definition: hashinsert.c:370
Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
Definition: hashovfl.c:112
void _hash_dropbuf(Relation rel, Buffer buf)
Definition: hashpage.c:277
void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashpage.c:1356
Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)
Definition: hashpage.c:1559
void _hash_expandtable(Relation rel, Buffer metabuf)
Definition: hashpage.c:614
static Size IndexTupleSize(const IndexTupleData *itup)
Definition: itup.h:71
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4336
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_highmask
Definition: hash.h:255
uint16 hashm_ffactor
Definition: hash.h:249
OffsetNumber offnum
Definition: hash_xlog.h:58

References _hash_addovflpage(), _hash_dropbuf(), _hash_expandtable(), _hash_finish_split(), _hash_get_indextuple_hashkey(), _hash_getbucketbuf_from_hashkey(), _hash_getbuf(), _hash_pgaddtup(), _hash_relbuf(), _hash_vacuum_one_page(), Assert(), BlockNumberIsValid(), buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictIn(), END_CRIT_SECTION, ereport, errcode(), errhint(), errmsg(), ERROR, H_BUCKET_BEING_SPLIT, H_HAS_DEAD_TUPLES, HASH_METAPAGE, HASH_NOLOCK, HASH_WRITE, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMaxItemSize, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageGetMeta, HashPageGetOpaque, IndexTupleSize(), InvalidBuffer, IsBufferCleanupOK(), LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MarkBufferDirty(), MAXALIGN, xl_hash_insert::offnum, PageGetFreeSpace(), PageSetLSN(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashInsert, START_CRIT_SECTION, XLOG_HASH_INSERT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _h_indexbuild(), hashbuildCallback(), and hashinsert().

◆ _hash_dropbuf()

void _hash_dropbuf ( Relation  rel,
Buffer  buf 
)

◆ _hash_dropscanbuf()

void _hash_dropscanbuf ( Relation  rel,
HashScanOpaque  so 
)

Definition at line 289 of file hashpage.c.

290{
291 /* release pin we hold on primary bucket page */
293 so->hashso_bucket_buf != so->currPos.buf)
296
297 /* release pin we hold on primary bucket page of bucket being split */
302
303 /* release any pin we still hold */
304 if (BufferIsValid(so->currPos.buf))
305 _hash_dropbuf(rel, so->currPos.buf);
307
308 /* reset split scan */
309 so->hashso_buc_populated = false;
310 so->hashso_buc_split = false;
311}
bool hashso_buc_split
Definition: hash.h:180
HashScanPosData currPos
Definition: hash.h:189
bool hashso_buc_populated
Definition: hash.h:174
Buffer hashso_split_bucket_buf
Definition: hash.h:171
Buffer hashso_bucket_buf
Definition: hash.h:164
Buffer buf
Definition: hash.h:111

References _hash_dropbuf(), HashScanPosData::buf, BufferIsValid(), HashScanOpaqueData::currPos, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, and InvalidBuffer.

Referenced by _hash_next(), hashendscan(), and hashrescan().

◆ _hash_expandtable()

void _hash_expandtable ( Relation  rel,
Buffer  metabuf 
)

Definition at line 614 of file hashpage.c.

615{
616 HashMetaPage metap;
617 Bucket old_bucket;
618 Bucket new_bucket;
619 uint32 spare_ndx;
620 BlockNumber start_oblkno;
621 BlockNumber start_nblkno;
622 Buffer buf_nblkno;
623 Buffer buf_oblkno;
624 Page opage;
625 Page npage;
626 HashPageOpaque oopaque;
627 HashPageOpaque nopaque;
628 uint32 maxbucket;
629 uint32 highmask;
630 uint32 lowmask;
631 bool metap_update_masks = false;
632 bool metap_update_splitpoint = false;
633
634restart_expand:
635
636 /*
637 * Write-lock the meta page. It used to be necessary to acquire a
638 * heavyweight lock to begin a split, but that is no longer required.
639 */
641
642 _hash_checkpage(rel, metabuf, LH_META_PAGE);
643 metap = HashPageGetMeta(BufferGetPage(metabuf));
644
645 /*
646 * Check to see if split is still needed; someone else might have already
647 * done one while we waited for the lock.
648 *
649 * Make sure this stays in sync with _hash_doinsert()
650 */
651 if (metap->hashm_ntuples <=
652 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
653 goto fail;
654
655 /*
656 * Can't split anymore if maxbucket has reached its maximum possible
657 * value.
658 *
659 * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because
660 * the calculation maxbucket+1 mustn't overflow). Currently we restrict
661 * to half that to prevent failure of pg_ceil_log2_32() and insufficient
662 * space in hashm_spares[]. It's moot anyway because an index with 2^32
663 * buckets would certainly overflow BlockNumber and hence
664 * _hash_alloc_buckets() would fail, but if we supported buckets smaller
665 * than a disk block then this would be an independent constraint.
666 *
667 * If you change this, see also the maximum initial number of buckets in
668 * _hash_init().
669 */
670 if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
671 goto fail;
672
673 /*
674 * Determine which bucket is to be split, and attempt to take cleanup lock
675 * on the old bucket. If we can't get the lock, give up.
676 *
677 * The cleanup lock protects us not only against other backends, but
678 * against our own backend as well.
679 *
680 * The cleanup lock is mainly to protect the split from concurrent
681 * inserts. See src/backend/access/hash/README, Lock Definitions for
682 * further details. Due to this locking restriction, if there is any
683 * pending scan, the split will give up which is not good, but harmless.
684 */
685 new_bucket = metap->hashm_maxbucket + 1;
686
687 old_bucket = (new_bucket & metap->hashm_lowmask);
688
689 start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
690
691 buf_oblkno = _hash_getbuf_with_condlock_cleanup(rel, start_oblkno, LH_BUCKET_PAGE);
692 if (!buf_oblkno)
693 goto fail;
694
695 opage = BufferGetPage(buf_oblkno);
696 oopaque = HashPageGetOpaque(opage);
697
698 /*
699 * We want to finish the split from a bucket as there is no apparent
700 * benefit by not doing so and it will make the code complicated to finish
701 * the split that involves multiple buckets considering the case where new
702 * split also fails. We don't need to consider the new bucket for
703 * completing the split here as it is not possible that a re-split of new
704 * bucket starts when there is still a pending split from old bucket.
705 */
706 if (H_BUCKET_BEING_SPLIT(oopaque))
707 {
708 /*
709 * Copy bucket mapping info now; refer the comment in code below where
710 * we copy this information before calling _hash_splitbucket to see
711 * why this is okay.
712 */
713 maxbucket = metap->hashm_maxbucket;
714 highmask = metap->hashm_highmask;
715 lowmask = metap->hashm_lowmask;
716
717 /*
718 * Release the lock on metapage and old_bucket, before completing the
719 * split.
720 */
722 LockBuffer(buf_oblkno, BUFFER_LOCK_UNLOCK);
723
724 _hash_finish_split(rel, metabuf, buf_oblkno, old_bucket, maxbucket,
725 highmask, lowmask);
726
727 /* release the pin on old buffer and retry for expand. */
728 _hash_dropbuf(rel, buf_oblkno);
729
730 goto restart_expand;
731 }
732
733 /*
734 * Clean the tuples remained from the previous split. This operation
735 * requires cleanup lock and we already have one on the old bucket, so
736 * let's do it. We also don't want to allow further splits from the bucket
737 * till the garbage of previous split is cleaned. This has two
738 * advantages; first, it helps in avoiding the bloat due to garbage and
739 * second is, during cleanup of bucket, we are always sure that the
740 * garbage tuples belong to most recently split bucket. On the contrary,
741 * if we allow cleanup of bucket after meta page is updated to indicate
742 * the new split and before the actual split, the cleanup operation won't
743 * be able to decide whether the tuple has been moved to the newly created
744 * bucket and ended up deleting such tuples.
745 */
746 if (H_NEEDS_SPLIT_CLEANUP(oopaque))
747 {
748 /*
749 * Copy bucket mapping info now; refer to the comment in code below
750 * where we copy this information before calling _hash_splitbucket to
751 * see why this is okay.
752 */
753 maxbucket = metap->hashm_maxbucket;
754 highmask = metap->hashm_highmask;
755 lowmask = metap->hashm_lowmask;
756
757 /* Release the metapage lock. */
759
760 hashbucketcleanup(rel, old_bucket, buf_oblkno, start_oblkno, NULL,
761 maxbucket, highmask, lowmask, NULL, NULL, true,
762 NULL, NULL);
763
764 _hash_dropbuf(rel, buf_oblkno);
765
766 goto restart_expand;
767 }
768
769 /*
770 * There shouldn't be any active scan on new bucket.
771 *
772 * Note: it is safe to compute the new bucket's blkno here, even though we
773 * may still need to update the BUCKET_TO_BLKNO mapping. This is because
774 * the current value of hashm_spares[hashm_ovflpoint] correctly shows
775 * where we are going to put a new splitpoint's worth of buckets.
776 */
777 start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);
778
779 /*
780 * If the split point is increasing we need to allocate a new batch of
781 * bucket pages.
782 */
783 spare_ndx = _hash_spareindex(new_bucket + 1);
784 if (spare_ndx > metap->hashm_ovflpoint)
785 {
786 uint32 buckets_to_add;
787
788 Assert(spare_ndx == metap->hashm_ovflpoint + 1);
789
790 /*
791 * We treat allocation of buckets as a separate WAL-logged action.
792 * Even if we fail after this operation, won't leak bucket pages;
793 * rather, the next split will consume this space. In any case, even
794 * without failure we don't use all the space in one split operation.
795 */
796 buckets_to_add = _hash_get_totalbuckets(spare_ndx) - new_bucket;
797 if (!_hash_alloc_buckets(rel, start_nblkno, buckets_to_add))
798 {
799 /* can't split due to BlockNumber overflow */
800 _hash_relbuf(rel, buf_oblkno);
801 goto fail;
802 }
803 }
804
805 /*
806 * Physically allocate the new bucket's primary page. We want to do this
807 * before changing the metapage's mapping info, in case we can't get the
808 * disk space.
809 *
810 * XXX It doesn't make sense to call _hash_getnewbuf first, zeroing the
811 * buffer, and then only afterwards check whether we have a cleanup lock.
812 * However, since no scan can be accessing the buffer yet, any concurrent
813 * accesses will just be from processes like the bgwriter or checkpointer
814 * which don't care about its contents, so it doesn't really matter.
815 */
816 buf_nblkno = _hash_getnewbuf(rel, start_nblkno, MAIN_FORKNUM);
817 if (!IsBufferCleanupOK(buf_nblkno))
818 {
819 _hash_relbuf(rel, buf_oblkno);
820 _hash_relbuf(rel, buf_nblkno);
821 goto fail;
822 }
823
824 /*
825 * Since we are scribbling on the pages in the shared buffers, establish a
826 * critical section. Any failure in this next code leaves us with a big
827 * problem: the metapage is effectively corrupt but could get written back
828 * to disk.
829 */
831
832 /*
833 * Okay to proceed with split. Update the metapage bucket mapping info.
834 */
835 metap->hashm_maxbucket = new_bucket;
836
837 if (new_bucket > metap->hashm_highmask)
838 {
839 /* Starting a new doubling */
840 metap->hashm_lowmask = metap->hashm_highmask;
841 metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
842 metap_update_masks = true;
843 }
844
845 /*
846 * If the split point is increasing we need to adjust the hashm_spares[]
847 * array and hashm_ovflpoint so that future overflow pages will be created
848 * beyond this new batch of bucket pages.
849 */
850 if (spare_ndx > metap->hashm_ovflpoint)
851 {
852 metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
853 metap->hashm_ovflpoint = spare_ndx;
854 metap_update_splitpoint = true;
855 }
856
857 MarkBufferDirty(metabuf);
858
859 /*
860 * Copy bucket mapping info now; this saves re-accessing the meta page
861 * inside _hash_splitbucket's inner loop. Note that once we drop the
862 * split lock, other splits could begin, so these values might be out of
863 * date before _hash_splitbucket finishes. That's okay, since all it
864 * needs is to tell which of these two buckets to map hashkeys into.
865 */
866 maxbucket = metap->hashm_maxbucket;
867 highmask = metap->hashm_highmask;
868 lowmask = metap->hashm_lowmask;
869
870 opage = BufferGetPage(buf_oblkno);
871 oopaque = HashPageGetOpaque(opage);
872
873 /*
874 * Mark the old bucket to indicate that split is in progress. (At
875 * operation end, we will clear the split-in-progress flag.) Also, for a
876 * primary bucket page, hasho_prevblkno stores the number of buckets that
877 * existed as of the last split, so we must update that value here.
878 */
880 oopaque->hasho_prevblkno = maxbucket;
881
882 MarkBufferDirty(buf_oblkno);
883
884 npage = BufferGetPage(buf_nblkno);
885
886 /*
887 * initialize the new bucket's primary page and mark it to indicate that
888 * split is in progress.
889 */
890 nopaque = HashPageGetOpaque(npage);
891 nopaque->hasho_prevblkno = maxbucket;
893 nopaque->hasho_bucket = new_bucket;
895 nopaque->hasho_page_id = HASHO_PAGE_ID;
896
897 MarkBufferDirty(buf_nblkno);
898
899 /* XLOG stuff */
900 if (RelationNeedsWAL(rel))
901 {
903 XLogRecPtr recptr;
904
905 xlrec.new_bucket = maxbucket;
906 xlrec.old_bucket_flag = oopaque->hasho_flag;
907 xlrec.new_bucket_flag = nopaque->hasho_flag;
908 xlrec.flags = 0;
909
911
912 XLogRegisterBuffer(0, buf_oblkno, REGBUF_STANDARD);
913 XLogRegisterBuffer(1, buf_nblkno, REGBUF_WILL_INIT);
915
916 if (metap_update_masks)
917 {
919 XLogRegisterBufData(2, &metap->hashm_lowmask, sizeof(uint32));
920 XLogRegisterBufData(2, &metap->hashm_highmask, sizeof(uint32));
921 }
922
923 if (metap_update_splitpoint)
924 {
927 sizeof(uint32));
929 &metap->hashm_spares[metap->hashm_ovflpoint],
930 sizeof(uint32));
931 }
932
934
935 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_ALLOCATE_PAGE);
936
937 PageSetLSN(BufferGetPage(buf_oblkno), recptr);
938 PageSetLSN(BufferGetPage(buf_nblkno), recptr);
939 PageSetLSN(BufferGetPage(metabuf), recptr);
940 }
941
943
944 /* drop lock, but keep pin */
946
947 /* Relocate records to the new bucket */
948 _hash_splitbucket(rel, metabuf,
949 old_bucket, new_bucket,
950 buf_oblkno, buf_nblkno, NULL,
951 maxbucket, highmask, lowmask);
952
953 /* all done, now release the pins on primary buckets. */
954 _hash_dropbuf(rel, buf_oblkno);
955 _hash_dropbuf(rel, buf_nblkno);
956
957 return;
958
959 /* Here if decide not to split or fail to acquire old bucket lock */
960fail:
961
962 /* We didn't write the metapage, so just drop lock */
964}
void hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy, uint32 maxbucket, uint32 highmask, uint32 lowmask, double *tuples_removed, double *num_index_tuples, bool split_cleanup, IndexBulkDeleteCallback callback, void *callback_state)
Definition: hash.c:690
#define LH_BUCKET_BEING_POPULATED
Definition: hash.h:58
#define BUCKET_TO_BLKNO(metap, B)
Definition: hash.h:39
#define H_NEEDS_SPLIT_CLEANUP(opaque)
Definition: hash.h:90
#define LH_BUCKET_BEING_SPLIT
Definition: hash.h:59
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define SizeOfHashSplitAllocPage
Definition: hash_xlog.h:100
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
Buffer _hash_getbuf_with_condlock_cleanup(Relation rel, BlockNumber blkno, int flags)
Definition: hashpage.c:96
static void _hash_splitbucket(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket, Buffer obuf, Buffer nbuf, HTAB *htab, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashpage.c:1073
static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
Definition: hashpage.c:992
uint32 _hash_spareindex(uint32 num_bucket)
Definition: hashutil.c:142
uint32 _hash_get_totalbuckets(uint32 splitpoint_phase)
Definition: hashutil.c:174

References _hash_alloc_buckets(), _hash_checkpage(), _hash_dropbuf(), _hash_finish_split(), _hash_get_totalbuckets(), _hash_getbuf_with_condlock_cleanup(), _hash_getnewbuf(), _hash_relbuf(), _hash_spareindex(), _hash_splitbucket(), Assert(), BUCKET_TO_BLKNO, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), END_CRIT_SECTION, xl_hash_split_allocate_page::flags, H_BUCKET_BEING_SPLIT, H_NEEDS_SPLIT_CLEANUP, hashbucketcleanup(), HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HashPageGetOpaque, InvalidBlockNumber, IsBufferCleanupOK(), LH_BUCKET_BEING_POPULATED, LH_BUCKET_BEING_SPLIT, LH_BUCKET_PAGE, LH_META_PAGE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), xl_hash_split_allocate_page::new_bucket, xl_hash_split_allocate_page::new_bucket_flag, xl_hash_split_allocate_page::old_bucket_flag, PageSetLSN(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationNeedsWAL, SizeOfHashSplitAllocPage, START_CRIT_SECTION, XLH_SPLIT_META_UPDATE_MASKS, XLH_SPLIT_META_UPDATE_SPLITPOINT, XLOG_HASH_SPLIT_ALLOCATE_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert().

◆ _hash_finish_split()

void _hash_finish_split ( Relation  rel,
Buffer  metabuf,
Buffer  obuf,
Bucket  obucket,
uint32  maxbucket,
uint32  highmask,
uint32  lowmask 
)

Definition at line 1356 of file hashpage.c.

1358{
1359 HASHCTL hash_ctl;
1360 HTAB *tidhtab;
1361 Buffer bucket_nbuf = InvalidBuffer;
1362 Buffer nbuf;
1363 Page npage;
1364 BlockNumber nblkno;
1365 BlockNumber bucket_nblkno;
1366 HashPageOpaque npageopaque;
1367 Bucket nbucket;
1368 bool found;
1369
1370 /* Initialize hash tables used to track TIDs */
1371 hash_ctl.keysize = sizeof(ItemPointerData);
1372 hash_ctl.entrysize = sizeof(ItemPointerData);
1373 hash_ctl.hcxt = CurrentMemoryContext;
1374
1375 tidhtab =
1376 hash_create("bucket ctids",
1377 256, /* arbitrary initial size */
1378 &hash_ctl,
1380
1381 bucket_nblkno = nblkno = _hash_get_newblock_from_oldbucket(rel, obucket);
1382
1383 /*
1384 * Scan the new bucket and build hash table of TIDs
1385 */
1386 for (;;)
1387 {
1388 OffsetNumber noffnum;
1389 OffsetNumber nmaxoffnum;
1390
1391 nbuf = _hash_getbuf(rel, nblkno, HASH_READ,
1393
1394 /* remember the primary bucket buffer to acquire cleanup lock on it. */
1395 if (nblkno == bucket_nblkno)
1396 bucket_nbuf = nbuf;
1397
1398 npage = BufferGetPage(nbuf);
1399 npageopaque = HashPageGetOpaque(npage);
1400
1401 /* Scan each tuple in new page */
1402 nmaxoffnum = PageGetMaxOffsetNumber(npage);
1403 for (noffnum = FirstOffsetNumber;
1404 noffnum <= nmaxoffnum;
1405 noffnum = OffsetNumberNext(noffnum))
1406 {
1407 IndexTuple itup;
1408
1409 /* Fetch the item's TID and insert it in hash table. */
1410 itup = (IndexTuple) PageGetItem(npage,
1411 PageGetItemId(npage, noffnum));
1412
1413 (void) hash_search(tidhtab, &itup->t_tid, HASH_ENTER, &found);
1414
1415 Assert(!found);
1416 }
1417
1418 nblkno = npageopaque->hasho_nextblkno;
1419
1420 /*
1421 * release our write lock without modifying buffer and ensure to
1422 * retain the pin on primary bucket.
1423 */
1424 if (nbuf == bucket_nbuf)
1426 else
1427 _hash_relbuf(rel, nbuf);
1428
1429 /* Exit loop if no more overflow pages in new bucket */
1430 if (!BlockNumberIsValid(nblkno))
1431 break;
1432 }
1433
1434 /*
1435 * Conditionally get the cleanup lock on old and new buckets to perform
1436 * the split operation. If we don't get the cleanup locks, silently give
1437 * up and next insertion on old bucket will try again to complete the
1438 * split.
1439 */
1441 {
1442 hash_destroy(tidhtab);
1443 return;
1444 }
1445 if (!ConditionalLockBufferForCleanup(bucket_nbuf))
1446 {
1448 hash_destroy(tidhtab);
1449 return;
1450 }
1451
1452 npage = BufferGetPage(bucket_nbuf);
1453 npageopaque = HashPageGetOpaque(npage);
1454 nbucket = npageopaque->hasho_bucket;
1455
1456 _hash_splitbucket(rel, metabuf, obucket,
1457 nbucket, obuf, bucket_nbuf, tidhtab,
1458 maxbucket, highmask, lowmask);
1459
1460 _hash_dropbuf(rel, bucket_nbuf);
1461 hash_destroy(tidhtab);
1462}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5785
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
#define HASH_READ
Definition: hash.h:339
BlockNumber _hash_get_newblock_from_oldbucket(Relation rel, Bucket old_bucket)
Definition: hashutil.c:461
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
struct ItemPointerData ItemPointerData
MemoryContext CurrentMemoryContext
Definition: mcxt.c:159
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
MemoryContext hcxt
Definition: hsearch.h:86
Definition: dynahash.c:220
ItemPointerData t_tid
Definition: itup.h:37

References _hash_dropbuf(), _hash_get_newblock_from_oldbucket(), _hash_getbuf(), _hash_relbuf(), _hash_splitbucket(), Assert(), BlockNumberIsValid(), BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), CurrentMemoryContext, HASHCTL::entrysize, FirstOffsetNumber, HASH_BLOBS, HASH_CONTEXT, hash_create(), hash_destroy(), HASH_ELEM, HASH_ENTER, HASH_READ, hash_search(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_nextblkno, HashPageGetOpaque, HASHCTL::hcxt, InvalidBuffer, HASHCTL::keysize, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, LockBuffer(), OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and IndexTupleData::t_tid.

Referenced by _hash_doinsert(), and _hash_expandtable().

◆ _hash_first()

bool _hash_first ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 288 of file hashsearch.c.

289{
290 Relation rel = scan->indexRelation;
292 ScanKey cur;
293 uint32 hashkey;
294 Bucket bucket;
295 Buffer buf;
296 Page page;
297 HashPageOpaque opaque;
298 HashScanPosItem *currItem;
299
301 if (scan->instrument)
302 scan->instrument->nsearches++;
303
304 /*
305 * We do not support hash scans with no index qualification, because we
306 * would have to read the whole index rather than just one bucket. That
307 * creates a whole raft of problems, since we haven't got a practical way
308 * to lock all the buckets against splits or compactions.
309 */
310 if (scan->numberOfKeys < 1)
312 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
313 errmsg("hash indexes do not support whole-index scans")));
314
315 /* There may be more than one index qual, but we hash only the first */
316 cur = &scan->keyData[0];
317
318 /* We support only single-column hash indexes */
319 Assert(cur->sk_attno == 1);
320 /* And there's only one operator strategy, too */
321 Assert(cur->sk_strategy == HTEqualStrategyNumber);
322
323 /*
324 * If the constant in the index qual is NULL, assume it cannot match any
325 * items in the index.
326 */
327 if (cur->sk_flags & SK_ISNULL)
328 return false;
329
330 /*
331 * Okay to compute the hash key. We want to do this before acquiring any
332 * locks, in case a user-defined hash function happens to be slow.
333 *
334 * If scankey operator is not a cross-type comparison, we can use the
335 * cached hash function; otherwise gotta look it up in the catalogs.
336 *
337 * We support the convention that sk_subtype == InvalidOid means the
338 * opclass input type; this is a hack to simplify life for ScanKeyInit().
339 */
340 if (cur->sk_subtype == rel->rd_opcintype[0] ||
341 cur->sk_subtype == InvalidOid)
342 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
343 else
344 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
345 cur->sk_subtype);
346
347 so->hashso_sk_hash = hashkey;
348
349 buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
351 page = BufferGetPage(buf);
352 opaque = HashPageGetOpaque(page);
353 bucket = opaque->hasho_bucket;
354
356
357 /*
358 * If a bucket split is in progress, then while scanning the bucket being
359 * populated, we need to skip tuples that were copied from bucket being
360 * split. We also need to maintain a pin on the bucket being split to
361 * ensure that split-cleanup work done by vacuum doesn't remove tuples
362 * from it till this scan is done. We need to maintain a pin on the
363 * bucket being populated to ensure that vacuum doesn't squeeze that
364 * bucket till this scan is complete; otherwise, the ordering of tuples
365 * can't be maintained during forward and backward scans. Here, we have
366 * to be cautious about locking order: first, acquire the lock on bucket
367 * being split; then, release the lock on it but not the pin; then,
368 * acquire a lock on bucket being populated and again re-verify whether
369 * the bucket split is still in progress. Acquiring the lock on bucket
370 * being split first ensures that the vacuum waits for this scan to
371 * finish.
372 */
373 if (H_BUCKET_BEING_POPULATED(opaque))
374 {
375 BlockNumber old_blkno;
376 Buffer old_buf;
377
378 old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
379
380 /*
381 * release the lock on new bucket and re-acquire it after acquiring
382 * the lock on old bucket.
383 */
385
386 old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
387
388 /*
389 * remember the split bucket buffer so as to use it later for
390 * scanning.
391 */
392 so->hashso_split_bucket_buf = old_buf;
394
396 page = BufferGetPage(buf);
397 opaque = HashPageGetOpaque(page);
398 Assert(opaque->hasho_bucket == bucket);
399
400 if (H_BUCKET_BEING_POPULATED(opaque))
401 so->hashso_buc_populated = true;
402 else
403 {
406 }
407 }
408
409 /* If a backwards scan is requested, move to the end of the chain */
411 {
412 /*
413 * Backward scans that start during split needs to start from end of
414 * bucket being split.
415 */
416 while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
418 _hash_readnext(scan, &buf, &page, &opaque);
419 }
420
421 /* remember which buffer we have pinned, if any */
423 so->currPos.buf = buf;
424
425 /* Now find all the tuples satisfying the qualification from a page */
426 if (!_hash_readpage(scan, &buf, dir))
427 return false;
428
429 /* OK, itemIndex says what to return */
430 currItem = &so->currPos.items[so->currPos.itemIndex];
431 scan->xs_heaptid = currItem->heapTid;
432
433 /* if we're here, _hash_readpage found a valid tuples */
434 return true;
435}
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:197
struct cursor * cur
Definition: ecpg.c:29
#define H_BUCKET_BEING_POPULATED(opaque)
Definition: hash.h:92
HashScanOpaqueData * HashScanOpaque
Definition: hash.h:192
static void _hash_readnext(IndexScanDesc scan, Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
Definition: hashsearch.c:131
static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
Definition: hashsearch.c:448
BlockNumber _hash_get_oldblock_from_newbucket(Relation rel, Bucket new_bucket)
Definition: hashutil.c:422
uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype)
Definition: hashutil.c:102
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:694
#define InvalidOid
Definition: postgres_ext.h:35
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition: predicate.c:2599
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
#define HTEqualStrategyNumber
Definition: stratnum.h:41
uint32 hashso_sk_hash
Definition: hash.h:161
HashScanPosItem items[MaxIndexTuplesPerPage]
Definition: hash.h:127
int itemIndex
Definition: hash.h:125
struct IndexScanInstrumentation * instrument
Definition: relscan.h:159
ItemPointerData xs_heaptid
Definition: relscan.h:172
struct SnapshotData * xs_snapshot
Definition: relscan.h:138
Oid * rd_opcintype
Definition: rel.h:208

References _hash_datum2hashkey(), _hash_datum2hashkey_type(), _hash_dropbuf(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getbuf(), _hash_readnext(), _hash_readpage(), Assert(), BlockNumberIsValid(), buf, HashScanPosData::buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsInvalid, cur, HashScanOpaqueData::currPos, ereport, errcode(), errmsg(), ERROR, H_BUCKET_BEING_POPULATED, HASH_READ, HashPageGetOpaque, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_sk_hash, HashScanOpaqueData::hashso_split_bucket_buf, HTEqualStrategyNumber, IndexScanDescData::indexRelation, IndexScanDescData::instrument, InvalidBuffer, InvalidOid, HashScanPosData::itemIndex, HashScanPosData::items, IndexScanDescData::keyData, LH_BUCKET_PAGE, LockBuffer(), IndexScanInstrumentation::nsearches, IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, pgstat_count_index_scan, PredicateLockPage(), RelationData::rd_opcintype, ScanDirectionIsBackward, SK_ISNULL, IndexScanDescData::xs_heaptid, and IndexScanDescData::xs_snapshot.

Referenced by hashgetbitmap(), and hashgettuple().

◆ _hash_freeovflpage()

BlockNumber _hash_freeovflpage ( Relation  rel,
Buffer  bucketbuf,
Buffer  ovflbuf,
Buffer  wbuf,
IndexTuple itups,
OffsetNumber itup_offsets,
Size tups_size,
uint16  nitups,
BufferAccessStrategy  bstrategy 
)

Definition at line 490 of file hashovfl.c.

494{
495 HashMetaPage metap;
496 Buffer metabuf;
497 Buffer mapbuf;
498 BlockNumber ovflblkno;
499 BlockNumber prevblkno;
500 BlockNumber blkno;
501 BlockNumber nextblkno;
502 BlockNumber writeblkno;
503 HashPageOpaque ovflopaque;
504 Page ovflpage;
505 Page mappage;
506 uint32 *freep;
507 uint32 ovflbitno;
508 int32 bitmappage,
509 bitmapbit;
511 Buffer prevbuf = InvalidBuffer;
512 Buffer nextbuf = InvalidBuffer;
513 bool update_metap = false;
514
515 /* Get information from the doomed page */
516 _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
517 ovflblkno = BufferGetBlockNumber(ovflbuf);
518 ovflpage = BufferGetPage(ovflbuf);
519 ovflopaque = HashPageGetOpaque(ovflpage);
520 nextblkno = ovflopaque->hasho_nextblkno;
521 prevblkno = ovflopaque->hasho_prevblkno;
522 writeblkno = BufferGetBlockNumber(wbuf);
523 bucket = ovflopaque->hasho_bucket;
524
525 /*
526 * Fix up the bucket chain. this is a doubly-linked list, so we must fix
527 * up the bucket chain members behind and ahead of the overflow page being
528 * deleted. Concurrency issues are avoided by using lock chaining as
529 * described atop hashbucketcleanup.
530 */
531 if (BlockNumberIsValid(prevblkno))
532 {
533 if (prevblkno == writeblkno)
534 prevbuf = wbuf;
535 else
536 prevbuf = _hash_getbuf_with_strategy(rel,
537 prevblkno,
540 bstrategy);
541 }
542 if (BlockNumberIsValid(nextblkno))
543 nextbuf = _hash_getbuf_with_strategy(rel,
544 nextblkno,
547 bstrategy);
548
549 /* Note: bstrategy is intentionally not used for metapage and bitmap */
550
551 /* Read the metapage so we can determine which bitmap page to use */
553 metap = HashPageGetMeta(BufferGetPage(metabuf));
554
555 /* Identify which bit to set */
556 ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
557
558 bitmappage = ovflbitno >> BMPG_SHIFT(metap);
559 bitmapbit = ovflbitno & BMPG_MASK(metap);
560
561 if (bitmappage >= metap->hashm_nmaps)
562 elog(ERROR, "invalid overflow bit number %u", ovflbitno);
563 blkno = metap->hashm_mapp[bitmappage];
564
565 /* Release metapage lock while we access the bitmap page */
567
568 /* read the bitmap page to clear the bitmap bit */
569 mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
570 mappage = BufferGetPage(mapbuf);
571 freep = HashPageGetBitmap(mappage);
572 Assert(ISSET(freep, bitmapbit));
573
574 /* Get write-lock on metapage to update firstfree */
576
577 /* This operation needs to log multiple tuples, prepare WAL for that */
578 if (RelationNeedsWAL(rel))
580
582
583 /*
584 * we have to insert tuples on the "write" page, being careful to preserve
585 * hashkey ordering. (If we insert many tuples into the same "write" page
586 * it would be worth qsort'ing them).
587 */
588 if (nitups > 0)
589 {
590 _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
591 MarkBufferDirty(wbuf);
592 }
593
594 /*
595 * Reinitialize the freed overflow page. Just zeroing the page won't
596 * work, because WAL replay routines expect pages to be initialized. See
597 * explanation of RBM_NORMAL mode atop XLogReadBufferExtended. We are
598 * careful to make the special space valid here so that tools like
599 * pageinspect won't get confused.
600 */
601 _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
602
603 ovflopaque = HashPageGetOpaque(ovflpage);
604
607 ovflopaque->hasho_bucket = InvalidBucket;
608 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
609 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
610
611 MarkBufferDirty(ovflbuf);
612
613 if (BufferIsValid(prevbuf))
614 {
615 Page prevpage = BufferGetPage(prevbuf);
616 HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
617
618 Assert(prevopaque->hasho_bucket == bucket);
619 prevopaque->hasho_nextblkno = nextblkno;
620 MarkBufferDirty(prevbuf);
621 }
622 if (BufferIsValid(nextbuf))
623 {
624 Page nextpage = BufferGetPage(nextbuf);
625 HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
626
627 Assert(nextopaque->hasho_bucket == bucket);
628 nextopaque->hasho_prevblkno = prevblkno;
629 MarkBufferDirty(nextbuf);
630 }
631
632 /* Clear the bitmap bit to indicate that this overflow page is free */
633 CLRBIT(freep, bitmapbit);
634 MarkBufferDirty(mapbuf);
635
636 /* if this is now the first free page, update hashm_firstfree */
637 if (ovflbitno < metap->hashm_firstfree)
638 {
639 metap->hashm_firstfree = ovflbitno;
640 update_metap = true;
641 MarkBufferDirty(metabuf);
642 }
643
644 /* XLOG stuff */
645 if (RelationNeedsWAL(rel))
646 {
648 XLogRecPtr recptr;
649 int i;
650 bool mod_wbuf = false;
651
652 xlrec.prevblkno = prevblkno;
653 xlrec.nextblkno = nextblkno;
654 xlrec.ntups = nitups;
655 xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
656 xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
657
660
661 /*
662 * bucket buffer was not changed, but still needs to be registered to
663 * ensure that we can acquire a cleanup lock on it during replay.
664 */
665 if (!xlrec.is_prim_bucket_same_wrt)
666 {
668
669 XLogRegisterBuffer(0, bucketbuf, flags);
670 }
671
672 if (xlrec.ntups > 0)
673 {
675
676 /* Remember that wbuf is modified. */
677 mod_wbuf = true;
678
679 XLogRegisterBufData(1, itup_offsets,
680 nitups * sizeof(OffsetNumber));
681 for (i = 0; i < nitups; i++)
682 XLogRegisterBufData(1, itups[i], tups_size[i]);
683 }
684 else if (xlrec.is_prim_bucket_same_wrt || xlrec.is_prev_bucket_same_wrt)
685 {
686 uint8 wbuf_flags;
687
688 /*
689 * A write buffer needs to be registered even if no tuples are
690 * added to it to ensure that we can acquire a cleanup lock on it
691 * if it is the same as primary bucket buffer or update the
692 * nextblkno if it is same as the previous bucket buffer.
693 */
694 Assert(xlrec.ntups == 0);
695
696 wbuf_flags = REGBUF_STANDARD;
697 if (!xlrec.is_prev_bucket_same_wrt)
698 {
699 wbuf_flags |= REGBUF_NO_CHANGE;
700 }
701 else
702 {
703 /* Remember that wbuf is modified. */
704 mod_wbuf = true;
705 }
706 XLogRegisterBuffer(1, wbuf, wbuf_flags);
707 }
708
710
711 /*
712 * If prevpage and the writepage (block in which we are moving tuples
713 * from overflow) are same, then no need to separately register
714 * prevpage. During replay, we can directly update the nextblock in
715 * writepage.
716 */
717 if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
719
720 if (BufferIsValid(nextbuf))
722
724 XLogRegisterBufData(5, &bitmapbit, sizeof(uint32));
725
726 if (update_metap)
727 {
729 XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
730 }
731
732 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
733
734 /* Set LSN iff wbuf is modified. */
735 if (mod_wbuf)
736 PageSetLSN(BufferGetPage(wbuf), recptr);
737
738 PageSetLSN(BufferGetPage(ovflbuf), recptr);
739
740 if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
741 PageSetLSN(BufferGetPage(prevbuf), recptr);
742 if (BufferIsValid(nextbuf))
743 PageSetLSN(BufferGetPage(nextbuf), recptr);
744
745 PageSetLSN(BufferGetPage(mapbuf), recptr);
746
747 if (update_metap)
748 PageSetLSN(BufferGetPage(metabuf), recptr);
749 }
750
752
753 /* release previous bucket if it is not same as write bucket */
754 if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
755 _hash_relbuf(rel, prevbuf);
756
757 if (BufferIsValid(ovflbuf))
758 _hash_relbuf(rel, ovflbuf);
759
760 if (BufferIsValid(nextbuf))
761 _hash_relbuf(rel, nextbuf);
762
763 _hash_relbuf(rel, mapbuf);
764 _hash_relbuf(rel, metabuf);
765
766 return nextblkno;
767}
#define CLRBIT(x, i)
Definition: blutils.c:28
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:403
uint8_t uint8
Definition: c.h:500
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:224
int32_t int32
Definition: c.h:498
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define ISSET(A, N)
Definition: hash.h:334
#define InvalidBucket
Definition: hash.h:37
#define HASH_XLOG_FREE_OVFL_BUFS
Definition: hash_xlog.h:22
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define SizeOfHashSqueezePage
Definition: hash_xlog.h:167
void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups)
Definition: hashinsert.c:331
uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Definition: hashovfl.c:62
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition: hashpage.c:239
BlockNumber prevblkno
Definition: hash_xlog.h:155
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:158
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:161
BlockNumber nextblkno
Definition: hash_xlog.h:156
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition: xloginsert.c:175
#define REGBUF_NO_CHANGE
Definition: xloginsert.h:37
#define REGBUF_NO_IMAGE
Definition: xloginsert.h:33

References _hash_checkpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_ovflblkno_to_bitno(), _hash_pageinit(), _hash_pgaddmultitup(), _hash_relbuf(), Assert(), BlockNumberIsValid(), BMPG_MASK, BMPG_SHIFT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), CLRBIT, elog, END_CRIT_SECTION, ERROR, HASH_METAPAGE, HASH_READ, HASH_WRITE, HASH_XLOG_FREE_OVFL_BUFS, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBucket, InvalidBuffer, xl_hash_squeeze_page::is_prev_bucket_same_wrt, xl_hash_squeeze_page::is_prim_bucket_same_wrt, ISSET, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_UNUSED_PAGE, LockBuffer(), MarkBufferDirty(), xl_hash_squeeze_page::nextblkno, xl_hash_squeeze_page::ntups, PageSetLSN(), PG_USED_FOR_ASSERTS_ONLY, xl_hash_squeeze_page::prevblkno, REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashSqueezePage, START_CRIT_SECTION, XLOG_HASH_SQUEEZE_PAGE, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_squeezebucket().

◆ _hash_get_indextuple_hashkey()

uint32 _hash_get_indextuple_hashkey ( IndexTuple  itup)

Definition at line 291 of file hashutil.c.

292{
293 char *attp;
294
295 /*
296 * We assume the hash key is the first attribute and can't be null, so
297 * this can be done crudely but very very cheaply ...
298 */
299 attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
300 return *((uint32 *) attp);
301}
static Size IndexInfoFindDataOffset(unsigned short t_info)
Definition: itup.h:112
unsigned short t_info
Definition: itup.h:49

References IndexInfoFindDataOffset(), and IndexTupleData::t_info.

Referenced by _h_indexbuild(), _hash_binsearch(), _hash_binsearch_last(), _hash_doinsert(), _hash_load_qualified_items(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_splitbucket(), hash_page_items(), and hashbucketcleanup().

◆ _hash_get_newblock_from_oldbucket()

BlockNumber _hash_get_newblock_from_oldbucket ( Relation  rel,
Bucket  old_bucket 
)

Definition at line 461 of file hashutil.c.

462{
463 Bucket new_bucket;
464 Buffer metabuf;
465 HashMetaPage metap;
466 BlockNumber blkno;
467
469 metap = HashPageGetMeta(BufferGetPage(metabuf));
470
471 new_bucket = _hash_get_newbucket_from_oldbucket(rel, old_bucket,
472 metap->hashm_lowmask,
473 metap->hashm_maxbucket);
474 blkno = BUCKET_TO_BLKNO(metap, new_bucket);
475
476 _hash_relbuf(rel, metabuf);
477
478 return blkno;
479}
Bucket _hash_get_newbucket_from_oldbucket(Relation rel, Bucket old_bucket, uint32 lowmask, uint32 maxbucket)
Definition: hashutil.c:494

References _hash_get_newbucket_from_oldbucket(), _hash_getbuf(), _hash_relbuf(), BUCKET_TO_BLKNO, BufferGetPage(), HASH_METAPAGE, HASH_READ, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashPageGetMeta, and LH_META_PAGE.

Referenced by _hash_finish_split().

◆ _hash_get_newbucket_from_oldbucket()

Bucket _hash_get_newbucket_from_oldbucket ( Relation  rel,
Bucket  old_bucket,
uint32  lowmask,
uint32  maxbucket 
)

Definition at line 494 of file hashutil.c.

496{
497 Bucket new_bucket;
498
499 new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
500 if (new_bucket > maxbucket)
501 {
502 lowmask = lowmask >> 1;
503 new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
504 }
505
506 return new_bucket;
507}
#define CALC_NEW_BUCKET(old_bucket, lowmask)
Definition: hashutil.c:24

References CALC_NEW_BUCKET.

Referenced by _hash_get_newblock_from_oldbucket(), and hashbucketcleanup().

◆ _hash_get_oldblock_from_newbucket()

BlockNumber _hash_get_oldblock_from_newbucket ( Relation  rel,
Bucket  new_bucket 
)

Definition at line 422 of file hashutil.c.

423{
424 Bucket old_bucket;
425 uint32 mask;
426 Buffer metabuf;
427 HashMetaPage metap;
428 BlockNumber blkno;
429
430 /*
431 * To get the old bucket from the current bucket, we need a mask to modulo
432 * into lower half of table. This mask is stored in meta page as
433 * hashm_lowmask, but here we can't rely on the same, because we need a
434 * value of lowmask that was prevalent at the time when bucket split was
435 * started. Masking the most significant bit of new bucket would give us
436 * old bucket.
437 */
438 mask = (((uint32) 1) << pg_leftmost_one_pos32(new_bucket)) - 1;
439 old_bucket = new_bucket & mask;
440
442 metap = HashPageGetMeta(BufferGetPage(metabuf));
443
444 blkno = BUCKET_TO_BLKNO(metap, old_bucket);
445
446 _hash_relbuf(rel, metabuf);
447
448 return blkno;
449}
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:41

References _hash_getbuf(), _hash_relbuf(), BUCKET_TO_BLKNO, BufferGetPage(), HASH_METAPAGE, HASH_READ, HashPageGetMeta, LH_META_PAGE, and pg_leftmost_one_pos32().

Referenced by _hash_first().

◆ _hash_get_totalbuckets()

uint32 _hash_get_totalbuckets ( uint32  splitpoint_phase)

Definition at line 174 of file hashutil.c.

175{
176 uint32 splitpoint_group;
177 uint32 total_buckets;
178 uint32 phases_within_splitpoint_group;
179
180 if (splitpoint_phase < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
181 return (1 << splitpoint_phase);
182
183 /* get splitpoint's group */
184 splitpoint_group = HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE;
185 splitpoint_group +=
186 ((splitpoint_phase - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) >>
188
189 /* account for buckets before splitpoint_group */
190 total_buckets = (1 << (splitpoint_group - 1));
191
192 /* account for buckets within splitpoint_group */
193 phases_within_splitpoint_group =
194 (((splitpoint_phase - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) &
195 HASH_SPLITPOINT_PHASE_MASK) + 1); /* from 0-based to 1-based */
196 total_buckets +=
197 (((1 << (splitpoint_group - 1)) >> HASH_SPLITPOINT_PHASE_BITS) *
198 phases_within_splitpoint_group);
199
200 return total_buckets;
201}
#define HASH_SPLITPOINT_PHASE_MASK
Definition: hash.h:234
#define HASH_SPLITPOINT_PHASE_BITS
Definition: hash.h:232

References HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE, HASH_SPLITPOINT_PHASE_BITS, and HASH_SPLITPOINT_PHASE_MASK.

Referenced by _hash_expandtable(), _hash_init_metabuffer(), _hash_ovflblkno_to_bitno(), and bitno_to_blkno().

◆ _hash_getbucketbuf_from_hashkey()

Buffer _hash_getbucketbuf_from_hashkey ( Relation  rel,
uint32  hashkey,
int  access,
HashMetaPage cachedmetap 
)

Definition at line 1559 of file hashpage.c.

1561{
1562 HashMetaPage metap;
1563 Buffer buf;
1564 Buffer metabuf = InvalidBuffer;
1565 Page page;
1566 Bucket bucket;
1567 BlockNumber blkno;
1568 HashPageOpaque opaque;
1569
1570 /* We read from target bucket buffer, hence locking is must. */
1572
1573 metap = _hash_getcachedmetap(rel, &metabuf, false);
1574 Assert(metap != NULL);
1575
1576 /*
1577 * Loop until we get a lock on the correct target bucket.
1578 */
1579 for (;;)
1580 {
1581 /*
1582 * Compute the target bucket number, and convert to block number.
1583 */
1584 bucket = _hash_hashkey2bucket(hashkey,
1585 metap->hashm_maxbucket,
1586 metap->hashm_highmask,
1587 metap->hashm_lowmask);
1588
1589 blkno = BUCKET_TO_BLKNO(metap, bucket);
1590
1591 /* Fetch the primary bucket page for the bucket */
1592 buf = _hash_getbuf(rel, blkno, access, LH_BUCKET_PAGE);
1593 page = BufferGetPage(buf);
1594 opaque = HashPageGetOpaque(page);
1595 Assert(opaque->hasho_bucket == bucket);
1597
1598 /*
1599 * If this bucket hasn't been split, we're done.
1600 */
1601 if (opaque->hasho_prevblkno <= metap->hashm_maxbucket)
1602 break;
1603
1604 /* Drop lock on this buffer, update cached metapage, and retry. */
1605 _hash_relbuf(rel, buf);
1606 metap = _hash_getcachedmetap(rel, &metabuf, true);
1607 Assert(metap != NULL);
1608 }
1609
1610 if (BufferIsValid(metabuf))
1611 _hash_dropbuf(rel, metabuf);
1612
1613 if (cachedmetap)
1614 *cachedmetap = metap;
1615
1616 return buf;
1617}
HashMetaPage _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
Definition: hashpage.c:1501
short access
Definition: preproc-type.c:36

References _hash_dropbuf(), _hash_getbuf(), _hash_getcachedmetap(), _hash_hashkey2bucket(), _hash_relbuf(), Assert(), BUCKET_TO_BLKNO, buf, BufferGetPage(), BufferIsValid(), HASH_READ, HASH_WRITE, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, InvalidBlockNumber, InvalidBuffer, and LH_BUCKET_PAGE.

Referenced by _hash_doinsert(), and _hash_first().

◆ _hash_getbuf()

Buffer _hash_getbuf ( Relation  rel,
BlockNumber  blkno,
int  access,
int  flags 
)

Definition at line 70 of file hashpage.c.

71{
72 Buffer buf;
73
74 if (blkno == P_NEW)
75 elog(ERROR, "hash AM does not use P_NEW");
76
77 buf = ReadBuffer(rel, blkno);
78
79 if (access != HASH_NOLOCK)
81
82 /* ref count and lock type are correct */
83
84 _hash_checkpage(rel, buf, flags);
85
86 return buf;
87}
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:751
#define P_NEW
Definition: bufmgr.h:191

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), P_NEW, and ReadBuffer().

Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_get_newblock_from_oldbucket(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getcachedmetap(), _hash_kill_items(), _hash_next(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), hash_bitmap_info(), hashbulkdelete(), and pgstathashindex().

◆ _hash_getbuf_with_condlock_cleanup()

Buffer _hash_getbuf_with_condlock_cleanup ( Relation  rel,
BlockNumber  blkno,
int  flags 
)

Definition at line 96 of file hashpage.c.

97{
98 Buffer buf;
99
100 if (blkno == P_NEW)
101 elog(ERROR, "hash AM does not use P_NEW");
102
103 buf = ReadBuffer(rel, blkno);
104
106 {
108 return InvalidBuffer;
109 }
110
111 /* ref count and lock type are correct */
112
113 _hash_checkpage(rel, buf, flags);
114
115 return buf;
116}

References _hash_checkpage(), buf, ConditionalLockBufferForCleanup(), elog, ERROR, InvalidBuffer, P_NEW, ReadBuffer(), and ReleaseBuffer().

Referenced by _hash_expandtable().

◆ _hash_getbuf_with_strategy()

Buffer _hash_getbuf_with_strategy ( Relation  rel,
BlockNumber  blkno,
int  access,
int  flags,
BufferAccessStrategy  bstrategy 
)

Definition at line 239 of file hashpage.c.

242{
243 Buffer buf;
244
245 if (blkno == P_NEW)
246 elog(ERROR, "hash AM does not use P_NEW");
247
248 buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
249
250 if (access != HASH_NOLOCK)
252
253 /* ref count and lock type are correct */
254
255 _hash_checkpage(rel, buf, flags);
256
257 return buf;
258}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:798
@ RBM_NORMAL
Definition: bufmgr.h:46

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), MAIN_FORKNUM, P_NEW, RBM_NORMAL, and ReadBufferExtended().

Referenced by _hash_freeovflpage(), _hash_squeezebucket(), hashbucketcleanup(), and pgstat_hash_page().

◆ _hash_getcachedmetap()

HashMetaPage _hash_getcachedmetap ( Relation  rel,
Buffer metabuf,
bool  force_refresh 
)

Definition at line 1501 of file hashpage.c.

1502{
1503 Page page;
1504
1505 Assert(metabuf);
1506 if (force_refresh || rel->rd_amcache == NULL)
1507 {
1508 char *cache = NULL;
1509
1510 /*
1511 * It's important that we don't set rd_amcache to an invalid value.
1512 * Either MemoryContextAlloc or _hash_getbuf could fail, so don't
1513 * install a pointer to the newly-allocated storage in the actual
1514 * relcache entry until both have succeeded.
1515 */
1516 if (rel->rd_amcache == NULL)
1517 cache = MemoryContextAlloc(rel->rd_indexcxt,
1518 sizeof(HashMetaPageData));
1519
1520 /* Read the metapage. */
1521 if (BufferIsValid(*metabuf))
1522 LockBuffer(*metabuf, BUFFER_LOCK_SHARE);
1523 else
1524 *metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ,
1525 LH_META_PAGE);
1526 page = BufferGetPage(*metabuf);
1527
1528 /* Populate the cache. */
1529 if (rel->rd_amcache == NULL)
1530 rel->rd_amcache = cache;
1531 memcpy(rel->rd_amcache, HashPageGetMeta(page),
1532 sizeof(HashMetaPageData));
1533
1534 /* Release metapage lock, but keep the pin. */
1535 LockBuffer(*metabuf, BUFFER_LOCK_UNLOCK);
1536 }
1537
1538 return (HashMetaPage) rel->rd_amcache;
1539}
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1256
void * rd_amcache
Definition: rel.h:229
MemoryContext rd_indexcxt
Definition: rel.h:204

References _hash_getbuf(), Assert(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), HASH_METAPAGE, HASH_READ, HashPageGetMeta, LH_META_PAGE, LockBuffer(), MemoryContextAlloc(), RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _hash_getbucketbuf_from_hashkey(), and hashbulkdelete().

◆ _hash_getinitbuf()

Buffer _hash_getinitbuf ( Relation  rel,
BlockNumber  blkno 
)

Definition at line 135 of file hashpage.c.

136{
137 Buffer buf;
138
139 if (blkno == P_NEW)
140 elog(ERROR, "hash AM does not use P_NEW");
141
143 NULL);
144
145 /* ref count and lock type are correct */
146
147 /* initialize the page */
149
150 return buf;
151}
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:47

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), elog, ERROR, MAIN_FORKNUM, P_NEW, RBM_ZERO_AND_LOCK, and ReadBufferExtended().

Referenced by _hash_addovflpage().

◆ _hash_getnewbuf()

Buffer _hash_getnewbuf ( Relation  rel,
BlockNumber  blkno,
ForkNumber  forkNum 
)

Definition at line 198 of file hashpage.c.

199{
200 BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
201 Buffer buf;
202
203 if (blkno == P_NEW)
204 elog(ERROR, "hash AM does not use P_NEW");
205 if (blkno > nblocks)
206 elog(ERROR, "access to noncontiguous page in hash index \"%s\"",
208
209 /* smgr insists we explicitly extend the relation */
210 if (blkno == nblocks)
211 {
212 buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL,
214 if (BufferGetBlockNumber(buf) != blkno)
215 elog(ERROR, "unexpected hash relation size: %u, should be %u",
216 BufferGetBlockNumber(buf), blkno);
217 }
218 else
219 {
220 buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO_AND_LOCK,
221 NULL);
222 }
223
224 /* ref count and lock type are correct */
225
226 /* initialize the page */
228
229 return buf;
230}
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:851
BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
Definition: bufmgr.c:4361
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:75
@ EB_LOCK_FIRST
Definition: bufmgr.h:87
#define BMR_REL(p_rel)
Definition: bufmgr.h:108

References _hash_pageinit(), BMR_REL, buf, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, elog, ERROR, ExtendBufferedRel(), P_NEW, RBM_ZERO_AND_LOCK, ReadBufferExtended(), RelationGetNumberOfBlocksInFork(), and RelationGetRelationName.

Referenced by _hash_addovflpage(), _hash_expandtable(), and _hash_init().

◆ _hash_hashkey2bucket()

Bucket _hash_hashkey2bucket ( uint32  hashkey,
uint32  maxbucket,
uint32  highmask,
uint32  lowmask 
)

Definition at line 125 of file hashutil.c.

127{
128 Bucket bucket;
129
130 bucket = hashkey & highmask;
131 if (bucket > maxbucket)
132 bucket = bucket & lowmask;
133
134 return bucket;
135}

Referenced by _h_indexbuild(), _hash_getbucketbuf_from_hashkey(), _hash_splitbucket(), comparetup_index_hash(), and hashbucketcleanup().

◆ _hash_init()

uint32 _hash_init ( Relation  rel,
double  num_tuples,
ForkNumber  forkNum 
)

Definition at line 327 of file hashpage.c.

328{
329 Buffer metabuf;
330 Buffer buf;
331 Buffer bitmapbuf;
332 Page pg;
333 HashMetaPage metap;
334 RegProcedure procid;
335 int32 data_width;
336 int32 item_width;
337 int32 ffactor;
338 uint32 num_buckets;
339 uint32 i;
340 bool use_wal;
341
342 /* safety check */
343 if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
344 elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
346
347 /*
348 * WAL log creation of pages if the relation is persistent, or this is the
349 * init fork. Init forks for unlogged relations always need to be WAL
350 * logged.
351 */
352 use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM;
353
354 /*
355 * Determine the target fill factor (in tuples per bucket) for this index.
356 * The idea is to make the fill factor correspond to pages about as full
357 * as the user-settable fillfactor parameter says. We can compute it
358 * exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
359 */
360 data_width = sizeof(uint32);
361 item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
362 sizeof(ItemIdData); /* include the line pointer */
363 ffactor = HashGetTargetPageUsage(rel) / item_width;
364 /* keep to a sane range */
365 if (ffactor < 10)
366 ffactor = 10;
367
368 procid = index_getprocid(rel, 1, HASHSTANDARD_PROC);
369
370 /*
371 * We initialize the metapage, the first N bucket pages, and the first
372 * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
373 * calls to occur. This ensures that the smgr level has the right idea of
374 * the physical index length.
375 *
376 * Critical section not required, because on error the creation of the
377 * whole relation will be rolled back.
378 */
379 metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
380 _hash_init_metabuffer(metabuf, num_tuples, procid, ffactor, false);
381 MarkBufferDirty(metabuf);
382
383 pg = BufferGetPage(metabuf);
384 metap = HashPageGetMeta(pg);
385
386 /* XLOG stuff */
387 if (use_wal)
388 {
390 XLogRecPtr recptr;
391
392 xlrec.num_tuples = num_tuples;
393 xlrec.procid = metap->hashm_procid;
394 xlrec.ffactor = metap->hashm_ffactor;
395
399
400 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_META_PAGE);
401
402 PageSetLSN(BufferGetPage(metabuf), recptr);
403 }
404
405 num_buckets = metap->hashm_maxbucket + 1;
406
407 /*
408 * Release buffer lock on the metapage while we initialize buckets.
409 * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
410 * won't accomplish anything. It's a bad idea to hold buffer locks for
411 * long intervals in any case, since that can block the bgwriter.
412 */
414
415 /*
416 * Initialize and WAL Log the first N buckets
417 */
418 for (i = 0; i < num_buckets; i++)
419 {
420 BlockNumber blkno;
421
422 /* Allow interrupts, in case N is huge */
424
425 blkno = BUCKET_TO_BLKNO(metap, i);
426 buf = _hash_getnewbuf(rel, blkno, forkNum);
429
430 if (use_wal)
432 forkNum,
433 blkno,
435 true);
436 _hash_relbuf(rel, buf);
437 }
438
439 /* Now reacquire buffer lock on metapage */
441
442 /*
443 * Initialize bitmap page
444 */
445 bitmapbuf = _hash_getnewbuf(rel, num_buckets + 1, forkNum);
446 _hash_initbitmapbuffer(bitmapbuf, metap->hashm_bmsize, false);
447 MarkBufferDirty(bitmapbuf);
448
449 /* add the new bitmap page to the metapage's list of bitmaps */
450 /* metapage already has a write lock */
451 if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
453 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
454 errmsg("out of overflow pages in hash index \"%s\"",
456
457 metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
458
459 metap->hashm_nmaps++;
460 MarkBufferDirty(metabuf);
461
462 /* XLOG stuff */
463 if (use_wal)
464 {
466 XLogRecPtr recptr;
467
468 xlrec.bmsize = metap->hashm_bmsize;
469
473
474 /*
475 * This is safe only because nobody else can be modifying the index at
476 * this stage; it's only visible to the transaction that is creating
477 * it.
478 */
480
481 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_BITMAP_PAGE);
482
483 PageSetLSN(BufferGetPage(bitmapbuf), recptr);
484 PageSetLSN(BufferGetPage(metabuf), recptr);
485 }
486
487 /* all done */
488 _hash_relbuf(rel, bitmapbuf);
489 _hash_relbuf(rel, metabuf);
490
491 return num_buckets;
492}
#define HashGetTargetPageUsage(relation)
Definition: hash.h:281
#define SizeOfHashInitBitmapPage
Definition: hash_xlog.h:233
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define SizeOfHashInitMetaPage
Definition: hash_xlog.h:217
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:873
struct ItemIdData ItemIdData
@ INIT_FORKNUM
Definition: relpath.h:61
RegProcedure hashm_procid
Definition: hash.h:261
RelFileLocator rd_locator
Definition: rel.h:57
RegProcedure procid
Definition: hash_xlog.h:213
XLogRecPtr log_newpage(RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:1143

References _hash_getnewbuf(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), _hash_relbuf(), xl_hash_init_bitmap_page::bmsize, BUCKET_TO_BLKNO, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), CHECK_FOR_INTERRUPTS, elog, ereport, errcode(), errmsg(), ERROR, xl_hash_init_meta_page::ffactor, HASH_MAX_BITMAPS, HASH_METAPAGE, HashGetTargetPageUsage, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_procid, HashPageGetMeta, HASHSTANDARD_PROC, i, index_getprocid(), INIT_FORKNUM, LH_BUCKET_PAGE, LockBuffer(), log_newpage(), MarkBufferDirty(), MAXALIGN, xl_hash_init_meta_page::num_tuples, PageSetLSN(), xl_hash_init_meta_page::procid, RelationData::rd_locator, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetNumberOfBlocksInFork(), RelationGetRelationName, RelationNeedsWAL, SizeOfHashInitBitmapPage, SizeOfHashInitMetaPage, XLOG_HASH_INIT_BITMAP_PAGE, XLOG_HASH_INIT_META_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbuild(), and hashbuildempty().

◆ _hash_init_metabuffer()

void _hash_init_metabuffer ( Buffer  buf,
double  num_tuples,
RegProcedure  procid,
uint16  ffactor,
bool  initpage 
)

Definition at line 498 of file hashpage.c.

500{
501 HashMetaPage metap;
502 HashPageOpaque pageopaque;
503 Page page;
504 double dnumbuckets;
505 uint32 num_buckets;
506 uint32 spare_index;
507 uint32 lshift;
508
509 /*
510 * Choose the number of initial bucket pages to match the fill factor
511 * given the estimated number of tuples. We round up the result to the
512 * total number of buckets which has to be allocated before using its
513 * hashm_spares element. However always force at least 2 bucket pages. The
514 * upper limit is determined by considerations explained in
515 * _hash_expandtable().
516 */
517 dnumbuckets = num_tuples / ffactor;
518 if (dnumbuckets <= 2.0)
519 num_buckets = 2;
520 else if (dnumbuckets >= (double) 0x40000000)
521 num_buckets = 0x40000000;
522 else
523 num_buckets = _hash_get_totalbuckets(_hash_spareindex(dnumbuckets));
524
525 spare_index = _hash_spareindex(num_buckets);
526 Assert(spare_index < HASH_MAX_SPLITPOINTS);
527
528 page = BufferGetPage(buf);
529 if (initpage)
531
532 pageopaque = HashPageGetOpaque(page);
535 pageopaque->hasho_bucket = InvalidBucket;
536 pageopaque->hasho_flag = LH_META_PAGE;
537 pageopaque->hasho_page_id = HASHO_PAGE_ID;
538
539 metap = HashPageGetMeta(page);
540
541 metap->hashm_magic = HASH_MAGIC;
543 metap->hashm_ntuples = 0;
544 metap->hashm_nmaps = 0;
545 metap->hashm_ffactor = ffactor;
546 metap->hashm_bsize = HashGetMaxBitmapSize(page);
547
548 /* find largest bitmap array size that will fit in page size */
549 lshift = pg_leftmost_one_pos32(metap->hashm_bsize);
550 Assert(lshift > 0);
551 metap->hashm_bmsize = 1 << lshift;
552 metap->hashm_bmshift = lshift + BYTE_TO_BIT;
553 Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));
554
555 /*
556 * Label the index with its primary hash support function's OID. This is
557 * pretty useless for normal operation (in fact, hashm_procid is not used
558 * anywhere), but it might be handy for forensic purposes so we keep it.
559 */
560 metap->hashm_procid = procid;
561
562 /*
563 * We initialize the index with N buckets, 0 .. N-1, occupying physical
564 * blocks 1 to N. The first freespace bitmap page is in block N+1.
565 */
566 metap->hashm_maxbucket = num_buckets - 1;
567
568 /*
569 * Set highmask as next immediate ((2 ^ x) - 1), which should be
570 * sufficient to cover num_buckets.
571 */
572 metap->hashm_highmask = pg_nextpower2_32(num_buckets + 1) - 1;
573 metap->hashm_lowmask = (metap->hashm_highmask >> 1);
574
575 MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
576 MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
577
578 /* Set up mapping for one spare page after the initial splitpoints */
579 metap->hashm_spares[spare_index] = 1;
580 metap->hashm_ovflpoint = spare_index;
581 metap->hashm_firstfree = 0;
582
583 /*
584 * Set pd_lower just past the end of the metadata. This is essential,
585 * because without doing so, metadata will be lost if xlog.c compresses
586 * the page.
587 */
588 ((PageHeader) page)->pd_lower =
589 ((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
590}
PageHeaderData * PageHeader
Definition: bufpage.h:174
#define MemSet(start, val, len)
Definition: c.h:991
#define HASH_MAX_SPLITPOINTS
Definition: hash.h:239
#define BYTE_TO_BIT
Definition: hash.h:301
#define HashGetMaxBitmapSize(page)
Definition: hash.h:319
uint16 hashm_bsize
Definition: hash.h:250
uint16 hashm_bmshift
Definition: hash.h:253

References _hash_get_totalbuckets(), _hash_pageinit(), _hash_spareindex(), Assert(), BMPG_MASK, BMPG_SHIFT, buf, BufferGetPage(), BufferGetPageSize(), BYTE_TO_BIT, HASH_MAGIC, HASH_MAX_SPLITPOINTS, HASH_VERSION, HashGetMaxBitmapSize, HashMetaPageData::hashm_bmshift, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_bsize, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_magic, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_procid, HashMetaPageData::hashm_spares, HashMetaPageData::hashm_version, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HashPageGetOpaque, InvalidBlockNumber, InvalidBucket, LH_META_PAGE, MemSet, pg_leftmost_one_pos32(), and pg_nextpower2_32().

Referenced by _hash_init(), and hash_xlog_init_meta_page().

◆ _hash_initbitmapbuffer()

void _hash_initbitmapbuffer ( Buffer  buf,
uint16  bmsize,
bool  initpage 
)

Definition at line 777 of file hashovfl.c.

778{
779 Page pg;
781 uint32 *freep;
782
783 pg = BufferGetPage(buf);
784
785 /* initialize the page */
786 if (initpage)
788
789 /* initialize the page's special space */
790 op = HashPageGetOpaque(pg);
796
797 /* set all of the bits to 1 */
798 freep = HashPageGetBitmap(pg);
799 memset(freep, 0xFF, bmsize);
800
801 /*
802 * Set pd_lower just past the end of the bitmap page data. We could even
803 * set pd_lower equal to pd_upper, but this is more precise and makes the
804 * page look compressible to xlog.c.
805 */
806 ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
807}

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetOpaque, InvalidBlockNumber, InvalidBucket, and LH_BITMAP_PAGE.

Referenced by _hash_addovflpage(), _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_init_bitmap_page().

◆ _hash_initbuf()

void _hash_initbuf ( Buffer  buf,
uint32  max_bucket,
uint32  num_bucket,
uint32  flag,
bool  initpage 
)

Definition at line 157 of file hashpage.c.

159{
160 HashPageOpaque pageopaque;
161 Page page;
162
163 page = BufferGetPage(buf);
164
165 /* initialize the page */
166 if (initpage)
168
169 pageopaque = HashPageGetOpaque(page);
170
171 /*
172 * Set hasho_prevblkno with current hashm_maxbucket. This value will be
173 * used to validate cached HashMetaPageData. See
174 * _hash_getbucketbuf_from_hashkey().
175 */
176 pageopaque->hasho_prevblkno = max_bucket;
178 pageopaque->hasho_bucket = num_bucket;
179 pageopaque->hasho_flag = flag;
180 pageopaque->hasho_page_id = HASHO_PAGE_ID;
181}
char * flag(int b)
Definition: test-ctype.c:33

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), flag(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, and InvalidBlockNumber.

Referenced by _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_split_allocate_page().

◆ _hash_kill_items()

void _hash_kill_items ( IndexScanDesc  scan)

Definition at line 536 of file hashutil.c.

537{
539 Relation rel = scan->indexRelation;
540 BlockNumber blkno;
541 Buffer buf;
542 Page page;
543 HashPageOpaque opaque;
544 OffsetNumber offnum,
545 maxoff;
546 int numKilled = so->numKilled;
547 int i;
548 bool killedsomething = false;
549 bool havePin = false;
550
551 Assert(so->numKilled > 0);
552 Assert(so->killedItems != NULL);
554
555 /*
556 * Always reset the scan state, so we don't look for same items on other
557 * pages.
558 */
559 so->numKilled = 0;
560
561 blkno = so->currPos.currPage;
563 {
564 /*
565 * We already have pin on this buffer, so, all we need to do is
566 * acquire lock on it.
567 */
568 havePin = true;
569 buf = so->currPos.buf;
571 }
572 else
574
575 page = BufferGetPage(buf);
576 opaque = HashPageGetOpaque(page);
577 maxoff = PageGetMaxOffsetNumber(page);
578
579 for (i = 0; i < numKilled; i++)
580 {
581 int itemIndex = so->killedItems[i];
582 HashScanPosItem *currItem = &so->currPos.items[itemIndex];
583
584 offnum = currItem->indexOffset;
585
586 Assert(itemIndex >= so->currPos.firstItem &&
587 itemIndex <= so->currPos.lastItem);
588
589 while (offnum <= maxoff)
590 {
591 ItemId iid = PageGetItemId(page, offnum);
592 IndexTuple ituple = (IndexTuple) PageGetItem(page, iid);
593
594 if (ItemPointerEquals(&ituple->t_tid, &currItem->heapTid))
595 {
596 /* found the item */
597 ItemIdMarkDead(iid);
598 killedsomething = true;
599 break; /* out of inner search loop */
600 }
601 offnum = OffsetNumberNext(offnum);
602 }
603 }
604
605 /*
606 * Since this can be redone later if needed, mark as dirty hint. Whenever
607 * we mark anything LP_DEAD, we also set the page's
608 * LH_PAGE_HAS_DEAD_TUPLES flag, which is likewise just a hint.
609 */
610 if (killedsomething)
611 {
612 opaque->hasho_flag |= LH_PAGE_HAS_DEAD_TUPLES;
614 }
615
616 if (so->hashso_bucket_buf == so->currPos.buf ||
617 havePin)
619 else
620 _hash_relbuf(rel, buf);
621}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:5367
#define HashScanPosIsPinned(scanpos)
Definition: hash.h:130
#define HashScanPosIsValid(scanpos)
Definition: hash.h:137
#define LH_PAGE_HAS_DEAD_TUPLES
Definition: hash.h:61
#define ItemIdMarkDead(itemId)
Definition: itemid.h:179
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
int * killedItems
Definition: hash.h:182
BlockNumber currPage
Definition: hash.h:112
int firstItem
Definition: hash.h:123
ItemPointerData heapTid
Definition: hash.h:105
OffsetNumber indexOffset
Definition: hash.h:106

References _hash_getbuf(), _hash_relbuf(), Assert(), buf, HashScanPosData::buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HashScanPosData::currPage, HashScanOpaqueData::currPos, HashScanPosData::firstItem, HASH_READ, HashPageGetOpaque, HashScanPosIsPinned, HashScanPosIsValid, HashScanOpaqueData::hashso_bucket_buf, HashScanPosItem::heapTid, i, HashScanPosItem::indexOffset, IndexScanDescData::indexRelation, ItemIdMarkDead, ItemPointerEquals(), HashScanPosData::items, HashScanOpaqueData::killedItems, LH_OVERFLOW_PAGE, LH_PAGE_HAS_DEAD_TUPLES, LockBuffer(), MarkBufferDirtyHint(), HashScanOpaqueData::numKilled, OffsetNumberNext, IndexScanDescData::opaque, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and IndexTupleData::t_tid.

Referenced by _hash_next(), _hash_readpage(), hashendscan(), and hashrescan().

◆ _hash_next()

bool _hash_next ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 48 of file hashsearch.c.

49{
50 Relation rel = scan->indexRelation;
52 HashScanPosItem *currItem;
53 BlockNumber blkno;
54 Buffer buf;
55 bool end_of_scan = false;
56
57 /*
58 * Advance to the next tuple on the current page; or if done, try to read
59 * data from the next or previous page based on the scan direction. Before
60 * moving to the next or previous page make sure that we deal with all the
61 * killed items.
62 */
64 {
65 if (++so->currPos.itemIndex > so->currPos.lastItem)
66 {
67 if (so->numKilled > 0)
68 _hash_kill_items(scan);
69
70 blkno = so->currPos.nextPage;
71 if (BlockNumberIsValid(blkno))
72 {
74 if (!_hash_readpage(scan, &buf, dir))
75 end_of_scan = true;
76 }
77 else
78 end_of_scan = true;
79 }
80 }
81 else
82 {
83 if (--so->currPos.itemIndex < so->currPos.firstItem)
84 {
85 if (so->numKilled > 0)
86 _hash_kill_items(scan);
87
88 blkno = so->currPos.prevPage;
89 if (BlockNumberIsValid(blkno))
90 {
91 buf = _hash_getbuf(rel, blkno, HASH_READ,
93
94 /*
95 * We always maintain the pin on bucket page for whole scan
96 * operation, so releasing the additional pin we have acquired
97 * here.
98 */
99 if (buf == so->hashso_bucket_buf ||
101 _hash_dropbuf(rel, buf);
102
103 if (!_hash_readpage(scan, &buf, dir))
104 end_of_scan = true;
105 }
106 else
107 end_of_scan = true;
108 }
109 }
110
111 if (end_of_scan)
112 {
113 _hash_dropscanbuf(rel, so);
115 return false;
116 }
117
118 /* OK, itemIndex says what to return */
119 currItem = &so->currPos.items[so->currPos.itemIndex];
120 scan->xs_heaptid = currItem->heapTid;
121
122 return true;
123}
#define HashScanPosInvalidate(scanpos)
Definition: hash.h:144
void _hash_dropscanbuf(Relation rel, HashScanOpaque so)
Definition: hashpage.c:289
void _hash_kill_items(IndexScanDesc scan)
Definition: hashutil.c:536
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
BlockNumber prevPage
Definition: hash.h:114
BlockNumber nextPage
Definition: hash.h:113
int lastItem
Definition: hash.h:124

References _hash_dropbuf(), _hash_dropscanbuf(), _hash_getbuf(), _hash_kill_items(), _hash_readpage(), BlockNumberIsValid(), buf, HashScanOpaqueData::currPos, HashScanPosData::firstItem, HASH_READ, HashScanPosInvalidate, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, if(), IndexScanDescData::indexRelation, HashScanPosData::itemIndex, HashScanPosData::items, HashScanPosData::lastItem, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, HashScanPosData::nextPage, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, HashScanPosData::prevPage, ScanDirectionIsForward, and IndexScanDescData::xs_heaptid.

Referenced by hashgetbitmap(), and hashgettuple().

◆ _hash_ovflblkno_to_bitno()

uint32 _hash_ovflblkno_to_bitno ( HashMetaPage  metap,
BlockNumber  ovflblkno 
)

Definition at line 62 of file hashovfl.c.

63{
64 uint32 splitnum = metap->hashm_ovflpoint;
65 uint32 i;
66 uint32 bitnum;
67
68 /* Determine the split number containing this page */
69 for (i = 1; i <= splitnum; i++)
70 {
71 if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
72 break; /* oops */
73 bitnum = ovflblkno - _hash_get_totalbuckets(i);
74
75 /*
76 * bitnum has to be greater than number of overflow page added in
77 * previous split point. The overflow page at this splitnum (i) if any
78 * should start from (_hash_get_totalbuckets(i) +
79 * metap->hashm_spares[i - 1] + 1).
80 */
81 if (bitnum > metap->hashm_spares[i - 1] &&
82 bitnum <= metap->hashm_spares[i])
83 return bitnum - 1; /* -1 to convert 1-based to 0-based */
84 }
85
87 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
88 errmsg("invalid overflow block number %u", ovflblkno)));
89 return 0; /* keep compiler quiet */
90}

References _hash_get_totalbuckets(), ereport, errcode(), errmsg(), ERROR, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, and i.

Referenced by _hash_freeovflpage(), and hash_bitmap_info().

◆ _hash_pageinit()

void _hash_pageinit ( Page  page,
Size  size 
)

Definition at line 596 of file hashpage.c.

597{
598 PageInit(page, size, sizeof(HashPageOpaqueData));
599}
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42

References PageInit().

Referenced by _hash_alloc_buckets(), _hash_freeovflpage(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), and hash_xlog_squeeze_page().

◆ _hash_pgaddmultitup()

void _hash_pgaddmultitup ( Relation  rel,
Buffer  buf,
IndexTuple itups,
OffsetNumber itup_offsets,
uint16  nitups 
)

Definition at line 331 of file hashinsert.c.

333{
334 OffsetNumber itup_off;
335 Page page;
336 uint32 hashkey;
337 int i;
338
340 page = BufferGetPage(buf);
341
342 for (i = 0; i < nitups; i++)
343 {
344 Size itemsize;
345
346 itemsize = IndexTupleSize(itups[i]);
347 itemsize = MAXALIGN(itemsize);
348
349 /* Find where to insert the tuple (preserving page's hashkey ordering) */
350 hashkey = _hash_get_indextuple_hashkey(itups[i]);
351 itup_off = _hash_binsearch(page, hashkey);
352
353 itup_offsets[i] = itup_off;
354
355 if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
357 elog(ERROR, "failed to add index item to \"%s\"",
359 }
360}
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:472
OffsetNumber _hash_binsearch(Page page, uint32 hash_value)
Definition: hashutil.c:350
Pointer Item
Definition: item.h:17
#define InvalidOffsetNumber
Definition: off.h:26

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), buf, BufferGetPage(), elog, ERROR, i, IndexTupleSize(), InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, MAXALIGN, PageAddItem, and RelationGetRelationName.

Referenced by _hash_freeovflpage(), _hash_splitbucket(), and _hash_squeezebucket().

◆ _hash_pgaddtup()

OffsetNumber _hash_pgaddtup ( Relation  rel,
Buffer  buf,
Size  itemsize,
IndexTuple  itup,
bool  appendtup 
)

Definition at line 274 of file hashinsert.c.

276{
277 OffsetNumber itup_off;
278 Page page;
279
281 page = BufferGetPage(buf);
282
283 /*
284 * Find where to insert the tuple (preserving page's hashkey ordering). If
285 * 'appendtup' is true then we just insert it at the end.
286 */
287 if (appendtup)
288 {
289 itup_off = PageGetMaxOffsetNumber(page) + 1;
290
291#ifdef USE_ASSERT_CHECKING
292 /* ensure this tuple's hashkey is >= the final existing tuple */
293 if (PageGetMaxOffsetNumber(page) > 0)
294 {
295 IndexTuple lasttup;
296 ItemId itemid;
297
298 itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
299 lasttup = (IndexTuple) PageGetItem(page, itemid);
300
303 }
304#endif
305 }
306 else
307 {
308 uint32 hashkey = _hash_get_indextuple_hashkey(itup);
309
310 itup_off = _hash_binsearch(page, hashkey);
311 }
312
313 if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
315 elog(ERROR, "failed to add index item to \"%s\"",
317
318 return itup_off;
319}

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), Assert(), buf, BufferGetPage(), elog, ERROR, InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, PageAddItem, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and RelationGetRelationName.

Referenced by _hash_doinsert().

◆ _hash_relbuf()

◆ _hash_spareindex()

uint32 _hash_spareindex ( uint32  num_bucket)

Definition at line 142 of file hashutil.c.

143{
144 uint32 splitpoint_group;
145 uint32 splitpoint_phases;
146
147 splitpoint_group = pg_ceil_log2_32(num_bucket);
148
149 if (splitpoint_group < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
150 return splitpoint_group;
151
152 /* account for single-phase groups */
153 splitpoint_phases = HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE;
154
155 /* account for multi-phase groups before splitpoint_group */
156 splitpoint_phases +=
157 ((splitpoint_group - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) <<
159
160 /* account for phases within current group */
161 splitpoint_phases +=
162 (((num_bucket - 1) >>
163 (splitpoint_group - (HASH_SPLITPOINT_PHASE_BITS + 1))) &
164 HASH_SPLITPOINT_PHASE_MASK); /* to 0-based value. */
165
166 return splitpoint_phases;
167}
static uint32 pg_ceil_log2_32(uint32 num)
Definition: pg_bitutils.h:258

References HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE, HASH_SPLITPOINT_PHASE_BITS, HASH_SPLITPOINT_PHASE_MASK, and pg_ceil_log2_32().

Referenced by _hash_expandtable(), and _hash_init_metabuffer().

◆ _hash_squeezebucket()

void _hash_squeezebucket ( Relation  rel,
Bucket  bucket,
BlockNumber  bucket_blkno,
Buffer  bucket_buf,
BufferAccessStrategy  bstrategy 
)

Definition at line 842 of file hashovfl.c.

847{
848 BlockNumber wblkno;
849 BlockNumber rblkno;
850 Buffer wbuf;
851 Buffer rbuf;
852 Page wpage;
853 Page rpage;
854 HashPageOpaque wopaque;
855 HashPageOpaque ropaque;
856
857 /*
858 * start squeezing into the primary bucket page.
859 */
860 wblkno = bucket_blkno;
861 wbuf = bucket_buf;
862 wpage = BufferGetPage(wbuf);
863 wopaque = HashPageGetOpaque(wpage);
864
865 /*
866 * if there aren't any overflow pages, there's nothing to squeeze. caller
867 * is responsible for releasing the pin on primary bucket page.
868 */
869 if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
870 {
872 return;
873 }
874
875 /*
876 * Find the last page in the bucket chain by starting at the base bucket
877 * page and working forward. Note: we assume that a hash bucket chain is
878 * usually smaller than the buffer ring being used by VACUUM, else using
879 * the access strategy here would be counterproductive.
880 */
881 rbuf = InvalidBuffer;
882 ropaque = wopaque;
883 do
884 {
885 rblkno = ropaque->hasho_nextblkno;
886 if (rbuf != InvalidBuffer)
887 _hash_relbuf(rel, rbuf);
889 rblkno,
892 bstrategy);
893 rpage = BufferGetPage(rbuf);
894 ropaque = HashPageGetOpaque(rpage);
895 Assert(ropaque->hasho_bucket == bucket);
896 } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
897
898 /*
899 * squeeze the tuples.
900 */
901 for (;;)
902 {
903 OffsetNumber roffnum;
904 OffsetNumber maxroffnum;
905 OffsetNumber deletable[MaxOffsetNumber];
907 Size tups_size[MaxIndexTuplesPerPage];
909 uint16 ndeletable = 0;
910 uint16 nitups = 0;
911 Size all_tups_size = 0;
912 int i;
913 bool retain_pin = false;
914
915readpage:
916 /* Scan each tuple in "read" page */
917 maxroffnum = PageGetMaxOffsetNumber(rpage);
918 for (roffnum = FirstOffsetNumber;
919 roffnum <= maxroffnum;
920 roffnum = OffsetNumberNext(roffnum))
921 {
922 IndexTuple itup;
923 Size itemsz;
924
925 /* skip dead tuples */
926 if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
927 continue;
928
929 itup = (IndexTuple) PageGetItem(rpage,
930 PageGetItemId(rpage, roffnum));
931 itemsz = IndexTupleSize(itup);
932 itemsz = MAXALIGN(itemsz);
933
934 /*
935 * Walk up the bucket chain, looking for a page big enough for
936 * this item and all other accumulated items. Exit if we reach
937 * the read page.
938 */
939 while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
940 {
941 Buffer next_wbuf = InvalidBuffer;
942 bool tups_moved = false;
943
944 Assert(!PageIsEmpty(wpage));
945
946 if (wblkno == bucket_blkno)
947 retain_pin = true;
948
949 wblkno = wopaque->hasho_nextblkno;
950 Assert(BlockNumberIsValid(wblkno));
951
952 /* don't need to move to next page if we reached the read page */
953 if (wblkno != rblkno)
954 next_wbuf = _hash_getbuf_with_strategy(rel,
955 wblkno,
958 bstrategy);
959
960 if (nitups > 0)
961 {
962 Assert(nitups == ndeletable);
963
964 /*
965 * This operation needs to log multiple tuples, prepare
966 * WAL for that.
967 */
968 if (RelationNeedsWAL(rel))
969 XLogEnsureRecordSpace(0, 3 + nitups);
970
972
973 /*
974 * we have to insert tuples on the "write" page, being
975 * careful to preserve hashkey ordering. (If we insert
976 * many tuples into the same "write" page it would be
977 * worth qsort'ing them).
978 */
979 _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
980 MarkBufferDirty(wbuf);
981
982 /* Delete tuples we already moved off read page */
983 PageIndexMultiDelete(rpage, deletable, ndeletable);
984 MarkBufferDirty(rbuf);
985
986 /* XLOG stuff */
987 if (RelationNeedsWAL(rel))
988 {
989 XLogRecPtr recptr;
991
992 xlrec.ntups = nitups;
993 xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
994
997
998 /*
999 * bucket buffer was not changed, but still needs to
1000 * be registered to ensure that we can acquire a
1001 * cleanup lock on it during replay.
1002 */
1003 if (!xlrec.is_prim_bucket_same_wrt)
1004 {
1006
1007 XLogRegisterBuffer(0, bucket_buf, flags);
1008 }
1009
1011 XLogRegisterBufData(1, itup_offsets,
1012 nitups * sizeof(OffsetNumber));
1013 for (i = 0; i < nitups; i++)
1014 XLogRegisterBufData(1, itups[i], tups_size[i]);
1015
1017 XLogRegisterBufData(2, deletable,
1018 ndeletable * sizeof(OffsetNumber));
1019
1020 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
1021
1022 PageSetLSN(BufferGetPage(wbuf), recptr);
1023 PageSetLSN(BufferGetPage(rbuf), recptr);
1024 }
1025
1027
1028 tups_moved = true;
1029 }
1030
1031 /*
1032 * release the lock on previous page after acquiring the lock
1033 * on next page
1034 */
1035 if (retain_pin)
1037 else
1038 _hash_relbuf(rel, wbuf);
1039
1040 /* nothing more to do if we reached the read page */
1041 if (rblkno == wblkno)
1042 {
1043 _hash_relbuf(rel, rbuf);
1044 return;
1045 }
1046
1047 wbuf = next_wbuf;
1048 wpage = BufferGetPage(wbuf);
1049 wopaque = HashPageGetOpaque(wpage);
1050 Assert(wopaque->hasho_bucket == bucket);
1051 retain_pin = false;
1052
1053 /* be tidy */
1054 for (i = 0; i < nitups; i++)
1055 pfree(itups[i]);
1056 nitups = 0;
1057 all_tups_size = 0;
1058 ndeletable = 0;
1059
1060 /*
1061 * after moving the tuples, rpage would have been compacted,
1062 * so we need to rescan it.
1063 */
1064 if (tups_moved)
1065 goto readpage;
1066 }
1067
1068 /* remember tuple for deletion from "read" page */
1069 deletable[ndeletable++] = roffnum;
1070
1071 /*
1072 * we need a copy of index tuples as they can be freed as part of
1073 * overflow page, however we need them to write a WAL record in
1074 * _hash_freeovflpage.
1075 */
1076 itups[nitups] = CopyIndexTuple(itup);
1077 tups_size[nitups++] = itemsz;
1078 all_tups_size += itemsz;
1079 }
1080
1081 /*
1082 * If we reach here, there are no live tuples on the "read" page ---
1083 * it was empty when we got to it, or we moved them all. So we can
1084 * just free the page without bothering with deleting tuples
1085 * individually. Then advance to the previous "read" page.
1086 *
1087 * Tricky point here: if our read and write pages are adjacent in the
1088 * bucket chain, our write lock on wbuf will conflict with
1089 * _hash_freeovflpage's attempt to update the sibling links of the
1090 * removed page. In that case, we don't need to lock it again.
1091 */
1092 rblkno = ropaque->hasho_prevblkno;
1093 Assert(BlockNumberIsValid(rblkno));
1094
1095 /* free this overflow page (releases rbuf) */
1096 _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
1097 tups_size, nitups, bstrategy);
1098
1099 /* be tidy */
1100 for (i = 0; i < nitups; i++)
1101 pfree(itups[i]);
1102
1103 /* are we freeing the page adjacent to wbuf? */
1104 if (rblkno == wblkno)
1105 {
1106 /* retain the pin on primary bucket page till end of bucket scan */
1107 if (wblkno == bucket_blkno)
1109 else
1110 _hash_relbuf(rel, wbuf);
1111 return;
1112 }
1113
1114 rbuf = _hash_getbuf_with_strategy(rel,
1115 rblkno,
1116 HASH_WRITE,
1118 bstrategy);
1119 rpage = BufferGetPage(rbuf);
1120 ropaque = HashPageGetOpaque(rpage);
1121 Assert(ropaque->hasho_bucket == bucket);
1122 }
1123
1124 /* NOTREACHED */
1125}
Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
Definition: bufpage.c:933
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1160
static bool PageIsEmpty(const PageData *page)
Definition: bufpage.h:224
uint16_t uint16
Definition: c.h:501
#define SizeOfHashMovePageContents
Definition: hash_xlog.h:138
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:490
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define MaxIndexTuplesPerPage
Definition: itup.h:181
#define MaxOffsetNumber
Definition: off.h:28

References _hash_freeovflpage(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_relbuf(), Assert(), BlockNumberIsValid(), BUFFER_LOCK_UNLOCK, BufferGetPage(), CopyIndexTuple(), END_CRIT_SECTION, FirstOffsetNumber, HASH_WRITE, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, i, IndexTupleSize(), InvalidBuffer, xl_hash_move_page_contents::is_prim_bucket_same_wrt, ItemIdIsDead, LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, MaxOffsetNumber, xl_hash_move_page_contents::ntups, OffsetNumberNext, PageGetFreeSpaceForMultipleTuples(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIndexMultiDelete(), PageIsEmpty(), PageSetLSN(), pfree(), REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashMovePageContents, START_CRIT_SECTION, XLOG_HASH_MOVE_PAGE_CONTENTS, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbucketcleanup().

◆ hashadjustmembers()

void hashadjustmembers ( Oid  opfamilyoid,
Oid  opclassoid,
List operators,
List functions 
)

Definition at line 263 of file hashvalidate.c.

267{
268 Oid opcintype;
269 ListCell *lc;
270
271 /*
272 * Hash operators and required support functions are always "loose"
273 * members of the opfamily if they are cross-type. If they are not
274 * cross-type, we prefer to tie them to the appropriate opclass ... but if
275 * the user hasn't created one, we can't do that, and must fall back to
276 * using the opfamily dependency. (We mustn't force creation of an
277 * opclass in such a case, as leaving an incomplete opclass laying about
278 * would be bad. Throwing an error is another undesirable alternative.)
279 *
280 * This behavior results in a bit of a dump/reload hazard, in that the
281 * order of restoring objects could affect what dependencies we end up
282 * with. pg_dump's existing behavior will preserve the dependency choices
283 * in most cases, but not if a cross-type operator has been bound tightly
284 * into an opclass. That's a mistake anyway, so silently "fixing" it
285 * isn't awful.
286 *
287 * Optional support functions are always "loose" family members.
288 *
289 * To avoid repeated lookups, we remember the most recently used opclass's
290 * input type.
291 */
292 if (OidIsValid(opclassoid))
293 {
294 /* During CREATE OPERATOR CLASS, need CCI to see the pg_opclass row */
296 opcintype = get_opclass_input_type(opclassoid);
297 }
298 else
299 opcintype = InvalidOid;
300
301 /*
302 * We handle operators and support functions almost identically, so rather
303 * than duplicate this code block, just join the lists.
304 */
305 foreach(lc, list_concat_copy(operators, functions))
306 {
308
309 if (op->is_func && op->number != HASHSTANDARD_PROC)
310 {
311 /* Optional support proc, so always a soft family dependency */
312 op->ref_is_hard = false;
313 op->ref_is_family = true;
314 op->refobjid = opfamilyoid;
315 }
316 else if (op->lefttype != op->righttype)
317 {
318 /* Cross-type, so always a soft family dependency */
319 op->ref_is_hard = false;
320 op->ref_is_family = true;
321 op->refobjid = opfamilyoid;
322 }
323 else
324 {
325 /* Not cross-type; is there a suitable opclass? */
326 if (op->lefttype != opcintype)
327 {
328 /* Avoid repeating this expensive lookup, even if it fails */
329 opcintype = op->lefttype;
330 opclassoid = opclass_for_family_datatype(HASH_AM_OID,
331 opfamilyoid,
332 opcintype);
333 }
334 if (OidIsValid(opclassoid))
335 {
336 /* Hard dependency on opclass */
337 op->ref_is_hard = true;
338 op->ref_is_family = false;
339 op->refobjid = opclassoid;
340 }
341 else
342 {
343 /* We're stuck, so make a soft dependency on the opfamily */
344 op->ref_is_hard = false;
345 op->ref_is_family = true;
346 op->refobjid = opfamilyoid;
347 }
348 }
349 }
350}
Oid opclass_for_family_datatype(Oid amoid, Oid opfamilyoid, Oid datatypeoid)
Definition: amvalidate.c:236
#define OidIsValid(objectId)
Definition: c.h:746
List * list_concat_copy(const List *list1, const List *list2)
Definition: list.c:598
Oid get_opclass_input_type(Oid opclass)
Definition: lsyscache.c:1304
#define lfirst(lc)
Definition: pg_list.h:172
static const struct fns functions
Definition: regcomp.c:358
Oid refobjid
Definition: amapi.h:96
Oid lefttype
Definition: amapi.h:91
bool ref_is_family
Definition: amapi.h:95
Oid righttype
Definition: amapi.h:92
int number
Definition: amapi.h:90
bool is_func
Definition: amapi.h:88
bool ref_is_hard
Definition: amapi.h:94
void CommandCounterIncrement(void)
Definition: xact.c:1100

References CommandCounterIncrement(), functions, get_opclass_input_type(), HASHSTANDARD_PROC, InvalidOid, OpFamilyMember::is_func, OpFamilyMember::lefttype, lfirst, list_concat_copy(), OpFamilyMember::number, OidIsValid, opclass_for_family_datatype(), OpFamilyMember::ref_is_family, OpFamilyMember::ref_is_hard, OpFamilyMember::refobjid, and OpFamilyMember::righttype.

Referenced by hashhandler().

◆ hashbeginscan()

IndexScanDesc hashbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 374 of file hash.c.

375{
376 IndexScanDesc scan;
378
379 /* no order by operators allowed */
380 Assert(norderbys == 0);
381
382 scan = RelationGetIndexScan(rel, nkeys, norderbys);
383
388
389 so->hashso_buc_populated = false;
390 so->hashso_buc_split = false;
391
392 so->killedItems = NULL;
393 so->numKilled = 0;
394
395 scan->opaque = so;
396
397 return scan;
398}
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
void * palloc(Size size)
Definition: mcxt.c:1940

References Assert(), HashScanOpaqueData::currPos, HashScanPosInvalidate, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, InvalidBuffer, HashScanOpaqueData::killedItems, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and RelationGetIndexScan().

Referenced by hashhandler().

◆ hashbucketcleanup()

void hashbucketcleanup ( Relation  rel,
Bucket  cur_bucket,
Buffer  bucket_buf,
BlockNumber  bucket_blkno,
BufferAccessStrategy  bstrategy,
uint32  maxbucket,
uint32  highmask,
uint32  lowmask,
double *  tuples_removed,
double *  num_index_tuples,
bool  split_cleanup,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 690 of file hash.c.

696{
697 BlockNumber blkno;
698 Buffer buf;
700 bool bucket_dirty = false;
701
702 blkno = bucket_blkno;
703 buf = bucket_buf;
704
705 if (split_cleanup)
706 new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
707 lowmask, maxbucket);
708
709 /* Scan each page in bucket */
710 for (;;)
711 {
712 HashPageOpaque opaque;
713 OffsetNumber offno;
714 OffsetNumber maxoffno;
715 Buffer next_buf;
716 Page page;
717 OffsetNumber deletable[MaxOffsetNumber];
718 int ndeletable = 0;
719 bool retain_pin = false;
720 bool clear_dead_marking = false;
721
722 vacuum_delay_point(false);
723
724 page = BufferGetPage(buf);
725 opaque = HashPageGetOpaque(page);
726
727 /* Scan each tuple in page */
728 maxoffno = PageGetMaxOffsetNumber(page);
729 for (offno = FirstOffsetNumber;
730 offno <= maxoffno;
731 offno = OffsetNumberNext(offno))
732 {
733 ItemPointer htup;
734 IndexTuple itup;
735 Bucket bucket;
736 bool kill_tuple = false;
737
738 itup = (IndexTuple) PageGetItem(page,
739 PageGetItemId(page, offno));
740 htup = &(itup->t_tid);
741
742 /*
743 * To remove the dead tuples, we strictly want to rely on results
744 * of callback function. refer btvacuumpage for detailed reason.
745 */
746 if (callback && callback(htup, callback_state))
747 {
748 kill_tuple = true;
749 if (tuples_removed)
750 *tuples_removed += 1;
751 }
752 else if (split_cleanup)
753 {
754 /* delete the tuples that are moved by split. */
756 maxbucket,
757 highmask,
758 lowmask);
759 /* mark the item for deletion */
760 if (bucket != cur_bucket)
761 {
762 /*
763 * We expect tuples to either belong to current bucket or
764 * new_bucket. This is ensured because we don't allow
765 * further splits from bucket that contains garbage. See
766 * comments in _hash_expandtable.
767 */
768 Assert(bucket == new_bucket);
769 kill_tuple = true;
770 }
771 }
772
773 if (kill_tuple)
774 {
775 /* mark the item for deletion */
776 deletable[ndeletable++] = offno;
777 }
778 else
779 {
780 /* we're keeping it, so count it */
781 if (num_index_tuples)
782 *num_index_tuples += 1;
783 }
784 }
785
786 /* retain the pin on primary bucket page till end of bucket scan */
787 if (blkno == bucket_blkno)
788 retain_pin = true;
789 else
790 retain_pin = false;
791
792 blkno = opaque->hasho_nextblkno;
793
794 /*
795 * Apply deletions, advance to next page and write page if needed.
796 */
797 if (ndeletable > 0)
798 {
799 /* No ereport(ERROR) until changes are logged */
801
802 PageIndexMultiDelete(page, deletable, ndeletable);
803 bucket_dirty = true;
804
805 /*
806 * Let us mark the page as clean if vacuum removes the DEAD tuples
807 * from an index page. We do this by clearing
808 * LH_PAGE_HAS_DEAD_TUPLES flag.
809 */
810 if (tuples_removed && *tuples_removed > 0 &&
811 H_HAS_DEAD_TUPLES(opaque))
812 {
813 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
814 clear_dead_marking = true;
815 }
816
818
819 /* XLOG stuff */
820 if (RelationNeedsWAL(rel))
821 {
822 xl_hash_delete xlrec;
823 XLogRecPtr recptr;
824
825 xlrec.clear_dead_marking = clear_dead_marking;
826 xlrec.is_primary_bucket_page = (buf == bucket_buf);
827
830
831 /*
832 * bucket buffer was not changed, but still needs to be
833 * registered to ensure that we can acquire a cleanup lock on
834 * it during replay.
835 */
836 if (!xlrec.is_primary_bucket_page)
837 {
839
840 XLogRegisterBuffer(0, bucket_buf, flags);
841 }
842
844 XLogRegisterBufData(1, deletable,
845 ndeletable * sizeof(OffsetNumber));
846
847 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
848 PageSetLSN(BufferGetPage(buf), recptr);
849 }
850
852 }
853
854 /* bail out if there are no more pages to scan. */
855 if (!BlockNumberIsValid(blkno))
856 break;
857
858 next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
860 bstrategy);
861
862 /*
863 * release the lock on previous page after acquiring the lock on next
864 * page
865 */
866 if (retain_pin)
868 else
869 _hash_relbuf(rel, buf);
870
871 buf = next_buf;
872 }
873
874 /*
875 * lock the bucket page to clear the garbage flag and squeeze the bucket.
876 * if the current buffer is same as bucket buffer, then we already have
877 * lock on bucket page.
878 */
879 if (buf != bucket_buf)
880 {
881 _hash_relbuf(rel, buf);
883 }
884
885 /*
886 * Clear the garbage flag from bucket after deleting the tuples that are
887 * moved by split. We purposefully clear the flag before squeeze bucket,
888 * so that after restart, vacuum shouldn't again try to delete the moved
889 * by split tuples.
890 */
891 if (split_cleanup)
892 {
893 HashPageOpaque bucket_opaque;
894 Page page;
895
896 page = BufferGetPage(bucket_buf);
897 bucket_opaque = HashPageGetOpaque(page);
898
899 /* No ereport(ERROR) until changes are logged */
901
902 bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
903 MarkBufferDirty(bucket_buf);
904
905 /* XLOG stuff */
906 if (RelationNeedsWAL(rel))
907 {
908 XLogRecPtr recptr;
909
911 XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
912
913 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
914 PageSetLSN(page, recptr);
915 }
916
918 }
919
920 /*
921 * If we have deleted anything, try to compact free space. For squeezing
922 * the bucket, we must have a cleanup lock, else it can impact the
923 * ordering of tuples for a scan that has started before it.
924 */
925 if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
926 _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
927 bstrategy);
928 else
929 LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
930}
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define SizeOfHashDelete
Definition: hash_xlog.h:186
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
void _hash_squeezebucket(Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:842
bool clear_dead_marking
Definition: hash_xlog.h:180
bool is_primary_bucket_page
Definition: hash_xlog.h:182
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
void vacuum_delay_point(bool is_analyze)
Definition: vacuum.c:2402

References _hash_get_indextuple_hashkey(), _hash_get_newbucket_from_oldbucket(), _hash_getbuf_with_strategy(), _hash_hashkey2bucket(), _hash_relbuf(), _hash_squeezebucket(), Assert(), BlockNumberIsValid(), buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), callback(), xl_hash_delete::clear_dead_marking, END_CRIT_SECTION, FirstOffsetNumber, H_HAS_DEAD_TUPLES, HASH_WRITE, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageGetOpaque, InvalidBucket, xl_hash_delete::is_primary_bucket_page, IsBufferCleanupOK(), LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MaxOffsetNumber, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIndexMultiDelete(), PageSetLSN(), PG_USED_FOR_ASSERTS_ONLY, REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashDelete, START_CRIT_SECTION, IndexTupleData::t_tid, vacuum_delay_point(), XLOG_HASH_DELETE, XLOG_HASH_SPLIT_CLEANUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_expandtable(), _hash_splitbucket(), and hashbulkdelete().

◆ hashbuild()

IndexBuildResult * hashbuild ( Relation  heap,
Relation  index,
struct IndexInfo indexInfo 
)

Definition at line 122 of file hash.c.

123{
124 IndexBuildResult *result;
125 BlockNumber relpages;
126 double reltuples;
127 double allvisfrac;
128 uint32 num_buckets;
129 Size sort_threshold;
130 HashBuildState buildstate;
131
132 /*
133 * We expect to be called exactly once for any index relation. If that's
134 * not the case, big trouble's what we have.
135 */
137 elog(ERROR, "index \"%s\" already contains data",
139
140 /* Estimate the number of rows currently present in the table */
141 estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
142
143 /* Initialize the hash index metadata page and initial buckets */
144 num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
145
146 /*
147 * If we just insert the tuples into the index in scan order, then
148 * (assuming their hash codes are pretty random) there will be no locality
149 * of access to the index, and if the index is bigger than available RAM
150 * then we'll thrash horribly. To prevent that scenario, we can sort the
151 * tuples by (expected) bucket number. However, such a sort is useless
152 * overhead when the index does fit in RAM. We choose to sort if the
153 * initial index size exceeds maintenance_work_mem, or the number of
154 * buffers usable for the index, whichever is less. (Limiting by the
155 * number of buffers should reduce thrashing between PG buffers and kernel
156 * buffers, which seems useful even if no physical I/O results. Limiting
157 * by maintenance_work_mem is useful to allow easy testing of the sort
158 * code path, and may be useful to DBAs as an additional control knob.)
159 *
160 * NOTE: this test will need adjustment if a bucket is ever different from
161 * one page. Also, "initial index size" accounting does not include the
162 * metapage, nor the first bitmap page.
163 */
164 sort_threshold = (maintenance_work_mem * (Size) 1024) / BLCKSZ;
165 if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
166 sort_threshold = Min(sort_threshold, NBuffers);
167 else
168 sort_threshold = Min(sort_threshold, NLocBuffer);
169
170 if (num_buckets >= sort_threshold)
171 buildstate.spool = _h_spoolinit(heap, index, num_buckets);
172 else
173 buildstate.spool = NULL;
174
175 /* prepare to build the index */
176 buildstate.indtuples = 0;
177 buildstate.heapRel = heap;
178
179 /* do the heap scan */
180 reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
182 &buildstate, NULL);
184 buildstate.indtuples);
185
186 if (buildstate.spool)
187 {
188 /* sort the tuples and insert them into the index */
189 _h_indexbuild(buildstate.spool, buildstate.heapRel);
190 _h_spooldestroy(buildstate.spool);
191 }
192
193 /*
194 * Return statistics
195 */
196 result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
197
198 result->heap_tuples = reltuples;
199 result->index_tuples = buildstate.indtuples;
200
201 return result;
202}
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:280
#define Min(x, y)
Definition: c.h:975
int NBuffers
Definition: globals.c:143
static void hashbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: hash.c:217
uint32 _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
Definition: hashpage.c:327
void _h_indexbuild(HSpool *hspool, Relation heapRel)
Definition: hashsort.c:120
HSpool * _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
Definition: hashsort.c:60
void _h_spooldestroy(HSpool *hspool)
Definition: hashsort.c:99
int NLocBuffer
Definition: localbuf.c:44
void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: plancat.c:1069
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:89
HSpool * spool
Definition: hash.c:40
Relation heapRel
Definition: hash.c:42
double indtuples
Definition: hash.c:41
double heap_tuples
Definition: genam.h:55
double index_tuples
Definition: genam.h:56
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1735

References _h_indexbuild(), _h_spooldestroy(), _h_spoolinit(), _hash_init(), elog, ERROR, estimate_rel_size(), hashbuildCallback(), IndexBuildResult::heap_tuples, HashBuildState::heapRel, IndexBuildResult::index_tuples, HashBuildState::indtuples, MAIN_FORKNUM, maintenance_work_mem, Min, NBuffers, NLocBuffer, palloc(), pgstat_progress_update_param(), PROGRESS_CREATEIDX_TUPLES_TOTAL, RelationGetNumberOfBlocks, RelationGetRelationName, HashBuildState::spool, and table_index_build_scan().

Referenced by hashhandler().

◆ hashbuildempty()

void hashbuildempty ( Relation  index)

Definition at line 208 of file hash.c.

209{
211}

References _hash_init(), and INIT_FORKNUM.

Referenced by hashhandler().

◆ hashbulkdelete()

IndexBulkDeleteResult * hashbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 465 of file hash.c.

467{
468 Relation rel = info->index;
469 double tuples_removed;
470 double num_index_tuples;
471 double orig_ntuples;
472 Bucket orig_maxbucket;
473 Bucket cur_maxbucket;
474 Bucket cur_bucket;
475 Buffer metabuf = InvalidBuffer;
476 HashMetaPage metap;
477 HashMetaPage cachedmetap;
478
479 tuples_removed = 0;
480 num_index_tuples = 0;
481
482 /*
483 * We need a copy of the metapage so that we can use its hashm_spares[]
484 * values to compute bucket page addresses, but a cached copy should be
485 * good enough. (If not, we'll detect that further down and refresh the
486 * cache as necessary.)
487 */
488 cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
489 Assert(cachedmetap != NULL);
490
491 orig_maxbucket = cachedmetap->hashm_maxbucket;
492 orig_ntuples = cachedmetap->hashm_ntuples;
493
494 /* Scan the buckets that we know exist */
495 cur_bucket = 0;
496 cur_maxbucket = orig_maxbucket;
497
498loop_top:
499 while (cur_bucket <= cur_maxbucket)
500 {
501 BlockNumber bucket_blkno;
502 BlockNumber blkno;
503 Buffer bucket_buf;
504 Buffer buf;
505 HashPageOpaque bucket_opaque;
506 Page page;
507 bool split_cleanup = false;
508
509 /* Get address of bucket's start page */
510 bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
511
512 blkno = bucket_blkno;
513
514 /*
515 * We need to acquire a cleanup lock on the primary bucket page to out
516 * wait concurrent scans before deleting the dead tuples.
517 */
521
522 page = BufferGetPage(buf);
523 bucket_opaque = HashPageGetOpaque(page);
524
525 /*
526 * If the bucket contains tuples that are moved by split, then we need
527 * to delete such tuples. We can't delete such tuples if the split
528 * operation on bucket is not finished as those are needed by scans.
529 */
530 if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
531 H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
532 {
533 split_cleanup = true;
534
535 /*
536 * This bucket might have been split since we last held a lock on
537 * the metapage. If so, hashm_maxbucket, hashm_highmask and
538 * hashm_lowmask might be old enough to cause us to fail to remove
539 * tuples left behind by the most recent split. To prevent that,
540 * now that the primary page of the target bucket has been locked
541 * (and thus can't be further split), check whether we need to
542 * update our cached metapage data.
543 */
544 Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
545 if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
546 {
547 cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
548 Assert(cachedmetap != NULL);
549 }
550 }
551
552 bucket_buf = buf;
553
554 hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
555 cachedmetap->hashm_maxbucket,
556 cachedmetap->hashm_highmask,
557 cachedmetap->hashm_lowmask, &tuples_removed,
558 &num_index_tuples, split_cleanup,
559 callback, callback_state);
560
561 _hash_dropbuf(rel, bucket_buf);
562
563 /* Advance to next bucket */
564 cur_bucket++;
565 }
566
567 if (BufferIsInvalid(metabuf))
569
570 /* Write-lock metapage and check for split since we started */
572 metap = HashPageGetMeta(BufferGetPage(metabuf));
573
574 if (cur_maxbucket != metap->hashm_maxbucket)
575 {
576 /* There's been a split, so process the additional bucket(s) */
578 cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
579 Assert(cachedmetap != NULL);
580 cur_maxbucket = cachedmetap->hashm_maxbucket;
581 goto loop_top;
582 }
583
584 /* Okay, we're really done. Update tuple count in metapage. */
586
587 if (orig_maxbucket == metap->hashm_maxbucket &&
588 orig_ntuples == metap->hashm_ntuples)
589 {
590 /*
591 * No one has split or inserted anything since start of scan, so
592 * believe our count as gospel.
593 */
594 metap->hashm_ntuples = num_index_tuples;
595 }
596 else
597 {
598 /*
599 * Otherwise, our count is untrustworthy since we may have
600 * double-scanned tuples in split buckets. Proceed by dead-reckoning.
601 * (Note: we still return estimated_count = false, because using this
602 * count is better than not updating reltuples at all.)
603 */
604 if (metap->hashm_ntuples > tuples_removed)
605 metap->hashm_ntuples -= tuples_removed;
606 else
607 metap->hashm_ntuples = 0;
608 num_index_tuples = metap->hashm_ntuples;
609 }
610
611 MarkBufferDirty(metabuf);
612
613 /* XLOG stuff */
614 if (RelationNeedsWAL(rel))
615 {
617 XLogRecPtr recptr;
618
619 xlrec.ntuples = metap->hashm_ntuples;
620
623
625
626 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
627 PageSetLSN(BufferGetPage(metabuf), recptr);
628 }
629
631
632 _hash_relbuf(rel, metabuf);
633
634 /* return statistics */
635 if (stats == NULL)
637 stats->estimated_count = false;
638 stats->num_index_tuples = num_index_tuples;
639 stats->tuples_removed += tuples_removed;
640 /* hashvacuumcleanup will fill in num_pages */
641
642 return stats;
643}
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5617
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define SizeOfHashUpdateMetaPage
Definition: hash_xlog.h:200
double tuples_removed
Definition: genam.h:103
double num_index_tuples
Definition: genam.h:102
Relation index
Definition: genam.h:69
BufferAccessStrategy strategy
Definition: genam.h:76

References _hash_checkpage(), _hash_dropbuf(), _hash_getbuf(), _hash_getcachedmetap(), _hash_relbuf(), Assert(), BUCKET_TO_BLKNO, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsInvalid, callback(), END_CRIT_SECTION, IndexBulkDeleteResult::estimated_count, H_BUCKET_BEING_SPLIT, H_NEEDS_SPLIT_CLEANUP, HASH_METAPAGE, HASH_NOLOCK, hashbucketcleanup(), HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HashPageGetOpaque, IndexVacuumInfo::index, InvalidBlockNumber, InvalidBuffer, LH_BUCKET_PAGE, LH_META_PAGE, LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirty(), xl_hash_update_meta_page::ntuples, IndexBulkDeleteResult::num_index_tuples, PageSetLSN(), palloc0(), RBM_NORMAL, ReadBufferExtended(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashUpdateMetaPage, START_CRIT_SECTION, IndexVacuumInfo::strategy, IndexBulkDeleteResult::tuples_removed, XLOG_HASH_UPDATE_META_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashhandler().

◆ hashendscan()

void hashendscan ( IndexScanDesc  scan)

Definition at line 434 of file hash.c.

435{
437 Relation rel = scan->indexRelation;
438
440 {
441 /* Before leaving current page, deal with any killed items */
442 if (so->numKilled > 0)
443 _hash_kill_items(scan);
444 }
445
446 _hash_dropscanbuf(rel, so);
447
448 if (so->killedItems != NULL)
449 pfree(so->killedItems);
450 pfree(so);
451 scan->opaque = NULL;
452}

References _hash_dropscanbuf(), _hash_kill_items(), HashScanOpaqueData::currPos, HashScanPosIsValid, if(), IndexScanDescData::indexRelation, HashScanOpaqueData::killedItems, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by hashhandler().

◆ hashgetbitmap()

int64 hashgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 342 of file hash.c.

343{
345 bool res;
346 int64 ntids = 0;
347 HashScanPosItem *currItem;
348
350
351 while (res)
352 {
353 currItem = &so->currPos.items[so->currPos.itemIndex];
354
355 /*
356 * _hash_first and _hash_next handle eliminate dead index entries
357 * whenever scan->ignore_killed_tuples is true. Therefore, there's
358 * nothing to do here except add the results to the TIDBitmap.
359 */
360 tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
361 ntids++;
362
363 res = _hash_next(scan, ForwardScanDirection);
364 }
365
366 return ntids;
367}
bool _hash_first(IndexScanDesc scan, ScanDirection dir)
Definition: hashsearch.c:288
bool _hash_next(IndexScanDesc scan, ScanDirection dir)
Definition: hashsearch.c:48
@ ForwardScanDirection
Definition: sdir.h:28
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:366

References _hash_first(), _hash_next(), HashScanOpaqueData::currPos, ForwardScanDirection, HashScanPosData::itemIndex, HashScanPosData::items, IndexScanDescData::opaque, and tbm_add_tuples().

Referenced by hashhandler().

◆ hashgettuple()

bool hashgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 290 of file hash.c.

291{
293 bool res;
294
295 /* Hash indexes are always lossy since we store only the hash code */
296 scan->xs_recheck = true;
297
298 /*
299 * If we've already initialized this scan, we can just advance it in the
300 * appropriate direction. If we haven't done so yet, we call a routine to
301 * get the first item in the scan.
302 */
304 res = _hash_first(scan, dir);
305 else
306 {
307 /*
308 * Check to see if we should kill the previously-fetched tuple.
309 */
310 if (scan->kill_prior_tuple)
311 {
312 /*
313 * Yes, so remember it for later. (We'll deal with all such tuples
314 * at once right after leaving the index page or at end of scan.)
315 * In case if caller reverses the indexscan direction it is quite
316 * possible that the same item might get entered multiple times.
317 * But, we don't detect that; instead, we just forget any excess
318 * entries.
319 */
320 if (so->killedItems == NULL)
321 so->killedItems = (int *)
322 palloc(MaxIndexTuplesPerPage * sizeof(int));
323
325 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
326 }
327
328 /*
329 * Now continue the scan.
330 */
331 res = _hash_next(scan, dir);
332 }
333
334 return res;
335}
bool kill_prior_tuple
Definition: relscan.h:147

References _hash_first(), _hash_next(), HashScanOpaqueData::currPos, HashScanPosIsValid, if(), HashScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, HashScanOpaqueData::killedItems, MaxIndexTuplesPerPage, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by hashhandler().

◆ hashinsert()

bool hashinsert ( Relation  rel,
Datum values,
bool *  isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
struct IndexInfo indexInfo 
)

Definition at line 258 of file hash.c.

263{
264 Datum index_values[1];
265 bool index_isnull[1];
266 IndexTuple itup;
267
268 /* convert data to a hash key; on failure, do not insert anything */
269 if (!_hash_convert_tuple(rel,
270 values, isnull,
271 index_values, index_isnull))
272 return false;
273
274 /* form an index tuple and point it at the heap tuple */
275 itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
276 itup->t_tid = *ht_ctid;
277
278 _hash_doinsert(rel, itup, heapRel, false);
279
280 pfree(itup);
281
282 return false;
283}
bool _hash_convert_tuple(Relation index, Datum *user_values, bool *user_isnull, Datum *index_values, bool *index_isnull)
Definition: hashutil.c:318
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44

References _hash_convert_tuple(), _hash_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by hashhandler().

◆ hashoptions()

bytea * hashoptions ( Datum  reloptions,
bool  validate 
)

Definition at line 275 of file hashutil.c.

276{
277 static const relopt_parse_elt tab[] = {
278 {"fillfactor", RELOPT_TYPE_INT, offsetof(HashOptions, fillfactor)},
279 };
280
281 return (bytea *) build_reloptions(reloptions, validate,
283 sizeof(HashOptions),
284 tab, lengthof(tab));
285}
static bool validate(Port *port, const char *auth)
Definition: auth-oauth.c:638
#define lengthof(array)
Definition: c.h:759
static int fillfactor
Definition: pgbench.c:188
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
Definition: reloptions.c:1934
@ RELOPT_KIND_HASH
Definition: reloptions.h:45
@ RELOPT_TYPE_INT
Definition: reloptions.h:32
Definition: c.h:658

References build_reloptions(), fillfactor, lengthof, RELOPT_KIND_HASH, RELOPT_TYPE_INT, and validate().

Referenced by hashhandler().

◆ hashrescan()

void hashrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 404 of file hash.c.

406{
408 Relation rel = scan->indexRelation;
409
411 {
412 /* Before leaving current page, deal with any killed items */
413 if (so->numKilled > 0)
414 _hash_kill_items(scan);
415 }
416
417 _hash_dropscanbuf(rel, so);
418
419 /* set position invalid (this will cause _hash_first call) */
421
422 /* Update scan key, if a new one is given */
423 if (scankey && scan->numberOfKeys > 0)
424 memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
425
426 so->hashso_buc_populated = false;
427 so->hashso_buc_split = false;
428}

References _hash_dropscanbuf(), _hash_kill_items(), HashScanOpaqueData::currPos, HashScanPosInvalidate, HashScanPosIsValid, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, if(), IndexScanDescData::indexRelation, IndexScanDescData::keyData, IndexScanDescData::numberOfKeys, HashScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by hashhandler().

◆ hashtranslatecmptype()

StrategyNumber hashtranslatecmptype ( CompareType  cmptype,
Oid  opfamily 
)

Definition at line 941 of file hash.c.

942{
943 if (cmptype == COMPARE_EQ)
945 return InvalidStrategy;
946}
@ COMPARE_EQ
Definition: cmptype.h:36
#define InvalidStrategy
Definition: stratnum.h:24

References COMPARE_EQ, HTEqualStrategyNumber, and InvalidStrategy.

Referenced by hashhandler().

◆ hashtranslatestrategy()

CompareType hashtranslatestrategy ( StrategyNumber  strategy,
Oid  opfamily 
)

Definition at line 933 of file hash.c.

934{
935 if (strategy == HTEqualStrategyNumber)
936 return COMPARE_EQ;
937 return COMPARE_INVALID;
938}
@ COMPARE_INVALID
Definition: cmptype.h:33

References COMPARE_EQ, COMPARE_INVALID, and HTEqualStrategyNumber.

Referenced by hashhandler().

◆ hashvacuumcleanup()

IndexBulkDeleteResult * hashvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 651 of file hash.c.

652{
653 Relation rel = info->index;
654 BlockNumber num_pages;
655
656 /* If hashbulkdelete wasn't called, return NULL signifying no change */
657 /* Note: this covers the analyze_only case too */
658 if (stats == NULL)
659 return NULL;
660
661 /* update statistics */
662 num_pages = RelationGetNumberOfBlocks(rel);
663 stats->num_pages = num_pages;
664
665 return stats;
666}
BlockNumber num_pages
Definition: genam.h:100

References IndexVacuumInfo::index, IndexBulkDeleteResult::num_pages, and RelationGetNumberOfBlocks.

Referenced by hashhandler().

◆ hashvalidate()

bool hashvalidate ( Oid  opclassoid)

Definition at line 40 of file hashvalidate.c.

41{
42 bool result = true;
43 HeapTuple classtup;
44 Form_pg_opclass classform;
45 Oid opfamilyoid;
46 Oid opcintype;
47 char *opclassname;
48 char *opfamilyname;
49 CatCList *proclist,
50 *oprlist;
51 List *grouplist;
52 OpFamilyOpFuncGroup *opclassgroup;
53 List *hashabletypes = NIL;
54 int i;
55 ListCell *lc;
56
57 /* Fetch opclass information */
58 classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
59 if (!HeapTupleIsValid(classtup))
60 elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
61 classform = (Form_pg_opclass) GETSTRUCT(classtup);
62
63 opfamilyoid = classform->opcfamily;
64 opcintype = classform->opcintype;
65 opclassname = NameStr(classform->opcname);
66
67 /* Fetch opfamily information */
68 opfamilyname = get_opfamily_name(opfamilyoid, false);
69
70 /* Fetch all operators and support functions of the opfamily */
71 oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
72 proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
73
74 /* Check individual support functions */
75 for (i = 0; i < proclist->n_members; i++)
76 {
77 HeapTuple proctup = &proclist->members[i]->tuple;
78 Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
79 bool ok;
80
81 /*
82 * All hash functions should be registered with matching left/right
83 * types
84 */
85 if (procform->amproclefttype != procform->amprocrighttype)
86 {
88 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
89 errmsg("operator family \"%s\" of access method %s contains support function %s with different left and right input types",
90 opfamilyname, "hash",
91 format_procedure(procform->amproc))));
92 result = false;
93 }
94
95 /* Check procedure numbers and function signatures */
96 switch (procform->amprocnum)
97 {
99 ok = check_amproc_signature(procform->amproc, INT4OID, true,
100 1, 1, procform->amproclefttype);
101 break;
103 ok = check_amproc_signature(procform->amproc, INT8OID, true,
104 2, 2, procform->amproclefttype, INT8OID);
105 break;
106 case HASHOPTIONS_PROC:
107 ok = check_amoptsproc_signature(procform->amproc);
108 break;
109 default:
111 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
112 errmsg("operator family \"%s\" of access method %s contains function %s with invalid support number %d",
113 opfamilyname, "hash",
114 format_procedure(procform->amproc),
115 procform->amprocnum)));
116 result = false;
117 continue; /* don't want additional message */
118 }
119
120 if (!ok)
121 {
123 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
124 errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d",
125 opfamilyname, "hash",
126 format_procedure(procform->amproc),
127 procform->amprocnum)));
128 result = false;
129 }
130
131 /* Remember which types we can hash */
132 if (ok && (procform->amprocnum == HASHSTANDARD_PROC || procform->amprocnum == HASHEXTENDED_PROC))
133 {
134 hashabletypes = list_append_unique_oid(hashabletypes, procform->amproclefttype);
135 }
136 }
137
138 /* Check individual operators */
139 for (i = 0; i < oprlist->n_members; i++)
140 {
141 HeapTuple oprtup = &oprlist->members[i]->tuple;
142 Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
143
144 /* Check that only allowed strategy numbers exist */
145 if (oprform->amopstrategy < 1 ||
146 oprform->amopstrategy > HTMaxStrategyNumber)
147 {
149 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
150 errmsg("operator family \"%s\" of access method %s contains operator %s with invalid strategy number %d",
151 opfamilyname, "hash",
152 format_operator(oprform->amopopr),
153 oprform->amopstrategy)));
154 result = false;
155 }
156
157 /* hash doesn't support ORDER BY operators */
158 if (oprform->amoppurpose != AMOP_SEARCH ||
159 OidIsValid(oprform->amopsortfamily))
160 {
162 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
163 errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
164 opfamilyname, "hash",
165 format_operator(oprform->amopopr))));
166 result = false;
167 }
168
169 /* Check operator signature --- same for all hash strategies */
170 if (!check_amop_signature(oprform->amopopr, BOOLOID,
171 oprform->amoplefttype,
172 oprform->amoprighttype))
173 {
175 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
176 errmsg("operator family \"%s\" of access method %s contains operator %s with wrong signature",
177 opfamilyname, "hash",
178 format_operator(oprform->amopopr))));
179 result = false;
180 }
181
182 /* There should be relevant hash functions for each datatype */
183 if (!list_member_oid(hashabletypes, oprform->amoplefttype) ||
184 !list_member_oid(hashabletypes, oprform->amoprighttype))
185 {
187 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
188 errmsg("operator family \"%s\" of access method %s lacks support function for operator %s",
189 opfamilyname, "hash",
190 format_operator(oprform->amopopr))));
191 result = false;
192 }
193 }
194
195 /* Now check for inconsistent groups of operators/functions */
196 grouplist = identify_opfamily_groups(oprlist, proclist);
197 opclassgroup = NULL;
198 foreach(lc, grouplist)
199 {
201
202 /* Remember the group exactly matching the test opclass */
203 if (thisgroup->lefttype == opcintype &&
204 thisgroup->righttype == opcintype)
205 opclassgroup = thisgroup;
206
207 /*
208 * Complain if there seems to be an incomplete set of operators for
209 * this datatype pair (implying that we have a hash function but no
210 * operator).
211 */
212 if (thisgroup->operatorset != (1 << HTEqualStrategyNumber))
213 {
215 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
216 errmsg("operator family \"%s\" of access method %s is missing operator(s) for types %s and %s",
217 opfamilyname, "hash",
218 format_type_be(thisgroup->lefttype),
219 format_type_be(thisgroup->righttype))));
220 result = false;
221 }
222 }
223
224 /* Check that the originally-named opclass is supported */
225 /* (if group is there, we already checked it adequately above) */
226 if (!opclassgroup)
227 {
229 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
230 errmsg("operator class \"%s\" of access method %s is missing operator(s)",
231 opclassname, "hash")));
232 result = false;
233 }
234
235 /*
236 * Complain if the opfamily doesn't have entries for all possible
237 * combinations of its supported datatypes. While missing cross-type
238 * operators are not fatal, it seems reasonable to insist that all
239 * built-in hash opfamilies be complete.
240 */
241 if (list_length(grouplist) !=
242 list_length(hashabletypes) * list_length(hashabletypes))
243 {
245 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
246 errmsg("operator family \"%s\" of access method %s is missing cross-type operator(s)",
247 opfamilyname, "hash")));
248 result = false;
249 }
250
251 ReleaseCatCacheList(proclist);
252 ReleaseCatCacheList(oprlist);
253 ReleaseSysCache(classtup);
254
255 return result;
256}
bool check_amproc_signature(Oid funcid, Oid restype, bool exact, int minargs, int maxargs,...)
Definition: amvalidate.c:152
bool check_amop_signature(Oid opno, Oid restype, Oid lefttype, Oid righttype)
Definition: amvalidate.c:206
List * identify_opfamily_groups(CatCList *oprlist, CatCList *proclist)
Definition: amvalidate.c:43
bool check_amoptsproc_signature(Oid funcid)
Definition: amvalidate.c:192
#define NameStr(name)
Definition: c.h:717
void ReleaseCatCacheList(CatCList *list)
Definition: catcache.c:2071
#define INFO
Definition: elog.h:34
char * format_type_be(Oid type_oid)
Definition: format_type.c:343
#define HASHEXTENDED_PROC
Definition: hash.h:356
#define HASHOPTIONS_PROC
Definition: hash.h:357
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
List * list_append_unique_oid(List *list, Oid datum)
Definition: list.c:1380
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:722
char * get_opfamily_name(Oid opfid, bool missing_ok)
Definition: lsyscache.c:1393
FormData_pg_amop * Form_pg_amop
Definition: pg_amop.h:88
FormData_pg_amproc * Form_pg_amproc
Definition: pg_amproc.h:68
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
FormData_pg_opclass * Form_pg_opclass
Definition: pg_opclass.h:83
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
char * format_procedure(Oid procedure_oid)
Definition: regproc.c:299
char * format_operator(Oid operator_oid)
Definition: regproc.c:793
#define HTMaxStrategyNumber
Definition: stratnum.h:43
Definition: pg_list.h:54
CatCTup * members[FLEXIBLE_ARRAY_MEMBER]
Definition: catcache.h:180
int n_members
Definition: catcache.h:178
HeapTupleData tuple
Definition: catcache.h:123
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
#define SearchSysCacheList1(cacheId, key1)
Definition: syscache.h:127

References check_amop_signature(), check_amoptsproc_signature(), check_amproc_signature(), elog, ereport, errcode(), errmsg(), ERROR, format_operator(), format_procedure(), format_type_be(), get_opfamily_name(), GETSTRUCT(), HASHEXTENDED_PROC, HASHOPTIONS_PROC, HASHSTANDARD_PROC, HeapTupleIsValid, HTEqualStrategyNumber, HTMaxStrategyNumber, i, identify_opfamily_groups(), INFO, OpFamilyOpFuncGroup::lefttype, lfirst, list_append_unique_oid(), list_length(), list_member_oid(), catclist::members, catclist::n_members, NameStr, NIL, ObjectIdGetDatum(), OidIsValid, OpFamilyOpFuncGroup::operatorset, ReleaseCatCacheList(), ReleaseSysCache(), OpFamilyOpFuncGroup::righttype, SearchSysCache1(), SearchSysCacheList1, and catctup::tuple.

Referenced by hashhandler().