#include "access/amapi.h"
#include "access/itup.h"
#include "access/sdir.h"
#include "catalog/pg_am_d.h"
#include "common/hashfn.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
#include "storage/lockdefs.h"
#include "utils/hsearch.h"
#include "utils/relcache.h"

Include dependency graph for hash.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures
struct	HashPageOpaqueData

struct	HashScanPosItem

struct	HashScanPosData

struct	HashScanOpaqueData

struct	HashMetaPageData

struct	HashOptions

Macros
#define	InvalidBucket ((Bucket) 0xFFFFFFFF)

#define	BUCKET_TO_BLKNO(metap, B) ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)

#define	LH_UNUSED_PAGE (0)

#define	LH_OVERFLOW_PAGE (1 << 0)

#define	LH_BUCKET_PAGE (1 << 1)

#define	LH_BITMAP_PAGE (1 << 2)

#define	LH_META_PAGE (1 << 3)

#define	LH_BUCKET_BEING_POPULATED (1 << 4)

#define	LH_BUCKET_BEING_SPLIT (1 << 5)

#define	LH_BUCKET_NEEDS_SPLIT_CLEANUP (1 << 6)

#define	LH_PAGE_HAS_DEAD_TUPLES (1 << 7)

#define	LH_PAGE_TYPE (LH_OVERFLOW_PAGE \| LH_BUCKET_PAGE \| LH_BITMAP_PAGE \| LH_META_PAGE)

#define	HashPageGetOpaque(page) ((HashPageOpaque) PageGetSpecialPointer(page))

#define	H_NEEDS_SPLIT_CLEANUP(opaque) (((opaque)->hasho_flag & LH_BUCKET_NEEDS_SPLIT_CLEANUP) != 0)

#define	H_BUCKET_BEING_SPLIT(opaque) (((opaque)->hasho_flag & LH_BUCKET_BEING_SPLIT) != 0)

#define	H_BUCKET_BEING_POPULATED(opaque) (((opaque)->hasho_flag & LH_BUCKET_BEING_POPULATED) != 0)

#define	H_HAS_DEAD_TUPLES(opaque) (((opaque)->hasho_flag & LH_PAGE_HAS_DEAD_TUPLES) != 0)

#define	HASHO_PAGE_ID 0xFF80

#define	HashScanPosIsPinned(scanpos)

#define	HashScanPosIsValid(scanpos)

#define	HashScanPosInvalidate(scanpos)

#define	HASH_METAPAGE 0 /* metapage is always block 0 */

#define	HASH_MAGIC 0x6440640

#define	HASH_VERSION 4

#define	HASH_MAX_BITMAPS Min(BLCKSZ / 8, 1024)

#define	HASH_SPLITPOINT_PHASE_BITS 2

#define	HASH_SPLITPOINT_PHASES_PER_GRP (1 << HASH_SPLITPOINT_PHASE_BITS)

#define	HASH_SPLITPOINT_PHASE_MASK (HASH_SPLITPOINT_PHASES_PER_GRP - 1)

#define	HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE 10

#define	HASH_MAX_SPLITPOINT_GROUP 32

#define	HASH_MAX_SPLITPOINTS

#define	HashGetFillFactor(relation)

#define	HashGetTargetPageUsage(relation) (BLCKSZ * HashGetFillFactor(relation) / 100)

#define	HashMaxItemSize(page)

#define	INDEX_MOVED_BY_SPLIT_MASK INDEX_AM_RESERVED_BIT

#define	HASH_MIN_FILLFACTOR 10

#define	HASH_DEFAULT_FILLFACTOR 75

#define	BYTE_TO_BIT 3 /* 2^3 bits/byte */

#define	ALL_SET ((uint32) ~0)

#define	BMPGSZ_BYTE(metap) ((metap)->hashm_bmsize)

#define	BMPGSZ_BIT(metap) ((metap)->hashm_bmsize << BYTE_TO_BIT)

#define	BMPG_SHIFT(metap) ((metap)->hashm_bmshift)

#define	BMPG_MASK(metap) (BMPGSZ_BIT(metap) - 1)

#define	HashPageGetBitmap(page) ((uint32 *) PageGetContents(page))

#define	HashGetMaxBitmapSize(page)

#define	HashPageGetMeta(page) ((HashMetaPage) PageGetContents(page))

#define	BITS_PER_MAP 32 /* Number of bits in uint32 */

#define	CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))

#define	SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] \|= (1<<((N)%BITS_PER_MAP)))

#define	ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))

#define	HASH_READ BUFFER_LOCK_SHARE

#define	HASH_WRITE BUFFER_LOCK_EXCLUSIVE

#define	HASH_NOLOCK (-1)

#define	HASHSTANDARD_PROC 1

#define	HASHEXTENDED_PROC 2

#define	HASHOPTIONS_PROC 3

#define	HASHNProcs 3

Typedefs
typedef uint32	Bucket

typedef struct HashPageOpaqueData	HashPageOpaqueData

typedef HashPageOpaqueData *	HashPageOpaque

typedef struct HashScanPosItem	HashScanPosItem

typedef struct HashScanPosData	HashScanPosData

typedef struct HashScanOpaqueData	HashScanOpaqueData

typedef HashScanOpaqueData *	HashScanOpaque

typedef struct HashMetaPageData	HashMetaPageData

typedef HashMetaPageData *	HashMetaPage

typedef struct HashOptions	HashOptions

typedef struct HSpool	HSpool

Functions
IndexBuildResult *	hashbuild (Relation heap, Relation index, struct IndexInfo *indexInfo)

void	hashbuildempty (Relation index)

bool	hashinsert (Relation rel, Datum values, bool isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo)

bool	hashgettuple (IndexScanDesc scan, ScanDirection dir)

int64	hashgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)

IndexScanDesc	hashbeginscan (Relation rel, int nkeys, int norderbys)

void	hashrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)

void	hashendscan (IndexScanDesc scan)

IndexBulkDeleteResult *	hashbulkdelete (IndexVacuumInfo info, IndexBulkDeleteResult stats, IndexBulkDeleteCallback callback, void *callback_state)

IndexBulkDeleteResult *	hashvacuumcleanup (IndexVacuumInfo info, IndexBulkDeleteResult stats)

bytea *	hashoptions (Datum reloptions, bool validate)

bool	hashvalidate (Oid opclassoid)

void	hashadjustmembers (Oid opfamilyoid, Oid opclassoid, List operators, List functions)

CompareType	hashtranslatestrategy (StrategyNumber strategy, Oid opfamily)

StrategyNumber	hashtranslatecmptype (CompareType cmptype, Oid opfamily)

void	_hash_doinsert (Relation rel, IndexTuple itup, Relation heapRel, bool sorted)

OffsetNumber	_hash_pgaddtup (Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool appendtup)

void	_hash_pgaddmultitup (Relation rel, Buffer buf, IndexTuple itups, OffsetNumber itup_offsets, uint16 nitups)

Buffer	_hash_addovflpage (Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)

BlockNumber	_hash_freeovflpage (Relation rel, Buffer bucketbuf, Buffer ovflbuf, Buffer wbuf, IndexTuple itups, OffsetNumber itup_offsets, Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy)

void	_hash_initbitmapbuffer (Buffer buf, uint16 bmsize, bool initpage)

void	_hash_squeezebucket (Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)

uint32	_hash_ovflblkno_to_bitno (HashMetaPage metap, BlockNumber ovflblkno)

Buffer	_hash_getbuf (Relation rel, BlockNumber blkno, int access, int flags)

Buffer	_hash_getbuf_with_condlock_cleanup (Relation rel, BlockNumber blkno, int flags)

HashMetaPage	_hash_getcachedmetap (Relation rel, Buffer *metabuf, bool force_refresh)

Buffer	_hash_getbucketbuf_from_hashkey (Relation rel, uint32 hashkey, int access, HashMetaPage *cachedmetap)

Buffer	_hash_getinitbuf (Relation rel, BlockNumber blkno)

void	_hash_initbuf (Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)

Buffer	_hash_getnewbuf (Relation rel, BlockNumber blkno, ForkNumber forkNum)

Buffer	_hash_getbuf_with_strategy (Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)

void	_hash_relbuf (Relation rel, Buffer buf)

void	_hash_dropbuf (Relation rel, Buffer buf)

void	_hash_dropscanbuf (Relation rel, HashScanOpaque so)

uint32	_hash_init (Relation rel, double num_tuples, ForkNumber forkNum)

void	_hash_init_metabuffer (Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)

void	_hash_pageinit (Page page, Size size)

void	_hash_expandtable (Relation rel, Buffer metabuf)

void	_hash_finish_split (Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, uint32 maxbucket, uint32 highmask, uint32 lowmask)

bool	_hash_next (IndexScanDesc scan, ScanDirection dir)

bool	_hash_first (IndexScanDesc scan, ScanDirection dir)

HSpool *	_h_spoolinit (Relation heap, Relation index, uint32 num_buckets)

void	_h_spooldestroy (HSpool *hspool)

void	_h_spool (HSpool hspool, ItemPointer self, const Datum values, const bool *isnull)

void	_h_indexbuild (HSpool *hspool, Relation heapRel)

bool	_hash_checkqual (IndexScanDesc scan, IndexTuple itup)

uint32	_hash_datum2hashkey (Relation rel, Datum key)

uint32	_hash_datum2hashkey_type (Relation rel, Datum key, Oid keytype)

Bucket	_hash_hashkey2bucket (uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)

uint32	_hash_spareindex (uint32 num_bucket)

uint32	_hash_get_totalbuckets (uint32 splitpoint_phase)

void	_hash_checkpage (Relation rel, Buffer buf, int flags)

uint32	_hash_get_indextuple_hashkey (IndexTuple itup)

bool	_hash_convert_tuple (Relation index, Datum user_values, bool user_isnull, Datum index_values, bool index_isnull)

OffsetNumber	_hash_binsearch (Page page, uint32 hash_value)

OffsetNumber	_hash_binsearch_last (Page page, uint32 hash_value)

BlockNumber	_hash_get_oldblock_from_newbucket (Relation rel, Bucket new_bucket)

BlockNumber	_hash_get_newblock_from_oldbucket (Relation rel, Bucket old_bucket)

Bucket	_hash_get_newbucket_from_oldbucket (Relation rel, Bucket old_bucket, uint32 lowmask, uint32 maxbucket)

void	_hash_kill_items (IndexScanDesc scan)

void	hashbucketcleanup (Relation rel, Bucket cur_bucket, Buffer bucket_buf, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy, uint32 maxbucket, uint32 highmask, uint32 lowmask, double tuples_removed, double num_index_tuples, bool split_cleanup, IndexBulkDeleteCallback callback, void *callback_state)

Macro Definition Documentation

◆ ALL_SET

#define ALL_SET ((uint32) ~0)

Definition at line 302 of file hash.h.

◆ BITS_PER_MAP

#define BITS_PER_MAP 32 /* Number of bits in uint32 */

Definition at line 329 of file hash.h.

◆ BMPG_MASK

#define BMPG_MASK ( metap ) (BMPGSZ_BIT(metap) - 1)

Definition at line 314 of file hash.h.

◆ BMPG_SHIFT

#define BMPG_SHIFT ( metap ) ((metap)->hashm_bmshift)

Definition at line 313 of file hash.h.

◆ BMPGSZ_BIT

#define BMPGSZ_BIT ( metap ) ((metap)->hashm_bmsize << BYTE_TO_BIT)

Definition at line 312 of file hash.h.

◆ BMPGSZ_BYTE

#define BMPGSZ_BYTE ( metap ) ((metap)->hashm_bmsize)

Definition at line 311 of file hash.h.

◆ BUCKET_TO_BLKNO

#define BUCKET_TO_BLKNO	(	metap,
		B
	)	((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)

Definition at line 39 of file hash.h.

◆ BYTE_TO_BIT

#define BYTE_TO_BIT 3 /* 2^3 bits/byte */

Definition at line 301 of file hash.h.

◆ CLRBIT

#define CLRBIT	(	A,
		N
	)	((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))

Definition at line 332 of file hash.h.

◆ H_BUCKET_BEING_POPULATED

#define H_BUCKET_BEING_POPULATED ( opaque ) (((opaque)->hasho_flag & LH_BUCKET_BEING_POPULATED) != 0)

Definition at line 92 of file hash.h.

◆ H_BUCKET_BEING_SPLIT

#define H_BUCKET_BEING_SPLIT ( opaque ) (((opaque)->hasho_flag & LH_BUCKET_BEING_SPLIT) != 0)

Definition at line 91 of file hash.h.

◆ H_HAS_DEAD_TUPLES

#define H_HAS_DEAD_TUPLES ( opaque ) (((opaque)->hasho_flag & LH_PAGE_HAS_DEAD_TUPLES) != 0)

Definition at line 93 of file hash.h.

◆ H_NEEDS_SPLIT_CLEANUP

#define H_NEEDS_SPLIT_CLEANUP ( opaque ) (((opaque)->hasho_flag & LH_BUCKET_NEEDS_SPLIT_CLEANUP) != 0)

Definition at line 90 of file hash.h.

◆ HASH_DEFAULT_FILLFACTOR

#define HASH_DEFAULT_FILLFACTOR 75

Definition at line 296 of file hash.h.

◆ HASH_MAGIC

#define HASH_MAGIC 0x6440640

Definition at line 200 of file hash.h.

◆ HASH_MAX_BITMAPS

#define HASH_MAX_BITMAPS Min(BLCKSZ / 8, 1024)

Definition at line 230 of file hash.h.

◆ HASH_MAX_SPLITPOINT_GROUP

#define HASH_MAX_SPLITPOINT_GROUP 32

Definition at line 238 of file hash.h.

◆ HASH_MAX_SPLITPOINTS

#define HASH_MAX_SPLITPOINTS

Value:

    (((HASH_MAX_SPLITPOINT_GROUP - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) * \
      HASH_SPLITPOINT_PHASES_PER_GRP) + \
     HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)

Definition at line 239 of file hash.h.

◆ HASH_METAPAGE

#define HASH_METAPAGE 0 /* metapage is always block 0 */

Definition at line 198 of file hash.h.

◆ HASH_MIN_FILLFACTOR

#define HASH_MIN_FILLFACTOR 10

Definition at line 295 of file hash.h.

◆ HASH_NOLOCK

#define HASH_NOLOCK (-1)

Definition at line 341 of file hash.h.

◆ HASH_READ

#define HASH_READ BUFFER_LOCK_SHARE

Definition at line 339 of file hash.h.

◆ HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE

#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE 10

Definition at line 235 of file hash.h.

◆ HASH_SPLITPOINT_PHASE_BITS

#define HASH_SPLITPOINT_PHASE_BITS 2

Definition at line 232 of file hash.h.

◆ HASH_SPLITPOINT_PHASE_MASK

#define HASH_SPLITPOINT_PHASE_MASK (HASH_SPLITPOINT_PHASES_PER_GRP - 1)

Definition at line 234 of file hash.h.

◆ HASH_SPLITPOINT_PHASES_PER_GRP

#define HASH_SPLITPOINT_PHASES_PER_GRP (1 << HASH_SPLITPOINT_PHASE_BITS)

Definition at line 233 of file hash.h.

◆ HASH_VERSION

#define HASH_VERSION 4

Definition at line 201 of file hash.h.

◆ HASH_WRITE

#define HASH_WRITE BUFFER_LOCK_EXCLUSIVE

Definition at line 340 of file hash.h.

◆ HASHEXTENDED_PROC

#define HASHEXTENDED_PROC 2

Definition at line 356 of file hash.h.

◆ HashGetFillFactor

#define HashGetFillFactor ( relation )

Value:

    (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
                 relation->rd_rel->relam == HASH_AM_OID), \
     (relation)->rd_options ? \
     ((HashOptions *) (relation)->rd_options)->fillfactor : \
     HASH_DEFAULT_FILLFACTOR)

Definition at line 275 of file hash.h.

◆ HashGetMaxBitmapSize

#define HashGetMaxBitmapSize ( page )

Value:

(PageGetPageSize((Page) page) - \

(MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData))))

PageGetPageSize

static Size PageGetPageSize(const PageData *page)

Definition: bufpage.h:277

SizeOfPageHeaderData

#define SizeOfPageHeaderData

Definition: bufpage.h:217

Page

PageData * Page

Definition: bufpage.h:82

MAXALIGN

#define MAXALIGN(LEN)

Definition: c.h:782

HashPageOpaqueData

Definition: hash.h:78

Definition at line 319 of file hash.h.

◆ HashGetTargetPageUsage

#define HashGetTargetPageUsage ( relation ) (BLCKSZ * HashGetFillFactor(relation) / 100)

Definition at line 281 of file hash.h.

◆ HashMaxItemSize

#define HashMaxItemSize ( page )

Value:

    MAXALIGN_DOWN(PageGetPageSize(page) - \
                  SizeOfPageHeaderData - \
                  sizeof(ItemIdData) - \
                  MAXALIGN(sizeof(HashPageOpaqueData)))

Definition at line 287 of file hash.h.

◆ HASHNProcs

#define HASHNProcs 3

Definition at line 358 of file hash.h.

◆ HASHO_PAGE_ID

#define HASHO_PAGE_ID 0xFF80

Definition at line 101 of file hash.h.

◆ HASHOPTIONS_PROC

#define HASHOPTIONS_PROC 3

Definition at line 357 of file hash.h.

◆ HashPageGetBitmap

#define HashPageGetBitmap ( page ) ((uint32 *) PageGetContents(page))

Definition at line 316 of file hash.h.

◆ HashPageGetMeta

#define HashPageGetMeta ( page ) ((HashMetaPage) PageGetContents(page))

Definition at line 323 of file hash.h.

◆ HashPageGetOpaque

#define HashPageGetOpaque ( page ) ((HashPageOpaque) PageGetSpecialPointer(page))

Definition at line 88 of file hash.h.

◆ HashScanPosInvalidate

#define HashScanPosInvalidate ( scanpos )

Value:

    do { \
        (scanpos).buf = InvalidBuffer; \
        (scanpos).currPage = InvalidBlockNumber; \
        (scanpos).nextPage = InvalidBlockNumber; \
        (scanpos).prevPage = InvalidBlockNumber; \
        (scanpos).firstItem = 0; \
        (scanpos).lastItem = 0; \
        (scanpos).itemIndex = 0; \
    } while (0)

Definition at line 144 of file hash.h.

◆ HashScanPosIsPinned

#define HashScanPosIsPinned ( scanpos )

Value:

( \
    AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
                !BufferIsValid((scanpos).buf)), \
    BufferIsValid((scanpos).buf) \
)

Definition at line 130 of file hash.h.

◆ HashScanPosIsValid

#define HashScanPosIsValid ( scanpos )

Value:

( \
    AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
                !BufferIsValid((scanpos).buf)), \
    BlockNumberIsValid((scanpos).currPage) \
)

Definition at line 137 of file hash.h.

◆ HASHSTANDARD_PROC

#define HASHSTANDARD_PROC 1

Definition at line 355 of file hash.h.

◆ INDEX_MOVED_BY_SPLIT_MASK

#define INDEX_MOVED_BY_SPLIT_MASK INDEX_AM_RESERVED_BIT

Definition at line 293 of file hash.h.

◆ InvalidBucket

#define InvalidBucket ((Bucket) 0xFFFFFFFF)

Definition at line 37 of file hash.h.

◆ ISSET

#define ISSET	(	A,
		N
	)	((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))

Definition at line 334 of file hash.h.

◆ LH_BITMAP_PAGE

#define LH_BITMAP_PAGE (1 << 2)

Definition at line 56 of file hash.h.

◆ LH_BUCKET_BEING_POPULATED

#define LH_BUCKET_BEING_POPULATED (1 << 4)

Definition at line 58 of file hash.h.

◆ LH_BUCKET_BEING_SPLIT

#define LH_BUCKET_BEING_SPLIT (1 << 5)

Definition at line 59 of file hash.h.

◆ LH_BUCKET_NEEDS_SPLIT_CLEANUP

#define LH_BUCKET_NEEDS_SPLIT_CLEANUP (1 << 6)

Definition at line 60 of file hash.h.

◆ LH_BUCKET_PAGE

#define LH_BUCKET_PAGE (1 << 1)

Definition at line 55 of file hash.h.

◆ LH_META_PAGE

#define LH_META_PAGE (1 << 3)

Definition at line 57 of file hash.h.

◆ LH_OVERFLOW_PAGE

#define LH_OVERFLOW_PAGE (1 << 0)

Definition at line 54 of file hash.h.

◆ LH_PAGE_HAS_DEAD_TUPLES

#define LH_PAGE_HAS_DEAD_TUPLES (1 << 7)

Definition at line 61 of file hash.h.

◆ LH_PAGE_TYPE

#define LH_PAGE_TYPE (LH_OVERFLOW_PAGE | LH_BUCKET_PAGE | LH_BITMAP_PAGE | LH_META_PAGE)

Definition at line 63 of file hash.h.

◆ LH_UNUSED_PAGE

#define LH_UNUSED_PAGE (0)

Definition at line 53 of file hash.h.

◆ SETBIT

#define SETBIT	(	A,
		N
	)	((A)[(N)/BITS_PER_MAP] \|= (1<<((N)%BITS_PER_MAP)))

Definition at line 333 of file hash.h.

Typedef Documentation

◆ Bucket

typedef uint32 Bucket

Definition at line 35 of file hash.h.

◆ HashMetaPage

typedef HashMetaPageData* HashMetaPage

Definition at line 267 of file hash.h.

◆ HashMetaPageData

typedef struct HashMetaPageData HashMetaPageData

◆ HashOptions

typedef struct HashOptions HashOptions

◆ HashPageOpaque

typedef HashPageOpaqueData* HashPageOpaque

Definition at line 86 of file hash.h.

◆ HashPageOpaqueData

typedef struct HashPageOpaqueData HashPageOpaqueData

◆ HashScanOpaque

typedef HashScanOpaqueData* HashScanOpaque

Definition at line 192 of file hash.h.

◆ HashScanOpaqueData

typedef struct HashScanOpaqueData HashScanOpaqueData

◆ HashScanPosData

typedef struct HashScanPosData HashScanPosData

◆ HashScanPosItem

typedef struct HashScanPosItem HashScanPosItem

◆ HSpool

typedef struct HSpool HSpool

Definition at line 452 of file hash.h.

Function Documentation

◆ _h_indexbuild()

void _h_indexbuild	(	HSpool *	hspool,
		Relation	heapRel
	)

Definition at line 120 of file hashsort.c.

{
    IndexTuple  itup;
    int64       tups_done = 0;
#ifdef USE_ASSERT_CHECKING
    uint32      hashkey = 0;
#endif
 
    tuplesort_performsort(hspool->sortstate);
 
    while ((itup = tuplesort_getindextuple(hspool->sortstate, true)) != NULL)
    {
        /*
         * Technically, it isn't critical that hash keys be found in sorted
         * order, since this sorting is only used to increase locality of
         * access as a performance optimization.  It still seems like a good
         * idea to test tuplesort.c's handling of hash index tuple sorts
         * through an assertion, though.
         */
#ifdef USE_ASSERT_CHECKING
        uint32      lasthashkey = hashkey;
 
        hashkey = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
                                       hspool->max_buckets, hspool->high_mask,
                                       hspool->low_mask);
        Assert(hashkey >= lasthashkey);
#endif
 
        /* the tuples are sorted by hashkey, so pass 'sorted' as true */
        _hash_doinsert(hspool->index, itup, heapRel, true);
 
        /* allow insertion phase to be interrupted, and track progress */
        CHECK_FOR_INTERRUPTS();
 
        pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
                                     ++tups_done);
    }
}

References _hash_doinsert(), _hash_get_indextuple_hashkey(), _hash_hashkey2bucket(), Assert(), CHECK_FOR_INTERRUPTS, HSpool::high_mask, HSpool::index, HSpool::low_mask, HSpool::max_buckets, pgstat_progress_update_param(), PROGRESS_CREATEIDX_TUPLES_DONE, HSpool::sortstate, tuplesort_getindextuple(), and tuplesort_performsort().

Referenced by hashbuild().

◆ _h_spool()

void _h_spool	(	HSpool *	hspool,
		ItemPointer	self,
		const Datum *	values,
		const bool *	isnull
	)

Definition at line 109 of file hashsort.c.

{
    tuplesort_putindextuplevalues(hspool->sortstate, hspool->index,
                                  self, values, isnull);
}

References HSpool::index, HSpool::sortstate, tuplesort_putindextuplevalues(), and values.

Referenced by hashbuildCallback().

◆ _h_spooldestroy()

void _h_spooldestroy ( HSpool * hspool )

Definition at line 99 of file hashsort.c.

{
    tuplesort_end(hspool->sortstate);
    pfree(hspool);
}

References pfree(), HSpool::sortstate, and tuplesort_end().

Referenced by hashbuild().

◆ _h_spoolinit()

HSpool * _h_spoolinit	(	Relation	heap,
		Relation	index,
		uint32	num_buckets
	)

Definition at line 60 of file hashsort.c.

{
    HSpool     *hspool = (HSpool *) palloc0(sizeof(HSpool));
 
    hspool->index = index;
 
    /*
     * Determine the bitmask for hash code values.  Since there are currently
     * num_buckets buckets in the index, the appropriate mask can be computed
     * as follows.
     *
     * NOTE : This hash mask calculation should be in sync with similar
     * calculation in _hash_init_metabuffer.
     */
    hspool->high_mask = pg_nextpower2_32(num_buckets + 1) - 1;
    hspool->low_mask = (hspool->high_mask >> 1);
    hspool->max_buckets = num_buckets - 1;
 
    /*
     * We size the sort area as maintenance_work_mem rather than work_mem to
     * speed index creation.  This should be OK since a single backend can't
     * run multiple index creations in parallel.
     */
    hspool->sortstate = tuplesort_begin_index_hash(heap,
                                                   index,
                                                   hspool->high_mask,
                                                   hspool->low_mask,
                                                   hspool->max_buckets,
                                                   maintenance_work_mem,
                                                   NULL,
                                                   TUPLESORT_NONE);
 
    return hspool;
}

References HSpool::high_mask, HSpool::index, HSpool::low_mask, maintenance_work_mem, HSpool::max_buckets, palloc0(), pg_nextpower2_32(), HSpool::sortstate, tuplesort_begin_index_hash(), and TUPLESORT_NONE.

Referenced by hashbuild().

◆ _hash_addovflpage()

Buffer _hash_addovflpage	(	Relation	rel,
		Buffer	metabuf,
		Buffer	buf,
		bool	retain_pin
	)

Definition at line 112 of file hashovfl.c.

{
    Buffer      ovflbuf;
    Page        page;
    Page        ovflpage;
    HashPageOpaque pageopaque;
    HashPageOpaque ovflopaque;
    HashMetaPage metap;
    Buffer      mapbuf = InvalidBuffer;
    Buffer      newmapbuf = InvalidBuffer;
    BlockNumber blkno;
    uint32      orig_firstfree;
    uint32      splitnum;
    uint32     *freep = NULL;
    uint32      max_ovflpg;
    uint32      bit;
    uint32      bitmap_page_bit;
    uint32      first_page;
    uint32      last_bit;
    uint32      last_page;
    uint32      i,
                j;
    bool        page_found = false;
 
    /*
     * Write-lock the tail page.  Here, we need to maintain locking order such
     * that, first acquire the lock on tail page of bucket, then on meta page
     * to find and lock the bitmap page and if it is found, then lock on meta
     * page is released, then finally acquire the lock on new overflow buffer.
     * We need this locking order to avoid deadlock with backends that are
     * doing inserts.
     *
     * Note: We could have avoided locking many buffers here if we made two
     * WAL records for acquiring an overflow page (one to allocate an overflow
     * page and another to add it to overflow bucket chain).  However, doing
     * so can leak an overflow page, if the system crashes after allocation.
     * Needless to say, it is better to have a single record from a
     * performance point of view as well.
     */
    LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
    /* probably redundant... */
    _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 
    /* loop to find current tail page, in case someone else inserted too */
    for (;;)
    {
        BlockNumber nextblkno;
 
        page = BufferGetPage(buf);
        pageopaque = HashPageGetOpaque(page);
        nextblkno = pageopaque->hasho_nextblkno;
 
        if (!BlockNumberIsValid(nextblkno))
            break;
 
        /* we assume we do not need to write the unmodified page */
        if (retain_pin)
        {
            /* pin will be retained only for the primary bucket page */
            Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
            LockBuffer(buf, BUFFER_LOCK_UNLOCK);
        }
        else
            _hash_relbuf(rel, buf);
 
        retain_pin = false;
 
        buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
    }
 
    /* Get exclusive lock on the meta page */
    LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
    _hash_checkpage(rel, metabuf, LH_META_PAGE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));
 
    /* start search at hashm_firstfree */
    orig_firstfree = metap->hashm_firstfree;
    first_page = orig_firstfree >> BMPG_SHIFT(metap);
    bit = orig_firstfree & BMPG_MASK(metap);
    i = first_page;
    j = bit / BITS_PER_MAP;
    bit &= ~(BITS_PER_MAP - 1);
 
    /* outer loop iterates once per bitmap page */
    for (;;)
    {
        BlockNumber mapblkno;
        Page        mappage;
        uint32      last_inpage;
 
        /* want to end search with the last existing overflow page */
        splitnum = metap->hashm_ovflpoint;
        max_ovflpg = metap->hashm_spares[splitnum] - 1;
        last_page = max_ovflpg >> BMPG_SHIFT(metap);
        last_bit = max_ovflpg & BMPG_MASK(metap);
 
        if (i > last_page)
            break;
 
        Assert(i < metap->hashm_nmaps);
        mapblkno = metap->hashm_mapp[i];
 
        if (i == last_page)
            last_inpage = last_bit;
        else
            last_inpage = BMPGSZ_BIT(metap) - 1;
 
        /* Release exclusive lock on metapage while reading bitmap page */
        LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
        mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
        mappage = BufferGetPage(mapbuf);
        freep = HashPageGetBitmap(mappage);
 
        for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
        {
            if (freep[j] != ALL_SET)
            {
                page_found = true;
 
                /* Reacquire exclusive lock on the meta page */
                LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
                /* convert bit to bit number within page */
                bit += _hash_firstfreebit(freep[j]);
                bitmap_page_bit = bit;
 
                /* convert bit to absolute bit number */
                bit += (i << BMPG_SHIFT(metap));
                /* Calculate address of the recycled overflow page */
                blkno = bitno_to_blkno(metap, bit);
 
                /* Fetch and init the recycled page */
                ovflbuf = _hash_getinitbuf(rel, blkno);
 
                goto found;
            }
        }
 
        /* No free space here, try to advance to next map page */
        _hash_relbuf(rel, mapbuf);
        mapbuf = InvalidBuffer;
        i++;
        j = 0;                  /* scan from start of next map page */
        bit = 0;
 
        /* Reacquire exclusive lock on the meta page */
        LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
    }
 
    /*
     * No free pages --- have to extend the relation to add an overflow page.
     * First, check to see if we have to add a new bitmap page too.
     */
    if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
    {
        /*
         * We create the new bitmap page with all pages marked "in use".
         * Actually two pages in the new bitmap's range will exist
         * immediately: the bitmap page itself, and the following page which
         * is the one we return to the caller.  Both of these are correctly
         * marked "in use".  Subsequent pages do not exist yet, but it is
         * convenient to pre-mark them as "in use" too.
         */
        bit = metap->hashm_spares[splitnum];
 
        /* metapage already has a write lock */
        if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
            ereport(ERROR,
                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                     errmsg("out of overflow pages in hash index \"%s\"",
                            RelationGetRelationName(rel))));
 
        newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
    }
    else
    {
        /*
         * Nothing to do here; since the page will be past the last used page,
         * we know its bitmap bit was preinitialized to "in use".
         */
    }
 
    /* Calculate address of the new overflow page */
    bit = BufferIsValid(newmapbuf) ?
        metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
    blkno = bitno_to_blkno(metap, bit);
 
    /*
     * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
     * relation length stays in sync with ours.  XXX It's annoying to do this
     * with metapage write lock held; would be better to use a lock that
     * doesn't block incoming searches.
     *
     * It is okay to hold two buffer locks here (one on tail page of bucket
     * and other on new overflow page) since there cannot be anyone else
     * contending for access to ovflbuf.
     */
    ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
 
found:
 
    /*
     * Do the update.  No ereport(ERROR) until changes are logged. We want to
     * log the changes for bitmap page and overflow page together to avoid
     * loss of pages in case the new page is added.
     */
    START_CRIT_SECTION();
 
    if (page_found)
    {
        Assert(BufferIsValid(mapbuf));
 
        /* mark page "in use" in the bitmap */
        SETBIT(freep, bitmap_page_bit);
        MarkBufferDirty(mapbuf);
    }
    else
    {
        /* update the count to indicate new overflow page is added */
        metap->hashm_spares[splitnum]++;
 
        if (BufferIsValid(newmapbuf))
        {
            _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
            MarkBufferDirty(newmapbuf);
 
            /* add the new bitmap page to the metapage's list of bitmaps */
            metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
            metap->hashm_nmaps++;
            metap->hashm_spares[splitnum]++;
        }
 
        MarkBufferDirty(metabuf);
 
        /*
         * for new overflow page, we don't need to explicitly set the bit in
         * bitmap page, as by default that will be set to "in use".
         */
    }
 
    /*
     * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
     * changing it if someone moved it while we were searching bitmap pages.
     */
    if (metap->hashm_firstfree == orig_firstfree)
    {
        metap->hashm_firstfree = bit + 1;
        MarkBufferDirty(metabuf);
    }
 
    /* initialize new overflow page */
    ovflpage = BufferGetPage(ovflbuf);
    ovflopaque = HashPageGetOpaque(ovflpage);
    ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
    ovflopaque->hasho_nextblkno = InvalidBlockNumber;
    ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
    ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
    ovflopaque->hasho_page_id = HASHO_PAGE_ID;
 
    MarkBufferDirty(ovflbuf);
 
    /* logically chain overflow page to previous page */
    pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
 
    MarkBufferDirty(buf);
 
    /* XLOG stuff */
    if (RelationNeedsWAL(rel))
    {
        XLogRecPtr  recptr;
        xl_hash_add_ovfl_page xlrec;
 
        xlrec.bmpage_found = page_found;
        xlrec.bmsize = metap->hashm_bmsize;
 
        XLogBeginInsert();
        XLogRegisterData(&xlrec, SizeOfHashAddOvflPage);
 
        XLogRegisterBuffer(0, ovflbuf, REGBUF_WILL_INIT);
        XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
 
        XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
 
        if (BufferIsValid(mapbuf))
        {
            XLogRegisterBuffer(2, mapbuf, REGBUF_STANDARD);
            XLogRegisterBufData(2, &bitmap_page_bit, sizeof(uint32));
        }
 
        if (BufferIsValid(newmapbuf))
            XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
 
        XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
        XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
 
        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
 
        PageSetLSN(BufferGetPage(ovflbuf), recptr);
        PageSetLSN(BufferGetPage(buf), recptr);
 
        if (BufferIsValid(mapbuf))
            PageSetLSN(BufferGetPage(mapbuf), recptr);
 
        if (BufferIsValid(newmapbuf))
            PageSetLSN(BufferGetPage(newmapbuf), recptr);
 
        PageSetLSN(BufferGetPage(metabuf), recptr);
    }
 
    END_CRIT_SECTION();
 
    if (retain_pin)
        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    else
        _hash_relbuf(rel, buf);
 
    if (BufferIsValid(mapbuf))
        _hash_relbuf(rel, mapbuf);
 
    LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
    if (BufferIsValid(newmapbuf))
        _hash_relbuf(rel, newmapbuf);
 
    return ovflbuf;
}

References _hash_checkpage(), _hash_firstfreebit(), _hash_getbuf(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_initbitmapbuffer(), _hash_relbuf(), ALL_SET, Assert(), bit(), bitno_to_blkno(), BITS_PER_MAP, BlockNumberIsValid(), xl_hash_add_ovfl_page::bmpage_found, BMPG_MASK, BMPG_SHIFT, BMPGSZ_BIT, xl_hash_add_ovfl_page::bmsize, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, HASH_MAX_BITMAPS, HASH_WRITE, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBuffer, j, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), PageSetLSN(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, SETBIT, SizeOfHashAddOvflPage, START_CRIT_SECTION, XLOG_HASH_ADD_OVFL_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert(), and _hash_splitbucket().

◆ _hash_binsearch()

OffsetNumber _hash_binsearch	(	Page	page,
		uint32	hash_value
	)

Definition at line 350 of file hashutil.c.

{
    OffsetNumber upper;
    OffsetNumber lower;
 
    /* Loop invariant: lower <= desired place <= upper */
    upper = PageGetMaxOffsetNumber(page) + 1;
    lower = FirstOffsetNumber;
 
    while (upper > lower)
    {
        OffsetNumber off;
        IndexTuple  itup;
        uint32      hashkey;
 
        off = (upper + lower) / 2;
        Assert(OffsetNumberIsValid(off));
 
        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
        hashkey = _hash_get_indextuple_hashkey(itup);
        if (hashkey < hash_value)
            lower = off + 1;
        else
            upper = off;
    }
 
    return lower;
}

References _hash_get_indextuple_hashkey(), Assert(), FirstOffsetNumber, lower(), OffsetNumberIsValid, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and upper().

Referenced by _hash_pgaddmultitup(), _hash_pgaddtup(), and _hash_readpage().

◆ _hash_binsearch_last()

OffsetNumber _hash_binsearch_last	(	Page	page,
		uint32	hash_value
	)

Definition at line 388 of file hashutil.c.

{
    OffsetNumber upper;
    OffsetNumber lower;
 
    /* Loop invariant: lower <= desired place <= upper */
    upper = PageGetMaxOffsetNumber(page);
    lower = FirstOffsetNumber - 1;
 
    while (upper > lower)
    {
        IndexTuple  itup;
        OffsetNumber off;
        uint32      hashkey;
 
        off = (upper + lower + 1) / 2;
        Assert(OffsetNumberIsValid(off));
 
        itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
        hashkey = _hash_get_indextuple_hashkey(itup);
        if (hashkey > hash_value)
            upper = off - 1;
        else
            lower = off;
    }
 
    return lower;
}

References _hash_get_indextuple_hashkey(), Assert(), FirstOffsetNumber, lower(), OffsetNumberIsValid, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and upper().

Referenced by _hash_readpage().

◆ _hash_checkpage()

void _hash_checkpage	(	Relation	rel,
		Buffer	buf,
		int	flags
	)

Definition at line 210 of file hashutil.c.

{
    Page        page = BufferGetPage(buf);
 
    /*
     * ReadBuffer verifies that every newly-read page passes
     * PageHeaderIsValid, which means it either contains a reasonably sane
     * page header or is all-zero.  We have to defend against the all-zero
     * case, however.
     */
    if (PageIsNew(page))
        ereport(ERROR,
                (errcode(ERRCODE_INDEX_CORRUPTED),
                 errmsg("index \"%s\" contains unexpected zero page at block %u",
                        RelationGetRelationName(rel),
                        BufferGetBlockNumber(buf)),
                 errhint("Please REINDEX it.")));
 
    /*
     * Additionally check that the special area looks sane.
     */
    if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
        ereport(ERROR,
                (errcode(ERRCODE_INDEX_CORRUPTED),
                 errmsg("index \"%s\" contains corrupted page at block %u",
                        RelationGetRelationName(rel),
                        BufferGetBlockNumber(buf)),
                 errhint("Please REINDEX it.")));
 
    if (flags)
    {
        HashPageOpaque opaque = HashPageGetOpaque(page);
 
        if ((opaque->hasho_flag & flags) == 0)
            ereport(ERROR,
                    (errcode(ERRCODE_INDEX_CORRUPTED),
                     errmsg("index \"%s\" contains corrupted page at block %u",
                            RelationGetRelationName(rel),
                            BufferGetBlockNumber(buf)),
                     errhint("Please REINDEX it.")));
    }
 
    /*
     * When checking the metapage, also verify magic number and version.
     */
    if (flags == LH_META_PAGE)
    {
        HashMetaPage metap = HashPageGetMeta(page);
 
        if (metap->hashm_magic != HASH_MAGIC)
            ereport(ERROR,
                    (errcode(ERRCODE_INDEX_CORRUPTED),
                     errmsg("index \"%s\" is not a hash index",
                            RelationGetRelationName(rel))));
 
        if (metap->hashm_version != HASH_VERSION)
            ereport(ERROR,
                    (errcode(ERRCODE_INDEX_CORRUPTED),
                     errmsg("index \"%s\" has wrong hash version",
                            RelationGetRelationName(rel)),
                     errhint("Please REINDEX it.")));
    }
}

References buf, BufferGetBlockNumber(), BufferGetPage(), ereport, errcode(), errhint(), errmsg(), ERROR, HASH_MAGIC, HASH_VERSION, HashMetaPageData::hashm_magic, HashMetaPageData::hashm_version, HashPageOpaqueData::hasho_flag, HashPageGetMeta, HashPageGetOpaque, LH_META_PAGE, MAXALIGN, PageGetSpecialSize(), PageIsNew(), and RelationGetRelationName.

Referenced by _hash_addovflpage(), _hash_expandtable(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_readpage(), and hashbulkdelete().

◆ _hash_checkqual()

bool _hash_checkqual	(	IndexScanDesc	scan,
		IndexTuple	itup
	)

Definition at line 31 of file hashutil.c.

{
    /*
     * Currently, we can't check any of the scan conditions since we do not
     * have the original index entry value to supply to the sk_func. Always
     * return true; we expect that hashgettuple already set the recheck flag
     * to make the main indexscan code do it.
     */
#ifdef NOT_USED
    TupleDesc   tupdesc = RelationGetDescr(scan->indexRelation);
    ScanKey     key = scan->keyData;
    int         scanKeySize = scan->numberOfKeys;
 
    while (scanKeySize > 0)
    {
        Datum       datum;
        bool        isNull;
        Datum       test;
 
        datum = index_getattr(itup,
                              key->sk_attno,
                              tupdesc,
                              &isNull);
 
        /* assume sk_func is strict */
        if (isNull)
            return false;
        if (key->sk_flags & SK_ISNULL)
            return false;
 
        test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
                                 datum, key->sk_argument);
 
        if (!DatumGetBool(test))
            return false;
 
        key++;
        scanKeySize--;
    }
#endif
 
    return true;
}

References DatumGetBool(), FunctionCall2Coll(), index_getattr(), IndexScanDescData::indexRelation, sort-test::key, IndexScanDescData::keyData, IndexScanDescData::numberOfKeys, RelationGetDescr, SK_ISNULL, and test().

Referenced by _hash_load_qualified_items().

◆ _hash_convert_tuple()

bool _hash_convert_tuple	(	Relation	index,
		Datum *	user_values,
		bool *	user_isnull,
		Datum *	index_values,
		bool *	index_isnull
	)

Definition at line 318 of file hashutil.c.

{
    uint32      hashkey;
 
    /*
     * We do not insert null values into hash indexes.  This is okay because
     * the only supported search operator is '=', and we assume it is strict.
     */
    if (user_isnull[0])
        return false;
 
    hashkey = _hash_datum2hashkey(index, user_values[0]);
    index_values[0] = UInt32GetDatum(hashkey);
    index_isnull[0] = false;
    return true;
}

References _hash_datum2hashkey(), and UInt32GetDatum().

Referenced by hashbuildCallback(), and hashinsert().

◆ _hash_datum2hashkey()

uint32 _hash_datum2hashkey	(	Relation	rel,
		Datum	key
	)

Definition at line 82 of file hashutil.c.

{
    FmgrInfo   *procinfo;
    Oid         collation;
 
    /* XXX assumes index has only one attribute */
    procinfo = index_getprocinfo(rel, 1, HASHSTANDARD_PROC);
    collation = rel->rd_indcollation[0];
 
    return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
}

References DatumGetUInt32(), FunctionCall1Coll(), HASHSTANDARD_PROC, index_getprocinfo(), sort-test::key, and RelationData::rd_indcollation.

Referenced by _hash_convert_tuple(), and _hash_first().

◆ _hash_datum2hashkey_type()

uint32 _hash_datum2hashkey_type	(	Relation	rel,
		Datum	key,
		Oid	keytype
	)

Definition at line 102 of file hashutil.c.

{
    RegProcedure hash_proc;
    Oid         collation;
 
    /* XXX assumes index has only one attribute */
    hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
                                  keytype,
                                  keytype,
                                  HASHSTANDARD_PROC);
    if (!RegProcedureIsValid(hash_proc))
        elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
             HASHSTANDARD_PROC, keytype, keytype,
             RelationGetRelationName(rel));
    collation = rel->rd_indcollation[0];
 
    return DatumGetUInt32(OidFunctionCall1Coll(hash_proc, collation, key));
}

References DatumGetUInt32(), elog, ERROR, get_opfamily_proc(), HASHSTANDARD_PROC, sort-test::key, OidFunctionCall1Coll(), RelationData::rd_indcollation, RelationData::rd_opfamily, RegProcedureIsValid, and RelationGetRelationName.

Referenced by _hash_first().

◆ _hash_doinsert()

void _hash_doinsert	(	Relation	rel,
		IndexTuple	itup,
		Relation	heapRel,
		bool	sorted
	)

Definition at line 38 of file hashinsert.c.

{
    Buffer      buf = InvalidBuffer;
    Buffer      bucket_buf;
    Buffer      metabuf;
    HashMetaPage metap;
    HashMetaPage usedmetap = NULL;
    Page        metapage;
    Page        page;
    HashPageOpaque pageopaque;
    Size        itemsz;
    bool        do_expand;
    uint32      hashkey;
    Bucket      bucket;
    OffsetNumber itup_off;
 
    /*
     * Get the hash key for the item (it's stored in the index tuple itself).
     */
    hashkey = _hash_get_indextuple_hashkey(itup);
 
    /* compute item size too */
    itemsz = IndexTupleSize(itup);
    itemsz = MAXALIGN(itemsz);  /* be safe, PageAddItem will do this but we
                                 * need to be consistent */
 
restart_insert:
 
    /*
     * Read the metapage.  We don't lock it yet; HashMaxItemSize() will
     * examine pd_pagesize_version, but that can't change so we can examine it
     * without a lock.
     */
    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
    metapage = BufferGetPage(metabuf);
 
    /*
     * Check whether the item can fit on a hash page at all. (Eventually, we
     * ought to try to apply TOAST methods if not.)  Note that at this point,
     * itemsz doesn't include the ItemId.
     *
     * XXX this is useless code if we are only storing hash keys.
     */
    if (itemsz > HashMaxItemSize(metapage))
        ereport(ERROR,
                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                 errmsg("index row size %zu exceeds hash maximum %zu",
                        itemsz, HashMaxItemSize(metapage)),
                 errhint("Values larger than a buffer page cannot be indexed.")));
 
    /* Lock the primary bucket page for the target bucket. */
    buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
                                          &usedmetap);
    Assert(usedmetap != NULL);
 
    CheckForSerializableConflictIn(rel, NULL, BufferGetBlockNumber(buf));
 
    /* remember the primary bucket buffer to release the pin on it at end. */
    bucket_buf = buf;
 
    page = BufferGetPage(buf);
    pageopaque = HashPageGetOpaque(page);
    bucket = pageopaque->hasho_bucket;
 
    /*
     * If this bucket is in the process of being split, try to finish the
     * split before inserting, because that might create room for the
     * insertion to proceed without allocating an additional overflow page.
     * It's only interesting to finish the split if we're trying to insert
     * into the bucket from which we're removing tuples (the "old" bucket),
     * not if we're trying to insert into the bucket into which tuples are
     * being moved (the "new" bucket).
     */
    if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
    {
        /* release the lock on bucket buffer, before completing the split. */
        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
        _hash_finish_split(rel, metabuf, buf, bucket,
                           usedmetap->hashm_maxbucket,
                           usedmetap->hashm_highmask,
                           usedmetap->hashm_lowmask);
 
        /* release the pin on old and meta buffer.  retry for insert. */
        _hash_dropbuf(rel, buf);
        _hash_dropbuf(rel, metabuf);
        goto restart_insert;
    }
 
    /* Do the insertion */
    while (PageGetFreeSpace(page) < itemsz)
    {
        BlockNumber nextblkno;
 
        /*
         * Check if current page has any DEAD tuples. If yes, delete these
         * tuples and see if we can get a space for the new item to be
         * inserted before moving to the next page in the bucket chain.
         */
        if (H_HAS_DEAD_TUPLES(pageopaque))
        {
 
            if (IsBufferCleanupOK(buf))
            {
                _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
 
                if (PageGetFreeSpace(page) >= itemsz)
                    break;      /* OK, now we have enough space */
            }
        }
 
        /*
         * no space on this page; check for an overflow page
         */
        nextblkno = pageopaque->hasho_nextblkno;
 
        if (BlockNumberIsValid(nextblkno))
        {
            /*
             * ovfl page exists; go get it.  if it doesn't have room, we'll
             * find out next pass through the loop test above.  we always
             * release both the lock and pin if this is an overflow page, but
             * only the lock if this is the primary bucket page, since the pin
             * on the primary bucket must be retained throughout the scan.
             */
            if (buf != bucket_buf)
                _hash_relbuf(rel, buf);
            else
                LockBuffer(buf, BUFFER_LOCK_UNLOCK);
            buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
            page = BufferGetPage(buf);
        }
        else
        {
            /*
             * we're at the end of the bucket chain and we haven't found a
             * page with enough room.  allocate a new overflow page.
             */
 
            /* release our write lock without modifying buffer */
            LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
            /* chain to a new overflow page */
            buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
            page = BufferGetPage(buf);
 
            /* should fit now, given test above */
            Assert(PageGetFreeSpace(page) >= itemsz);
        }
        pageopaque = HashPageGetOpaque(page);
        Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
        Assert(pageopaque->hasho_bucket == bucket);
    }
 
    /*
     * Write-lock the metapage so we can increment the tuple count. After
     * incrementing it, check to see if it's time for a split.
     */
    LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
    /* Do the update.  No ereport(ERROR) until changes are logged */
    START_CRIT_SECTION();
 
    /* found page with enough space, so add the item here */
    itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
    MarkBufferDirty(buf);
 
    /* metapage operations */
    metap = HashPageGetMeta(metapage);
    metap->hashm_ntuples += 1;
 
    /* Make sure this stays in sync with _hash_expandtable() */
    do_expand = metap->hashm_ntuples >
        (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
 
    MarkBufferDirty(metabuf);
 
    /* XLOG stuff */
    if (RelationNeedsWAL(rel))
    {
        xl_hash_insert xlrec;
        XLogRecPtr  recptr;
 
        xlrec.offnum = itup_off;
 
        XLogBeginInsert();
        XLogRegisterData(&xlrec, SizeOfHashInsert);
 
        XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
 
        XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
        XLogRegisterBufData(0, itup, IndexTupleSize(itup));
 
        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
 
        PageSetLSN(BufferGetPage(buf), recptr);
        PageSetLSN(BufferGetPage(metabuf), recptr);
    }
 
    END_CRIT_SECTION();
 
    /* drop lock on metapage, but keep pin */
    LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
    /*
     * Release the modified page and ensure to release the pin on primary
     * page.
     */
    _hash_relbuf(rel, buf);
    if (buf != bucket_buf)
        _hash_dropbuf(rel, bucket_buf);
 
    /* Attempt to split if a split is needed */
    if (do_expand)
        _hash_expandtable(rel, metabuf);
 
    /* Finally drop our pin on the metapage */
    _hash_dropbuf(rel, metabuf);
}

References _hash_addovflpage(), _hash_dropbuf(), _hash_expandtable(), _hash_finish_split(), _hash_get_indextuple_hashkey(), _hash_getbucketbuf_from_hashkey(), _hash_getbuf(), _hash_pgaddtup(), _hash_relbuf(), _hash_vacuum_one_page(), Assert(), BlockNumberIsValid(), buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictIn(), END_CRIT_SECTION, ereport, errcode(), errhint(), errmsg(), ERROR, H_BUCKET_BEING_SPLIT, H_HAS_DEAD_TUPLES, HASH_METAPAGE, HASH_NOLOCK, HASH_WRITE, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMaxItemSize, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageGetMeta, HashPageGetOpaque, IndexTupleSize(), InvalidBuffer, IsBufferCleanupOK(), LH_META_PAGE, LH_OVERFLOW_PAGE, LH_PAGE_TYPE, LockBuffer(), MarkBufferDirty(), MAXALIGN, xl_hash_insert::offnum, PageGetFreeSpace(), PageSetLSN(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashInsert, START_CRIT_SECTION, XLOG_HASH_INSERT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _h_indexbuild(), hashbuildCallback(), and hashinsert().

◆ _hash_dropbuf()

void _hash_dropbuf	(	Relation	rel,
		Buffer	buf
	)

Definition at line 277 of file hashpage.c.

{
    ReleaseBuffer(buf);
}

References buf, and ReleaseBuffer().

Referenced by _hash_doinsert(), _hash_dropscanbuf(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_getbucketbuf_from_hashkey(), _hash_next(), _hash_readprev(), and hashbulkdelete().

◆ _hash_dropscanbuf()

void _hash_dropscanbuf	(	Relation	rel,
		HashScanOpaque	so
	)

Definition at line 289 of file hashpage.c.

{
    /* release pin we hold on primary bucket page */
    if (BufferIsValid(so->hashso_bucket_buf) &&
        so->hashso_bucket_buf != so->currPos.buf)
        _hash_dropbuf(rel, so->hashso_bucket_buf);
    so->hashso_bucket_buf = InvalidBuffer;
 
    /* release pin we hold on primary bucket page  of bucket being split */
    if (BufferIsValid(so->hashso_split_bucket_buf) &&
        so->hashso_split_bucket_buf != so->currPos.buf)
        _hash_dropbuf(rel, so->hashso_split_bucket_buf);
    so->hashso_split_bucket_buf = InvalidBuffer;
 
    /* release any pin we still hold */
    if (BufferIsValid(so->currPos.buf))
        _hash_dropbuf(rel, so->currPos.buf);
    so->currPos.buf = InvalidBuffer;
 
    /* reset split scan */
    so->hashso_buc_populated = false;
    so->hashso_buc_split = false;
}

References _hash_dropbuf(), HashScanPosData::buf, BufferIsValid(), HashScanOpaqueData::currPos, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, and InvalidBuffer.

Referenced by _hash_next(), hashendscan(), and hashrescan().

◆ _hash_expandtable()

void _hash_expandtable	(	Relation	rel,
		Buffer	metabuf
	)

Definition at line 614 of file hashpage.c.

{
    HashMetaPage metap;
    Bucket      old_bucket;
    Bucket      new_bucket;
    uint32      spare_ndx;
    BlockNumber start_oblkno;
    BlockNumber start_nblkno;
    Buffer      buf_nblkno;
    Buffer      buf_oblkno;
    Page        opage;
    Page        npage;
    HashPageOpaque oopaque;
    HashPageOpaque nopaque;
    uint32      maxbucket;
    uint32      highmask;
    uint32      lowmask;
    bool        metap_update_masks = false;
    bool        metap_update_splitpoint = false;
 
restart_expand:
 
    /*
     * Write-lock the meta page.  It used to be necessary to acquire a
     * heavyweight lock to begin a split, but that is no longer required.
     */
    LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
    _hash_checkpage(rel, metabuf, LH_META_PAGE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));
 
    /*
     * Check to see if split is still needed; someone else might have already
     * done one while we waited for the lock.
     *
     * Make sure this stays in sync with _hash_doinsert()
     */
    if (metap->hashm_ntuples <=
        (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
        goto fail;
 
    /*
     * Can't split anymore if maxbucket has reached its maximum possible
     * value.
     *
     * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because
     * the calculation maxbucket+1 mustn't overflow).  Currently we restrict
     * to half that to prevent failure of pg_ceil_log2_32() and insufficient
     * space in hashm_spares[].  It's moot anyway because an index with 2^32
     * buckets would certainly overflow BlockNumber and hence
     * _hash_alloc_buckets() would fail, but if we supported buckets smaller
     * than a disk block then this would be an independent constraint.
     *
     * If you change this, see also the maximum initial number of buckets in
     * _hash_init().
     */
    if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
        goto fail;
 
    /*
     * Determine which bucket is to be split, and attempt to take cleanup lock
     * on the old bucket.  If we can't get the lock, give up.
     *
     * The cleanup lock protects us not only against other backends, but
     * against our own backend as well.
     *
     * The cleanup lock is mainly to protect the split from concurrent
     * inserts. See src/backend/access/hash/README, Lock Definitions for
     * further details.  Due to this locking restriction, if there is any
     * pending scan, the split will give up which is not good, but harmless.
     */
    new_bucket = metap->hashm_maxbucket + 1;
 
    old_bucket = (new_bucket & metap->hashm_lowmask);
 
    start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
 
    buf_oblkno = _hash_getbuf_with_condlock_cleanup(rel, start_oblkno, LH_BUCKET_PAGE);
    if (!buf_oblkno)
        goto fail;
 
    opage = BufferGetPage(buf_oblkno);
    oopaque = HashPageGetOpaque(opage);
 
    /*
     * We want to finish the split from a bucket as there is no apparent
     * benefit by not doing so and it will make the code complicated to finish
     * the split that involves multiple buckets considering the case where new
     * split also fails.  We don't need to consider the new bucket for
     * completing the split here as it is not possible that a re-split of new
     * bucket starts when there is still a pending split from old bucket.
     */
    if (H_BUCKET_BEING_SPLIT(oopaque))
    {
        /*
         * Copy bucket mapping info now; refer the comment in code below where
         * we copy this information before calling _hash_splitbucket to see
         * why this is okay.
         */
        maxbucket = metap->hashm_maxbucket;
        highmask = metap->hashm_highmask;
        lowmask = metap->hashm_lowmask;
 
        /*
         * Release the lock on metapage and old_bucket, before completing the
         * split.
         */
        LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
        LockBuffer(buf_oblkno, BUFFER_LOCK_UNLOCK);
 
        _hash_finish_split(rel, metabuf, buf_oblkno, old_bucket, maxbucket,
                           highmask, lowmask);
 
        /* release the pin on old buffer and retry for expand. */
        _hash_dropbuf(rel, buf_oblkno);
 
        goto restart_expand;
    }
 
    /*
     * Clean the tuples remained from the previous split.  This operation
     * requires cleanup lock and we already have one on the old bucket, so
     * let's do it. We also don't want to allow further splits from the bucket
     * till the garbage of previous split is cleaned.  This has two
     * advantages; first, it helps in avoiding the bloat due to garbage and
     * second is, during cleanup of bucket, we are always sure that the
     * garbage tuples belong to most recently split bucket.  On the contrary,
     * if we allow cleanup of bucket after meta page is updated to indicate
     * the new split and before the actual split, the cleanup operation won't
     * be able to decide whether the tuple has been moved to the newly created
     * bucket and ended up deleting such tuples.
     */
    if (H_NEEDS_SPLIT_CLEANUP(oopaque))
    {
        /*
         * Copy bucket mapping info now; refer to the comment in code below
         * where we copy this information before calling _hash_splitbucket to
         * see why this is okay.
         */
        maxbucket = metap->hashm_maxbucket;
        highmask = metap->hashm_highmask;
        lowmask = metap->hashm_lowmask;
 
        /* Release the metapage lock. */
        LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
        hashbucketcleanup(rel, old_bucket, buf_oblkno, start_oblkno, NULL,
                          maxbucket, highmask, lowmask, NULL, NULL, true,
                          NULL, NULL);
 
        _hash_dropbuf(rel, buf_oblkno);
 
        goto restart_expand;
    }
 
    /*
     * There shouldn't be any active scan on new bucket.
     *
     * Note: it is safe to compute the new bucket's blkno here, even though we
     * may still need to update the BUCKET_TO_BLKNO mapping.  This is because
     * the current value of hashm_spares[hashm_ovflpoint] correctly shows
     * where we are going to put a new splitpoint's worth of buckets.
     */
    start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);
 
    /*
     * If the split point is increasing we need to allocate a new batch of
     * bucket pages.
     */
    spare_ndx = _hash_spareindex(new_bucket + 1);
    if (spare_ndx > metap->hashm_ovflpoint)
    {
        uint32      buckets_to_add;
 
        Assert(spare_ndx == metap->hashm_ovflpoint + 1);
 
        /*
         * We treat allocation of buckets as a separate WAL-logged action.
         * Even if we fail after this operation, won't leak bucket pages;
         * rather, the next split will consume this space. In any case, even
         * without failure we don't use all the space in one split operation.
         */
        buckets_to_add = _hash_get_totalbuckets(spare_ndx) - new_bucket;
        if (!_hash_alloc_buckets(rel, start_nblkno, buckets_to_add))
        {
            /* can't split due to BlockNumber overflow */
            _hash_relbuf(rel, buf_oblkno);
            goto fail;
        }
    }
 
    /*
     * Physically allocate the new bucket's primary page.  We want to do this
     * before changing the metapage's mapping info, in case we can't get the
     * disk space.
     *
     * XXX It doesn't make sense to call _hash_getnewbuf first, zeroing the
     * buffer, and then only afterwards check whether we have a cleanup lock.
     * However, since no scan can be accessing the buffer yet, any concurrent
     * accesses will just be from processes like the bgwriter or checkpointer
     * which don't care about its contents, so it doesn't really matter.
     */
    buf_nblkno = _hash_getnewbuf(rel, start_nblkno, MAIN_FORKNUM);
    if (!IsBufferCleanupOK(buf_nblkno))
    {
        _hash_relbuf(rel, buf_oblkno);
        _hash_relbuf(rel, buf_nblkno);
        goto fail;
    }
 
    /*
     * Since we are scribbling on the pages in the shared buffers, establish a
     * critical section.  Any failure in this next code leaves us with a big
     * problem: the metapage is effectively corrupt but could get written back
     * to disk.
     */
    START_CRIT_SECTION();
 
    /*
     * Okay to proceed with split.  Update the metapage bucket mapping info.
     */
    metap->hashm_maxbucket = new_bucket;
 
    if (new_bucket > metap->hashm_highmask)
    {
        /* Starting a new doubling */
        metap->hashm_lowmask = metap->hashm_highmask;
        metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
        metap_update_masks = true;
    }
 
    /*
     * If the split point is increasing we need to adjust the hashm_spares[]
     * array and hashm_ovflpoint so that future overflow pages will be created
     * beyond this new batch of bucket pages.
     */
    if (spare_ndx > metap->hashm_ovflpoint)
    {
        metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
        metap->hashm_ovflpoint = spare_ndx;
        metap_update_splitpoint = true;
    }
 
    MarkBufferDirty(metabuf);
 
    /*
     * Copy bucket mapping info now; this saves re-accessing the meta page
     * inside _hash_splitbucket's inner loop.  Note that once we drop the
     * split lock, other splits could begin, so these values might be out of
     * date before _hash_splitbucket finishes.  That's okay, since all it
     * needs is to tell which of these two buckets to map hashkeys into.
     */
    maxbucket = metap->hashm_maxbucket;
    highmask = metap->hashm_highmask;
    lowmask = metap->hashm_lowmask;
 
    opage = BufferGetPage(buf_oblkno);
    oopaque = HashPageGetOpaque(opage);
 
    /*
     * Mark the old bucket to indicate that split is in progress.  (At
     * operation end, we will clear the split-in-progress flag.)  Also, for a
     * primary bucket page, hasho_prevblkno stores the number of buckets that
     * existed as of the last split, so we must update that value here.
     */
    oopaque->hasho_flag |= LH_BUCKET_BEING_SPLIT;
    oopaque->hasho_prevblkno = maxbucket;
 
    MarkBufferDirty(buf_oblkno);
 
    npage = BufferGetPage(buf_nblkno);
 
    /*
     * initialize the new bucket's primary page and mark it to indicate that
     * split is in progress.
     */
    nopaque = HashPageGetOpaque(npage);
    nopaque->hasho_prevblkno = maxbucket;
    nopaque->hasho_nextblkno = InvalidBlockNumber;
    nopaque->hasho_bucket = new_bucket;
    nopaque->hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_BEING_POPULATED;
    nopaque->hasho_page_id = HASHO_PAGE_ID;
 
    MarkBufferDirty(buf_nblkno);
 
    /* XLOG stuff */
    if (RelationNeedsWAL(rel))
    {
        xl_hash_split_allocate_page xlrec;
        XLogRecPtr  recptr;
 
        xlrec.new_bucket = maxbucket;
        xlrec.old_bucket_flag = oopaque->hasho_flag;
        xlrec.new_bucket_flag = nopaque->hasho_flag;
        xlrec.flags = 0;
 
        XLogBeginInsert();
 
        XLogRegisterBuffer(0, buf_oblkno, REGBUF_STANDARD);
        XLogRegisterBuffer(1, buf_nblkno, REGBUF_WILL_INIT);
        XLogRegisterBuffer(2, metabuf, REGBUF_STANDARD);
 
        if (metap_update_masks)
        {
            xlrec.flags |= XLH_SPLIT_META_UPDATE_MASKS;
            XLogRegisterBufData(2, &metap->hashm_lowmask, sizeof(uint32));
            XLogRegisterBufData(2, &metap->hashm_highmask, sizeof(uint32));
        }
 
        if (metap_update_splitpoint)
        {
            xlrec.flags |= XLH_SPLIT_META_UPDATE_SPLITPOINT;
            XLogRegisterBufData(2, &metap->hashm_ovflpoint,
                                sizeof(uint32));
            XLogRegisterBufData(2,
                                &metap->hashm_spares[metap->hashm_ovflpoint],
                                sizeof(uint32));
        }
 
        XLogRegisterData(&xlrec, SizeOfHashSplitAllocPage);
 
        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_ALLOCATE_PAGE);
 
        PageSetLSN(BufferGetPage(buf_oblkno), recptr);
        PageSetLSN(BufferGetPage(buf_nblkno), recptr);
        PageSetLSN(BufferGetPage(metabuf), recptr);
    }
 
    END_CRIT_SECTION();
 
    /* drop lock, but keep pin */
    LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
    /* Relocate records to the new bucket */
    _hash_splitbucket(rel, metabuf,
                      old_bucket, new_bucket,
                      buf_oblkno, buf_nblkno, NULL,
                      maxbucket, highmask, lowmask);
 
    /* all done, now release the pins on primary buckets. */
    _hash_dropbuf(rel, buf_oblkno);
    _hash_dropbuf(rel, buf_nblkno);
 
    return;
 
    /* Here if decide not to split or fail to acquire old bucket lock */
fail:
 
    /* We didn't write the metapage, so just drop lock */
    LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
}

References _hash_alloc_buckets(), _hash_checkpage(), _hash_dropbuf(), _hash_finish_split(), _hash_get_totalbuckets(), _hash_getbuf_with_condlock_cleanup(), _hash_getnewbuf(), _hash_relbuf(), _hash_spareindex(), _hash_splitbucket(), Assert(), BUCKET_TO_BLKNO, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), END_CRIT_SECTION, xl_hash_split_allocate_page::flags, H_BUCKET_BEING_SPLIT, H_NEEDS_SPLIT_CLEANUP, hashbucketcleanup(), HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HashPageGetOpaque, InvalidBlockNumber, IsBufferCleanupOK(), LH_BUCKET_BEING_POPULATED, LH_BUCKET_BEING_SPLIT, LH_BUCKET_PAGE, LH_META_PAGE, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), xl_hash_split_allocate_page::new_bucket, xl_hash_split_allocate_page::new_bucket_flag, xl_hash_split_allocate_page::old_bucket_flag, PageSetLSN(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationNeedsWAL, SizeOfHashSplitAllocPage, START_CRIT_SECTION, XLH_SPLIT_META_UPDATE_MASKS, XLH_SPLIT_META_UPDATE_SPLITPOINT, XLOG_HASH_SPLIT_ALLOCATE_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_doinsert().

◆ _hash_finish_split()

void _hash_finish_split	(	Relation	rel,
		Buffer	metabuf,
		Buffer	obuf,
		Bucket	obucket,
		uint32	maxbucket,
		uint32	highmask,
		uint32	lowmask
	)

Definition at line 1356 of file hashpage.c.

{
    HASHCTL     hash_ctl;
    HTAB       *tidhtab;
    Buffer      bucket_nbuf = InvalidBuffer;
    Buffer      nbuf;
    Page        npage;
    BlockNumber nblkno;
    BlockNumber bucket_nblkno;
    HashPageOpaque npageopaque;
    Bucket      nbucket;
    bool        found;
 
    /* Initialize hash tables used to track TIDs */
    hash_ctl.keysize = sizeof(ItemPointerData);
    hash_ctl.entrysize = sizeof(ItemPointerData);
    hash_ctl.hcxt = CurrentMemoryContext;
 
    tidhtab =
        hash_create("bucket ctids",
                    256,        /* arbitrary initial size */
                    &hash_ctl,
                    HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
 
    bucket_nblkno = nblkno = _hash_get_newblock_from_oldbucket(rel, obucket);
 
    /*
     * Scan the new bucket and build hash table of TIDs
     */
    for (;;)
    {
        OffsetNumber noffnum;
        OffsetNumber nmaxoffnum;
 
        nbuf = _hash_getbuf(rel, nblkno, HASH_READ,
                            LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 
        /* remember the primary bucket buffer to acquire cleanup lock on it. */
        if (nblkno == bucket_nblkno)
            bucket_nbuf = nbuf;
 
        npage = BufferGetPage(nbuf);
        npageopaque = HashPageGetOpaque(npage);
 
        /* Scan each tuple in new page */
        nmaxoffnum = PageGetMaxOffsetNumber(npage);
        for (noffnum = FirstOffsetNumber;
             noffnum <= nmaxoffnum;
             noffnum = OffsetNumberNext(noffnum))
        {
            IndexTuple  itup;
 
            /* Fetch the item's TID and insert it in hash table. */
            itup = (IndexTuple) PageGetItem(npage,
                                            PageGetItemId(npage, noffnum));
 
            (void) hash_search(tidhtab, &itup->t_tid, HASH_ENTER, &found);
 
            Assert(!found);
        }
 
        nblkno = npageopaque->hasho_nextblkno;
 
        /*
         * release our write lock without modifying buffer and ensure to
         * retain the pin on primary bucket.
         */
        if (nbuf == bucket_nbuf)
            LockBuffer(nbuf, BUFFER_LOCK_UNLOCK);
        else
            _hash_relbuf(rel, nbuf);
 
        /* Exit loop if no more overflow pages in new bucket */
        if (!BlockNumberIsValid(nblkno))
            break;
    }
 
    /*
     * Conditionally get the cleanup lock on old and new buckets to perform
     * the split operation.  If we don't get the cleanup locks, silently give
     * up and next insertion on old bucket will try again to complete the
     * split.
     */
    if (!ConditionalLockBufferForCleanup(obuf))
    {
        hash_destroy(tidhtab);
        return;
    }
    if (!ConditionalLockBufferForCleanup(bucket_nbuf))
    {
        LockBuffer(obuf, BUFFER_LOCK_UNLOCK);
        hash_destroy(tidhtab);
        return;
    }
 
    npage = BufferGetPage(bucket_nbuf);
    npageopaque = HashPageGetOpaque(npage);
    nbucket = npageopaque->hasho_bucket;
 
    _hash_splitbucket(rel, metabuf, obucket,
                      nbucket, obuf, bucket_nbuf, tidhtab,
                      maxbucket, highmask, lowmask);
 
    _hash_dropbuf(rel, bucket_nbuf);
    hash_destroy(tidhtab);
}

References _hash_dropbuf(), _hash_get_newblock_from_oldbucket(), _hash_getbuf(), _hash_relbuf(), _hash_splitbucket(), Assert(), BlockNumberIsValid(), BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), CurrentMemoryContext, HASHCTL::entrysize, FirstOffsetNumber, HASH_BLOBS, HASH_CONTEXT, hash_create(), hash_destroy(), HASH_ELEM, HASH_ENTER, HASH_READ, hash_search(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_nextblkno, HashPageGetOpaque, HASHCTL::hcxt, InvalidBuffer, HASHCTL::keysize, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, LockBuffer(), OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and IndexTupleData::t_tid.

Referenced by _hash_doinsert(), and _hash_expandtable().

◆ _hash_first()

bool _hash_first	(	IndexScanDesc	scan,
		ScanDirection	dir
	)

Definition at line 288 of file hashsearch.c.

{
    Relation    rel = scan->indexRelation;
    HashScanOpaque so = (HashScanOpaque) scan->opaque;
    ScanKey     cur;
    uint32      hashkey;
    Bucket      bucket;
    Buffer      buf;
    Page        page;
    HashPageOpaque opaque;
    HashScanPosItem *currItem;
 
    pgstat_count_index_scan(rel);
    if (scan->instrument)
        scan->instrument->nsearches++;
 
    /*
     * We do not support hash scans with no index qualification, because we
     * would have to read the whole index rather than just one bucket. That
     * creates a whole raft of problems, since we haven't got a practical way
     * to lock all the buckets against splits or compactions.
     */
    if (scan->numberOfKeys < 1)
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("hash indexes do not support whole-index scans")));
 
    /* There may be more than one index qual, but we hash only the first */
    cur = &scan->keyData[0];
 
    /* We support only single-column hash indexes */
    Assert(cur->sk_attno == 1);
    /* And there's only one operator strategy, too */
    Assert(cur->sk_strategy == HTEqualStrategyNumber);
 
    /*
     * If the constant in the index qual is NULL, assume it cannot match any
     * items in the index.
     */
    if (cur->sk_flags & SK_ISNULL)
        return false;
 
    /*
     * Okay to compute the hash key.  We want to do this before acquiring any
     * locks, in case a user-defined hash function happens to be slow.
     *
     * If scankey operator is not a cross-type comparison, we can use the
     * cached hash function; otherwise gotta look it up in the catalogs.
     *
     * We support the convention that sk_subtype == InvalidOid means the
     * opclass input type; this is a hack to simplify life for ScanKeyInit().
     */
    if (cur->sk_subtype == rel->rd_opcintype[0] ||
        cur->sk_subtype == InvalidOid)
        hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
    else
        hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
                                           cur->sk_subtype);
 
    so->hashso_sk_hash = hashkey;
 
    buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
    PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
    page = BufferGetPage(buf);
    opaque = HashPageGetOpaque(page);
    bucket = opaque->hasho_bucket;
 
    so->hashso_bucket_buf = buf;
 
    /*
     * If a bucket split is in progress, then while scanning the bucket being
     * populated, we need to skip tuples that were copied from bucket being
     * split.  We also need to maintain a pin on the bucket being split to
     * ensure that split-cleanup work done by vacuum doesn't remove tuples
     * from it till this scan is done.  We need to maintain a pin on the
     * bucket being populated to ensure that vacuum doesn't squeeze that
     * bucket till this scan is complete; otherwise, the ordering of tuples
     * can't be maintained during forward and backward scans.  Here, we have
     * to be cautious about locking order: first, acquire the lock on bucket
     * being split; then, release the lock on it but not the pin; then,
     * acquire a lock on bucket being populated and again re-verify whether
     * the bucket split is still in progress.  Acquiring the lock on bucket
     * being split first ensures that the vacuum waits for this scan to
     * finish.
     */
    if (H_BUCKET_BEING_POPULATED(opaque))
    {
        BlockNumber old_blkno;
        Buffer      old_buf;
 
        old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
 
        /*
         * release the lock on new bucket and re-acquire it after acquiring
         * the lock on old bucket.
         */
        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
        old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
 
        /*
         * remember the split bucket buffer so as to use it later for
         * scanning.
         */
        so->hashso_split_bucket_buf = old_buf;
        LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
 
        LockBuffer(buf, BUFFER_LOCK_SHARE);
        page = BufferGetPage(buf);
        opaque = HashPageGetOpaque(page);
        Assert(opaque->hasho_bucket == bucket);
 
        if (H_BUCKET_BEING_POPULATED(opaque))
            so->hashso_buc_populated = true;
        else
        {
            _hash_dropbuf(rel, so->hashso_split_bucket_buf);
            so->hashso_split_bucket_buf = InvalidBuffer;
        }
    }
 
    /* If a backwards scan is requested, move to the end of the chain */
    if (ScanDirectionIsBackward(dir))
    {
        /*
         * Backward scans that start during split needs to start from end of
         * bucket being split.
         */
        while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
               (so->hashso_buc_populated && !so->hashso_buc_split))
            _hash_readnext(scan, &buf, &page, &opaque);
    }
 
    /* remember which buffer we have pinned, if any */
    Assert(BufferIsInvalid(so->currPos.buf));
    so->currPos.buf = buf;
 
    /* Now find all the tuples satisfying the qualification from a page */
    if (!_hash_readpage(scan, &buf, dir))
        return false;
 
    /* OK, itemIndex says what to return */
    currItem = &so->currPos.items[so->currPos.itemIndex];
    scan->xs_heaptid = currItem->heapTid;
 
    /* if we're here, _hash_readpage found a valid tuples */
    return true;
}

References _hash_datum2hashkey(), _hash_datum2hashkey_type(), _hash_dropbuf(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getbuf(), _hash_readnext(), _hash_readpage(), Assert(), BlockNumberIsValid(), buf, HashScanPosData::buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsInvalid, cur, HashScanOpaqueData::currPos, ereport, errcode(), errmsg(), ERROR, H_BUCKET_BEING_POPULATED, HASH_READ, HashPageGetOpaque, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_sk_hash, HashScanOpaqueData::hashso_split_bucket_buf, HTEqualStrategyNumber, IndexScanDescData::indexRelation, IndexScanDescData::instrument, InvalidBuffer, InvalidOid, HashScanPosData::itemIndex, HashScanPosData::items, IndexScanDescData::keyData, LH_BUCKET_PAGE, LockBuffer(), IndexScanInstrumentation::nsearches, IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, pgstat_count_index_scan, PredicateLockPage(), RelationData::rd_opcintype, ScanDirectionIsBackward, SK_ISNULL, IndexScanDescData::xs_heaptid, and IndexScanDescData::xs_snapshot.

Referenced by hashgetbitmap(), and hashgettuple().

◆ _hash_freeovflpage()

BlockNumber _hash_freeovflpage	(	Relation	rel,
		Buffer	bucketbuf,
		Buffer	ovflbuf,
		Buffer	wbuf,
		IndexTuple *	itups,
		OffsetNumber *	itup_offsets,
		Size *	tups_size,
		uint16	nitups,
		BufferAccessStrategy	bstrategy
	)

Definition at line 490 of file hashovfl.c.

{
    HashMetaPage metap;
    Buffer      metabuf;
    Buffer      mapbuf;
    BlockNumber ovflblkno;
    BlockNumber prevblkno;
    BlockNumber blkno;
    BlockNumber nextblkno;
    BlockNumber writeblkno;
    HashPageOpaque ovflopaque;
    Page        ovflpage;
    Page        mappage;
    uint32     *freep;
    uint32      ovflbitno;
    int32       bitmappage,
                bitmapbit;
    Bucket      bucket PG_USED_FOR_ASSERTS_ONLY;
    Buffer      prevbuf = InvalidBuffer;
    Buffer      nextbuf = InvalidBuffer;
    bool        update_metap = false;
 
    /* Get information from the doomed page */
    _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
    ovflblkno = BufferGetBlockNumber(ovflbuf);
    ovflpage = BufferGetPage(ovflbuf);
    ovflopaque = HashPageGetOpaque(ovflpage);
    nextblkno = ovflopaque->hasho_nextblkno;
    prevblkno = ovflopaque->hasho_prevblkno;
    writeblkno = BufferGetBlockNumber(wbuf);
    bucket = ovflopaque->hasho_bucket;
 
    /*
     * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
     * up the bucket chain members behind and ahead of the overflow page being
     * deleted.  Concurrency issues are avoided by using lock chaining as
     * described atop hashbucketcleanup.
     */
    if (BlockNumberIsValid(prevblkno))
    {
        if (prevblkno == writeblkno)
            prevbuf = wbuf;
        else
            prevbuf = _hash_getbuf_with_strategy(rel,
                                                 prevblkno,
                                                 HASH_WRITE,
                                                 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
                                                 bstrategy);
    }
    if (BlockNumberIsValid(nextblkno))
        nextbuf = _hash_getbuf_with_strategy(rel,
                                             nextblkno,
                                             HASH_WRITE,
                                             LH_OVERFLOW_PAGE,
                                             bstrategy);
 
    /* Note: bstrategy is intentionally not used for metapage and bitmap */
 
    /* Read the metapage so we can determine which bitmap page to use */
    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));
 
    /* Identify which bit to set */
    ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
 
    bitmappage = ovflbitno >> BMPG_SHIFT(metap);
    bitmapbit = ovflbitno & BMPG_MASK(metap);
 
    if (bitmappage >= metap->hashm_nmaps)
        elog(ERROR, "invalid overflow bit number %u", ovflbitno);
    blkno = metap->hashm_mapp[bitmappage];
 
    /* Release metapage lock while we access the bitmap page */
    LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
    /* read the bitmap page to clear the bitmap bit */
    mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
    mappage = BufferGetPage(mapbuf);
    freep = HashPageGetBitmap(mappage);
    Assert(ISSET(freep, bitmapbit));
 
    /* Get write-lock on metapage to update firstfree */
    LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
    /* This operation needs to log multiple tuples, prepare WAL for that */
    if (RelationNeedsWAL(rel))
        XLogEnsureRecordSpace(HASH_XLOG_FREE_OVFL_BUFS, 4 + nitups);
 
    START_CRIT_SECTION();
 
    /*
     * we have to insert tuples on the "write" page, being careful to preserve
     * hashkey ordering.  (If we insert many tuples into the same "write" page
     * it would be worth qsort'ing them).
     */
    if (nitups > 0)
    {
        _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
        MarkBufferDirty(wbuf);
    }
 
    /*
     * Reinitialize the freed overflow page.  Just zeroing the page won't
     * work, because WAL replay routines expect pages to be initialized. See
     * explanation of RBM_NORMAL mode atop XLogReadBufferExtended.  We are
     * careful to make the special space valid here so that tools like
     * pageinspect won't get confused.
     */
    _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
 
    ovflopaque = HashPageGetOpaque(ovflpage);
 
    ovflopaque->hasho_prevblkno = InvalidBlockNumber;
    ovflopaque->hasho_nextblkno = InvalidBlockNumber;
    ovflopaque->hasho_bucket = InvalidBucket;
    ovflopaque->hasho_flag = LH_UNUSED_PAGE;
    ovflopaque->hasho_page_id = HASHO_PAGE_ID;
 
    MarkBufferDirty(ovflbuf);
 
    if (BufferIsValid(prevbuf))
    {
        Page        prevpage = BufferGetPage(prevbuf);
        HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
 
        Assert(prevopaque->hasho_bucket == bucket);
        prevopaque->hasho_nextblkno = nextblkno;
        MarkBufferDirty(prevbuf);
    }
    if (BufferIsValid(nextbuf))
    {
        Page        nextpage = BufferGetPage(nextbuf);
        HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
 
        Assert(nextopaque->hasho_bucket == bucket);
        nextopaque->hasho_prevblkno = prevblkno;
        MarkBufferDirty(nextbuf);
    }
 
    /* Clear the bitmap bit to indicate that this overflow page is free */
    CLRBIT(freep, bitmapbit);
    MarkBufferDirty(mapbuf);
 
    /* if this is now the first free page, update hashm_firstfree */
    if (ovflbitno < metap->hashm_firstfree)
    {
        metap->hashm_firstfree = ovflbitno;
        update_metap = true;
        MarkBufferDirty(metabuf);
    }
 
    /* XLOG stuff */
    if (RelationNeedsWAL(rel))
    {
        xl_hash_squeeze_page xlrec;
        XLogRecPtr  recptr;
        int         i;
        bool        mod_wbuf = false;
 
        xlrec.prevblkno = prevblkno;
        xlrec.nextblkno = nextblkno;
        xlrec.ntups = nitups;
        xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
        xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
 
        XLogBeginInsert();
        XLogRegisterData(&xlrec, SizeOfHashSqueezePage);
 
        /*
         * bucket buffer was not changed, but still needs to be registered to
         * ensure that we can acquire a cleanup lock on it during replay.
         */
        if (!xlrec.is_prim_bucket_same_wrt)
        {
            uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
 
            XLogRegisterBuffer(0, bucketbuf, flags);
        }
 
        if (xlrec.ntups > 0)
        {
            XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
 
            /* Remember that wbuf is modified. */
            mod_wbuf = true;
 
            XLogRegisterBufData(1, itup_offsets,
                                nitups * sizeof(OffsetNumber));
            for (i = 0; i < nitups; i++)
                XLogRegisterBufData(1, itups[i], tups_size[i]);
        }
        else if (xlrec.is_prim_bucket_same_wrt || xlrec.is_prev_bucket_same_wrt)
        {
            uint8       wbuf_flags;
 
            /*
             * A write buffer needs to be registered even if no tuples are
             * added to it to ensure that we can acquire a cleanup lock on it
             * if it is the same as primary bucket buffer or update the
             * nextblkno if it is same as the previous bucket buffer.
             */
            Assert(xlrec.ntups == 0);
 
            wbuf_flags = REGBUF_STANDARD;
            if (!xlrec.is_prev_bucket_same_wrt)
            {
                wbuf_flags |= REGBUF_NO_CHANGE;
            }
            else
            {
                /* Remember that wbuf is modified. */
                mod_wbuf = true;
            }
            XLogRegisterBuffer(1, wbuf, wbuf_flags);
        }
 
        XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
 
        /*
         * If prevpage and the writepage (block in which we are moving tuples
         * from overflow) are same, then no need to separately register
         * prevpage.  During replay, we can directly update the nextblock in
         * writepage.
         */
        if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
            XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
 
        if (BufferIsValid(nextbuf))
            XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
 
        XLogRegisterBuffer(5, mapbuf, REGBUF_STANDARD);
        XLogRegisterBufData(5, &bitmapbit, sizeof(uint32));
 
        if (update_metap)
        {
            XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
            XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
        }
 
        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
 
        /* Set LSN iff wbuf is modified. */
        if (mod_wbuf)
            PageSetLSN(BufferGetPage(wbuf), recptr);
 
        PageSetLSN(BufferGetPage(ovflbuf), recptr);
 
        if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
            PageSetLSN(BufferGetPage(prevbuf), recptr);
        if (BufferIsValid(nextbuf))
            PageSetLSN(BufferGetPage(nextbuf), recptr);
 
        PageSetLSN(BufferGetPage(mapbuf), recptr);
 
        if (update_metap)
            PageSetLSN(BufferGetPage(metabuf), recptr);
    }
 
    END_CRIT_SECTION();
 
    /* release previous bucket if it is not same as write bucket */
    if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
        _hash_relbuf(rel, prevbuf);
 
    if (BufferIsValid(ovflbuf))
        _hash_relbuf(rel, ovflbuf);
 
    if (BufferIsValid(nextbuf))
        _hash_relbuf(rel, nextbuf);
 
    _hash_relbuf(rel, mapbuf);
    _hash_relbuf(rel, metabuf);
 
    return nextblkno;
}

References _hash_checkpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_ovflblkno_to_bitno(), _hash_pageinit(), _hash_pgaddmultitup(), _hash_relbuf(), Assert(), BlockNumberIsValid(), BMPG_MASK, BMPG_SHIFT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), CLRBIT, elog, END_CRIT_SECTION, ERROR, HASH_METAPAGE, HASH_READ, HASH_WRITE, HASH_XLOG_FREE_OVFL_BUFS, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_nmaps, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetMeta, HashPageGetOpaque, i, InvalidBlockNumber, InvalidBucket, InvalidBuffer, xl_hash_squeeze_page::is_prev_bucket_same_wrt, xl_hash_squeeze_page::is_prim_bucket_same_wrt, ISSET, LH_BITMAP_PAGE, LH_BUCKET_PAGE, LH_META_PAGE, LH_OVERFLOW_PAGE, LH_UNUSED_PAGE, LockBuffer(), MarkBufferDirty(), xl_hash_squeeze_page::nextblkno, xl_hash_squeeze_page::ntups, PageSetLSN(), PG_USED_FOR_ASSERTS_ONLY, xl_hash_squeeze_page::prevblkno, REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashSqueezePage, START_CRIT_SECTION, XLOG_HASH_SQUEEZE_PAGE, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_squeezebucket().

◆ _hash_get_indextuple_hashkey()

uint32 _hash_get_indextuple_hashkey ( IndexTuple itup )

Definition at line 291 of file hashutil.c.

{
    char       *attp;
 
    /*
     * We assume the hash key is the first attribute and can't be null, so
     * this can be done crudely but very very cheaply ...
     */
    attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
    return *((uint32 *) attp);
}

References IndexInfoFindDataOffset(), and IndexTupleData::t_info.

Referenced by _h_indexbuild(), _hash_binsearch(), _hash_binsearch_last(), _hash_doinsert(), _hash_load_qualified_items(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_splitbucket(), hash_page_items(), and hashbucketcleanup().

◆ _hash_get_newblock_from_oldbucket()

BlockNumber _hash_get_newblock_from_oldbucket	(	Relation	rel,
		Bucket	old_bucket
	)

Definition at line 461 of file hashutil.c.

{
    Bucket      new_bucket;
    Buffer      metabuf;
    HashMetaPage metap;
    BlockNumber blkno;
 
    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));
 
    new_bucket = _hash_get_newbucket_from_oldbucket(rel, old_bucket,
                                                    metap->hashm_lowmask,
                                                    metap->hashm_maxbucket);
    blkno = BUCKET_TO_BLKNO(metap, new_bucket);
 
    _hash_relbuf(rel, metabuf);
 
    return blkno;
}

References _hash_get_newbucket_from_oldbucket(), _hash_getbuf(), _hash_relbuf(), BUCKET_TO_BLKNO, BufferGetPage(), HASH_METAPAGE, HASH_READ, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashPageGetMeta, and LH_META_PAGE.

Referenced by _hash_finish_split().

◆ _hash_get_newbucket_from_oldbucket()

Bucket _hash_get_newbucket_from_oldbucket	(	Relation	rel,
		Bucket	old_bucket,
		uint32	lowmask,
		uint32	maxbucket
	)

Definition at line 494 of file hashutil.c.

{
    Bucket      new_bucket;
 
    new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
    if (new_bucket > maxbucket)
    {
        lowmask = lowmask >> 1;
        new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
    }
 
    return new_bucket;
}

References CALC_NEW_BUCKET.

Referenced by _hash_get_newblock_from_oldbucket(), and hashbucketcleanup().

◆ _hash_get_oldblock_from_newbucket()

BlockNumber _hash_get_oldblock_from_newbucket	(	Relation	rel,
		Bucket	new_bucket
	)

Definition at line 422 of file hashutil.c.

{
    Bucket      old_bucket;
    uint32      mask;
    Buffer      metabuf;
    HashMetaPage metap;
    BlockNumber blkno;
 
    /*
     * To get the old bucket from the current bucket, we need a mask to modulo
     * into lower half of table.  This mask is stored in meta page as
     * hashm_lowmask, but here we can't rely on the same, because we need a
     * value of lowmask that was prevalent at the time when bucket split was
     * started.  Masking the most significant bit of new bucket would give us
     * old bucket.
     */
    mask = (((uint32) 1) << pg_leftmost_one_pos32(new_bucket)) - 1;
    old_bucket = new_bucket & mask;
 
    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));
 
    blkno = BUCKET_TO_BLKNO(metap, old_bucket);
 
    _hash_relbuf(rel, metabuf);
 
    return blkno;
}

References _hash_getbuf(), _hash_relbuf(), BUCKET_TO_BLKNO, BufferGetPage(), HASH_METAPAGE, HASH_READ, HashPageGetMeta, LH_META_PAGE, and pg_leftmost_one_pos32().

Referenced by _hash_first().

◆ _hash_get_totalbuckets()

uint32 _hash_get_totalbuckets ( uint32 splitpoint_phase )

Definition at line 174 of file hashutil.c.

{
    uint32      splitpoint_group;
    uint32      total_buckets;
    uint32      phases_within_splitpoint_group;
 
    if (splitpoint_phase < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
        return (1 << splitpoint_phase);
 
    /* get splitpoint's group */
    splitpoint_group = HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE;
    splitpoint_group +=
        ((splitpoint_phase - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) >>
         HASH_SPLITPOINT_PHASE_BITS);
 
    /* account for buckets before splitpoint_group */
    total_buckets = (1 << (splitpoint_group - 1));
 
    /* account for buckets within splitpoint_group */
    phases_within_splitpoint_group =
        (((splitpoint_phase - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) &
          HASH_SPLITPOINT_PHASE_MASK) + 1); /* from 0-based to 1-based */
    total_buckets +=
        (((1 << (splitpoint_group - 1)) >> HASH_SPLITPOINT_PHASE_BITS) *
         phases_within_splitpoint_group);
 
    return total_buckets;
}

References HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE, HASH_SPLITPOINT_PHASE_BITS, and HASH_SPLITPOINT_PHASE_MASK.

Referenced by _hash_expandtable(), _hash_init_metabuffer(), _hash_ovflblkno_to_bitno(), and bitno_to_blkno().

◆ _hash_getbucketbuf_from_hashkey()

Buffer _hash_getbucketbuf_from_hashkey	(	Relation	rel,
		uint32	hashkey,
		int	access,
		HashMetaPage *	cachedmetap
	)

Definition at line 1559 of file hashpage.c.

{
    HashMetaPage metap;
    Buffer      buf;
    Buffer      metabuf = InvalidBuffer;
    Page        page;
    Bucket      bucket;
    BlockNumber blkno;
    HashPageOpaque opaque;
 
    /* We read from target bucket buffer, hence locking is must. */
    Assert(access == HASH_READ || access == HASH_WRITE);
 
    metap = _hash_getcachedmetap(rel, &metabuf, false);
    Assert(metap != NULL);
 
    /*
     * Loop until we get a lock on the correct target bucket.
     */
    for (;;)
    {
        /*
         * Compute the target bucket number, and convert to block number.
         */
        bucket = _hash_hashkey2bucket(hashkey,
                                      metap->hashm_maxbucket,
                                      metap->hashm_highmask,
                                      metap->hashm_lowmask);
 
        blkno = BUCKET_TO_BLKNO(metap, bucket);
 
        /* Fetch the primary bucket page for the bucket */
        buf = _hash_getbuf(rel, blkno, access, LH_BUCKET_PAGE);
        page = BufferGetPage(buf);
        opaque = HashPageGetOpaque(page);
        Assert(opaque->hasho_bucket == bucket);
        Assert(opaque->hasho_prevblkno != InvalidBlockNumber);
 
        /*
         * If this bucket hasn't been split, we're done.
         */
        if (opaque->hasho_prevblkno <= metap->hashm_maxbucket)
            break;
 
        /* Drop lock on this buffer, update cached metapage, and retry. */
        _hash_relbuf(rel, buf);
        metap = _hash_getcachedmetap(rel, &metabuf, true);
        Assert(metap != NULL);
    }
 
    if (BufferIsValid(metabuf))
        _hash_dropbuf(rel, metabuf);
 
    if (cachedmetap)
        *cachedmetap = metap;
 
    return buf;
}

References _hash_dropbuf(), _hash_getbuf(), _hash_getcachedmetap(), _hash_hashkey2bucket(), _hash_relbuf(), Assert(), BUCKET_TO_BLKNO, buf, BufferGetPage(), BufferIsValid(), HASH_READ, HASH_WRITE, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, InvalidBlockNumber, InvalidBuffer, and LH_BUCKET_PAGE.

Referenced by _hash_doinsert(), and _hash_first().

◆ _hash_getbuf()

Buffer _hash_getbuf	(	Relation	rel,
		BlockNumber	blkno,
		int	access,
		int	flags
	)

Definition at line 70 of file hashpage.c.

{
    Buffer      buf;
 
    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");
 
    buf = ReadBuffer(rel, blkno);
 
    if (access != HASH_NOLOCK)
        LockBuffer(buf, access);
 
    /* ref count and lock type are correct */
 
    _hash_checkpage(rel, buf, flags);
 
    return buf;
}

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), P_NEW, and ReadBuffer().

Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_get_newblock_from_oldbucket(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getcachedmetap(), _hash_kill_items(), _hash_next(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), hash_bitmap_info(), hashbulkdelete(), and pgstathashindex().

◆ _hash_getbuf_with_condlock_cleanup()

Buffer _hash_getbuf_with_condlock_cleanup	(	Relation	rel,
		BlockNumber	blkno,
		int	flags
	)

Definition at line 96 of file hashpage.c.

{
    Buffer      buf;
 
    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");
 
    buf = ReadBuffer(rel, blkno);
 
    if (!ConditionalLockBufferForCleanup(buf))
    {
        ReleaseBuffer(buf);
        return InvalidBuffer;
    }
 
    /* ref count and lock type are correct */
 
    _hash_checkpage(rel, buf, flags);
 
    return buf;
}

References _hash_checkpage(), buf, ConditionalLockBufferForCleanup(), elog, ERROR, InvalidBuffer, P_NEW, ReadBuffer(), and ReleaseBuffer().

Referenced by _hash_expandtable().

◆ _hash_getbuf_with_strategy()

Buffer _hash_getbuf_with_strategy	(	Relation	rel,
		BlockNumber	blkno,
		int	access,
		int	flags,
		BufferAccessStrategy	bstrategy
	)

Definition at line 239 of file hashpage.c.

{
    Buffer      buf;
 
    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");
 
    buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
 
    if (access != HASH_NOLOCK)
        LockBuffer(buf, access);
 
    /* ref count and lock type are correct */
 
    _hash_checkpage(rel, buf, flags);
 
    return buf;
}

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), MAIN_FORKNUM, P_NEW, RBM_NORMAL, and ReadBufferExtended().

Referenced by _hash_freeovflpage(), _hash_squeezebucket(), hashbucketcleanup(), and pgstat_hash_page().

◆ _hash_getcachedmetap()

HashMetaPage _hash_getcachedmetap	(	Relation	rel,
		Buffer *	metabuf,
		bool	force_refresh
	)

Definition at line 1501 of file hashpage.c.

{
    Page        page;
 
    Assert(metabuf);
    if (force_refresh || rel->rd_amcache == NULL)
    {
        char       *cache = NULL;
 
        /*
         * It's important that we don't set rd_amcache to an invalid value.
         * Either MemoryContextAlloc or _hash_getbuf could fail, so don't
         * install a pointer to the newly-allocated storage in the actual
         * relcache entry until both have succeeded.
         */
        if (rel->rd_amcache == NULL)
            cache = MemoryContextAlloc(rel->rd_indexcxt,
                                       sizeof(HashMetaPageData));
 
        /* Read the metapage. */
        if (BufferIsValid(*metabuf))
            LockBuffer(*metabuf, BUFFER_LOCK_SHARE);
        else
            *metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ,
                                    LH_META_PAGE);
        page = BufferGetPage(*metabuf);
 
        /* Populate the cache. */
        if (rel->rd_amcache == NULL)
            rel->rd_amcache = cache;
        memcpy(rel->rd_amcache, HashPageGetMeta(page),
               sizeof(HashMetaPageData));
 
        /* Release metapage lock, but keep the pin. */
        LockBuffer(*metabuf, BUFFER_LOCK_UNLOCK);
    }
 
    return (HashMetaPage) rel->rd_amcache;
}

References _hash_getbuf(), Assert(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), HASH_METAPAGE, HASH_READ, HashPageGetMeta, LH_META_PAGE, LockBuffer(), MemoryContextAlloc(), RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _hash_getbucketbuf_from_hashkey(), and hashbulkdelete().

◆ _hash_getinitbuf()

Buffer _hash_getinitbuf	(	Relation	rel,
		BlockNumber	blkno
	)

Definition at line 135 of file hashpage.c.

{
    Buffer      buf;
 
    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");
 
    buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO_AND_LOCK,
                             NULL);
 
    /* ref count and lock type are correct */
 
    /* initialize the page */
    _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
 
    return buf;
}

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), elog, ERROR, MAIN_FORKNUM, P_NEW, RBM_ZERO_AND_LOCK, and ReadBufferExtended().

Referenced by _hash_addovflpage().

◆ _hash_getnewbuf()

Buffer _hash_getnewbuf	(	Relation	rel,
		BlockNumber	blkno,
		ForkNumber	forkNum
	)

Definition at line 198 of file hashpage.c.

{
    BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
    Buffer      buf;
 
    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");
    if (blkno > nblocks)
        elog(ERROR, "access to noncontiguous page in hash index \"%s\"",
             RelationGetRelationName(rel));
 
    /* smgr insists we explicitly extend the relation */
    if (blkno == nblocks)
    {
        buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL,
                                EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
        if (BufferGetBlockNumber(buf) != blkno)
            elog(ERROR, "unexpected hash relation size: %u, should be %u",
                 BufferGetBlockNumber(buf), blkno);
    }
    else
    {
        buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO_AND_LOCK,
                                 NULL);
    }
 
    /* ref count and lock type are correct */
 
    /* initialize the page */
    _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
 
    return buf;
}

References _hash_pageinit(), BMR_REL, buf, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, elog, ERROR, ExtendBufferedRel(), P_NEW, RBM_ZERO_AND_LOCK, ReadBufferExtended(), RelationGetNumberOfBlocksInFork(), and RelationGetRelationName.

Referenced by _hash_addovflpage(), _hash_expandtable(), and _hash_init().

◆ _hash_hashkey2bucket()

Bucket _hash_hashkey2bucket	(	uint32	hashkey,
		uint32	maxbucket,
		uint32	highmask,
		uint32	lowmask
	)

Definition at line 125 of file hashutil.c.

{
    Bucket      bucket;
 
    bucket = hashkey & highmask;
    if (bucket > maxbucket)
        bucket = bucket & lowmask;
 
    return bucket;
}

Referenced by _h_indexbuild(), _hash_getbucketbuf_from_hashkey(), _hash_splitbucket(), comparetup_index_hash(), and hashbucketcleanup().

◆ _hash_init()

uint32 _hash_init	(	Relation	rel,
		double	num_tuples,
		ForkNumber	forkNum
	)

Definition at line 327 of file hashpage.c.

{
    Buffer      metabuf;
    Buffer      buf;
    Buffer      bitmapbuf;
    Page        pg;
    HashMetaPage metap;
    RegProcedure procid;
    int32       data_width;
    int32       item_width;
    int32       ffactor;
    uint32      num_buckets;
    uint32      i;
    bool        use_wal;
 
    /* safety check */
    if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
        elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
             RelationGetRelationName(rel));
 
    /*
     * WAL log creation of pages if the relation is persistent, or this is the
     * init fork.  Init forks for unlogged relations always need to be WAL
     * logged.
     */
    use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM;
 
    /*
     * Determine the target fill factor (in tuples per bucket) for this index.
     * The idea is to make the fill factor correspond to pages about as full
     * as the user-settable fillfactor parameter says.  We can compute it
     * exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
     */
    data_width = sizeof(uint32);
    item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
        sizeof(ItemIdData);     /* include the line pointer */
    ffactor = HashGetTargetPageUsage(rel) / item_width;
    /* keep to a sane range */
    if (ffactor < 10)
        ffactor = 10;
 
    procid = index_getprocid(rel, 1, HASHSTANDARD_PROC);
 
    /*
     * We initialize the metapage, the first N bucket pages, and the first
     * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
     * calls to occur.  This ensures that the smgr level has the right idea of
     * the physical index length.
     *
     * Critical section not required, because on error the creation of the
     * whole relation will be rolled back.
     */
    metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
    _hash_init_metabuffer(metabuf, num_tuples, procid, ffactor, false);
    MarkBufferDirty(metabuf);
 
    pg = BufferGetPage(metabuf);
    metap = HashPageGetMeta(pg);
 
    /* XLOG stuff */
    if (use_wal)
    {
        xl_hash_init_meta_page xlrec;
        XLogRecPtr  recptr;
 
        xlrec.num_tuples = num_tuples;
        xlrec.procid = metap->hashm_procid;
        xlrec.ffactor = metap->hashm_ffactor;
 
        XLogBeginInsert();
        XLogRegisterData(&xlrec, SizeOfHashInitMetaPage);
        XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD);
 
        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_META_PAGE);
 
        PageSetLSN(BufferGetPage(metabuf), recptr);
    }
 
    num_buckets = metap->hashm_maxbucket + 1;
 
    /*
     * Release buffer lock on the metapage while we initialize buckets.
     * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
     * won't accomplish anything.  It's a bad idea to hold buffer locks for
     * long intervals in any case, since that can block the bgwriter.
     */
    LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
    /*
     * Initialize and WAL Log the first N buckets
     */
    for (i = 0; i < num_buckets; i++)
    {
        BlockNumber blkno;
 
        /* Allow interrupts, in case N is huge */
        CHECK_FOR_INTERRUPTS();
 
        blkno = BUCKET_TO_BLKNO(metap, i);
        buf = _hash_getnewbuf(rel, blkno, forkNum);
        _hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
        MarkBufferDirty(buf);
 
        if (use_wal)
            log_newpage(&rel->rd_locator,
                        forkNum,
                        blkno,
                        BufferGetPage(buf),
                        true);
        _hash_relbuf(rel, buf);
    }
 
    /* Now reacquire buffer lock on metapage */
    LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
    /*
     * Initialize bitmap page
     */
    bitmapbuf = _hash_getnewbuf(rel, num_buckets + 1, forkNum);
    _hash_initbitmapbuffer(bitmapbuf, metap->hashm_bmsize, false);
    MarkBufferDirty(bitmapbuf);
 
    /* add the new bitmap page to the metapage's list of bitmaps */
    /* metapage already has a write lock */
    if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
        ereport(ERROR,
                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                 errmsg("out of overflow pages in hash index \"%s\"",
                        RelationGetRelationName(rel))));
 
    metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
 
    metap->hashm_nmaps++;
    MarkBufferDirty(metabuf);
 
    /* XLOG stuff */
    if (use_wal)
    {
        xl_hash_init_bitmap_page xlrec;
        XLogRecPtr  recptr;
 
        xlrec.bmsize = metap->hashm_bmsize;
 
        XLogBeginInsert();
        XLogRegisterData(&xlrec, SizeOfHashInitBitmapPage);
        XLogRegisterBuffer(0, bitmapbuf, REGBUF_WILL_INIT);
 
        /*
         * This is safe only because nobody else can be modifying the index at
         * this stage; it's only visible to the transaction that is creating
         * it.
         */
        XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
 
        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_BITMAP_PAGE);
 
        PageSetLSN(BufferGetPage(bitmapbuf), recptr);
        PageSetLSN(BufferGetPage(metabuf), recptr);
    }
 
    /* all done */
    _hash_relbuf(rel, bitmapbuf);
    _hash_relbuf(rel, metabuf);
 
    return num_buckets;
}

References _hash_getnewbuf(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), _hash_relbuf(), xl_hash_init_bitmap_page::bmsize, BUCKET_TO_BLKNO, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), CHECK_FOR_INTERRUPTS, elog, ereport, errcode(), errmsg(), ERROR, xl_hash_init_meta_page::ffactor, HASH_MAX_BITMAPS, HASH_METAPAGE, HashGetTargetPageUsage, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_procid, HashPageGetMeta, HASHSTANDARD_PROC, i, index_getprocid(), INIT_FORKNUM, LH_BUCKET_PAGE, LockBuffer(), log_newpage(), MarkBufferDirty(), MAXALIGN, xl_hash_init_meta_page::num_tuples, PageSetLSN(), xl_hash_init_meta_page::procid, RelationData::rd_locator, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetNumberOfBlocksInFork(), RelationGetRelationName, RelationNeedsWAL, SizeOfHashInitBitmapPage, SizeOfHashInitMetaPage, XLOG_HASH_INIT_BITMAP_PAGE, XLOG_HASH_INIT_META_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbuild(), and hashbuildempty().

◆ _hash_init_metabuffer()

void _hash_init_metabuffer	(	Buffer	buf,
		double	num_tuples,
		RegProcedure	procid,
		uint16	ffactor,
		bool	initpage
	)

Definition at line 498 of file hashpage.c.

{
    HashMetaPage metap;
    HashPageOpaque pageopaque;
    Page        page;
    double      dnumbuckets;
    uint32      num_buckets;
    uint32      spare_index;
    uint32      lshift;
 
    /*
     * Choose the number of initial bucket pages to match the fill factor
     * given the estimated number of tuples.  We round up the result to the
     * total number of buckets which has to be allocated before using its
     * hashm_spares element. However always force at least 2 bucket pages. The
     * upper limit is determined by considerations explained in
     * _hash_expandtable().
     */
    dnumbuckets = num_tuples / ffactor;
    if (dnumbuckets <= 2.0)
        num_buckets = 2;
    else if (dnumbuckets >= (double) 0x40000000)
        num_buckets = 0x40000000;
    else
        num_buckets = _hash_get_totalbuckets(_hash_spareindex(dnumbuckets));
 
    spare_index = _hash_spareindex(num_buckets);
    Assert(spare_index < HASH_MAX_SPLITPOINTS);
 
    page = BufferGetPage(buf);
    if (initpage)
        _hash_pageinit(page, BufferGetPageSize(buf));
 
    pageopaque = HashPageGetOpaque(page);
    pageopaque->hasho_prevblkno = InvalidBlockNumber;
    pageopaque->hasho_nextblkno = InvalidBlockNumber;
    pageopaque->hasho_bucket = InvalidBucket;
    pageopaque->hasho_flag = LH_META_PAGE;
    pageopaque->hasho_page_id = HASHO_PAGE_ID;
 
    metap = HashPageGetMeta(page);
 
    metap->hashm_magic = HASH_MAGIC;
    metap->hashm_version = HASH_VERSION;
    metap->hashm_ntuples = 0;
    metap->hashm_nmaps = 0;
    metap->hashm_ffactor = ffactor;
    metap->hashm_bsize = HashGetMaxBitmapSize(page);
 
    /* find largest bitmap array size that will fit in page size */
    lshift = pg_leftmost_one_pos32(metap->hashm_bsize);
    Assert(lshift > 0);
    metap->hashm_bmsize = 1 << lshift;
    metap->hashm_bmshift = lshift + BYTE_TO_BIT;
    Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));
 
    /*
     * Label the index with its primary hash support function's OID.  This is
     * pretty useless for normal operation (in fact, hashm_procid is not used
     * anywhere), but it might be handy for forensic purposes so we keep it.
     */
    metap->hashm_procid = procid;
 
    /*
     * We initialize the index with N buckets, 0 .. N-1, occupying physical
     * blocks 1 to N.  The first freespace bitmap page is in block N+1.
     */
    metap->hashm_maxbucket = num_buckets - 1;
 
    /*
     * Set highmask as next immediate ((2 ^ x) - 1), which should be
     * sufficient to cover num_buckets.
     */
    metap->hashm_highmask = pg_nextpower2_32(num_buckets + 1) - 1;
    metap->hashm_lowmask = (metap->hashm_highmask >> 1);
 
    MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
    MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
 
    /* Set up mapping for one spare page after the initial splitpoints */
    metap->hashm_spares[spare_index] = 1;
    metap->hashm_ovflpoint = spare_index;
    metap->hashm_firstfree = 0;
 
    /*
     * Set pd_lower just past the end of the metadata.  This is essential,
     * because without doing so, metadata will be lost if xlog.c compresses
     * the page.
     */
    ((PageHeader) page)->pd_lower =
        ((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
}

Referenced by _hash_init(), and hash_xlog_init_meta_page().

◆ _hash_initbitmapbuffer()

void _hash_initbitmapbuffer	(	Buffer	buf,
		uint16	bmsize,
		bool	initpage
	)

Definition at line 777 of file hashovfl.c.

{
    Page        pg;
    HashPageOpaque op;
    uint32     *freep;
 
    pg = BufferGetPage(buf);
 
    /* initialize the page */
    if (initpage)
        _hash_pageinit(pg, BufferGetPageSize(buf));
 
    /* initialize the page's special space */
    op = HashPageGetOpaque(pg);
    op->hasho_prevblkno = InvalidBlockNumber;
    op->hasho_nextblkno = InvalidBlockNumber;
    op->hasho_bucket = InvalidBucket;
    op->hasho_flag = LH_BITMAP_PAGE;
    op->hasho_page_id = HASHO_PAGE_ID;
 
    /* set all of the bits to 1 */
    freep = HashPageGetBitmap(pg);
    memset(freep, 0xFF, bmsize);
 
    /*
     * Set pd_lower just past the end of the bitmap page data.  We could even
     * set pd_lower equal to pd_upper, but this is more precise and makes the
     * page look compressible to xlog.c.
     */
    ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
}

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetBitmap, HashPageGetOpaque, InvalidBlockNumber, InvalidBucket, and LH_BITMAP_PAGE.

Referenced by _hash_addovflpage(), _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_init_bitmap_page().

◆ _hash_initbuf()

void _hash_initbuf	(	Buffer	buf,
		uint32	max_bucket,
		uint32	num_bucket,
		uint32	flag,
		bool	initpage
	)

Definition at line 157 of file hashpage.c.

{
    HashPageOpaque pageopaque;
    Page        page;
 
    page = BufferGetPage(buf);
 
    /* initialize the page */
    if (initpage)
        _hash_pageinit(page, BufferGetPageSize(buf));
 
    pageopaque = HashPageGetOpaque(page);
 
    /*
     * Set hasho_prevblkno with current hashm_maxbucket. This value will be
     * used to validate cached HashMetaPageData. See
     * _hash_getbucketbuf_from_hashkey().
     */
    pageopaque->hasho_prevblkno = max_bucket;
    pageopaque->hasho_nextblkno = InvalidBlockNumber;
    pageopaque->hasho_bucket = num_bucket;
    pageopaque->hasho_flag = flag;
    pageopaque->hasho_page_id = HASHO_PAGE_ID;
}

References _hash_pageinit(), buf, BufferGetPage(), BufferGetPageSize(), flag(), HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HASHO_PAGE_ID, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, and InvalidBlockNumber.

Referenced by _hash_init(), hash_xlog_add_ovfl_page(), and hash_xlog_split_allocate_page().

◆ _hash_kill_items()

void _hash_kill_items ( IndexScanDesc scan )

Definition at line 536 of file hashutil.c.

{
    HashScanOpaque so = (HashScanOpaque) scan->opaque;
    Relation    rel = scan->indexRelation;
    BlockNumber blkno;
    Buffer      buf;
    Page        page;
    HashPageOpaque opaque;
    OffsetNumber offnum,
                maxoff;
    int         numKilled = so->numKilled;
    int         i;
    bool        killedsomething = false;
    bool        havePin = false;
 
    Assert(so->numKilled > 0);
    Assert(so->killedItems != NULL);
    Assert(HashScanPosIsValid(so->currPos));
 
    /*
     * Always reset the scan state, so we don't look for same items on other
     * pages.
     */
    so->numKilled = 0;
 
    blkno = so->currPos.currPage;
    if (HashScanPosIsPinned(so->currPos))
    {
        /*
         * We already have pin on this buffer, so, all we need to do is
         * acquire lock on it.
         */
        havePin = true;
        buf = so->currPos.buf;
        LockBuffer(buf, BUFFER_LOCK_SHARE);
    }
    else
        buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
 
    page = BufferGetPage(buf);
    opaque = HashPageGetOpaque(page);
    maxoff = PageGetMaxOffsetNumber(page);
 
    for (i = 0; i < numKilled; i++)
    {
        int         itemIndex = so->killedItems[i];
        HashScanPosItem *currItem = &so->currPos.items[itemIndex];
 
        offnum = currItem->indexOffset;
 
        Assert(itemIndex >= so->currPos.firstItem &&
               itemIndex <= so->currPos.lastItem);
 
        while (offnum <= maxoff)
        {
            ItemId      iid = PageGetItemId(page, offnum);
            IndexTuple  ituple = (IndexTuple) PageGetItem(page, iid);
 
            if (ItemPointerEquals(&ituple->t_tid, &currItem->heapTid))
            {
                /* found the item */
                ItemIdMarkDead(iid);
                killedsomething = true;
                break;          /* out of inner search loop */
            }
            offnum = OffsetNumberNext(offnum);
        }
    }
 
    /*
     * Since this can be redone later if needed, mark as dirty hint. Whenever
     * we mark anything LP_DEAD, we also set the page's
     * LH_PAGE_HAS_DEAD_TUPLES flag, which is likewise just a hint.
     */
    if (killedsomething)
    {
        opaque->hasho_flag |= LH_PAGE_HAS_DEAD_TUPLES;
        MarkBufferDirtyHint(buf, true);
    }
 
    if (so->hashso_bucket_buf == so->currPos.buf ||
        havePin)
        LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
    else
        _hash_relbuf(rel, buf);
}

References _hash_getbuf(), _hash_relbuf(), Assert(), buf, HashScanPosData::buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HashScanPosData::currPage, HashScanOpaqueData::currPos, HashScanPosData::firstItem, HASH_READ, HashPageGetOpaque, HashScanPosIsPinned, HashScanPosIsValid, HashScanOpaqueData::hashso_bucket_buf, HashScanPosItem::heapTid, i, HashScanPosItem::indexOffset, IndexScanDescData::indexRelation, ItemIdMarkDead, ItemPointerEquals(), HashScanPosData::items, HashScanOpaqueData::killedItems, LH_OVERFLOW_PAGE, LH_PAGE_HAS_DEAD_TUPLES, LockBuffer(), MarkBufferDirtyHint(), HashScanOpaqueData::numKilled, OffsetNumberNext, IndexScanDescData::opaque, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and IndexTupleData::t_tid.

Referenced by _hash_next(), _hash_readpage(), hashendscan(), and hashrescan().

◆ _hash_next()

bool _hash_next	(	IndexScanDesc	scan,
		ScanDirection	dir
	)

Definition at line 48 of file hashsearch.c.

{
    Relation    rel = scan->indexRelation;
    HashScanOpaque so = (HashScanOpaque) scan->opaque;
    HashScanPosItem *currItem;
    BlockNumber blkno;
    Buffer      buf;
    bool        end_of_scan = false;
 
    /*
     * Advance to the next tuple on the current page; or if done, try to read
     * data from the next or previous page based on the scan direction. Before
     * moving to the next or previous page make sure that we deal with all the
     * killed items.
     */
    if (ScanDirectionIsForward(dir))
    {
        if (++so->currPos.itemIndex > so->currPos.lastItem)
        {
            if (so->numKilled > 0)
                _hash_kill_items(scan);
 
            blkno = so->currPos.nextPage;
            if (BlockNumberIsValid(blkno))
            {
                buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
                if (!_hash_readpage(scan, &buf, dir))
                    end_of_scan = true;
            }
            else
                end_of_scan = true;
        }
    }
    else
    {
        if (--so->currPos.itemIndex < so->currPos.firstItem)
        {
            if (so->numKilled > 0)
                _hash_kill_items(scan);
 
            blkno = so->currPos.prevPage;
            if (BlockNumberIsValid(blkno))
            {
                buf = _hash_getbuf(rel, blkno, HASH_READ,
                                   LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 
                /*
                 * We always maintain the pin on bucket page for whole scan
                 * operation, so releasing the additional pin we have acquired
                 * here.
                 */
                if (buf == so->hashso_bucket_buf ||
                    buf == so->hashso_split_bucket_buf)
                    _hash_dropbuf(rel, buf);
 
                if (!_hash_readpage(scan, &buf, dir))
                    end_of_scan = true;
            }
            else
                end_of_scan = true;
        }
    }
 
    if (end_of_scan)
    {
        _hash_dropscanbuf(rel, so);
        HashScanPosInvalidate(so->currPos);
        return false;
    }
 
    /* OK, itemIndex says what to return */
    currItem = &so->currPos.items[so->currPos.itemIndex];
    scan->xs_heaptid = currItem->heapTid;
 
    return true;
}

References _hash_dropbuf(), _hash_dropscanbuf(), _hash_getbuf(), _hash_kill_items(), _hash_readpage(), BlockNumberIsValid(), buf, HashScanOpaqueData::currPos, HashScanPosData::firstItem, HASH_READ, HashScanPosInvalidate, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, if(), IndexScanDescData::indexRelation, HashScanPosData::itemIndex, HashScanPosData::items, HashScanPosData::lastItem, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, HashScanPosData::nextPage, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, HashScanPosData::prevPage, ScanDirectionIsForward, and IndexScanDescData::xs_heaptid.

Referenced by hashgetbitmap(), and hashgettuple().

◆ _hash_ovflblkno_to_bitno()

uint32 _hash_ovflblkno_to_bitno	(	HashMetaPage	metap,
		BlockNumber	ovflblkno
	)

Definition at line 62 of file hashovfl.c.

{
    uint32      splitnum = metap->hashm_ovflpoint;
    uint32      i;
    uint32      bitnum;
 
    /* Determine the split number containing this page */
    for (i = 1; i <= splitnum; i++)
    {
        if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
            break;              /* oops */
        bitnum = ovflblkno - _hash_get_totalbuckets(i);
 
        /*
         * bitnum has to be greater than number of overflow page added in
         * previous split point. The overflow page at this splitnum (i) if any
         * should start from (_hash_get_totalbuckets(i) +
         * metap->hashm_spares[i - 1] + 1).
         */
        if (bitnum > metap->hashm_spares[i - 1] &&
            bitnum <= metap->hashm_spares[i])
            return bitnum - 1;  /* -1 to convert 1-based to 0-based */
    }
 
    ereport(ERROR,
            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
             errmsg("invalid overflow block number %u", ovflblkno)));
    return 0;                   /* keep compiler quiet */
}

References _hash_get_totalbuckets(), ereport, errcode(), errmsg(), ERROR, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, and i.

Referenced by _hash_freeovflpage(), and hash_bitmap_info().

◆ _hash_pageinit()

void _hash_pageinit	(	Page	page,
		Size	size
	)

Definition at line 596 of file hashpage.c.

{
    PageInit(page, size, sizeof(HashPageOpaqueData));
}

References PageInit().

Referenced by _hash_alloc_buckets(), _hash_freeovflpage(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), and hash_xlog_squeeze_page().

◆ _hash_pgaddmultitup()

void _hash_pgaddmultitup	(	Relation	rel,
		Buffer	buf,
		IndexTuple *	itups,
		OffsetNumber *	itup_offsets,
		uint16	nitups
	)

Definition at line 331 of file hashinsert.c.

{
    OffsetNumber itup_off;
    Page        page;
    uint32      hashkey;
    int         i;
 
    _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
    page = BufferGetPage(buf);
 
    for (i = 0; i < nitups; i++)
    {
        Size        itemsize;
 
        itemsize = IndexTupleSize(itups[i]);
        itemsize = MAXALIGN(itemsize);
 
        /* Find where to insert the tuple (preserving page's hashkey ordering) */
        hashkey = _hash_get_indextuple_hashkey(itups[i]);
        itup_off = _hash_binsearch(page, hashkey);
 
        itup_offsets[i] = itup_off;
 
        if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
            == InvalidOffsetNumber)
            elog(ERROR, "failed to add index item to \"%s\"",
                 RelationGetRelationName(rel));
    }
}

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), buf, BufferGetPage(), elog, ERROR, i, IndexTupleSize(), InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, MAXALIGN, PageAddItem, and RelationGetRelationName.

Referenced by _hash_freeovflpage(), _hash_splitbucket(), and _hash_squeezebucket().

◆ _hash_pgaddtup()

OffsetNumber _hash_pgaddtup	(	Relation	rel,
		Buffer	buf,
		Size	itemsize,
		IndexTuple	itup,
		bool	appendtup
	)

Definition at line 274 of file hashinsert.c.

{
    OffsetNumber itup_off;
    Page        page;
 
    _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
    page = BufferGetPage(buf);
 
    /*
     * Find where to insert the tuple (preserving page's hashkey ordering). If
     * 'appendtup' is true then we just insert it at the end.
     */
    if (appendtup)
    {
        itup_off = PageGetMaxOffsetNumber(page) + 1;
 
#ifdef USE_ASSERT_CHECKING
        /* ensure this tuple's hashkey is >= the final existing tuple */
        if (PageGetMaxOffsetNumber(page) > 0)
        {
            IndexTuple  lasttup;
            ItemId      itemid;
 
            itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
            lasttup = (IndexTuple) PageGetItem(page, itemid);
 
            Assert(_hash_get_indextuple_hashkey(lasttup) <=
                   _hash_get_indextuple_hashkey(itup));
        }
#endif
    }
    else
    {
        uint32      hashkey = _hash_get_indextuple_hashkey(itup);
 
        itup_off = _hash_binsearch(page, hashkey);
    }
 
    if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
        == InvalidOffsetNumber)
        elog(ERROR, "failed to add index item to \"%s\"",
             RelationGetRelationName(rel));
 
    return itup_off;
}

References _hash_binsearch(), _hash_checkpage(), _hash_get_indextuple_hashkey(), Assert(), buf, BufferGetPage(), elog, ERROR, InvalidOffsetNumber, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, PageAddItem, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), and RelationGetRelationName.

Referenced by _hash_doinsert().

◆ _hash_relbuf()

void _hash_relbuf	(	Relation	rel,
		Buffer	buf
	)

Definition at line 266 of file hashpage.c.

{
    UnlockReleaseBuffer(buf);
}

References buf, and UnlockReleaseBuffer().

Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_freeovflpage(), _hash_get_newblock_from_oldbucket(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), hash_bitmap_info(), hashbucketcleanup(), hashbulkdelete(), pgstat_hash_page(), and pgstathashindex().

◆ _hash_spareindex()

uint32 _hash_spareindex ( uint32 num_bucket )

Definition at line 142 of file hashutil.c.

{
    uint32      splitpoint_group;
    uint32      splitpoint_phases;
 
    splitpoint_group = pg_ceil_log2_32(num_bucket);
 
    if (splitpoint_group < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
        return splitpoint_group;
 
    /* account for single-phase groups */
    splitpoint_phases = HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE;
 
    /* account for multi-phase groups before splitpoint_group */
    splitpoint_phases +=
        ((splitpoint_group - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) <<
         HASH_SPLITPOINT_PHASE_BITS);
 
    /* account for phases within current group */
    splitpoint_phases +=
        (((num_bucket - 1) >>
          (splitpoint_group - (HASH_SPLITPOINT_PHASE_BITS + 1))) &
         HASH_SPLITPOINT_PHASE_MASK);   /* to 0-based value. */
 
    return splitpoint_phases;
}

References HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE, HASH_SPLITPOINT_PHASE_BITS, HASH_SPLITPOINT_PHASE_MASK, and pg_ceil_log2_32().

Referenced by _hash_expandtable(), and _hash_init_metabuffer().

◆ _hash_squeezebucket()

void _hash_squeezebucket	(	Relation	rel,
		Bucket	bucket,
		BlockNumber	bucket_blkno,
		Buffer	bucket_buf,
		BufferAccessStrategy	bstrategy
	)

Definition at line 842 of file hashovfl.c.

{
    BlockNumber wblkno;
    BlockNumber rblkno;
    Buffer      wbuf;
    Buffer      rbuf;
    Page        wpage;
    Page        rpage;
    HashPageOpaque wopaque;
    HashPageOpaque ropaque;
 
    /*
     * start squeezing into the primary bucket page.
     */
    wblkno = bucket_blkno;
    wbuf = bucket_buf;
    wpage = BufferGetPage(wbuf);
    wopaque = HashPageGetOpaque(wpage);
 
    /*
     * if there aren't any overflow pages, there's nothing to squeeze. caller
     * is responsible for releasing the pin on primary bucket page.
     */
    if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
    {
        LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
        return;
    }
 
    /*
     * Find the last page in the bucket chain by starting at the base bucket
     * page and working forward.  Note: we assume that a hash bucket chain is
     * usually smaller than the buffer ring being used by VACUUM, else using
     * the access strategy here would be counterproductive.
     */
    rbuf = InvalidBuffer;
    ropaque = wopaque;
    do
    {
        rblkno = ropaque->hasho_nextblkno;
        if (rbuf != InvalidBuffer)
            _hash_relbuf(rel, rbuf);
        rbuf = _hash_getbuf_with_strategy(rel,
                                          rblkno,
                                          HASH_WRITE,
                                          LH_OVERFLOW_PAGE,
                                          bstrategy);
        rpage = BufferGetPage(rbuf);
        ropaque = HashPageGetOpaque(rpage);
        Assert(ropaque->hasho_bucket == bucket);
    } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
 
    /*
     * squeeze the tuples.
     */
    for (;;)
    {
        OffsetNumber roffnum;
        OffsetNumber maxroffnum;
        OffsetNumber deletable[MaxOffsetNumber];
        IndexTuple  itups[MaxIndexTuplesPerPage];
        Size        tups_size[MaxIndexTuplesPerPage];
        OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
        uint16      ndeletable = 0;
        uint16      nitups = 0;
        Size        all_tups_size = 0;
        int         i;
        bool        retain_pin = false;
 
readpage:
        /* Scan each tuple in "read" page */
        maxroffnum = PageGetMaxOffsetNumber(rpage);
        for (roffnum = FirstOffsetNumber;
             roffnum <= maxroffnum;
             roffnum = OffsetNumberNext(roffnum))
        {
            IndexTuple  itup;
            Size        itemsz;
 
            /* skip dead tuples */
            if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
                continue;
 
            itup = (IndexTuple) PageGetItem(rpage,
                                            PageGetItemId(rpage, roffnum));
            itemsz = IndexTupleSize(itup);
            itemsz = MAXALIGN(itemsz);
 
            /*
             * Walk up the bucket chain, looking for a page big enough for
             * this item and all other accumulated items.  Exit if we reach
             * the read page.
             */
            while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
            {
                Buffer      next_wbuf = InvalidBuffer;
                bool        tups_moved = false;
 
                Assert(!PageIsEmpty(wpage));
 
                if (wblkno == bucket_blkno)
                    retain_pin = true;
 
                wblkno = wopaque->hasho_nextblkno;
                Assert(BlockNumberIsValid(wblkno));
 
                /* don't need to move to next page if we reached the read page */
                if (wblkno != rblkno)
                    next_wbuf = _hash_getbuf_with_strategy(rel,
                                                           wblkno,
                                                           HASH_WRITE,
                                                           LH_OVERFLOW_PAGE,
                                                           bstrategy);
 
                if (nitups > 0)
                {
                    Assert(nitups == ndeletable);
 
                    /*
                     * This operation needs to log multiple tuples, prepare
                     * WAL for that.
                     */
                    if (RelationNeedsWAL(rel))
                        XLogEnsureRecordSpace(0, 3 + nitups);
 
                    START_CRIT_SECTION();
 
                    /*
                     * we have to insert tuples on the "write" page, being
                     * careful to preserve hashkey ordering.  (If we insert
                     * many tuples into the same "write" page it would be
                     * worth qsort'ing them).
                     */
                    _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
                    MarkBufferDirty(wbuf);
 
                    /* Delete tuples we already moved off read page */
                    PageIndexMultiDelete(rpage, deletable, ndeletable);
                    MarkBufferDirty(rbuf);
 
                    /* XLOG stuff */
                    if (RelationNeedsWAL(rel))
                    {
                        XLogRecPtr  recptr;
                        xl_hash_move_page_contents xlrec;
 
                        xlrec.ntups = nitups;
                        xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
 
                        XLogBeginInsert();
                        XLogRegisterData(&xlrec, SizeOfHashMovePageContents);
 
                        /*
                         * bucket buffer was not changed, but still needs to
                         * be registered to ensure that we can acquire a
                         * cleanup lock on it during replay.
                         */
                        if (!xlrec.is_prim_bucket_same_wrt)
                        {
                            int         flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
 
                            XLogRegisterBuffer(0, bucket_buf, flags);
                        }
 
                        XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
                        XLogRegisterBufData(1, itup_offsets,
                                            nitups * sizeof(OffsetNumber));
                        for (i = 0; i < nitups; i++)
                            XLogRegisterBufData(1, itups[i], tups_size[i]);
 
                        XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
                        XLogRegisterBufData(2, deletable,
                                            ndeletable * sizeof(OffsetNumber));
 
                        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
 
                        PageSetLSN(BufferGetPage(wbuf), recptr);
                        PageSetLSN(BufferGetPage(rbuf), recptr);
                    }
 
                    END_CRIT_SECTION();
 
                    tups_moved = true;
                }
 
                /*
                 * release the lock on previous page after acquiring the lock
                 * on next page
                 */
                if (retain_pin)
                    LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
                else
                    _hash_relbuf(rel, wbuf);
 
                /* nothing more to do if we reached the read page */
                if (rblkno == wblkno)
                {
                    _hash_relbuf(rel, rbuf);
                    return;
                }
 
                wbuf = next_wbuf;
                wpage = BufferGetPage(wbuf);
                wopaque = HashPageGetOpaque(wpage);
                Assert(wopaque->hasho_bucket == bucket);
                retain_pin = false;
 
                /* be tidy */
                for (i = 0; i < nitups; i++)
                    pfree(itups[i]);
                nitups = 0;
                all_tups_size = 0;
                ndeletable = 0;
 
                /*
                 * after moving the tuples, rpage would have been compacted,
                 * so we need to rescan it.
                 */
                if (tups_moved)
                    goto readpage;
            }
 
            /* remember tuple for deletion from "read" page */
            deletable[ndeletable++] = roffnum;
 
            /*
             * we need a copy of index tuples as they can be freed as part of
             * overflow page, however we need them to write a WAL record in
             * _hash_freeovflpage.
             */
            itups[nitups] = CopyIndexTuple(itup);
            tups_size[nitups++] = itemsz;
            all_tups_size += itemsz;
        }
 
        /*
         * If we reach here, there are no live tuples on the "read" page ---
         * it was empty when we got to it, or we moved them all.  So we can
         * just free the page without bothering with deleting tuples
         * individually.  Then advance to the previous "read" page.
         *
         * Tricky point here: if our read and write pages are adjacent in the
         * bucket chain, our write lock on wbuf will conflict with
         * _hash_freeovflpage's attempt to update the sibling links of the
         * removed page.  In that case, we don't need to lock it again.
         */
        rblkno = ropaque->hasho_prevblkno;
        Assert(BlockNumberIsValid(rblkno));
 
        /* free this overflow page (releases rbuf) */
        _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
                           tups_size, nitups, bstrategy);
 
        /* be tidy */
        for (i = 0; i < nitups; i++)
            pfree(itups[i]);
 
        /* are we freeing the page adjacent to wbuf? */
        if (rblkno == wblkno)
        {
            /* retain the pin on primary bucket page till end of bucket scan */
            if (wblkno == bucket_blkno)
                LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
            else
                _hash_relbuf(rel, wbuf);
            return;
        }
 
        rbuf = _hash_getbuf_with_strategy(rel,
                                          rblkno,
                                          HASH_WRITE,
                                          LH_OVERFLOW_PAGE,
                                          bstrategy);
        rpage = BufferGetPage(rbuf);
        ropaque = HashPageGetOpaque(rpage);
        Assert(ropaque->hasho_bucket == bucket);
    }
 
    /* NOTREACHED */
}

References _hash_freeovflpage(), _hash_getbuf_with_strategy(), _hash_pgaddmultitup(), _hash_relbuf(), Assert(), BlockNumberIsValid(), BUFFER_LOCK_UNLOCK, BufferGetPage(), CopyIndexTuple(), END_CRIT_SECTION, FirstOffsetNumber, HASH_WRITE, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_prevblkno, HashPageGetOpaque, i, IndexTupleSize(), InvalidBuffer, xl_hash_move_page_contents::is_prim_bucket_same_wrt, ItemIdIsDead, LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, MaxOffsetNumber, xl_hash_move_page_contents::ntups, OffsetNumberNext, PageGetFreeSpaceForMultipleTuples(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIndexMultiDelete(), PageIsEmpty(), PageSetLSN(), pfree(), REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashMovePageContents, START_CRIT_SECTION, XLOG_HASH_MOVE_PAGE_CONTENTS, XLogBeginInsert(), XLogEnsureRecordSpace(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashbucketcleanup().

◆ hashadjustmembers()

void hashadjustmembers	(	Oid	opfamilyoid,
		Oid	opclassoid,
		List *	operators,
		List *	functions
	)

Definition at line 263 of file hashvalidate.c.

{
    Oid         opcintype;
    ListCell   *lc;
 
    /*
     * Hash operators and required support functions are always "loose"
     * members of the opfamily if they are cross-type.  If they are not
     * cross-type, we prefer to tie them to the appropriate opclass ... but if
     * the user hasn't created one, we can't do that, and must fall back to
     * using the opfamily dependency.  (We mustn't force creation of an
     * opclass in such a case, as leaving an incomplete opclass laying about
     * would be bad.  Throwing an error is another undesirable alternative.)
     *
     * This behavior results in a bit of a dump/reload hazard, in that the
     * order of restoring objects could affect what dependencies we end up
     * with.  pg_dump's existing behavior will preserve the dependency choices
     * in most cases, but not if a cross-type operator has been bound tightly
     * into an opclass.  That's a mistake anyway, so silently "fixing" it
     * isn't awful.
     *
     * Optional support functions are always "loose" family members.
     *
     * To avoid repeated lookups, we remember the most recently used opclass's
     * input type.
     */
    if (OidIsValid(opclassoid))
    {
        /* During CREATE OPERATOR CLASS, need CCI to see the pg_opclass row */
        CommandCounterIncrement();
        opcintype = get_opclass_input_type(opclassoid);
    }
    else
        opcintype = InvalidOid;
 
    /*
     * We handle operators and support functions almost identically, so rather
     * than duplicate this code block, just join the lists.
     */
    foreach(lc, list_concat_copy(operators, functions))
    {
        OpFamilyMember *op = (OpFamilyMember *) lfirst(lc);
 
        if (op->is_func && op->number != HASHSTANDARD_PROC)
        {
            /* Optional support proc, so always a soft family dependency */
            op->ref_is_hard = false;
            op->ref_is_family = true;
            op->refobjid = opfamilyoid;
        }
        else if (op->lefttype != op->righttype)
        {
            /* Cross-type, so always a soft family dependency */
            op->ref_is_hard = false;
            op->ref_is_family = true;
            op->refobjid = opfamilyoid;
        }
        else
        {
            /* Not cross-type; is there a suitable opclass? */
            if (op->lefttype != opcintype)
            {
                /* Avoid repeating this expensive lookup, even if it fails */
                opcintype = op->lefttype;
                opclassoid = opclass_for_family_datatype(HASH_AM_OID,
                                                         opfamilyoid,
                                                         opcintype);
            }
            if (OidIsValid(opclassoid))
            {
                /* Hard dependency on opclass */
                op->ref_is_hard = true;
                op->ref_is_family = false;
                op->refobjid = opclassoid;
            }
            else
            {
                /* We're stuck, so make a soft dependency on the opfamily */
                op->ref_is_hard = false;
                op->ref_is_family = true;
                op->refobjid = opfamilyoid;
            }
        }
    }
}

References CommandCounterIncrement(), functions, get_opclass_input_type(), HASHSTANDARD_PROC, InvalidOid, OpFamilyMember::is_func, OpFamilyMember::lefttype, lfirst, list_concat_copy(), OpFamilyMember::number, OidIsValid, opclass_for_family_datatype(), OpFamilyMember::ref_is_family, OpFamilyMember::ref_is_hard, OpFamilyMember::refobjid, and OpFamilyMember::righttype.

Referenced by hashhandler().

◆ hashbeginscan()

IndexScanDesc hashbeginscan	(	Relation	rel,
		int	nkeys,
		int	norderbys
	)

Definition at line 374 of file hash.c.

{
    IndexScanDesc scan;
    HashScanOpaque so;
 
    /* no order by operators allowed */
    Assert(norderbys == 0);
 
    scan = RelationGetIndexScan(rel, nkeys, norderbys);
 
    so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
    HashScanPosInvalidate(so->currPos);
    so->hashso_bucket_buf = InvalidBuffer;
    so->hashso_split_bucket_buf = InvalidBuffer;
 
    so->hashso_buc_populated = false;
    so->hashso_buc_split = false;
 
    so->killedItems = NULL;
    so->numKilled = 0;
 
    scan->opaque = so;
 
    return scan;
}

References Assert(), HashScanOpaqueData::currPos, HashScanPosInvalidate, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, HashScanOpaqueData::hashso_bucket_buf, HashScanOpaqueData::hashso_split_bucket_buf, InvalidBuffer, HashScanOpaqueData::killedItems, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and RelationGetIndexScan().

Referenced by hashhandler().

◆ hashbucketcleanup()

void hashbucketcleanup	(	Relation	rel,
		Bucket	cur_bucket,
		Buffer	bucket_buf,
		BlockNumber	bucket_blkno,
		BufferAccessStrategy	bstrategy,
		uint32	maxbucket,
		uint32	highmask,
		uint32	lowmask,
		double *	tuples_removed,
		double *	num_index_tuples,
		bool	split_cleanup,
		IndexBulkDeleteCallback	callback,
		void *	callback_state
	)

Definition at line 690 of file hash.c.

{
    BlockNumber blkno;
    Buffer      buf;
    Bucket      new_bucket PG_USED_FOR_ASSERTS_ONLY = InvalidBucket;
    bool        bucket_dirty = false;
 
    blkno = bucket_blkno;
    buf = bucket_buf;
 
    if (split_cleanup)
        new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
                                                        lowmask, maxbucket);
 
    /* Scan each page in bucket */
    for (;;)
    {
        HashPageOpaque opaque;
        OffsetNumber offno;
        OffsetNumber maxoffno;
        Buffer      next_buf;
        Page        page;
        OffsetNumber deletable[MaxOffsetNumber];
        int         ndeletable = 0;
        bool        retain_pin = false;
        bool        clear_dead_marking = false;
 
        vacuum_delay_point(false);
 
        page = BufferGetPage(buf);
        opaque = HashPageGetOpaque(page);
 
        /* Scan each tuple in page */
        maxoffno = PageGetMaxOffsetNumber(page);
        for (offno = FirstOffsetNumber;
             offno <= maxoffno;
             offno = OffsetNumberNext(offno))
        {
            ItemPointer htup;
            IndexTuple  itup;
            Bucket      bucket;
            bool        kill_tuple = false;
 
            itup = (IndexTuple) PageGetItem(page,
                                            PageGetItemId(page, offno));
            htup = &(itup->t_tid);
 
            /*
             * To remove the dead tuples, we strictly want to rely on results
             * of callback function.  refer btvacuumpage for detailed reason.
             */
            if (callback && callback(htup, callback_state))
            {
                kill_tuple = true;
                if (tuples_removed)
                    *tuples_removed += 1;
            }
            else if (split_cleanup)
            {
                /* delete the tuples that are moved by split. */
                bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
                                              maxbucket,
                                              highmask,
                                              lowmask);
                /* mark the item for deletion */
                if (bucket != cur_bucket)
                {
                    /*
                     * We expect tuples to either belong to current bucket or
                     * new_bucket.  This is ensured because we don't allow
                     * further splits from bucket that contains garbage. See
                     * comments in _hash_expandtable.
                     */
                    Assert(bucket == new_bucket);
                    kill_tuple = true;
                }
            }
 
            if (kill_tuple)
            {
                /* mark the item for deletion */
                deletable[ndeletable++] = offno;
            }
            else
            {
                /* we're keeping it, so count it */
                if (num_index_tuples)
                    *num_index_tuples += 1;
            }
        }
 
        /* retain the pin on primary bucket page till end of bucket scan */
        if (blkno == bucket_blkno)
            retain_pin = true;
        else
            retain_pin = false;
 
        blkno = opaque->hasho_nextblkno;
 
        /*
         * Apply deletions, advance to next page and write page if needed.
         */
        if (ndeletable > 0)
        {
            /* No ereport(ERROR) until changes are logged */
            START_CRIT_SECTION();
 
            PageIndexMultiDelete(page, deletable, ndeletable);
            bucket_dirty = true;
 
            /*
             * Let us mark the page as clean if vacuum removes the DEAD tuples
             * from an index page. We do this by clearing
             * LH_PAGE_HAS_DEAD_TUPLES flag.
             */
            if (tuples_removed && *tuples_removed > 0 &&
                H_HAS_DEAD_TUPLES(opaque))
            {
                opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
                clear_dead_marking = true;
            }
 
            MarkBufferDirty(buf);
 
            /* XLOG stuff */
            if (RelationNeedsWAL(rel))
            {
                xl_hash_delete xlrec;
                XLogRecPtr  recptr;
 
                xlrec.clear_dead_marking = clear_dead_marking;
                xlrec.is_primary_bucket_page = (buf == bucket_buf);
 
                XLogBeginInsert();
                XLogRegisterData(&xlrec, SizeOfHashDelete);
 
                /*
                 * bucket buffer was not changed, but still needs to be
                 * registered to ensure that we can acquire a cleanup lock on
                 * it during replay.
                 */
                if (!xlrec.is_primary_bucket_page)
                {
                    uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
 
                    XLogRegisterBuffer(0, bucket_buf, flags);
                }
 
                XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
                XLogRegisterBufData(1, deletable,
                                    ndeletable * sizeof(OffsetNumber));
 
                recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
                PageSetLSN(BufferGetPage(buf), recptr);
            }
 
            END_CRIT_SECTION();
        }
 
        /* bail out if there are no more pages to scan. */
        if (!BlockNumberIsValid(blkno))
            break;
 
        next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
                                              LH_OVERFLOW_PAGE,
                                              bstrategy);
 
        /*
         * release the lock on previous page after acquiring the lock on next
         * page
         */
        if (retain_pin)
            LockBuffer(buf, BUFFER_LOCK_UNLOCK);
        else
            _hash_relbuf(rel, buf);
 
        buf = next_buf;
    }
 
    /*
     * lock the bucket page to clear the garbage flag and squeeze the bucket.
     * if the current buffer is same as bucket buffer, then we already have
     * lock on bucket page.
     */
    if (buf != bucket_buf)
    {
        _hash_relbuf(rel, buf);
        LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
    }
 
    /*
     * Clear the garbage flag from bucket after deleting the tuples that are
     * moved by split.  We purposefully clear the flag before squeeze bucket,
     * so that after restart, vacuum shouldn't again try to delete the moved
     * by split tuples.
     */
    if (split_cleanup)
    {
        HashPageOpaque bucket_opaque;
        Page        page;
 
        page = BufferGetPage(bucket_buf);
        bucket_opaque = HashPageGetOpaque(page);
 
        /* No ereport(ERROR) until changes are logged */
        START_CRIT_SECTION();
 
        bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
        MarkBufferDirty(bucket_buf);
 
        /* XLOG stuff */
        if (RelationNeedsWAL(rel))
        {
            XLogRecPtr  recptr;
 
            XLogBeginInsert();
            XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
 
            recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
            PageSetLSN(page, recptr);
        }
 
        END_CRIT_SECTION();
    }
 
    /*
     * If we have deleted anything, try to compact free space.  For squeezing
     * the bucket, we must have a cleanup lock, else it can impact the
     * ordering of tuples for a scan that has started before it.
     */
    if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
        _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
                            bstrategy);
    else
        LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
}

References _hash_get_indextuple_hashkey(), _hash_get_newbucket_from_oldbucket(), _hash_getbuf_with_strategy(), _hash_hashkey2bucket(), _hash_relbuf(), _hash_squeezebucket(), Assert(), BlockNumberIsValid(), buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), callback(), xl_hash_delete::clear_dead_marking, END_CRIT_SECTION, FirstOffsetNumber, H_HAS_DEAD_TUPLES, HASH_WRITE, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageGetOpaque, InvalidBucket, xl_hash_delete::is_primary_bucket_page, IsBufferCleanupOK(), LH_OVERFLOW_PAGE, LockBuffer(), MarkBufferDirty(), MaxOffsetNumber, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIndexMultiDelete(), PageSetLSN(), PG_USED_FOR_ASSERTS_ONLY, REGBUF_NO_CHANGE, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashDelete, START_CRIT_SECTION, IndexTupleData::t_tid, vacuum_delay_point(), XLOG_HASH_DELETE, XLOG_HASH_SPLIT_CLEANUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _hash_expandtable(), _hash_splitbucket(), and hashbulkdelete().

◆ hashbuild()

IndexBuildResult * hashbuild	(	Relation	heap,
		Relation	index,
		struct IndexInfo *	indexInfo
	)

Definition at line 122 of file hash.c.

{
    IndexBuildResult *result;
    BlockNumber relpages;
    double      reltuples;
    double      allvisfrac;
    uint32      num_buckets;
    Size        sort_threshold;
    HashBuildState buildstate;
 
    /*
     * We expect to be called exactly once for any index relation. If that's
     * not the case, big trouble's what we have.
     */
    if (RelationGetNumberOfBlocks(index) != 0)
        elog(ERROR, "index \"%s\" already contains data",
             RelationGetRelationName(index));
 
    /* Estimate the number of rows currently present in the table */
    estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
 
    /* Initialize the hash index metadata page and initial buckets */
    num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
 
    /*
     * If we just insert the tuples into the index in scan order, then
     * (assuming their hash codes are pretty random) there will be no locality
     * of access to the index, and if the index is bigger than available RAM
     * then we'll thrash horribly.  To prevent that scenario, we can sort the
     * tuples by (expected) bucket number.  However, such a sort is useless
     * overhead when the index does fit in RAM.  We choose to sort if the
     * initial index size exceeds maintenance_work_mem, or the number of
     * buffers usable for the index, whichever is less.  (Limiting by the
     * number of buffers should reduce thrashing between PG buffers and kernel
     * buffers, which seems useful even if no physical I/O results.  Limiting
     * by maintenance_work_mem is useful to allow easy testing of the sort
     * code path, and may be useful to DBAs as an additional control knob.)
     *
     * NOTE: this test will need adjustment if a bucket is ever different from
     * one page.  Also, "initial index size" accounting does not include the
     * metapage, nor the first bitmap page.
     */
    sort_threshold = (maintenance_work_mem * (Size) 1024) / BLCKSZ;
    if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
        sort_threshold = Min(sort_threshold, NBuffers);
    else
        sort_threshold = Min(sort_threshold, NLocBuffer);
 
    if (num_buckets >= sort_threshold)
        buildstate.spool = _h_spoolinit(heap, index, num_buckets);
    else
        buildstate.spool = NULL;
 
    /* prepare to build the index */
    buildstate.indtuples = 0;
    buildstate.heapRel = heap;
 
    /* do the heap scan */
    reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
                                       hashbuildCallback,
                                       &buildstate, NULL);
    pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_TOTAL,
                                 buildstate.indtuples);
 
    if (buildstate.spool)
    {
        /* sort the tuples and insert them into the index */
        _h_indexbuild(buildstate.spool, buildstate.heapRel);
        _h_spooldestroy(buildstate.spool);
    }
 
    /*
     * Return statistics
     */
    result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
 
    result->heap_tuples = reltuples;
    result->index_tuples = buildstate.indtuples;
 
    return result;
}

References _h_indexbuild(), _h_spooldestroy(), _h_spoolinit(), _hash_init(), elog, ERROR, estimate_rel_size(), hashbuildCallback(), IndexBuildResult::heap_tuples, HashBuildState::heapRel, IndexBuildResult::index_tuples, HashBuildState::indtuples, MAIN_FORKNUM, maintenance_work_mem, Min, NBuffers, NLocBuffer, palloc(), pgstat_progress_update_param(), PROGRESS_CREATEIDX_TUPLES_TOTAL, RelationGetNumberOfBlocks, RelationGetRelationName, HashBuildState::spool, and table_index_build_scan().

Referenced by hashhandler().

◆ hashbuildempty()

void hashbuildempty ( Relation index )

Definition at line 208 of file hash.c.

{
    _hash_init(index, 0, INIT_FORKNUM);
}

References _hash_init(), and INIT_FORKNUM.

Referenced by hashhandler().

◆ hashbulkdelete()

IndexBulkDeleteResult * hashbulkdelete	(	IndexVacuumInfo *	info,
		IndexBulkDeleteResult *	stats,
		IndexBulkDeleteCallback	callback,
		void *	callback_state
	)

Definition at line 465 of file hash.c.

{
    Relation    rel = info->index;
    double      tuples_removed;
    double      num_index_tuples;
    double      orig_ntuples;
    Bucket      orig_maxbucket;
    Bucket      cur_maxbucket;
    Bucket      cur_bucket;
    Buffer      metabuf = InvalidBuffer;
    HashMetaPage metap;
    HashMetaPage cachedmetap;
 
    tuples_removed = 0;
    num_index_tuples = 0;
 
    /*
     * We need a copy of the metapage so that we can use its hashm_spares[]
     * values to compute bucket page addresses, but a cached copy should be
     * good enough.  (If not, we'll detect that further down and refresh the
     * cache as necessary.)
     */
    cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
    Assert(cachedmetap != NULL);
 
    orig_maxbucket = cachedmetap->hashm_maxbucket;
    orig_ntuples = cachedmetap->hashm_ntuples;
 
    /* Scan the buckets that we know exist */
    cur_bucket = 0;
    cur_maxbucket = orig_maxbucket;
 
loop_top:
    while (cur_bucket <= cur_maxbucket)
    {
        BlockNumber bucket_blkno;
        BlockNumber blkno;
        Buffer      bucket_buf;
        Buffer      buf;
        HashPageOpaque bucket_opaque;
        Page        page;
        bool        split_cleanup = false;
 
        /* Get address of bucket's start page */
        bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
 
        blkno = bucket_blkno;
 
        /*
         * We need to acquire a cleanup lock on the primary bucket page to out
         * wait concurrent scans before deleting the dead tuples.
         */
        buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
        LockBufferForCleanup(buf);
        _hash_checkpage(rel, buf, LH_BUCKET_PAGE);
 
        page = BufferGetPage(buf);
        bucket_opaque = HashPageGetOpaque(page);
 
        /*
         * If the bucket contains tuples that are moved by split, then we need
         * to delete such tuples.  We can't delete such tuples if the split
         * operation on bucket is not finished as those are needed by scans.
         */
        if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
            H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
        {
            split_cleanup = true;
 
            /*
             * This bucket might have been split since we last held a lock on
             * the metapage.  If so, hashm_maxbucket, hashm_highmask and
             * hashm_lowmask might be old enough to cause us to fail to remove
             * tuples left behind by the most recent split.  To prevent that,
             * now that the primary page of the target bucket has been locked
             * (and thus can't be further split), check whether we need to
             * update our cached metapage data.
             */
            Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
            if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
            {
                cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
                Assert(cachedmetap != NULL);
            }
        }
 
        bucket_buf = buf;
 
        hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
                          cachedmetap->hashm_maxbucket,
                          cachedmetap->hashm_highmask,
                          cachedmetap->hashm_lowmask, &tuples_removed,
                          &num_index_tuples, split_cleanup,
                          callback, callback_state);
 
        _hash_dropbuf(rel, bucket_buf);
 
        /* Advance to next bucket */
        cur_bucket++;
    }
 
    if (BufferIsInvalid(metabuf))
        metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
 
    /* Write-lock metapage and check for split since we started */
    LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));
 
    if (cur_maxbucket != metap->hashm_maxbucket)
    {
        /* There's been a split, so process the additional bucket(s) */
        LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
        cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
        Assert(cachedmetap != NULL);
        cur_maxbucket = cachedmetap->hashm_maxbucket;
        goto loop_top;
    }
 
    /* Okay, we're really done.  Update tuple count in metapage. */
    START_CRIT_SECTION();
 
    if (orig_maxbucket == metap->hashm_maxbucket &&
        orig_ntuples == metap->hashm_ntuples)
    {
        /*
         * No one has split or inserted anything since start of scan, so
         * believe our count as gospel.
         */
        metap->hashm_ntuples = num_index_tuples;
    }
    else
    {
        /*
         * Otherwise, our count is untrustworthy since we may have
         * double-scanned tuples in split buckets.  Proceed by dead-reckoning.
         * (Note: we still return estimated_count = false, because using this
         * count is better than not updating reltuples at all.)
         */
        if (metap->hashm_ntuples > tuples_removed)
            metap->hashm_ntuples -= tuples_removed;
        else
            metap->hashm_ntuples = 0;
        num_index_tuples = metap->hashm_ntuples;
    }
 
    MarkBufferDirty(metabuf);
 
    /* XLOG stuff */
    if (RelationNeedsWAL(rel))
    {
        xl_hash_update_meta_page xlrec;
        XLogRecPtr  recptr;
 
        xlrec.ntuples = metap->hashm_ntuples;
 
        XLogBeginInsert();
        XLogRegisterData(&xlrec, SizeOfHashUpdateMetaPage);
 
        XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
 
        recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
        PageSetLSN(BufferGetPage(metabuf), recptr);
    }
 
    END_CRIT_SECTION();
 
    _hash_relbuf(rel, metabuf);
 
    /* return statistics */
    if (stats == NULL)
        stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
    stats->estimated_count = false;
    stats->num_index_tuples = num_index_tuples;
    stats->tuples_removed += tuples_removed;
    /* hashvacuumcleanup will fill in num_pages */
 
    return stats;
}

References _hash_checkpage(), _hash_dropbuf(), _hash_getbuf(), _hash_getcachedmetap(), _hash_relbuf(), Assert(), BUCKET_TO_BLKNO, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsInvalid, callback(), END_CRIT_SECTION, IndexBulkDeleteResult::estimated_count, H_BUCKET_BEING_SPLIT, H_NEEDS_SPLIT_CLEANUP, HASH_METAPAGE, HASH_NOLOCK, hashbucketcleanup(), HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HashPageGetOpaque, IndexVacuumInfo::index, InvalidBlockNumber, InvalidBuffer, LH_BUCKET_PAGE, LH_META_PAGE, LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirty(), xl_hash_update_meta_page::ntuples, IndexBulkDeleteResult::num_index_tuples, PageSetLSN(), palloc0(), RBM_NORMAL, ReadBufferExtended(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHashUpdateMetaPage, START_CRIT_SECTION, IndexVacuumInfo::strategy, IndexBulkDeleteResult::tuples_removed, XLOG_HASH_UPDATE_META_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by hashhandler().

◆ hashendscan()

void hashendscan ( IndexScanDesc scan )

Definition at line 434 of file hash.c.

{
    HashScanOpaque so = (HashScanOpaque) scan->opaque;
    Relation    rel = scan->indexRelation;
 
    if (HashScanPosIsValid(so->currPos))
    {
        /* Before leaving current page, deal with any killed items */
        if (so->numKilled > 0)
            _hash_kill_items(scan);
    }
 
    _hash_dropscanbuf(rel, so);
 
    if (so->killedItems != NULL)
        pfree(so->killedItems);
    pfree(so);
    scan->opaque = NULL;
}

References _hash_dropscanbuf(), _hash_kill_items(), HashScanOpaqueData::currPos, HashScanPosIsValid, if(), IndexScanDescData::indexRelation, HashScanOpaqueData::killedItems, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by hashhandler().

◆ hashgetbitmap()

int64 hashgetbitmap	(	IndexScanDesc	scan,
		TIDBitmap *	tbm
	)

Definition at line 342 of file hash.c.

{
    HashScanOpaque so = (HashScanOpaque) scan->opaque;
    bool        res;
    int64       ntids = 0;
    HashScanPosItem *currItem;
 
    res = _hash_first(scan, ForwardScanDirection);
 
    while (res)
    {
        currItem = &so->currPos.items[so->currPos.itemIndex];
 
        /*
         * _hash_first and _hash_next handle eliminate dead index entries
         * whenever scan->ignore_killed_tuples is true.  Therefore, there's
         * nothing to do here except add the results to the TIDBitmap.
         */
        tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
        ntids++;
 
        res = _hash_next(scan, ForwardScanDirection);
    }
 
    return ntids;
}

References _hash_first(), _hash_next(), HashScanOpaqueData::currPos, ForwardScanDirection, HashScanPosData::itemIndex, HashScanPosData::items, IndexScanDescData::opaque, and tbm_add_tuples().

Referenced by hashhandler().

◆ hashgettuple()

bool hashgettuple	(	IndexScanDesc	scan,
		ScanDirection	dir
	)

Definition at line 290 of file hash.c.

{
    HashScanOpaque so = (HashScanOpaque) scan->opaque;
    bool        res;
 
    /* Hash indexes are always lossy since we store only the hash code */
    scan->xs_recheck = true;
 
    /*
     * If we've already initialized this scan, we can just advance it in the
     * appropriate direction.  If we haven't done so yet, we call a routine to
     * get the first item in the scan.
     */
    if (!HashScanPosIsValid(so->currPos))
        res = _hash_first(scan, dir);
    else
    {
        /*
         * Check to see if we should kill the previously-fetched tuple.
         */
        if (scan->kill_prior_tuple)
        {
            /*
             * Yes, so remember it for later. (We'll deal with all such tuples
             * at once right after leaving the index page or at end of scan.)
             * In case if caller reverses the indexscan direction it is quite
             * possible that the same item might get entered multiple times.
             * But, we don't detect that; instead, we just forget any excess
             * entries.
             */
            if (so->killedItems == NULL)
                so->killedItems = (int *)
                    palloc(MaxIndexTuplesPerPage * sizeof(int));
 
            if (so->numKilled < MaxIndexTuplesPerPage)
                so->killedItems[so->numKilled++] = so->currPos.itemIndex;
        }
 
        /*
         * Now continue the scan.
         */
        res = _hash_next(scan, dir);
    }
 
    return res;
}

References _hash_first(), _hash_next(), HashScanOpaqueData::currPos, HashScanPosIsValid, if(), HashScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, HashScanOpaqueData::killedItems, MaxIndexTuplesPerPage, HashScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by hashhandler().

◆ hashinsert()

bool hashinsert	(	Relation	rel,
		Datum *	values,
		bool *	isnull,
		ItemPointer	ht_ctid,
		Relation	heapRel,
		IndexUniqueCheck	checkUnique,
		bool	indexUnchanged,
		struct IndexInfo *	indexInfo
	)

Definition at line 258 of file hash.c.

{
    Datum       index_values[1];
    bool        index_isnull[1];
    IndexTuple  itup;
 
    /* convert data to a hash key; on failure, do not insert anything */
    if (!_hash_convert_tuple(rel,
                             values, isnull,
                             index_values, index_isnull))
        return false;
 
    /* form an index tuple and point it at the heap tuple */
    itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
    itup->t_tid = *ht_ctid;
 
    _hash_doinsert(rel, itup, heapRel, false);
 
    pfree(itup);
 
    return false;
}

References _hash_convert_tuple(), _hash_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by hashhandler().

◆ hashoptions()

bytea * hashoptions	(	Datum	reloptions,
		bool	validate
	)

Definition at line 275 of file hashutil.c.

{
    static const relopt_parse_elt tab[] = {
        {"fillfactor", RELOPT_TYPE_INT, offsetof(HashOptions, fillfactor)},
    };
 
    return (bytea *) build_reloptions(reloptions, validate,
                                      RELOPT_KIND_HASH,
                                      sizeof(HashOptions),
                                      tab, lengthof(tab));
}

References build_reloptions(), fillfactor, lengthof, RELOPT_KIND_HASH, RELOPT_TYPE_INT, and validate().

Referenced by hashhandler().

◆ hashrescan()

void hashrescan	(	IndexScanDesc	scan,
		ScanKey	scankey,
		int	nscankeys,
		ScanKey	orderbys,
		int	norderbys
	)

Definition at line 404 of file hash.c.

{
    HashScanOpaque so = (HashScanOpaque) scan->opaque;
    Relation    rel = scan->indexRelation;
 
    if (HashScanPosIsValid(so->currPos))
    {
        /* Before leaving current page, deal with any killed items */
        if (so->numKilled > 0)
            _hash_kill_items(scan);
    }
 
    _hash_dropscanbuf(rel, so);
 
    /* set position invalid (this will cause _hash_first call) */
    HashScanPosInvalidate(so->currPos);
 
    /* Update scan key, if a new one is given */
    if (scankey && scan->numberOfKeys > 0)
        memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
 
    so->hashso_buc_populated = false;
    so->hashso_buc_split = false;
}

References _hash_dropscanbuf(), _hash_kill_items(), HashScanOpaqueData::currPos, HashScanPosInvalidate, HashScanPosIsValid, HashScanOpaqueData::hashso_buc_populated, HashScanOpaqueData::hashso_buc_split, if(), IndexScanDescData::indexRelation, IndexScanDescData::keyData, IndexScanDescData::numberOfKeys, HashScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by hashhandler().

◆ hashtranslatecmptype()

StrategyNumber hashtranslatecmptype	(	CompareType	cmptype,
		Oid	opfamily
	)

Definition at line 941 of file hash.c.

{
    if (cmptype == COMPARE_EQ)
        return HTEqualStrategyNumber;
    return InvalidStrategy;
}

References COMPARE_EQ, HTEqualStrategyNumber, and InvalidStrategy.

Referenced by hashhandler().

◆ hashtranslatestrategy()

CompareType hashtranslatestrategy	(	StrategyNumber	strategy,
		Oid	opfamily
	)

Definition at line 933 of file hash.c.

{
    if (strategy == HTEqualStrategyNumber)
        return COMPARE_EQ;
    return COMPARE_INVALID;
}

References COMPARE_EQ, COMPARE_INVALID, and HTEqualStrategyNumber.

Referenced by hashhandler().

◆ hashvacuumcleanup()

IndexBulkDeleteResult * hashvacuumcleanup	(	IndexVacuumInfo *	info,
		IndexBulkDeleteResult *	stats
	)

Definition at line 651 of file hash.c.

{
    Relation    rel = info->index;
    BlockNumber num_pages;
 
    /* If hashbulkdelete wasn't called, return NULL signifying no change */
    /* Note: this covers the analyze_only case too */
    if (stats == NULL)
        return NULL;
 
    /* update statistics */
    num_pages = RelationGetNumberOfBlocks(rel);
    stats->num_pages = num_pages;
 
    return stats;
}

References IndexVacuumInfo::index, IndexBulkDeleteResult::num_pages, and RelationGetNumberOfBlocks.

Referenced by hashhandler().

◆ hashvalidate()

bool hashvalidate ( Oid opclassoid )

Definition at line 40 of file hashvalidate.c.

{
    bool        result = true;
    HeapTuple   classtup;
    Form_pg_opclass classform;
    Oid         opfamilyoid;
    Oid         opcintype;
    char       *opclassname;
    char       *opfamilyname;
    CatCList   *proclist,
               *oprlist;
    List       *grouplist;
    OpFamilyOpFuncGroup *opclassgroup;
    List       *hashabletypes = NIL;
    int         i;
    ListCell   *lc;
 
    /* Fetch opclass information */
    classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
    if (!HeapTupleIsValid(classtup))
        elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
    classform = (Form_pg_opclass) GETSTRUCT(classtup);
 
    opfamilyoid = classform->opcfamily;
    opcintype = classform->opcintype;
    opclassname = NameStr(classform->opcname);
 
    /* Fetch opfamily information */
    opfamilyname = get_opfamily_name(opfamilyoid, false);
 
    /* Fetch all operators and support functions of the opfamily */
    oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
    proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
 
    /* Check individual support functions */
    for (i = 0; i < proclist->n_members; i++)
    {
        HeapTuple   proctup = &proclist->members[i]->tuple;
        Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
        bool        ok;
 
        /*
         * All hash functions should be registered with matching left/right
         * types
         */
        if (procform->amproclefttype != procform->amprocrighttype)
        {
            ereport(INFO,
                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                     errmsg("operator family \"%s\" of access method %s contains support function %s with different left and right input types",
                            opfamilyname, "hash",
                            format_procedure(procform->amproc))));
            result = false;
        }
 
        /* Check procedure numbers and function signatures */
        switch (procform->amprocnum)
        {
            case HASHSTANDARD_PROC:
                ok = check_amproc_signature(procform->amproc, INT4OID, true,
                                            1, 1, procform->amproclefttype);
                break;
            case HASHEXTENDED_PROC:
                ok = check_amproc_signature(procform->amproc, INT8OID, true,
                                            2, 2, procform->amproclefttype, INT8OID);
                break;
            case HASHOPTIONS_PROC:
                ok = check_amoptsproc_signature(procform->amproc);
                break;
            default:
                ereport(INFO,
                        (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                         errmsg("operator family \"%s\" of access method %s contains function %s with invalid support number %d",
                                opfamilyname, "hash",
                                format_procedure(procform->amproc),
                                procform->amprocnum)));
                result = false;
                continue;       /* don't want additional message */
        }
 
        if (!ok)
        {
            ereport(INFO,
                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                     errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d",
                            opfamilyname, "hash",
                            format_procedure(procform->amproc),
                            procform->amprocnum)));
            result = false;
        }
 
        /* Remember which types we can hash */
        if (ok && (procform->amprocnum == HASHSTANDARD_PROC || procform->amprocnum == HASHEXTENDED_PROC))
        {
            hashabletypes = list_append_unique_oid(hashabletypes, procform->amproclefttype);
        }
    }
 
    /* Check individual operators */
    for (i = 0; i < oprlist->n_members; i++)
    {
        HeapTuple   oprtup = &oprlist->members[i]->tuple;
        Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
 
        /* Check that only allowed strategy numbers exist */
        if (oprform->amopstrategy < 1 ||
            oprform->amopstrategy > HTMaxStrategyNumber)
        {
            ereport(INFO,
                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                     errmsg("operator family \"%s\" of access method %s contains operator %s with invalid strategy number %d",
                            opfamilyname, "hash",
                            format_operator(oprform->amopopr),
                            oprform->amopstrategy)));
            result = false;
        }
 
        /* hash doesn't support ORDER BY operators */
        if (oprform->amoppurpose != AMOP_SEARCH ||
            OidIsValid(oprform->amopsortfamily))
        {
            ereport(INFO,
                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                     errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
                            opfamilyname, "hash",
                            format_operator(oprform->amopopr))));
            result = false;
        }
 
        /* Check operator signature --- same for all hash strategies */
        if (!check_amop_signature(oprform->amopopr, BOOLOID,
                                  oprform->amoplefttype,
                                  oprform->amoprighttype))
        {
            ereport(INFO,
                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                     errmsg("operator family \"%s\" of access method %s contains operator %s with wrong signature",
                            opfamilyname, "hash",
                            format_operator(oprform->amopopr))));
            result = false;
        }
 
        /* There should be relevant hash functions for each datatype */
        if (!list_member_oid(hashabletypes, oprform->amoplefttype) ||
            !list_member_oid(hashabletypes, oprform->amoprighttype))
        {
            ereport(INFO,
                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                     errmsg("operator family \"%s\" of access method %s lacks support function for operator %s",
                            opfamilyname, "hash",
                            format_operator(oprform->amopopr))));
            result = false;
        }
    }
 
    /* Now check for inconsistent groups of operators/functions */
    grouplist = identify_opfamily_groups(oprlist, proclist);
    opclassgroup = NULL;
    foreach(lc, grouplist)
    {
        OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
 
        /* Remember the group exactly matching the test opclass */
        if (thisgroup->lefttype == opcintype &&
            thisgroup->righttype == opcintype)
            opclassgroup = thisgroup;
 
        /*
         * Complain if there seems to be an incomplete set of operators for
         * this datatype pair (implying that we have a hash function but no
         * operator).
         */
        if (thisgroup->operatorset != (1 << HTEqualStrategyNumber))
        {
            ereport(INFO,
                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                     errmsg("operator family \"%s\" of access method %s is missing operator(s) for types %s and %s",
                            opfamilyname, "hash",
                            format_type_be(thisgroup->lefttype),
                            format_type_be(thisgroup->righttype))));
            result = false;
        }
    }
 
    /* Check that the originally-named opclass is supported */
    /* (if group is there, we already checked it adequately above) */
    if (!opclassgroup)
    {
        ereport(INFO,
                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                 errmsg("operator class \"%s\" of access method %s is missing operator(s)",
                        opclassname, "hash")));
        result = false;
    }
 
    /*
     * Complain if the opfamily doesn't have entries for all possible
     * combinations of its supported datatypes.  While missing cross-type
     * operators are not fatal, it seems reasonable to insist that all
     * built-in hash opfamilies be complete.
     */
    if (list_length(grouplist) !=
        list_length(hashabletypes) * list_length(hashabletypes))
    {
        ereport(INFO,
                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                 errmsg("operator family \"%s\" of access method %s is missing cross-type operator(s)",
                        opfamilyname, "hash")));
        result = false;
    }
 
    ReleaseCatCacheList(proclist);
    ReleaseCatCacheList(oprlist);
    ReleaseSysCache(classtup);
 
    return result;
}

References check_amop_signature(), check_amoptsproc_signature(), check_amproc_signature(), elog, ereport, errcode(), errmsg(), ERROR, format_operator(), format_procedure(), format_type_be(), get_opfamily_name(), GETSTRUCT(), HASHEXTENDED_PROC, HASHOPTIONS_PROC, HASHSTANDARD_PROC, HeapTupleIsValid, HTEqualStrategyNumber, HTMaxStrategyNumber, i, identify_opfamily_groups(), INFO, OpFamilyOpFuncGroup::lefttype, lfirst, list_append_unique_oid(), list_length(), list_member_oid(), catclist::members, catclist::n_members, NameStr, NIL, ObjectIdGetDatum(), OidIsValid, OpFamilyOpFuncGroup::operatorset, ReleaseCatCacheList(), ReleaseSysCache(), OpFamilyOpFuncGroup::righttype, SearchSysCache1(), SearchSysCacheList1, and catctup::tuple.

Referenced by hashhandler().

Data Structures

Macros

Typedefs

Functions

Macro Definition Documentation

◆ ALL_SET

◆ BITS_PER_MAP

◆ BMPG_MASK

◆ BMPG_SHIFT

◆ BMPGSZ_BIT

◆ BMPGSZ_BYTE

◆ BUCKET_TO_BLKNO

◆ BYTE_TO_BIT

◆ CLRBIT

◆ H_BUCKET_BEING_POPULATED

◆ H_BUCKET_BEING_SPLIT

◆ H_HAS_DEAD_TUPLES

◆ H_NEEDS_SPLIT_CLEANUP

◆ HASH_DEFAULT_FILLFACTOR

◆ HASH_MAGIC

◆ HASH_MAX_BITMAPS

◆ HASH_MAX_SPLITPOINT_GROUP

◆ HASH_MAX_SPLITPOINTS

◆ HASH_METAPAGE

◆ HASH_MIN_FILLFACTOR

◆ HASH_NOLOCK

◆ HASH_READ

◆ HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE

◆ HASH_SPLITPOINT_PHASE_BITS

◆ HASH_SPLITPOINT_PHASE_MASK

◆ HASH_SPLITPOINT_PHASES_PER_GRP

◆ HASH_VERSION

◆ HASH_WRITE

◆ HASHEXTENDED_PROC

◆ HashGetFillFactor

◆ HashGetMaxBitmapSize

◆ HashGetTargetPageUsage

◆ HashMaxItemSize

◆ HASHNProcs

◆ HASHO_PAGE_ID

◆ HASHOPTIONS_PROC

◆ HashPageGetBitmap

◆ HashPageGetMeta

◆ HashPageGetOpaque

◆ HashScanPosInvalidate

◆ HashScanPosIsPinned

◆ HashScanPosIsValid

◆ HASHSTANDARD_PROC

◆ INDEX_MOVED_BY_SPLIT_MASK

◆ InvalidBucket

◆ ISSET

◆ LH_BITMAP_PAGE

◆ LH_BUCKET_BEING_POPULATED

◆ LH_BUCKET_BEING_SPLIT

◆ LH_BUCKET_NEEDS_SPLIT_CLEANUP

◆ LH_BUCKET_PAGE

◆ LH_META_PAGE

◆ LH_OVERFLOW_PAGE

◆ LH_PAGE_HAS_DEAD_TUPLES

◆ LH_PAGE_TYPE

◆ LH_UNUSED_PAGE

◆ SETBIT

Typedef Documentation

◆ Bucket

◆ HashMetaPage

◆ HashMetaPageData

◆ HashOptions

◆ HashPageOpaque

◆ HashPageOpaqueData

◆ HashScanOpaque

◆ HashScanOpaqueData

◆ HashScanPosData

◆ HashScanPosItem

◆ HSpool

Function Documentation

◆ _h_indexbuild()

◆ _h_spool()

◆ _h_spooldestroy()

◆ _h_spoolinit()

◆ _hash_addovflpage()