PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
hashutil.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hashutil.c
4  * Utility code for Postgres hash implementation.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hashutil.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/hash.h"
18 #include "access/reloptions.h"
19 #include "access/relscan.h"
20 #include "utils/lsyscache.h"
21 #include "utils/rel.h"
22 
23 #define CALC_NEW_BUCKET(old_bucket, lowmask) \
24  old_bucket | (lowmask + 1)
25 
26 /*
27  * _hash_checkqual -- does the index tuple satisfy the scan conditions?
28  */
29 bool
31 {
32  /*
33  * Currently, we can't check any of the scan conditions since we do not
34  * have the original index entry value to supply to the sk_func. Always
35  * return true; we expect that hashgettuple already set the recheck flag
36  * to make the main indexscan code do it.
37  */
38 #ifdef NOT_USED
39  TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
40  ScanKey key = scan->keyData;
41  int scanKeySize = scan->numberOfKeys;
42 
43  while (scanKeySize > 0)
44  {
45  Datum datum;
46  bool isNull;
47  Datum test;
48 
49  datum = index_getattr(itup,
50  key->sk_attno,
51  tupdesc,
52  &isNull);
53 
54  /* assume sk_func is strict */
55  if (isNull)
56  return false;
57  if (key->sk_flags & SK_ISNULL)
58  return false;
59 
60  test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
61  datum, key->sk_argument);
62 
63  if (!DatumGetBool(test))
64  return false;
65 
66  key++;
67  scanKeySize--;
68  }
69 #endif
70 
71  return true;
72 }
73 
74 /*
75  * _hash_datum2hashkey -- given a Datum, call the index's hash procedure
76  *
77  * The Datum is assumed to be of the index's column type, so we can use the
78  * "primary" hash procedure that's tracked for us by the generic index code.
79  */
80 uint32
82 {
83  FmgrInfo *procinfo;
84  Oid collation;
85 
86  /* XXX assumes index has only one attribute */
87  procinfo = index_getprocinfo(rel, 1, HASHPROC);
88  collation = rel->rd_indcollation[0];
89 
90  return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
91 }
92 
93 /*
94  * _hash_datum2hashkey_type -- given a Datum of a specified type,
95  * hash it in a fashion compatible with this index
96  *
97  * This is much more expensive than _hash_datum2hashkey, so use it only in
98  * cross-type situations.
99  */
100 uint32
102 {
103  RegProcedure hash_proc;
104  Oid collation;
105 
106  /* XXX assumes index has only one attribute */
107  hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
108  keytype,
109  keytype,
110  HASHPROC);
111  if (!RegProcedureIsValid(hash_proc))
112  elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
113  HASHPROC, keytype, keytype,
115  collation = rel->rd_indcollation[0];
116 
117  return DatumGetUInt32(OidFunctionCall1Coll(hash_proc, collation, key));
118 }
119 
120 /*
121  * _hash_hashkey2bucket -- determine which bucket the hashkey maps to.
122  */
123 Bucket
124 _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
125  uint32 highmask, uint32 lowmask)
126 {
127  Bucket bucket;
128 
129  bucket = hashkey & highmask;
130  if (bucket > maxbucket)
131  bucket = bucket & lowmask;
132 
133  return bucket;
134 }
135 
136 /*
137  * _hash_log2 -- returns ceil(lg2(num))
138  */
139 uint32
141 {
142  uint32 i,
143  limit;
144 
145  limit = 1;
146  for (i = 0; limit < num; limit <<= 1, i++)
147  ;
148  return i;
149 }
150 
151 /*
152  * _hash_checkpage -- sanity checks on the format of all hash pages
153  *
154  * If flags is not zero, it is a bitwise OR of the acceptable values of
155  * hasho_flag.
156  */
157 void
159 {
160  Page page = BufferGetPage(buf);
161 
162  /*
163  * ReadBuffer verifies that every newly-read page passes
164  * PageHeaderIsValid, which means it either contains a reasonably sane
165  * page header or is all-zero. We have to defend against the all-zero
166  * case, however.
167  */
168  if (PageIsNew(page))
169  ereport(ERROR,
170  (errcode(ERRCODE_INDEX_CORRUPTED),
171  errmsg("index \"%s\" contains unexpected zero page at block %u",
173  BufferGetBlockNumber(buf)),
174  errhint("Please REINDEX it.")));
175 
176  /*
177  * Additionally check that the special area looks sane.
178  */
179  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
180  ereport(ERROR,
181  (errcode(ERRCODE_INDEX_CORRUPTED),
182  errmsg("index \"%s\" contains corrupted page at block %u",
184  BufferGetBlockNumber(buf)),
185  errhint("Please REINDEX it.")));
186 
187  if (flags)
188  {
190 
191  if ((opaque->hasho_flag & flags) == 0)
192  ereport(ERROR,
193  (errcode(ERRCODE_INDEX_CORRUPTED),
194  errmsg("index \"%s\" contains corrupted page at block %u",
196  BufferGetBlockNumber(buf)),
197  errhint("Please REINDEX it.")));
198  }
199 
200  /*
201  * When checking the metapage, also verify magic number and version.
202  */
203  if (flags == LH_META_PAGE)
204  {
205  HashMetaPage metap = HashPageGetMeta(page);
206 
207  if (metap->hashm_magic != HASH_MAGIC)
208  ereport(ERROR,
209  (errcode(ERRCODE_INDEX_CORRUPTED),
210  errmsg("index \"%s\" is not a hash index",
211  RelationGetRelationName(rel))));
212 
213  if (metap->hashm_version != HASH_VERSION)
214  ereport(ERROR,
215  (errcode(ERRCODE_INDEX_CORRUPTED),
216  errmsg("index \"%s\" has wrong hash version",
218  errhint("Please REINDEX it.")));
219  }
220 }
221 
222 bytea *
223 hashoptions(Datum reloptions, bool validate)
224 {
225  return default_reloptions(reloptions, validate, RELOPT_KIND_HASH);
226 }
227 
228 /*
229  * _hash_get_indextuple_hashkey - get the hash index tuple's hash key value
230  */
231 uint32
233 {
234  char *attp;
235 
236  /*
237  * We assume the hash key is the first attribute and can't be null, so
238  * this can be done crudely but very very cheaply ...
239  */
240  attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
241  return *((uint32 *) attp);
242 }
243 
244 /*
245  * _hash_convert_tuple - convert raw index data to hash key
246  *
247  * Inputs: values and isnull arrays for the user data column(s)
248  * Outputs: values and isnull arrays for the index tuple, suitable for
249  * passing to index_form_tuple().
250  *
251  * Returns true if successful, false if not (because there are null values).
252  * On a false result, the given data need not be indexed.
253  *
254  * Note: callers know that the index-column arrays are always of length 1.
255  * In principle, there could be more than one input column, though we do not
256  * currently support that.
257  */
258 bool
260  Datum *user_values, bool *user_isnull,
261  Datum *index_values, bool *index_isnull)
262 {
263  uint32 hashkey;
264 
265  /*
266  * We do not insert null values into hash indexes. This is okay because
267  * the only supported search operator is '=', and we assume it is strict.
268  */
269  if (user_isnull[0])
270  return false;
271 
272  hashkey = _hash_datum2hashkey(index, user_values[0]);
273  index_values[0] = UInt32GetDatum(hashkey);
274  index_isnull[0] = false;
275  return true;
276 }
277 
278 /*
279  * _hash_binsearch - Return the offset number in the page where the
280  * specified hash value should be sought or inserted.
281  *
282  * We use binary search, relying on the assumption that the existing entries
283  * are ordered by hash key.
284  *
285  * Returns the offset of the first index entry having hashkey >= hash_value,
286  * or the page's max offset plus one if hash_value is greater than all
287  * existing hash keys in the page. This is the appropriate place to start
288  * a search, or to insert a new item.
289  */
291 _hash_binsearch(Page page, uint32 hash_value)
292 {
295 
296  /* Loop invariant: lower <= desired place <= upper */
297  upper = PageGetMaxOffsetNumber(page) + 1;
298  lower = FirstOffsetNumber;
299 
300  while (upper > lower)
301  {
302  OffsetNumber off;
303  IndexTuple itup;
304  uint32 hashkey;
305 
306  off = (upper + lower) / 2;
308 
309  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
310  hashkey = _hash_get_indextuple_hashkey(itup);
311  if (hashkey < hash_value)
312  lower = off + 1;
313  else
314  upper = off;
315  }
316 
317  return lower;
318 }
319 
320 /*
321  * _hash_binsearch_last
322  *
323  * Same as above, except that if there are multiple matching items in the
324  * page, we return the offset of the last one instead of the first one,
325  * and the possible range of outputs is 0..maxoffset not 1..maxoffset+1.
326  * This is handy for starting a new page in a backwards scan.
327  */
330 {
333 
334  /* Loop invariant: lower <= desired place <= upper */
335  upper = PageGetMaxOffsetNumber(page);
336  lower = FirstOffsetNumber - 1;
337 
338  while (upper > lower)
339  {
340  IndexTuple itup;
341  OffsetNumber off;
342  uint32 hashkey;
343 
344  off = (upper + lower + 1) / 2;
346 
347  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
348  hashkey = _hash_get_indextuple_hashkey(itup);
349  if (hashkey > hash_value)
350  upper = off - 1;
351  else
352  lower = off;
353  }
354 
355  return lower;
356 }
357 
358 /*
359  * _hash_get_oldblock_from_newbucket() -- get the block number of a bucket
360  * from which current (new) bucket is being split.
361  */
364 {
365  Bucket old_bucket;
366  uint32 mask;
367  Buffer metabuf;
368  HashMetaPage metap;
369  BlockNumber blkno;
370 
371  /*
372  * To get the old bucket from the current bucket, we need a mask to modulo
373  * into lower half of table. This mask is stored in meta page as
374  * hashm_lowmask, but here we can't rely on the same, because we need a
375  * value of lowmask that was prevalent at the time when bucket split was
376  * started. Masking the most significant bit of new bucket would give us
377  * old bucket.
378  */
379  mask = (((uint32) 1) << (fls(new_bucket) - 1)) - 1;
380  old_bucket = new_bucket & mask;
381 
383  metap = HashPageGetMeta(BufferGetPage(metabuf));
384 
385  blkno = BUCKET_TO_BLKNO(metap, old_bucket);
386 
387  _hash_relbuf(rel, metabuf);
388 
389  return blkno;
390 }
391 
392 /*
393  * _hash_get_newblock_from_oldbucket() -- get the block number of a bucket
394  * that will be generated after split from old bucket.
395  *
396  * This is used to find the new bucket from old bucket based on current table
397  * half. It is mainly required to finish the incomplete splits where we are
398  * sure that not more than one bucket could have split in progress from old
399  * bucket.
400  */
403 {
404  Bucket new_bucket;
405  Buffer metabuf;
406  HashMetaPage metap;
407  BlockNumber blkno;
408 
410  metap = HashPageGetMeta(BufferGetPage(metabuf));
411 
412  new_bucket = _hash_get_newbucket_from_oldbucket(rel, old_bucket,
413  metap->hashm_lowmask,
414  metap->hashm_maxbucket);
415  blkno = BUCKET_TO_BLKNO(metap, new_bucket);
416 
417  _hash_relbuf(rel, metabuf);
418 
419  return blkno;
420 }
421 
422 /*
423  * _hash_get_newbucket_from_oldbucket() -- get the new bucket that will be
424  * generated after split from current (old) bucket.
425  *
426  * This is used to find the new bucket from old bucket. New bucket can be
427  * obtained by OR'ing old bucket with most significant bit of current table
428  * half (lowmask passed in this function can be used to identify msb of
429  * current table half). There could be multiple buckets that could have
430  * been split from current bucket. We need the first such bucket that exists.
431  * Caller must ensure that no more than one split has happened from old
432  * bucket.
433  */
434 Bucket
436  uint32 lowmask, uint32 maxbucket)
437 {
438  Bucket new_bucket;
439 
440  new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
441  if (new_bucket > maxbucket)
442  {
443  lowmask = lowmask >> 1;
444  new_bucket = CALC_NEW_BUCKET(old_bucket, lowmask);
445  }
446 
447  return new_bucket;
448 }
bytea * hashoptions(Datum reloptions, bool validate)
Definition: hashutil.c:223
#define DatumGetUInt32(X)
Definition: postgres.h:494
Definition: fmgr.h:53
#define IndexInfoFindDataOffset(t_info)
Definition: itup.h:80
int errhint(const char *fmt,...)
Definition: elog.c:987
OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value)
Definition: hashutil.c:329
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:855
static void test(void)
#define RelationGetDescr(relation)
Definition: rel.h:425
Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashutil.c:124
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:43
#define LH_META_PAGE
Definition: hash.h:56
regproc RegProcedure
Definition: c.h:392
uint32 hashm_magic
Definition: hash.h:176
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
uint32 _hash_datum2hashkey(Relation rel, Datum key)
Definition: hashutil.c:81
#define CALC_NEW_BUCKET(old_bucket, lowmask)
Definition: hashutil.c:23
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:74
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:76
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1306
#define HASH_VERSION
Definition: hash.h:149
unsigned int Oid
Definition: postgres_ext.h:31
uint32 hashm_lowmask
Definition: hash.h:186
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
#define BUCKET_TO_BLKNO(metap, B)
Definition: hash.h:38
Relation indexRelation
Definition: relscan.h:89
int fls(int mask)
Definition: fls.c:55
uint16 OffsetNumber
Definition: off.h:24
#define HASH_MAGIC
Definition: hash.h:148
Definition: type.h:90
#define HASH_READ
Definition: hash.h:254
uint32 Bucket
Definition: hash.h:34
bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
Definition: hashutil.c:30
Oid * rd_indcollation
Definition: rel.h:189
#define ERROR
Definition: elog.h:43
uint32 _hash_get_indextuple_hashkey(IndexTuple itup)
Definition: hashutil.c:232
uint32 hashm_version
Definition: hash.h:177
bool _hash_convert_tuple(Relation index, Datum *user_values, bool *user_isnull, Datum *index_values, bool *index_isnull)
Definition: hashutil.c:259
static char * buf
Definition: pg_test_fsync.c:65
#define RegProcedureIsValid(p)
Definition: c.h:536
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
Bucket _hash_get_newbucket_from_oldbucket(Relation rel, Bucket old_bucket, uint32 lowmask, uint32 maxbucket)
Definition: hashutil.c:435
FmgrInfo sk_func
Definition: skey.h:71
#define DatumGetBool(X)
Definition: postgres.h:401
#define RelationGetRelationName(relation)
Definition: rel.h:433
unsigned int uint32
Definition: c.h:265
#define UInt32GetDatum(X)
Definition: postgres.h:501
Oid * rd_opfamily
Definition: rel.h:178
#define SK_ISNULL
Definition: skey.h:115
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:158
#define HASH_METAPAGE
Definition: hash.h:146
uintptr_t Datum
Definition: postgres.h:374
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1286
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:242
BlockNumber _hash_get_newblock_from_oldbucket(Relation rel, Bucket old_bucket)
Definition: hashutil.c:402
Datum OidFunctionCall1Coll(Oid functionId, Oid collation, Datum arg1)
Definition: fmgr.c:1578
OffsetNumber _hash_binsearch(Page page, uint32 hash_value)
Definition: hashutil.c:291
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:671
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:744
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:84
#define MAXALIGN(LEN)
Definition: c.h:584
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
ScanKey keyData
Definition: relscan.h:93
#define PageGetSpecialSize(page)
Definition: bufpage.h:297
uint32 hashm_maxbucket
Definition: hash.h:184
BlockNumber _hash_get_oldblock_from_newbucket(Relation rel, Bucket new_bucket)
Definition: hashutil.c:363
uint16 hasho_flag
Definition: hash.h:80
bytea * default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
Definition: reloptions.c:1275
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2588
#define PageIsNew(page)
Definition: bufpage.h:226
#define HashPageGetMeta(page)
Definition: hash.h:238
uint32 _hash_log2(uint32 num)
Definition: hashutil.c:140
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define HASHPROC
Definition: hash.h:269
Oid sk_collation
Definition: skey.h:70
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:40
Definition: c.h:435
uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype)
Definition: hashutil.c:101
#define elog
Definition: elog.h:219
unsigned short t_info
Definition: itup.h:49
int Buffer
Definition: buf.h:23
Datum sk_argument
Definition: skey.h:72
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
AttrNumber sk_attno
Definition: skey.h:67
Pointer Page
Definition: bufpage.h:74