PostgreSQL Source Code  git master
hashfunc.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hashfunc.c
4  * Support functions for hash access method.
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hashfunc.c
12  *
13  * NOTES
14  * These functions are stored in pg_amproc. For each operator class
15  * defined for hash indexes, they compute the hash value of the argument.
16  *
17  * Additional hash functions appear in /utils/adt/ files for various
18  * specialized datatypes.
19  *
20  * It is expected that every bit of a hash function's 32-bit result is
21  * as random as every other; failure to ensure this is likely to lead
22  * to poor performance of hash joins, for example. In most cases a hash
23  * function should use hash_any() or its variant hash_uint32().
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include "access/hash.h"
30 #include "catalog/pg_collation.h"
31 #include "utils/builtins.h"
32 #include "utils/hashutils.h"
33 #include "utils/pg_locale.h"
34 
35 /*
36  * Datatype-specific hash functions.
37  *
38  * These support both hash indexes and hash joins.
39  *
40  * NOTE: some of these are also used by catcache operations, without
41  * any direct connection to hash indexes. Also, the common hash_any
42  * routine is also used by dynahash tables.
43  */
44 
45 /* Note: this is used for both "char" and boolean datatypes */
46 Datum
48 {
49  return hash_uint32((int32) PG_GETARG_CHAR(0));
50 }
51 
52 Datum
54 {
56 }
57 
58 Datum
60 {
61  return hash_uint32((int32) PG_GETARG_INT16(0));
62 }
63 
64 Datum
66 {
68 }
69 
70 Datum
72 {
73  return hash_uint32(PG_GETARG_INT32(0));
74 }
75 
76 Datum
78 {
80 }
81 
82 Datum
84 {
85  /*
86  * The idea here is to produce a hash value compatible with the values
87  * produced by hashint4 and hashint2 for logically equal inputs; this is
88  * necessary to support cross-type hash joins across these input types.
89  * Since all three types are signed, we can xor the high half of the int8
90  * value if the sign is positive, or the complement of the high half when
91  * the sign is negative.
92  */
93  int64 val = PG_GETARG_INT64(0);
94  uint32 lohalf = (uint32) val;
95  uint32 hihalf = (uint32) (val >> 32);
96 
97  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
98 
99  return hash_uint32(lohalf);
100 }
101 
102 Datum
104 {
105  /* Same approach as hashint8 */
106  int64 val = PG_GETARG_INT64(0);
107  uint32 lohalf = (uint32) val;
108  uint32 hihalf = (uint32) (val >> 32);
109 
110  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
111 
112  return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
113 }
114 
115 Datum
117 {
118  return hash_uint32((uint32) PG_GETARG_OID(0));
119 }
120 
121 Datum
123 {
125 }
126 
127 Datum
129 {
130  return hash_uint32((uint32) PG_GETARG_OID(0));
131 }
132 
133 Datum
135 {
137 }
138 
139 Datum
141 {
143  float8 key8;
144 
145  /*
146  * On IEEE-float machines, minus zero and zero have different bit patterns
147  * but should compare as equal. We must ensure that they have the same
148  * hash value, which is most reliably done this way:
149  */
150  if (key == (float4) 0)
151  PG_RETURN_UINT32(0);
152 
153  /*
154  * To support cross-type hashing of float8 and float4, we want to return
155  * the same hash value hashfloat8 would produce for an equal float8 value.
156  * So, widen the value to float8 and hash that. (We must do this rather
157  * than have hashfloat8 try to narrow its value to float4; that could fail
158  * on overflow.)
159  */
160  key8 = key;
161 
162  return hash_any((unsigned char *) &key8, sizeof(key8));
163 }
164 
165 Datum
167 {
169  uint64 seed = PG_GETARG_INT64(1);
170  float8 key8;
171 
172  /* Same approach as hashfloat4 */
173  if (key == (float4) 0)
174  PG_RETURN_UINT64(seed);
175  key8 = key;
176 
177  return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
178 }
179 
180 Datum
182 {
184 
185  /*
186  * On IEEE-float machines, minus zero and zero have different bit patterns
187  * but should compare as equal. We must ensure that they have the same
188  * hash value, which is most reliably done this way:
189  */
190  if (key == (float8) 0)
191  PG_RETURN_UINT32(0);
192 
193  return hash_any((unsigned char *) &key, sizeof(key));
194 }
195 
196 Datum
198 {
200  uint64 seed = PG_GETARG_INT64(1);
201 
202  /* Same approach as hashfloat8 */
203  if (key == (float8) 0)
204  PG_RETURN_UINT64(seed);
205 
206  return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
207 }
208 
209 Datum
211 {
213 
214  return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
215 }
216 
217 Datum
219 {
221 
222  return hash_any_extended((unsigned char *) key->values,
223  key->dim1 * sizeof(Oid),
224  PG_GETARG_INT64(1));
225 }
226 
227 Datum
229 {
230  char *key = NameStr(*PG_GETARG_NAME(0));
231 
232  return hash_any((unsigned char *) key, strlen(key));
233 }
234 
235 Datum
237 {
238  char *key = NameStr(*PG_GETARG_NAME(0));
239 
240  return hash_any_extended((unsigned char *) key, strlen(key),
241  PG_GETARG_INT64(1));
242 }
243 
244 Datum
246 {
247  text *key = PG_GETARG_TEXT_PP(0);
248  Oid collid = PG_GET_COLLATION();
249  pg_locale_t mylocale = 0;
250  Datum result;
251 
252  if (!collid)
253  ereport(ERROR,
254  (errcode(ERRCODE_INDETERMINATE_COLLATION),
255  errmsg("could not determine which collation to use for string hashing"),
256  errhint("Use the COLLATE clause to set the collation explicitly.")));
257 
258  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
259  mylocale = pg_newlocale_from_collation(collid);
260 
261  if (!mylocale || mylocale->deterministic)
262  {
263  result = hash_any((unsigned char *) VARDATA_ANY(key),
264  VARSIZE_ANY_EXHDR(key));
265  }
266  else
267  {
268 #ifdef USE_ICU
269  if (mylocale->provider == COLLPROVIDER_ICU)
270  {
271  int32_t ulen = -1;
272  UChar *uchar = NULL;
273  Size bsize;
274  uint8_t *buf;
275 
276  ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
277 
278  bsize = ucol_getSortKey(mylocale->info.icu.ucol,
279  uchar, ulen, NULL, 0);
280  buf = palloc(bsize);
281  ucol_getSortKey(mylocale->info.icu.ucol,
282  uchar, ulen, buf, bsize);
283 
284  result = hash_any(buf, bsize);
285 
286  pfree(buf);
287  }
288  else
289 #endif
290  /* shouldn't happen */
291  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
292  }
293 
294  /* Avoid leaking memory for toasted inputs */
295  PG_FREE_IF_COPY(key, 0);
296 
297  return result;
298 }
299 
300 Datum
302 {
303  text *key = PG_GETARG_TEXT_PP(0);
304  Oid collid = PG_GET_COLLATION();
305  pg_locale_t mylocale = 0;
306  Datum result;
307 
308  if (!collid)
309  ereport(ERROR,
310  (errcode(ERRCODE_INDETERMINATE_COLLATION),
311  errmsg("could not determine which collation to use for string hashing"),
312  errhint("Use the COLLATE clause to set the collation explicitly.")));
313 
314  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
315  mylocale = pg_newlocale_from_collation(collid);
316 
317  if (!mylocale || mylocale->deterministic)
318  {
319  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
320  VARSIZE_ANY_EXHDR(key),
321  PG_GETARG_INT64(1));
322  }
323  else
324  {
325 #ifdef USE_ICU
326  if (mylocale->provider == COLLPROVIDER_ICU)
327  {
328  int32_t ulen = -1;
329  UChar *uchar = NULL;
330  Size bsize;
331  uint8_t *buf;
332 
333  ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
334 
335  bsize = ucol_getSortKey(mylocale->info.icu.ucol,
336  uchar, ulen, NULL, 0);
337  buf = palloc(bsize);
338  ucol_getSortKey(mylocale->info.icu.ucol,
339  uchar, ulen, buf, bsize);
340 
341  result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
342 
343  pfree(buf);
344  }
345  else
346 #endif
347  /* shouldn't happen */
348  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
349  }
350 
351  PG_FREE_IF_COPY(key, 0);
352 
353  return result;
354 }
355 
356 /*
357  * hashvarlena() can be used for any varlena datatype in which there are
358  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
359  */
360 Datum
362 {
363  struct varlena *key = PG_GETARG_VARLENA_PP(0);
364  Datum result;
365 
366  result = hash_any((unsigned char *) VARDATA_ANY(key),
367  VARSIZE_ANY_EXHDR(key));
368 
369  /* Avoid leaking memory for toasted inputs */
370  PG_FREE_IF_COPY(key, 0);
371 
372  return result;
373 }
374 
375 Datum
377 {
378  struct varlena *key = PG_GETARG_VARLENA_PP(0);
379  Datum result;
380 
381  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
382  VARSIZE_ANY_EXHDR(key),
383  PG_GETARG_INT64(1));
384 
385  PG_FREE_IF_COPY(key, 0);
386 
387  return result;
388 }
#define PG_GETARG_FLOAT8(n)
Definition: fmgr.h:276
union pg_locale_struct::@144 info
Definition: c.h:589
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
Datum hashoid(PG_FUNCTION_ARGS)
Definition: hashfunc.c:116
int errhint(const char *fmt,...)
Definition: elog.c:1071
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
Datum hashname(PG_FUNCTION_ARGS)
Definition: hashfunc.c:228
Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.c:148
Datum hashint8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:83
Datum hashint2extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:65
int errcode(int sqlerrcode)
Definition: elog.c:608
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.c:374
Datum hashint8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:103
unsigned int Oid
Definition: postgres_ext.h:31
#define PG_RETURN_UINT64(x)
Definition: fmgr.h:358
#define PG_GET_COLLATION()
Definition: fmgr.h:193
Datum hashvarlenaextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:376
#define PG_RETURN_UINT32(x)
Definition: fmgr.h:345
signed int int32
Definition: c.h:347
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:303
Datum hashenumextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:134
Datum hashchar(PG_FUNCTION_ARGS)
Definition: hashfunc.c:47
void pfree(void *pointer)
Definition: mcxt.c:1056
Datum hashvarlena(PG_FUNCTION_ARGS)
Definition: hashfunc.c:361
#define ERROR
Definition: elog.h:43
double float8
Definition: c.h:492
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1176
Datum hash_uint32(uint32 k)
Definition: hashfn.c:613
Datum hashtextextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:301
Datum hashtext(PG_FUNCTION_ARGS)
Definition: hashfunc.c:245
#define PG_GETARG_VARLENA_PP(n)
Definition: fmgr.h:283
static char * buf
Definition: pg_test_fsync.c:67
#define PG_GETARG_OID(n)
Definition: fmgr.h:270
Oid values[FLEXIBLE_ARRAY_MEMBER]
Definition: c.h:597
int dim1
Definition: c.h:595
unsigned int uint32
Definition: c.h:359
Datum hashoidvectorextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:218
Datum hashint4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:71
Datum hashfloat4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:140
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:275
#define ereport(elevel, rest)
Definition: elog.h:141
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:181
Datum hashint4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:77
float float4
Definition: c.h:491
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1323
uintptr_t Datum
Definition: postgres.h:367
bool deterministic
Definition: pg_locale.h:85
#define PG_GETARG_INT16(n)
Definition: fmgr.h:266
Datum hashenum(PG_FUNCTION_ARGS)
Definition: hashfunc.c:128
size_t Size
Definition: c.h:467
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:255
Datum hashoidextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:122
Datum hashint2(PG_FUNCTION_ARGS)
Definition: hashfunc.c:59
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
Datum hashcharextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:53
Datum hashoidvector(PG_FUNCTION_ARGS)
Definition: hashfunc.c:210
#define elog(elevel,...)
Definition: elog.h:228
#define NameStr(name)
Definition: c.h:610
Datum hashfloat4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:166
Definition: c.h:550
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define PG_GETARG_INT64(n)
Definition: fmgr.h:277
Datum hash_uint32_extended(uint32 k, uint64 seed)
Definition: hashfn.c:634
long val
Definition: informix.c:684
Datum hashnameextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:236
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:268
#define PG_GETARG_NAME(n)
Definition: fmgr.h:273
Datum hashfloat8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:197