PostgreSQL Source Code  git master
varchar.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varchar.c
4  * Functions for the built-in types char(n) and varchar(n).
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/varchar.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/detoast.h"
18 #include "catalog/pg_collation.h"
19 #include "catalog/pg_type.h"
20 #include "libpq/pqformat.h"
21 #include "mb/pg_wchar.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/supportnodes.h"
24 #include "utils/array.h"
25 #include "utils/builtins.h"
26 #include "utils/hashutils.h"
27 #include "utils/lsyscache.h"
28 #include "utils/pg_locale.h"
29 #include "utils/varlena.h"
30 
31 /* common code for bpchartypmodin and varchartypmodin */
32 static int32
33 anychar_typmodin(ArrayType *ta, const char *typename)
34 {
35  int32 typmod;
36  int32 *tl;
37  int n;
38 
39  tl = ArrayGetIntegerTypmods(ta, &n);
40 
41  /*
42  * we're not too tense about good error message here because grammar
43  * shouldn't allow wrong number of modifiers for CHAR
44  */
45  if (n != 1)
46  ereport(ERROR,
47  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48  errmsg("invalid type modifier")));
49 
50  if (*tl < 1)
51  ereport(ERROR,
52  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53  errmsg("length for type %s must be at least 1", typename)));
54  if (*tl > MaxAttrSize)
55  ereport(ERROR,
56  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57  errmsg("length for type %s cannot exceed %d",
58  typename, MaxAttrSize)));
59 
60  /*
61  * For largely historical reasons, the typmod is VARHDRSZ plus the number
62  * of characters; there is enough client-side code that knows about that
63  * that we'd better not change it.
64  */
65  typmod = VARHDRSZ + *tl;
66 
67  return typmod;
68 }
69 
70 /* common code for bpchartypmodout and varchartypmodout */
71 static char *
73 {
74  char *res = (char *) palloc(64);
75 
76  if (typmod > VARHDRSZ)
77  snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78  else
79  *res = '\0';
80 
81  return res;
82 }
83 
84 
85 /*
86  * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87  * is for blank-padded string whose length is specified in CREATE TABLE.
88  * VARCHAR is for storing string whose length is at most the length specified
89  * at CREATE TABLE time.
90  *
91  * It's hard to implement these types because we cannot figure out
92  * the length of the type from the type itself. I changed (hopefully all) the
93  * fmgr calls that invoke input functions of a data type to supply the
94  * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95  * the length of the attributes and hence the exact length of the char() or
96  * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97  * we cannot determine the length, we pass in -1 instead and the input
98  * converter does not enforce any length check.
99  *
100  * We actually implement this as a varlena so that we don't have to pass in
101  * the length for the comparison functions. (The difference between these
102  * types and "text" is that we truncate and possibly blank-pad the string
103  * at insertion time.)
104  *
105  * - ay 6/95
106  */
107 
108 
109 /*****************************************************************************
110  * bpchar - char() *
111  *****************************************************************************/
112 
113 /*
114  * bpchar_input -- common guts of bpcharin and bpcharrecv
115  *
116  * s is the input text of length len (may not be null-terminated)
117  * atttypmod is the typmod value to apply
118  *
119  * Note that atttypmod is measured in characters, which
120  * is not necessarily the same as the number of bytes.
121  *
122  * If the input string is too long, raise an error, unless the extra
123  * characters are spaces, in which case they're truncated. (per SQL)
124  */
125 static BpChar *
126 bpchar_input(const char *s, size_t len, int32 atttypmod)
127 {
128  BpChar *result;
129  char *r;
130  size_t maxlen;
131 
132  /* If typmod is -1 (or invalid), use the actual string length */
133  if (atttypmod < (int32) VARHDRSZ)
134  maxlen = len;
135  else
136  {
137  size_t charlen; /* number of CHARACTERS in the input */
138 
139  maxlen = atttypmod - VARHDRSZ;
140  charlen = pg_mbstrlen_with_len(s, len);
141  if (charlen > maxlen)
142  {
143  /* Verify that extra characters are spaces, and clip them off */
144  size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
145  size_t j;
146 
147  /*
148  * at this point, len is the actual BYTE length of the input
149  * string, maxlen is the max number of CHARACTERS allowed for this
150  * bpchar type, mbmaxlen is the length in BYTES of those chars.
151  */
152  for (j = mbmaxlen; j < len; j++)
153  {
154  if (s[j] != ' ')
155  ereport(ERROR,
156  (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
157  errmsg("value too long for type character(%d)",
158  (int) maxlen)));
159  }
160 
161  /*
162  * Now we set maxlen to the necessary byte length, not the number
163  * of CHARACTERS!
164  */
165  maxlen = len = mbmaxlen;
166  }
167  else
168  {
169  /*
170  * Now we set maxlen to the necessary byte length, not the number
171  * of CHARACTERS!
172  */
173  maxlen = len + (maxlen - charlen);
174  }
175  }
176 
177  result = (BpChar *) palloc(maxlen + VARHDRSZ);
178  SET_VARSIZE(result, maxlen + VARHDRSZ);
179  r = VARDATA(result);
180  memcpy(r, s, len);
181 
182  /* blank pad the string if necessary */
183  if (maxlen > len)
184  memset(r + len, ' ', maxlen - len);
185 
186  return result;
187 }
188 
189 /*
190  * Convert a C string to CHARACTER internal representation. atttypmod
191  * is the declared length of the type plus VARHDRSZ.
192  */
193 Datum
195 {
196  char *s = PG_GETARG_CSTRING(0);
197 
198 #ifdef NOT_USED
199  Oid typelem = PG_GETARG_OID(1);
200 #endif
201  int32 atttypmod = PG_GETARG_INT32(2);
202  BpChar *result;
203 
204  result = bpchar_input(s, strlen(s), atttypmod);
205  PG_RETURN_BPCHAR_P(result);
206 }
207 
208 
209 /*
210  * Convert a CHARACTER value to a C string.
211  *
212  * Uses the text conversion functions, which is only appropriate if BpChar
213  * and text are equivalent types.
214  */
215 Datum
217 {
218  Datum txt = PG_GETARG_DATUM(0);
219 
221 }
222 
223 /*
224  * bpcharrecv - converts external binary format to bpchar
225  */
226 Datum
228 {
230 
231 #ifdef NOT_USED
232  Oid typelem = PG_GETARG_OID(1);
233 #endif
234  int32 atttypmod = PG_GETARG_INT32(2);
235  BpChar *result;
236  char *str;
237  int nbytes;
238 
239  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
240  result = bpchar_input(str, nbytes, atttypmod);
241  pfree(str);
242  PG_RETURN_BPCHAR_P(result);
243 }
244 
245 /*
246  * bpcharsend - converts bpchar to binary format
247  */
248 Datum
250 {
251  /* Exactly the same as textsend, so share code */
252  return textsend(fcinfo);
253 }
254 
255 
256 /*
257  * Converts a CHARACTER type to the specified size.
258  *
259  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
260  * isExplicit is true if this is for an explicit cast to char(N).
261  *
262  * Truncation rules: for an explicit cast, silently truncate to the given
263  * length; for an implicit cast, raise error unless extra characters are
264  * all spaces. (This is sort-of per SQL: the spec would actually have us
265  * raise a "completion condition" for the explicit cast case, but Postgres
266  * hasn't got such a concept.)
267  */
268 Datum
270 {
271  BpChar *source = PG_GETARG_BPCHAR_PP(0);
272  int32 maxlen = PG_GETARG_INT32(1);
273  bool isExplicit = PG_GETARG_BOOL(2);
274  BpChar *result;
275  int32 len;
276  char *r;
277  char *s;
278  int i;
279  int charlen; /* number of characters in the input string +
280  * VARHDRSZ */
281 
282  /* No work if typmod is invalid */
283  if (maxlen < (int32) VARHDRSZ)
284  PG_RETURN_BPCHAR_P(source);
285 
286  maxlen -= VARHDRSZ;
287 
288  len = VARSIZE_ANY_EXHDR(source);
289  s = VARDATA_ANY(source);
290 
291  charlen = pg_mbstrlen_with_len(s, len);
292 
293  /* No work if supplied data matches typmod already */
294  if (charlen == maxlen)
295  PG_RETURN_BPCHAR_P(source);
296 
297  if (charlen > maxlen)
298  {
299  /* Verify that extra characters are spaces, and clip them off */
300  size_t maxmblen;
301 
302  maxmblen = pg_mbcharcliplen(s, len, maxlen);
303 
304  if (!isExplicit)
305  {
306  for (i = maxmblen; i < len; i++)
307  if (s[i] != ' ')
308  ereport(ERROR,
309  (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
310  errmsg("value too long for type character(%d)",
311  maxlen)));
312  }
313 
314  len = maxmblen;
315 
316  /*
317  * At this point, maxlen is the necessary byte length, not the number
318  * of CHARACTERS!
319  */
320  maxlen = len;
321  }
322  else
323  {
324  /*
325  * At this point, maxlen is the necessary byte length, not the number
326  * of CHARACTERS!
327  */
328  maxlen = len + (maxlen - charlen);
329  }
330 
331  Assert(maxlen >= len);
332 
333  result = palloc(maxlen + VARHDRSZ);
334  SET_VARSIZE(result, maxlen + VARHDRSZ);
335  r = VARDATA(result);
336 
337  memcpy(r, s, len);
338 
339  /* blank pad the string if necessary */
340  if (maxlen > len)
341  memset(r + len, ' ', maxlen - len);
342 
343  PG_RETURN_BPCHAR_P(result);
344 }
345 
346 
347 /* char_bpchar()
348  * Convert char to bpchar(1).
349  */
350 Datum
352 {
353  char c = PG_GETARG_CHAR(0);
354  BpChar *result;
355 
356  result = (BpChar *) palloc(VARHDRSZ + 1);
357 
358  SET_VARSIZE(result, VARHDRSZ + 1);
359  *(VARDATA(result)) = c;
360 
361  PG_RETURN_BPCHAR_P(result);
362 }
363 
364 
365 /* bpchar_name()
366  * Converts a bpchar() type to a NameData type.
367  */
368 Datum
370 {
371  BpChar *s = PG_GETARG_BPCHAR_PP(0);
372  char *s_data;
373  Name result;
374  int len;
375 
376  len = VARSIZE_ANY_EXHDR(s);
377  s_data = VARDATA_ANY(s);
378 
379  /* Truncate oversize input */
380  if (len >= NAMEDATALEN)
381  len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
382 
383  /* Remove trailing blanks */
384  while (len > 0)
385  {
386  if (s_data[len - 1] != ' ')
387  break;
388  len--;
389  }
390 
391  /* We use palloc0 here to ensure result is zero-padded */
392  result = (Name) palloc0(NAMEDATALEN);
393  memcpy(NameStr(*result), s_data, len);
394 
395  PG_RETURN_NAME(result);
396 }
397 
398 /* name_bpchar()
399  * Converts a NameData type to a bpchar type.
400  *
401  * Uses the text conversion functions, which is only appropriate if BpChar
402  * and text are equivalent types.
403  */
404 Datum
406 {
407  Name s = PG_GETARG_NAME(0);
408  BpChar *result;
409 
410  result = (BpChar *) cstring_to_text(NameStr(*s));
411  PG_RETURN_BPCHAR_P(result);
412 }
413 
414 Datum
416 {
418 
419  PG_RETURN_INT32(anychar_typmodin(ta, "char"));
420 }
421 
422 Datum
424 {
425  int32 typmod = PG_GETARG_INT32(0);
426 
428 }
429 
430 
431 /*****************************************************************************
432  * varchar - varchar(n)
433  *
434  * Note: varchar piggybacks on type text for most operations, and so has no
435  * C-coded functions except for I/O and typmod checking.
436  *****************************************************************************/
437 
438 /*
439  * varchar_input -- common guts of varcharin and varcharrecv
440  *
441  * s is the input text of length len (may not be null-terminated)
442  * atttypmod is the typmod value to apply
443  *
444  * Note that atttypmod is measured in characters, which
445  * is not necessarily the same as the number of bytes.
446  *
447  * If the input string is too long, raise an error, unless the extra
448  * characters are spaces, in which case they're truncated. (per SQL)
449  *
450  * Uses the C string to text conversion function, which is only appropriate
451  * if VarChar and text are equivalent types.
452  */
453 static VarChar *
454 varchar_input(const char *s, size_t len, int32 atttypmod)
455 {
456  VarChar *result;
457  size_t maxlen;
458 
459  maxlen = atttypmod - VARHDRSZ;
460 
461  if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
462  {
463  /* Verify that extra characters are spaces, and clip them off */
464  size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
465  size_t j;
466 
467  for (j = mbmaxlen; j < len; j++)
468  {
469  if (s[j] != ' ')
470  ereport(ERROR,
471  (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
472  errmsg("value too long for type character varying(%d)",
473  (int) maxlen)));
474  }
475 
476  len = mbmaxlen;
477  }
478 
479  result = (VarChar *) cstring_to_text_with_len(s, len);
480  return result;
481 }
482 
483 /*
484  * Convert a C string to VARCHAR internal representation. atttypmod
485  * is the declared length of the type plus VARHDRSZ.
486  */
487 Datum
489 {
490  char *s = PG_GETARG_CSTRING(0);
491 
492 #ifdef NOT_USED
493  Oid typelem = PG_GETARG_OID(1);
494 #endif
495  int32 atttypmod = PG_GETARG_INT32(2);
496  VarChar *result;
497 
498  result = varchar_input(s, strlen(s), atttypmod);
499  PG_RETURN_VARCHAR_P(result);
500 }
501 
502 
503 /*
504  * Convert a VARCHAR value to a C string.
505  *
506  * Uses the text to C string conversion function, which is only appropriate
507  * if VarChar and text are equivalent types.
508  */
509 Datum
511 {
512  Datum txt = PG_GETARG_DATUM(0);
513 
515 }
516 
517 /*
518  * varcharrecv - converts external binary format to varchar
519  */
520 Datum
522 {
524 
525 #ifdef NOT_USED
526  Oid typelem = PG_GETARG_OID(1);
527 #endif
528  int32 atttypmod = PG_GETARG_INT32(2);
529  VarChar *result;
530  char *str;
531  int nbytes;
532 
533  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
534  result = varchar_input(str, nbytes, atttypmod);
535  pfree(str);
536  PG_RETURN_VARCHAR_P(result);
537 }
538 
539 /*
540  * varcharsend - converts varchar to binary format
541  */
542 Datum
544 {
545  /* Exactly the same as textsend, so share code */
546  return textsend(fcinfo);
547 }
548 
549 
550 /*
551  * varchar_support()
552  *
553  * Planner support function for the varchar() length coercion function.
554  *
555  * Currently, the only interesting thing we can do is flatten calls that set
556  * the new maximum length >= the previous maximum length. We can ignore the
557  * isExplicit argument, since that only affects truncation cases.
558  */
559 Datum
561 {
562  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
563  Node *ret = NULL;
564 
565  if (IsA(rawreq, SupportRequestSimplify))
566  {
568  FuncExpr *expr = req->fcall;
569  Node *typmod;
570 
571  Assert(list_length(expr->args) >= 2);
572 
573  typmod = (Node *) lsecond(expr->args);
574 
575  if (IsA(typmod, Const) &&!((Const *) typmod)->constisnull)
576  {
577  Node *source = (Node *) linitial(expr->args);
578  int32 old_typmod = exprTypmod(source);
579  int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
580  int32 old_max = old_typmod - VARHDRSZ;
581  int32 new_max = new_typmod - VARHDRSZ;
582 
583  if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
584  ret = relabel_to_typmod(source, new_typmod);
585  }
586  }
587 
588  PG_RETURN_POINTER(ret);
589 }
590 
591 /*
592  * Converts a VARCHAR type to the specified size.
593  *
594  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
595  * isExplicit is true if this is for an explicit cast to varchar(N).
596  *
597  * Truncation rules: for an explicit cast, silently truncate to the given
598  * length; for an implicit cast, raise error unless extra characters are
599  * all spaces. (This is sort-of per SQL: the spec would actually have us
600  * raise a "completion condition" for the explicit cast case, but Postgres
601  * hasn't got such a concept.)
602  */
603 Datum
605 {
606  VarChar *source = PG_GETARG_VARCHAR_PP(0);
607  int32 typmod = PG_GETARG_INT32(1);
608  bool isExplicit = PG_GETARG_BOOL(2);
609  int32 len,
610  maxlen;
611  size_t maxmblen;
612  int i;
613  char *s_data;
614 
615  len = VARSIZE_ANY_EXHDR(source);
616  s_data = VARDATA_ANY(source);
617  maxlen = typmod - VARHDRSZ;
618 
619  /* No work if typmod is invalid or supplied data fits it already */
620  if (maxlen < 0 || len <= maxlen)
621  PG_RETURN_VARCHAR_P(source);
622 
623  /* only reach here if string is too long... */
624 
625  /* truncate multibyte string preserving multibyte boundary */
626  maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
627 
628  if (!isExplicit)
629  {
630  for (i = maxmblen; i < len; i++)
631  if (s_data[i] != ' ')
632  ereport(ERROR,
633  (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
634  errmsg("value too long for type character varying(%d)",
635  maxlen)));
636  }
637 
639  maxmblen));
640 }
641 
642 Datum
644 {
646 
647  PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
648 }
649 
650 Datum
652 {
653  int32 typmod = PG_GETARG_INT32(0);
654 
656 }
657 
658 
659 /*****************************************************************************
660  * Exported functions
661  *****************************************************************************/
662 
663 /* "True" length (not counting trailing blanks) of a BpChar */
664 static inline int
666 {
667  return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
668 }
669 
670 int
671 bpchartruelen(char *s, int len)
672 {
673  int i;
674 
675  /*
676  * Note that we rely on the assumption that ' ' is a singleton unit on
677  * every supported multibyte server encoding.
678  */
679  for (i = len - 1; i >= 0; i--)
680  {
681  if (s[i] != ' ')
682  break;
683  }
684  return i + 1;
685 }
686 
687 Datum
689 {
691  int len;
692 
693  /* get number of bytes, ignoring trailing spaces */
694  len = bcTruelen(arg);
695 
696  /* in multibyte encoding, convert to number of characters */
698  len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
699 
700  PG_RETURN_INT32(len);
701 }
702 
703 Datum
705 {
707 
708  /* We need not detoast the input at all */
710 }
711 
712 
713 /*****************************************************************************
714  * Comparison Functions used for bpchar
715  *
716  * Note: btree indexes need these routines not to leak memory; therefore,
717  * be careful to free working copies of toasted datums. Most places don't
718  * need to be so careful.
719  *****************************************************************************/
720 
721 static void
723 {
724  if (!OidIsValid(collid))
725  {
726  /*
727  * This typically means that the parser could not resolve a conflict
728  * of implicit collations, so report it that way.
729  */
730  ereport(ERROR,
731  (errcode(ERRCODE_INDETERMINATE_COLLATION),
732  errmsg("could not determine which collation to use for string comparison"),
733  errhint("Use the COLLATE clause to set the collation explicitly.")));
734  }
735 }
736 
737 Datum
739 {
740  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
741  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
742  int len1,
743  len2;
744  bool result;
745  Oid collid = PG_GET_COLLATION();
746 
747  check_collation_set(collid);
748 
749  len1 = bcTruelen(arg1);
750  len2 = bcTruelen(arg2);
751 
752  if (lc_collate_is_c(collid) ||
753  collid == DEFAULT_COLLATION_OID ||
754  pg_newlocale_from_collation(collid)->deterministic)
755  {
756  /*
757  * Since we only care about equality or not-equality, we can avoid all
758  * the expense of strcoll() here, and just do bitwise comparison.
759  */
760  if (len1 != len2)
761  result = false;
762  else
763  result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
764  }
765  else
766  {
767  result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
768  collid) == 0);
769  }
770 
771  PG_FREE_IF_COPY(arg1, 0);
772  PG_FREE_IF_COPY(arg2, 1);
773 
774  PG_RETURN_BOOL(result);
775 }
776 
777 Datum
779 {
780  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
781  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
782  int len1,
783  len2;
784  bool result;
785  Oid collid = PG_GET_COLLATION();
786 
787  check_collation_set(collid);
788 
789  len1 = bcTruelen(arg1);
790  len2 = bcTruelen(arg2);
791 
792  if (lc_collate_is_c(collid) ||
793  collid == DEFAULT_COLLATION_OID ||
794  pg_newlocale_from_collation(collid)->deterministic)
795  {
796  /*
797  * Since we only care about equality or not-equality, we can avoid all
798  * the expense of strcoll() here, and just do bitwise comparison.
799  */
800  if (len1 != len2)
801  result = true;
802  else
803  result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
804  }
805  else
806  {
807  result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
808  collid) != 0);
809  }
810 
811  PG_FREE_IF_COPY(arg1, 0);
812  PG_FREE_IF_COPY(arg2, 1);
813 
814  PG_RETURN_BOOL(result);
815 }
816 
817 Datum
819 {
820  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
821  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
822  int len1,
823  len2;
824  int cmp;
825 
826  len1 = bcTruelen(arg1);
827  len2 = bcTruelen(arg2);
828 
829  cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
830  PG_GET_COLLATION());
831 
832  PG_FREE_IF_COPY(arg1, 0);
833  PG_FREE_IF_COPY(arg2, 1);
834 
835  PG_RETURN_BOOL(cmp < 0);
836 }
837 
838 Datum
840 {
841  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
842  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
843  int len1,
844  len2;
845  int cmp;
846 
847  len1 = bcTruelen(arg1);
848  len2 = bcTruelen(arg2);
849 
850  cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
851  PG_GET_COLLATION());
852 
853  PG_FREE_IF_COPY(arg1, 0);
854  PG_FREE_IF_COPY(arg2, 1);
855 
856  PG_RETURN_BOOL(cmp <= 0);
857 }
858 
859 Datum
861 {
862  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
863  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
864  int len1,
865  len2;
866  int cmp;
867 
868  len1 = bcTruelen(arg1);
869  len2 = bcTruelen(arg2);
870 
871  cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
872  PG_GET_COLLATION());
873 
874  PG_FREE_IF_COPY(arg1, 0);
875  PG_FREE_IF_COPY(arg2, 1);
876 
877  PG_RETURN_BOOL(cmp > 0);
878 }
879 
880 Datum
882 {
883  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
884  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
885  int len1,
886  len2;
887  int cmp;
888 
889  len1 = bcTruelen(arg1);
890  len2 = bcTruelen(arg2);
891 
892  cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
893  PG_GET_COLLATION());
894 
895  PG_FREE_IF_COPY(arg1, 0);
896  PG_FREE_IF_COPY(arg2, 1);
897 
898  PG_RETURN_BOOL(cmp >= 0);
899 }
900 
901 Datum
903 {
904  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
905  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
906  int len1,
907  len2;
908  int cmp;
909 
910  len1 = bcTruelen(arg1);
911  len2 = bcTruelen(arg2);
912 
913  cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
914  PG_GET_COLLATION());
915 
916  PG_FREE_IF_COPY(arg1, 0);
917  PG_FREE_IF_COPY(arg2, 1);
918 
919  PG_RETURN_INT32(cmp);
920 }
921 
922 Datum
924 {
926  Oid collid = ssup->ssup_collation;
927  MemoryContext oldcontext;
928 
929  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
930 
931  /* Use generic string SortSupport */
932  varstr_sortsupport(ssup, BPCHAROID, collid);
933 
934  MemoryContextSwitchTo(oldcontext);
935 
936  PG_RETURN_VOID();
937 }
938 
939 Datum
941 {
942  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
943  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
944  int len1,
945  len2;
946  int cmp;
947 
948  len1 = bcTruelen(arg1);
949  len2 = bcTruelen(arg2);
950 
951  cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
952  PG_GET_COLLATION());
953 
954  PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
955 }
956 
957 Datum
959 {
960  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
961  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
962  int len1,
963  len2;
964  int cmp;
965 
966  len1 = bcTruelen(arg1);
967  len2 = bcTruelen(arg2);
968 
969  cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
970  PG_GET_COLLATION());
971 
972  PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
973 }
974 
975 
976 /*
977  * bpchar needs a specialized hash function because we want to ignore
978  * trailing blanks in comparisons.
979  */
980 Datum
982 {
984  Oid collid = PG_GET_COLLATION();
985  char *keydata;
986  int keylen;
987  pg_locale_t mylocale = 0;
988  Datum result;
989 
990  if (!collid)
991  ereport(ERROR,
992  (errcode(ERRCODE_INDETERMINATE_COLLATION),
993  errmsg("could not determine which collation to use for string hashing"),
994  errhint("Use the COLLATE clause to set the collation explicitly.")));
995 
996  keydata = VARDATA_ANY(key);
997  keylen = bcTruelen(key);
998 
999  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1000  mylocale = pg_newlocale_from_collation(collid);
1001 
1002  if (!mylocale || mylocale->deterministic)
1003  {
1004  result = hash_any((unsigned char *) keydata, keylen);
1005  }
1006  else
1007  {
1008 #ifdef USE_ICU
1009  if (mylocale->provider == COLLPROVIDER_ICU)
1010  {
1011  int32_t ulen = -1;
1012  UChar *uchar = NULL;
1013  Size bsize;
1014  uint8_t *buf;
1015 
1016  ulen = icu_to_uchar(&uchar, keydata, keylen);
1017 
1018  bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1019  uchar, ulen, NULL, 0);
1020  buf = palloc(bsize);
1021  ucol_getSortKey(mylocale->info.icu.ucol,
1022  uchar, ulen, buf, bsize);
1023 
1024  result = hash_any(buf, bsize);
1025 
1026  pfree(buf);
1027  }
1028  else
1029 #endif
1030  /* shouldn't happen */
1031  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1032  }
1033 
1034  /* Avoid leaking memory for toasted inputs */
1035  PG_FREE_IF_COPY(key, 0);
1036 
1037  return result;
1038 }
1039 
1040 Datum
1042 {
1044  Oid collid = PG_GET_COLLATION();
1045  char *keydata;
1046  int keylen;
1047  pg_locale_t mylocale = 0;
1048  Datum result;
1049 
1050  if (!collid)
1051  ereport(ERROR,
1052  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1053  errmsg("could not determine which collation to use for string hashing"),
1054  errhint("Use the COLLATE clause to set the collation explicitly.")));
1055 
1056  keydata = VARDATA_ANY(key);
1057  keylen = bcTruelen(key);
1058 
1059  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1060  mylocale = pg_newlocale_from_collation(collid);
1061 
1062  if (!mylocale || mylocale->deterministic)
1063  {
1064  result = hash_any_extended((unsigned char *) keydata, keylen,
1065  PG_GETARG_INT64(1));
1066  }
1067  else
1068  {
1069 #ifdef USE_ICU
1070  if (mylocale->provider == COLLPROVIDER_ICU)
1071  {
1072  int32_t ulen = -1;
1073  UChar *uchar = NULL;
1074  Size bsize;
1075  uint8_t *buf;
1076 
1077  ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
1078 
1079  bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1080  uchar, ulen, NULL, 0);
1081  buf = palloc(bsize);
1082  ucol_getSortKey(mylocale->info.icu.ucol,
1083  uchar, ulen, buf, bsize);
1084 
1085  result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
1086 
1087  pfree(buf);
1088  }
1089  else
1090 #endif
1091  /* shouldn't happen */
1092  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1093  }
1094 
1095  PG_FREE_IF_COPY(key, 0);
1096 
1097  return result;
1098 }
1099 
1100 /*
1101  * The following operators support character-by-character comparison
1102  * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1103  * Note that the regular bpchareq/bpcharne comparison operators, and
1104  * regular support functions 1 and 2 with "C" collation are assumed to be
1105  * compatible with these!
1106  */
1107 
1108 static int
1110 {
1111  int result;
1112  int len1,
1113  len2;
1114 
1115  len1 = bcTruelen(arg1);
1116  len2 = bcTruelen(arg2);
1117 
1118  result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1119  if (result != 0)
1120  return result;
1121  else if (len1 < len2)
1122  return -1;
1123  else if (len1 > len2)
1124  return 1;
1125  else
1126  return 0;
1127 }
1128 
1129 
1130 Datum
1132 {
1133  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1134  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1135  int result;
1136 
1137  result = internal_bpchar_pattern_compare(arg1, arg2);
1138 
1139  PG_FREE_IF_COPY(arg1, 0);
1140  PG_FREE_IF_COPY(arg2, 1);
1141 
1142  PG_RETURN_BOOL(result < 0);
1143 }
1144 
1145 
1146 Datum
1148 {
1149  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1150  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1151  int result;
1152 
1153  result = internal_bpchar_pattern_compare(arg1, arg2);
1154 
1155  PG_FREE_IF_COPY(arg1, 0);
1156  PG_FREE_IF_COPY(arg2, 1);
1157 
1158  PG_RETURN_BOOL(result <= 0);
1159 }
1160 
1161 
1162 Datum
1164 {
1165  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1166  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1167  int result;
1168 
1169  result = internal_bpchar_pattern_compare(arg1, arg2);
1170 
1171  PG_FREE_IF_COPY(arg1, 0);
1172  PG_FREE_IF_COPY(arg2, 1);
1173 
1174  PG_RETURN_BOOL(result >= 0);
1175 }
1176 
1177 
1178 Datum
1180 {
1181  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1182  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1183  int result;
1184 
1185  result = internal_bpchar_pattern_compare(arg1, arg2);
1186 
1187  PG_FREE_IF_COPY(arg1, 0);
1188  PG_FREE_IF_COPY(arg2, 1);
1189 
1190  PG_RETURN_BOOL(result > 0);
1191 }
1192 
1193 
1194 Datum
1196 {
1197  BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1198  BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1199  int result;
1200 
1201  result = internal_bpchar_pattern_compare(arg1, arg2);
1202 
1203  PG_FREE_IF_COPY(arg1, 0);
1204  PG_FREE_IF_COPY(arg2, 1);
1205 
1206  PG_RETURN_INT32(result);
1207 }
1208 
1209 
1210 Datum
1212 {
1214  MemoryContext oldcontext;
1215 
1216  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1217 
1218  /* Use generic string SortSupport, forcing "C" collation */
1219  varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1220 
1221  MemoryContextSwitchTo(oldcontext);
1222 
1223  PG_RETURN_VOID();
1224 }
Datum bpchar_sortsupport(PG_FUNCTION_ARGS)
Definition: varchar.c:923
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
union pg_locale_struct::@144 info
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:904
Datum bpchargt(PG_FUNCTION_ARGS)
Definition: varchar.c:860
Datum bpchartypmodout(PG_FUNCTION_ARGS)
Definition: varchar.c:423
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define MaxAttrSize
Definition: htup_details.h:585
Datum btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
Definition: varchar.c:1195
Datum name_bpchar(PG_FUNCTION_ARGS)
Definition: varchar.c:405
int errhint(const char *fmt,...)
Definition: elog.c:1069
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
static VarChar * varchar_input(const char *s, size_t len, int32 atttypmod)
Definition: varchar.c:454
Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.c:148
Datum char_bpchar(PG_FUNCTION_ARGS)
Definition: varchar.c:351
List * args
Definition: primnodes.h:463
#define DatumGetInt32(X)
Definition: postgres.h:472
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:275
Datum varcharin(PG_FUNCTION_ARGS)
Definition: varchar.c:488
Datum bpchar_pattern_gt(PG_FUNCTION_ARGS)
Definition: varchar.c:1179
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
#define VARHDRSZ
Definition: c.h:562
int32 * ArrayGetIntegerTypmods(ArrayType *arr, int *n)
Definition: arrayutils.c:200
StringInfoData * StringInfo
Definition: stringinfo.h:44
#define Min(x, y)
Definition: c.h:911
Datum bpchar_smaller(PG_FUNCTION_ARGS)
Definition: varchar.c:958
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
Datum bpcharge(PG_FUNCTION_ARGS)
Definition: varchar.c:881
Definition: nodes.h:525
static BpChar * bpchar_input(const char *s, size_t len, int32 atttypmod)
Definition: varchar.c:126
int errcode(int sqlerrcode)
Definition: elog.c:608
Datum varchar(PG_FUNCTION_ARGS)
Definition: varchar.c:604
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.c:374
Datum bpchareq(PG_FUNCTION_ARGS)
Definition: varchar.c:738
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:269
Datum bpcharlt(PG_FUNCTION_ARGS)
Definition: varchar.c:818
unsigned int Oid
Definition: postgres_ext.h:31
Datum bpcharin(PG_FUNCTION_ARGS)
Definition: varchar.c:194
#define OidIsValid(objectId)
Definition: c.h:645
Datum bpcharsend(PG_FUNCTION_ARGS)
Definition: varchar.c:249
#define PG_GET_COLLATION()
Definition: fmgr.h:193
#define lsecond(l)
Definition: pg_list.h:200
signed int int32
Definition: c.h:347
Datum bpchar_pattern_lt(PG_FUNCTION_ARGS)
Definition: varchar.c:1131
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:836
#define NAMEDATALEN
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:251
static void check_collation_set(Oid collid)
Definition: varchar.c:722
void pfree(void *pointer)
Definition: mcxt.c:1056
#define linitial(l)
Definition: pg_list.h:195
Datum varchartypmodin(PG_FUNCTION_ARGS)
Definition: varchar.c:643
Datum bpcharoctetlen(PG_FUNCTION_ARGS)
Definition: varchar.c:704
#define PG_GETARG_BPCHAR_PP(n)
Definition: fmgr.h:304
#define ERROR
Definition: elog.h:43
Datum hashbpchar(PG_FUNCTION_ARGS)
Definition: varchar.c:981
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1176
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1482
MemoryContext ssup_cxt
Definition: sortsupport.h:66
Datum btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
Definition: varchar.c:1211
Datum bpcharle(PG_FUNCTION_ARGS)
Definition: varchar.c:839
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:862
Datum bpchar_larger(PG_FUNCTION_ARGS)
Definition: varchar.c:940
Definition: c.h:610
#define PG_RETURN_BPCHAR_P(x)
Definition: fmgr.h:362
Datum varcharrecv(PG_FUNCTION_ARGS)
Definition: varchar.c:521
void varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
Definition: varlena.c:1967
#define PG_RETURN_VARCHAR_P(x)
Definition: fmgr.h:363
char * c
#define PG_GETARG_VARCHAR_PP(n)
Definition: fmgr.h:305
static char * buf
Definition: pg_test_fsync.c:67
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:183
#define PG_GETARG_OID(n)
Definition: fmgr.h:270
int pg_database_encoding_max_length(void)
Definition: wchar.c:1881
static char * anychar_typmodout(int32 typmod)
Definition: varchar.c:72
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:806
#define ereport(elevel, rest)
Definition: elog.h:141
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:548
Datum bpcharne(PG_FUNCTION_ARGS)
Definition: varchar.c:778
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1323
#define TextDatumGetCString(d)
Definition: builtins.h:84
void * palloc0(Size size)
Definition: mcxt.c:980
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:349
uintptr_t Datum
Definition: postgres.h:367
int bpchartruelen(char *s, int len)
Definition: varchar.c:671
bool deterministic
Definition: pg_locale.h:85
Datum varcharout(PG_FUNCTION_ARGS)
Definition: varchar.c:510
Datum varchartypmodout(PG_FUNCTION_ARGS)
Definition: varchar.c:651
#define PG_RETURN_VOID()
Definition: fmgr.h:339
static int internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
Definition: varchar.c:1109
text * cstring_to_text(const char *s)
Definition: varlena.c:171
#define Assert(condition)
Definition: c.h:739
Datum varchar_support(PG_FUNCTION_ARGS)
Definition: varchar.c:560
Datum bpchar_pattern_le(PG_FUNCTION_ARGS)
Definition: varchar.c:1147
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:352
size_t Size
Definition: c.h:467
static int bcTruelen(BpChar *arg)
Definition: varchar.c:665
static int list_length(const List *l)
Definition: pg_list.h:169
Datum textsend(PG_FUNCTION_ARGS)
Definition: varlena.c:570
Datum varcharsend(PG_FUNCTION_ARGS)
Definition: varchar.c:543
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:255
Datum bpcharlen(PG_FUNCTION_ARGS)
Definition: varchar.c:688
Datum bpchar_pattern_ge(PG_FUNCTION_ARGS)
Definition: varchar.c:1163
static int32 anychar_typmodin(ArrayType *ta, const char *typename)
Definition: varchar.c:33
Datum hashbpcharextended(PG_FUNCTION_ARGS)
Definition: varchar.c:1041
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
Datum bpcharrecv(PG_FUNCTION_ARGS)
Definition: varchar.c:227
#define elog(elevel,...)
Definition: elog.h:228
int i
Datum bpchar_name(PG_FUNCTION_ARGS)
Definition: varchar.c:369
#define NameStr(name)
Definition: c.h:616
void * arg
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:272
Definition: c.h:556
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
Datum bpcharcmp(PG_FUNCTION_ARGS)
Definition: varchar.c:902
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
Datum bpchar(PG_FUNCTION_ARGS)
Definition: varchar.c:269
NameData * Name
Definition: c.h:614
Datum bpcharout(PG_FUNCTION_ARGS)
Definition: varchar.c:216
#define PG_GETARG_INT64(n)
Definition: fmgr.h:277
Datum bpchartypmodin(PG_FUNCTION_ARGS)
Definition: varchar.c:415
#define snprintf
Definition: port.h:192
Node * relabel_to_typmod(Node *expr, int32 typmod)
Definition: nodeFuncs.c:587
#define PG_RETURN_NAME(x)
Definition: fmgr.h:353
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:268
#define PG_GETARG_NAME(n)
Definition: fmgr.h:273
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742